# Run this cell to set up packages for lecture.
from lec08_imports import *

2 / (1 / 80 + 1 / 60)

68.57142857142857

def harmonic_mean(a, b):
    return 2 / (1 / a + 1 / b)

harmonic_mean(80, 60)

68.57142857142857

harmonic_mean(20, 40)

26.666666666666664

show_def()

harmonic_mean(20, 40)

26.666666666666664

harmonic_mean(79, 894)

145.17163412127442

harmonic_mean(-2, 4)

-8.0

def triple(x):
    return x * 3

triple(5)

15

triple(7 + 8)

45

triple('triton')

'tritontritontriton'

def triple(x):
    return x * 3

triple(7)

21

x

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[16], line 1
----> 1 x

NameError: name 'x' is not defined

x = 15

# When triple(12) is called, you can pretend
# there's an invisible line inside the body of x
# that says x = 12.
# The x = 15 above is ignored.
triple(12)

36

def greeting():
    return 'Hi! 👋'

greeting()

'Hi! 👋'

def where_is_the_error(something):
    '''You can describe your function within triple quotes. For example, this function 
    illustrates that errors don't occur until functions are executed (called).'''
    return (1 / 0) + something

where_is_the_error(5)

---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
Cell In[22], line 1
----> 1 where_is_the_error(5)

Cell In[21], line 4, in where_is_the_error(something)
      1 def where_is_the_error(something):
      2     '''You can describe your function within triple quotes. For example, this function 
      3     illustrates that errors don't occur until functions are executed (called).'''
----> 4     return (1 / 0) + something

ZeroDivisionError: division by zero

>>> first_name('Pradeep Khosla')
'Pradeep'

'Pradeep Khosla'.split(' ')[0]

'Pradeep'

def first_name(full_name):
    '''Returns the first name given a full name.'''
    return full_name.split(' ')[0]

first_name('Pradeep Khosla')

'Pradeep'

# What if there are three names?
first_name('Chancellor Pradeep Khosla')

'Chancellor'

def pythagorean(a, b):
    '''Computes the hypotenuse length of a triangle with legs a and b.'''
    c = (a ** 2 + b ** 2) ** 0.5
    print(c)

x = pythagorean(3, 4)

5.0

# No output – why?
x

# Errors – why?
x + 10

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[30], line 2
      1 # Errors – why?
----> 2 x + 10

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

def better_pythagorean(a, b):
    '''Computes the hypotenuse length of a triangle with legs a and b, 
       and actually returns the result.
    '''
    c = (a ** 2 + b ** 2) ** 0.5
    return c

x = better_pythagorean(3, 4)
x

5.0

x + 10

15.0

def motivational(quote):
    return 0
    print("Here's a motivational quote:", quote)

motivational('Fall seven times and stand up eight.')

0

roster = bpd.read_csv('data/roster-anon.csv')
roster

roster

roster

roster.get('name').iloc[0]

'Jolette Obtwuz'

first_name(roster.get('name').iloc[0])

'Jolette'

first_name(roster.get('name').iloc[1])

'Ian'

roster.get('name')

0        Jolette Obtwuz
1            Ian Lmuqpm
2         Nicole Wpedyy
             ...       
250    Genevieve Cibjer
251        Devon Gncdxq
252      Allyson Hknnwt
Name: name, Length: 253, dtype: object

roster.get('name').apply(first_name)

0        Jolette
1            Ian
2         Nicole
         ...    
250    Genevieve
251        Devon
252      Allyson
Name: name, Length: 253, dtype: object

roster = roster.assign(
    first=roster.get('name').apply(first_name)
)
roster

name_counts = (
    roster
    .groupby('first')
    .count()
    .sort_values('name', ascending=False)
    .get(['name'])
)
name_counts

...

Ellipsis

...

Ellipsis

name_counts.get('name')

first
Ryan       4
Matthew    3
Alice      2
          ..
Hrithik    1
Huiyu      1
Ziling     1
Name: name, Length: 232, dtype: int64

# Not necessarily meaningful, but doable.
name_counts.get('name').apply(np.log)

first
Ryan       1.39
Matthew    1.10
Alice      0.69
           ... 
Hrithik    0.00
Huiyu      0.00
Ziling     0.00
Name: name, Length: 232, dtype: float64

name_counts.index

Index(['Ryan', 'Matthew', 'Alice', 'Nathan', 'Richard', 'James', 'Jake',
       'Shannon', 'Yiming', 'Bryan',
       ...
       'Helena', 'Hengyu', 'Henry', 'Ho', 'Hok', 'Holden', 'Holly', 'Hrithik',
       'Huiyu', 'Ziling'],
      dtype='object', name='first', length=232)

name_counts.index.apply(max)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[51], line 1
----> 1 name_counts.index.apply(max)

AttributeError: 'Index' object has no attribute 'apply'

# What is the max of an individual string?
name_counts.reset_index().get('first').apply(max)

0      y
1      w
2      l
      ..
229    t
230    y
231    n
Name: first, Length: 232, dtype: object

roster

which_section = roster[roster.get('name') == 'Jake Apidos'].get('section').iloc[0]
which_section

'11AM'

first_cond = roster.get('first') == 'Jake' # A Boolean Series!
section_cond = roster.get('section') == which_section # A Boolean Series!
how_many = roster[first_cond & section_cond].shape[0]
how_many

1

def shared_first_and_section(name):
    # First, find the row corresponding to that full name in roster.
    # We're assuming that full names are unique.
    row = roster[roster.get('name') == name]
    
    # Then, get that student's first name and section.
    first = row.get('first').iloc[0]
    section = row.get('section').iloc[0]
    
    # Now, find all the students with the same first name and section.
    shared_info = roster[(roster.get('first') == first) & (roster.get('section') == section)]
    
    # Return the number of such students.
    return shared_info.shape[0]

shared_first_and_section('Jake Apidos')

1

# This means that there is another Jennifer in the same section as Jennifer Jbgcjp.
shared_first_and_section('Jennifer Jbgcjp')

2

roster = roster.assign(shared=roster.get('name').apply(shared_first_and_section))
roster

roster[(roster.get('shared') >= 2)].sort_values('shared', ascending=False)

one_section_only = (
    roster[(roster.get('shared') >= 2) & 
           (roster.get('section') == '10AM')]
    .sort_values('shared', ascending=False)
)
one_section_only

# All of the names shared by multiple students in the 10AM section.
one_section_only.get('first').unique()

array(['Ryan', 'Bryan'], dtype=object)

...

Ellipsis

Lecture 8 – Functions and Applying¶

DSC 10, Winter 2024¶

Announcements¶

Agenda¶

Functions¶

Defining functions¶

Motivation¶

Example: Harmonic mean¶

Functions¶

Functions are "recipes"¶

Parameters and arguments¶

Scope 🩺¶

Functions can take 0 or more arguments¶

Functions don't run until you call them!¶

Example: `first_name`¶

Returning¶

Returning¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our `first_name` function¶

`.apply`¶

Example: Common first names¶

Activity¶

`.apply` works with built-in functions, too!¶

Aside: Resetting the index¶

Example: Shared first names and sections¶

Another function: `shared_first_and_section`¶

Sneak peek¶

Activity¶

Summary, next time¶

Summary¶

Next time¶

	name	section
0	Jolette Obtwuz	9AM
1	Ian Lmuqpm	11AM
2	Nicole Wpedyy	10AM
...	...	...
250	Genevieve Cibjer	9AM
251	Devon Gncdxq	11AM
252	Allyson Hknnwt	9AM

	name	section	first
0	Jolette Obtwuz	9AM	Jolette
1	Ian Lmuqpm	11AM	Ian
2	Nicole Wpedyy	10AM	Nicole
...	...	...	...
250	Genevieve Cibjer	9AM	Genevieve
251	Devon Gncdxq	11AM	Devon
252	Allyson Hknnwt	9AM	Allyson

	name	section	first	shared
3	Nathan Dcgnil	11AM	Nathan	2
11	Justin Sqkyny	11AM	Justin	2
23	Trevor Eqxfbf	11AM	Trevor	2
...	...	...	...	...
158	Matthew Qououb	11AM	Matthew	2
183	Justin Xisekj	11AM	Justin	2
229	Nathan Xyycyd	11AM	Nathan	2

	name	section	first	shared
68	Ryan Sbvsnc	10AM	Ryan	2
72	Ryan Ysafne	10AM	Ryan	2
110	Bryan Mulsga	10AM	Bryan	2
116	Bryan Ubueep	10AM	Bryan	2

Lecture 8 – Functions and Applying¶

DSC 10, Winter 2024¶

Announcements¶

Agenda¶

Functions¶

Defining functions¶

Motivation¶

Example: Harmonic mean¶

Functions¶

Functions are "recipes"¶

Parameters and arguments¶

Scope 🩺¶

Functions can take 0 or more arguments¶

Functions don't run until you call them!¶

Example: first_name¶

Returning¶

Returning¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our first_name function¶

.apply¶

Example: Common first names¶

Activity¶

.apply works with built-in functions, too!¶

Aside: Resetting the index¶

Example: Shared first names and sections¶

Another function: shared_first_and_section¶

Sneak peek¶

Activity¶

Summary, next time¶

Summary¶

Next time¶

Example: `first_name`¶

Using our `first_name` function¶

`.apply`¶

`.apply` works with built-in functions, too!¶

Another function: `shared_first_and_section`¶