# Run this cell to set up packages for lecture.
from lec08_imports import *

2 / (1 / 80 + 1 / 60)

68.57142857142857

def harmonic_mean(a, b):
    return 2 / (1 / a + 1 / b)

harmonic_mean(80, 60)

68.57142857142857

harmonic_mean(20, 40)

26.666666666666664

show_def()

harmonic_mean(20, 40)

26.666666666666664

harmonic_mean(79, 894)

145.17163412127442

harmonic_mean(-2, 4)

-8.0

def triple(x):
    return x * 3

triple(5)

15

triple(7 + 8)

45

triple('triton')

'tritontritontriton'

def triple(x):
    return x * 3

triple(7)

21

x

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[16], line 1
----> 1 x

NameError: name 'x' is not defined

x = 15

# When triple(12) is called, you can pretend
# there's an invisible line inside the body of x
# that says x = 12.
# The x = 15 above is ignored.
triple(12)

36

def greeting():
    return 'Hi! 👋'

greeting()

'Hi! 👋'

def where_is_the_error(something):
    '''You can describe your function within triple quotes. For example, this function 
    illustrates that errors don't occur until functions are executed (called).'''
    return (1 / 0) + something

where_is_the_error(5)

---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
Cell In[22], line 1
----> 1 where_is_the_error(5)

Cell In[21], line 4, in where_is_the_error(something)
      1 def where_is_the_error(something):
      2     '''You can describe your function within triple quotes. For example, this function 
      3     illustrates that errors don't occur until functions are executed (called).'''
----> 4     return (1 / 0) + something

ZeroDivisionError: division by zero

>>> first_name('Pradeep Khosla')
'Pradeep'

'Pradeep Khosla'.split(' ')[0]

'Pradeep'

def first_name(full_name):
    '''Returns the first name given a full name.'''
    return full_name.split(' ')[0]

first_name('Pradeep Khosla')

'Pradeep'

# What if there are three names?
first_name('Chancellor Pradeep Khosla')

'Chancellor'

def pythagorean(a, b):
    '''Computes the hypotenuse length of a right triangle with legs a and b.'''
    c = (a ** 2 + b ** 2) ** 0.5
    print(c)

x = pythagorean(3, 4)

5.0

# No output – why?
x

# Errors – why?
x + 10

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[30], line 2
      1 # Errors – why?
----> 2 x + 10

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

def better_pythagorean(a, b):
    '''Computes the hypotenuse length of a right triangle with legs a and b, 
       and actually returns the result.
    '''
    c = (a ** 2 + b ** 2) ** 0.5
    return c

x = better_pythagorean(3, 4)
x

5.0

x + 10

15.0

def motivational(quote):
    return 0
    print("Here's a motivational quote:", quote)

motivational('Fall seven times and stand up eight.')

0

roster = bpd.read_csv('data/roster-anon.csv')
roster

roster

roster

roster.get('name').iloc[0]

'Allie Sazhma'

first_name(roster.get('name').iloc[0])

'Allie'

first_name(roster.get('name').iloc[1])

'Amina'

roster.get('name')

0        Allie Sazhma
1        Amina Igxazd
2      Jazmine Enesxr
            ...      
219     Ismayl Gwuiij
220       Neil Dkaqgm
221     Maggie Ldfgau
Name: name, Length: 222, dtype: object

roster.get('name').apply(first_name)

0        Allie
1        Amina
2      Jazmine
        ...   
219     Ismayl
220       Neil
221     Maggie
Name: name, Length: 222, dtype: object

roster = roster.assign(
    first=roster.get('name').apply(first_name)
)
roster

name_counts = (
    roster
    .groupby('first')
    .count()
    .sort_values('name', ascending=False)
    .get(['name'])
)
name_counts

...

Ellipsis

...

Ellipsis

name_counts.get('name')

first
Kevin     4
Ryan      4
Noah      3
         ..
Hongyu    1
Hriday    1
Zixuan    1
Name: name, Length: 200, dtype: int64

# Not necessarily meaningful, but doable.
name_counts.get('name').apply(np.sqrt)

first
Kevin     2.00
Ryan      2.00
Noah      1.73
          ... 
Hongyu    1.00
Hriday    1.00
Zixuan    1.00
Name: name, Length: 200, dtype: float64

name_counts.index

Index(['Kevin', 'Ryan', 'Noah', 'Kristen', 'Jimmy', 'Felix', 'Edward', 'David',
       'Olivia', 'Brandon',
       ...
       'Hailey', 'Hannah', 'Haotian', 'Harrison', 'Helen', 'Henry', 'Hongan',
       'Hongyu', 'Hriday', 'Zixuan'],
      dtype='object', name='first', length=200)

name_counts.index.apply(max)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[51], line 1
----> 1 name_counts.index.apply(max)

AttributeError: 'Index' object has no attribute 'apply'

# What is the max of an individual string?
name_counts.reset_index().get('first').apply(max)

0      v
1      y
2      o
      ..
197    y
198    y
199    x
Name: first, Length: 200, dtype: object

name_counts.reset_index()

name_counts

roster

which_section = roster[roster.get('name') == 'Olivia Kcjqla'].get('section').iloc[0]
which_section

'10AM'

first_cond = roster.get('first') == 'Olivia' # A Series of Booleans!
section_cond = roster.get('section') == which_section # A Series of Booleans!
how_many = roster[first_cond & section_cond].shape[0]
how_many

1

def shared_first_and_section(name):
    # First, find the row corresponding to that full name in roster.
    # We're assuming that full names are unique.
    row = roster[roster.get('name') == name]
    
    # Then, get that student's first name and section.
    first = row.get('first').iloc[0]
    section = row.get('section').iloc[0]
    
    # Now, find all the students with the same first name and section.
    shared_info = roster[(roster.get('first') == first) & (roster.get('section') == section)]
    
    # Return the number of such students.
    return shared_info.shape[0]

shared_first_and_section('Olivia Kcjqla')

1

# This means that there is another Jimmy in the same section as Jimmy Xvngxm.
shared_first_and_section('Jimmy Xvngxm')

2

roster = roster.assign(shared=roster.get('name').apply(shared_first_and_section))
roster

roster[(roster.get('shared') >= 2)].sort_values('shared', ascending=False)

one_section_only = (
    roster[(roster.get('shared') >= 2) & 
           (roster.get('section') == '10AM')]
    .sort_values('shared', ascending=False)
)
one_section_only

# All of the names shared by multiple students in the 10AM section.
one_section_only.get('first').unique()

array(['Noah', 'John', 'Felix'], dtype=object)

...

Ellipsis

Lecture 8 – Functions and Applying¶

DSC 10, Summer 2024¶

Agenda¶

Functions¶

Defining functions¶

Motivation¶

Example: Harmonic mean¶

Functions¶

Functions are "recipes"¶

Parameters and arguments¶

Scope 🩺¶

Functions can take 0 or more arguments¶

Functions don't run until you call them!¶

Example: `first_name`¶

Returning¶

Returning¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our `first_name` function¶

`.apply`¶

Example: Common first names¶

Activity¶

`.apply` works with built-in functions, too!¶

Aside: Resetting the index¶

Example: Shared first names and sections¶

Another function: `shared_first_and_section`¶

Sneak peek¶

Activity¶

Summary, next time¶

Summary¶

Next time¶

	name	section
0	Allie Sazhma	11AM
1	Amina Igxazd	10AM
2	Jazmine Enesxr	9AM
...	...	...
219	Ismayl Gwuiij	10AM
220	Neil Dkaqgm	10AM
221	Maggie Ldfgau	9AM

	name	section	first
0	Allie Sazhma	11AM	Allie
1	Amina Igxazd	10AM	Amina
2	Jazmine Enesxr	9AM	Jazmine
...	...	...	...
219	Ismayl Gwuiij	10AM	Ismayl
220	Neil Dkaqgm	10AM	Neil
221	Maggie Ldfgau	9AM	Maggie

	name	section	first	shared
192	Ryan Oogwno	11AM	Ryan	4
36	Ryan Mgetat	11AM	Ryan	4
41	Ryan Nwdowi	11AM	Ryan	4
...	...	...	...	...
203	Felix Fnrqck	10AM	Felix	2
43	Jimmy Xvngxm	11AM	Jimmy	2
107	Noah Llpjpu	10AM	Noah	2

	name	section	first	shared
11	Noah Qodtvo	10AM	Noah	2
70	John Ubarsl	10AM	John	2
74	John Paqmwc	10AM	John	2
85	Felix Pgqrnv	10AM	Felix	2
107	Noah Llpjpu	10AM	Noah	2
203	Felix Fnrqck	10AM	Felix	2

Lecture 8 – Functions and Applying¶

DSC 10, Summer 2024¶

Agenda¶

Functions¶

Defining functions¶

Motivation¶

Example: Harmonic mean¶

Functions¶

Functions are "recipes"¶

Parameters and arguments¶

Scope 🩺¶

Functions can take 0 or more arguments¶

Functions don't run until you call them!¶

Example: first_name¶

Returning¶

Returning¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our first_name function¶

.apply¶

Example: Common first names¶

Activity¶

.apply works with built-in functions, too!¶

Aside: Resetting the index¶

Example: Shared first names and sections¶

Another function: shared_first_and_section¶

Sneak peek¶

Activity¶

Summary, next time¶

Summary¶

Next time¶

Example: `first_name`¶

Using our `first_name` function¶

`.apply`¶

`.apply` works with built-in functions, too!¶

Another function: `shared_first_and_section`¶