# Set up packages for lecture. Don't worry about understanding this code, but
# make sure to run it if you're following along.
import numpy as np
import babypandas as bpd
import pandas as pd

from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
set_matplotlib_formats("svg")
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (10, 5)

np.set_printoptions(threshold=20, precision=2, suppress=True)
pd.set_option("display.max_rows", 7)
pd.set_option("display.max_columns", 8)
pd.set_option("display.precision", 2)

from IPython.display import display, IFrame

def show_def():
    src = "https://docs.google.com/presentation/d/e/2PACX-1vRKMMwGtrQOeLefj31fCtmbNOaJuKY32eBz1VwHi_5ui0AGYV3MoCjPUtQ_4SB1f9x4Iu6gbH0vFvmB/embed?start=false&loop=false&delayms=60000&rm=minimal"
    width = 960 
    height = 569
    display(IFrame(src, width, height))


2 / (1 / 80 + 1 / 60)

68.57142857142857


def harmonic_mean(a, b):
    return 2 / (1 / a + 1 / b)


harmonic_mean(80, 60)

68.57142857142857


harmonic_mean(20, 40)

26.666666666666664


show_def()


harmonic_mean(20, 40)

26.666666666666664


harmonic_mean(79, 894)

145.17163412127442


harmonic_mean(-2, 4)

-8.0


def triple(x):
    return x * 3


triple(5)

15


triple(7 + 8)

45


triple('triton')

'tritontritontriton'


def triple(x):
    return x * 3


triple(7)

21

x

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/ch/hyjw6whx3g9gshnp58738jc80000gp/T/ipykernel_82186/32546335.py in <module>
----> 1 x

NameError: name 'x' is not defined


x = 15


# When triple(12) is called, you can pretend
# there's an invisible line inside the body of x
# that says x = 12.
# The x = 15 above is ignored.
triple(12)

36


def greeting():
    return 'Hi! 👋'


greeting()

'Hi! 👋'


def where_is_the_error(something):
    '''You can describe your function within triple quotes. For example, this function 
    illustrates that errors don't occur until functions are executed (called).'''
    return (1 / 0) + something


where_is_the_error(5)

---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
/var/folders/ch/hyjw6whx3g9gshnp58738jc80000gp/T/ipykernel_82186/3423408763.py in <module>
----> 1 where_is_the_error(5)

/var/folders/ch/hyjw6whx3g9gshnp58738jc80000gp/T/ipykernel_82186/1703529954.py in where_is_the_error(something)
      2     '''You can describe your function within triple quotes. For example, this function 
      3     illustrates that errors don't occur until functions are executed (called).'''
----> 4     return (1 / 0) + something

ZeroDivisionError: division by zero

>>> first_name('Pradeep Khosla')
'Pradeep'


'Pradeep Khosla'.split(' ')[0]

'Pradeep'


def first_name(full_name):
    '''Returns the first name given a full name.'''
    return full_name.split(' ')[0]


first_name('Pradeep Khosla')

'Pradeep'


# What if there are three names?
first_name('Chancellor Pradeep Khosla')

'Chancellor'


def pythagorean(a, b):
    '''Computes the hypotenuse length of a triangle with legs a and b.'''
    c = (a ** 2 + b ** 2) ** 0.5
    print(c)


x = pythagorean(3, 4)

5.0


# No output – why?
x


# Errors – why?
x + 10

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/var/folders/ch/hyjw6whx3g9gshnp58738jc80000gp/T/ipykernel_82186/3305400239.py in <module>
      1 # Errors – why?
----> 2 x + 10

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'


def better_pythagorean(a, b):
    '''Computes the hypotenuse length of a triangle with legs a and b, 
       and actually returns the result.
    '''
    c = (a ** 2 + b ** 2) ** 0.5
    return c


x = better_pythagorean(3, 4)
x

5.0


x + 10

15.0


def motivational(quote):
    return 0
    print("Here's a motivational quote:", quote)


motivational('Fall seven times and stand up eight.')

0


roster = bpd.read_csv('data/roster-anon.csv')
roster


roster


roster


roster.get('name').iloc[0]

'Derrick Gernlq'


first_name(roster.get('name').iloc[0])

'Derrick'


first_name(roster.get('name').iloc[1])

'Tommy'


roster.get('name')

0      Derrick Gernlq
1        Tommy Vbpsht
2        Grace Smgsmb
            ...      
273      Norah Pcqynf
274      Harry Jwofgg
275        Zhe Ltynpn
Name: name, Length: 276, dtype: object


roster.get('name').apply(first_name)

0      Derrick
1        Tommy
2        Grace
        ...   
273      Norah
274      Harry
275        Zhe
Name: name, Length: 276, dtype: object


roster = roster.assign(
    first=roster.get('name').apply(first_name)
)
roster


name_counts = (
    roster
    .groupby('first')
    .count()
    .sort_values('name', ascending=False)
    .get(['name'])
)
name_counts

...

Ellipsis

...

Ellipsis


name_counts.get('name')

first
Ryan       6
Andrew     4
Grace      3
          ..
Jared      1
Jasnoor    1
Zixuan     1
Name: name, Length: 250, dtype: int64


# Not necessarily meaningful, but doable.
name_counts.get('name').apply(np.log)

first
Ryan       1.79
Andrew     1.39
Grace      1.10
           ... 
Jared      0.00
Jasnoor    0.00
Zixuan     0.00
Name: name, Length: 250, dtype: float64


name_counts.index

Index(['Ryan', 'Andrew', 'Grace', 'Ethan', 'Aaron', 'Krishna', 'Tara',
       'Danielle', 'Daniel', 'Jacob',
       ...
       'Hunter', 'Hyunwoo', 'Ibrahim', 'Isabel', 'Isaiah', 'James', 'Janayra',
       'Jared', 'Jasnoor', 'Zixuan'],
      dtype='object', name='first', length=250)


name_counts.index.apply(max)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/var/folders/ch/hyjw6whx3g9gshnp58738jc80000gp/T/ipykernel_82186/1905262767.py in <module>
----> 1 name_counts.index.apply(max)

AttributeError: 'Index' object has no attribute 'apply'


# What is the max of an individual string?
name_counts.reset_index().get('first').apply(max)

0      y
1      w
2      r
      ..
247    r
248    s
249    x
Name: first, Length: 250, dtype: object


roster


which_section = (roster[roster.get('name') == 'Grace Smgsmb'].get('section').iloc[0])
which_section

'12PM'


section_cond = roster.get('section') == which_section # A Boolean Series!
first_cond = roster.get('first') == 'Grace' # A Boolean Series!
how_many = roster[section_cond & first_cond].shape[0]
how_many

3


def shared_first_and_section(name):
    # First, find the row corresponding to that full name in roster.
    # We're assuming that full names are unique.
    row = roster[roster.get('name') == name]
    
    # Then, get that student's first name and section.
    first = row.get('first').iloc[0]
    section = row.get('section').iloc[0]
    
    # Now, find all the students with the same first name and section.
    shared_info = roster[(roster.get('first') == first) & (roster.get('section') == section)]
    
    # Return the number of such students.
    return shared_info.shape[0]


shared_first_and_section('Grace Smgsmb')

3


shared_first_and_section('Ryan Uklbnk')

4


roster = roster.assign(shared=roster.get('name').apply(shared_first_and_section))
roster


roster[(roster.get('shared') >= 2)].sort_values('shared', ascending=False)


one_section_only = (
    roster[(roster.get('shared') >= 2) & 
           (roster.get('section') == '12PM')]
    .sort_values('shared', ascending=False)
)
one_section_only


# All of the names shared by multiple students in the 12PM section.
one_section_only.get('first').unique()

array(['Andrew', 'Grace', 'Ethan', 'Brandon', 'Ryan', 'Andrea', 'Aaron'],
      dtype=object)

...

Ellipsis

Lecture 8 – Functions and Apply¶

DSC 10, Spring 2023¶

Announcements¶

Agenda¶

Functions¶

Defining functions¶

Motivation¶

Example: Harmonic mean¶

Functions¶

Functions are "recipes"¶

Parameters and arguments¶

Scope 🩺¶

Functions can take 0 or more arguments¶

Functions don't run until you call them!¶

Example: `first_name`¶

Returning¶

Returning¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our `first_name` function¶

`.apply`¶

Example: Common first names¶

Activity¶

`.apply` works with built-in functions, too!¶

Aside: Resetting the index¶

Example: Shared first names and sections¶

Another function: `shared_first_and_section`¶

Sneak peek¶

Activity¶

Summary, next time¶

Summary¶

Next time¶

	name	section
0	Derrick Gernlq	1PM
1	Tommy Vbpsht	12PM
2	Grace Smgsmb	12PM
...	...	...
273	Norah Pcqynf	12PM
274	Harry Jwofgg	1PM
275	Zhe Ltynpn	1PM

	name	section	first
0	Derrick Gernlq	1PM	Derrick
1	Tommy Vbpsht	12PM	Tommy
2	Grace Smgsmb	12PM	Grace
...	...	...	...
273	Norah Pcqynf	12PM	Norah
274	Harry Jwofgg	1PM	Harry
275	Zhe Ltynpn	1PM	Zhe

	name	section	first	shared
98	Ryan Uklbnk	1PM	Ryan	4
38	Ryan Dyrncc	1PM	Ryan	4
223	Andrew Mplcin	12PM	Andrew	4
...	...	...	...	...
3	Danielle Jhpshv	1PM	Danielle	2
78	Jacob Jhcoau	1PM	Jacob	2
34	Ashley Pkqzzd	1PM	Ashley	2

	name	section	first	shared
269	Andrew Dpkpan	12PM	Andrew	4
66	Andrew Mdvbuw	12PM	Andrew	4
223	Andrew Mplcin	12PM	Andrew	4
...	...	...	...	...
225	Brandon Jtzrkj	12PM	Brandon	2
247	Andrea Bauodp	12PM	Andrea	2
101	Ryan Vztixv	12PM	Ryan	2

Lecture 8 – Functions and Apply¶

DSC 10, Spring 2023¶

Announcements¶

Agenda¶

Functions¶

Defining functions¶

Motivation¶

Example: Harmonic mean¶

Functions¶

Functions are "recipes"¶

Parameters and arguments¶

Scope 🩺¶

Functions can take 0 or more arguments¶

Functions don't run until you call them!¶

Example: first_name¶

Returning¶

Returning¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our first_name function¶

.apply¶

Example: Common first names¶

Activity¶

.apply works with built-in functions, too!¶

Aside: Resetting the index¶

Example: Shared first names and sections¶

Another function: shared_first_and_section¶

Sneak peek¶

Activity¶

Summary, next time¶

Summary¶

Next time¶

Example: `first_name`¶

Using our `first_name` function¶

`.apply`¶

`.apply` works with built-in functions, too!¶

Another function: `shared_first_and_section`¶