# Set up packages for lecture. Don't worry about understanding this code, but
# make sure to run it if you're following along.
import numpy as np
import babypandas as bpd
import pandas as pd
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
%reload_ext pandas_tutor
%set_pandas_tutor_options {'projectorMode': True}
set_matplotlib_formats("svg")
plt.style.use('ggplot')

np.set_printoptions(threshold=20, precision=2, suppress=True)
pd.set_option("display.max_rows", 7)
pd.set_option("display.max_columns", 8)
pd.set_option("display.precision", 2)

from IPython.display import display, IFrame
def show_def():
    src = "https://docs.google.com/presentation/d/e/2PACX-1vRKMMwGtrQOeLefj31fCtmbNOaJuKY32eBz1VwHi_5ui0AGYV3MoCjPUtQ_4SB1f9x4Iu6gbH0vFvmB/embed?start=false&loop=false&delayms=60000"
    width = 960 
    height = 569
    display(IFrame(src, width, height))


2 / (1 / 80 + 1 / 60)

68.57142857142857


def harmonic_mean(a, b):
    return 2 / (1 / a + 1 / b)


harmonic_mean(80, 60)

68.57142857142857


harmonic_mean(20, 40)

26.666666666666664


show_def()


harmonic_mean(20, 40)

26.666666666666664


harmonic_mean(79, 894)

145.17163412127442


harmonic_mean(-2, 4)

-8.0


def triple(x):
    return x * 3


triple(5)

15


triple('triton')

'tritontritontriton'


def greeting():
    return 'Hi! 👋'


greeting()

'Hi! 👋'


def where_is_the_error(something):
    '''You can describe your function within triple quotes. For example, this function 
    illustrates that errors don't occur until functions are executed (called).'''
    return (1 / 0) + something


where_is_the_error(5)

---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
/var/folders/pd/w73mdrsj2836_7gp0brr2q7r0000gn/T/ipykernel_71400/3423408763.py in <module>
----> 1 where_is_the_error(5)

/var/folders/pd/w73mdrsj2836_7gp0brr2q7r0000gn/T/ipykernel_71400/1703529954.py in where_is_the_error(something)
      2     '''You can describe your function within triple quotes. For example, this function 
      3     illustrates that errors don't occur until functions are executed (called).'''
----> 4     return (1 / 0) + something

ZeroDivisionError: division by zero


'Pradeep Khosla'.split(' ')[0]

'Pradeep'


def first_name(full_name):
    '''Returns the first name given a full name.'''
    return full_name.split(' ')[0]


first_name('Pradeep Khosla')

'Pradeep'


# What if there are three names?
first_name('Chancellor Pradeep Khosla')

'Chancellor'


def pythagorean(a, b):
    '''Computes the hypotenuse length of a triangle with legs a and b.'''
    c = (a ** 2 + b ** 2) ** 0.5
    print(c)


x = pythagorean(3, 4)

5.0


# No output – why?
x


# Errors – why?
x + 10

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/var/folders/pd/w73mdrsj2836_7gp0brr2q7r0000gn/T/ipykernel_71400/3707561498.py in <module>
      1 # Errors – why?
----> 2 x + 10

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'


def better_pythagorean(a, b):
    '''Computes the hypotenuse length of a triangle with legs a and b, and actually returns the result.'''
    c = (a ** 2 + b ** 2) ** 0.5
    return c


x = better_pythagorean(3, 4)
x

5.0


x + 10

15.0


def motivational(quote):
    return 0
    print("Here's a motivational quote:", quote)


motivational('Fall seven times and stand up eight.')

0


def what_is_awesome(s):
    return s + ' is awesome!'


what_is_awesome('data science')

'data science is awesome!'

s

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/var/folders/pd/w73mdrsj2836_7gp0brr2q7r0000gn/T/ipykernel_71400/1028141915.py in <module>
----> 1 s

NameError: name 's' is not defined


s = 'DSC 10'


what_is_awesome('data science')

'data science is awesome!'


roster = bpd.read_csv('data/roster-anon.csv')
roster


roster


roster


roster.get('name').iloc[0]

'Levy Dmxsqj'


first_name(roster.get('name').iloc[0])

'Levy'


first_name(roster.get('name').iloc[1])

'Aiden'


roster.get('name').apply(first_name)

0       Levy
1      Aiden
2      Sruti
       ...  
408     Leni
409     Dory
410    Laura
Name: name, Length: 411, dtype: object


%%pt

roster.get('name').apply(first_name)


with_first = roster.assign(
    first=roster.get('name').apply(first_name)
)
with_first


first_counts = with_first.groupby('first').count().sort_values('name', ascending=False).get(['name'])
first_counts


with_first


with_first.get('first').apply(len)

0      4
1      5
2      5
      ..
408    4
409    4
410    5
Name: first, Length: 411, dtype: int64


indexed_by_name = roster.set_index('name')
indexed_by_name


indexed_by_name.index.apply(first_name)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/var/folders/pd/w73mdrsj2836_7gp0brr2q7r0000gn/T/ipykernel_71400/1621495788.py in <module>
----> 1 indexed_by_name.index.apply(first_name)

AttributeError: 'Index' object has no attribute 'apply'


indexed_by_name.reset_index()


indexed_by_name.reset_index().get('name').apply(first_name)

0       Levy
1      Aiden
2      Sruti
       ...  
408     Leni
409     Dory
410    Laura
Name: name, Length: 411, dtype: object


with_first


what_section = with_first[with_first.get('name') == 'Ryan Ufhwdl'].get('section').iloc[0]
what_section

'1PM'


how_many = with_first[(with_first.get('section') == what_section) & (with_first.get('first') == 'Ryan')].shape[0]
how_many

2


def shared_first_and_section(name):
    # First, find the row corresponding to that full name in with_first.
    # We're assuming that full names are unique.
    row = with_first[with_first.get('name') == name]
    
    # Then, get that student's first name and section.
    first = row.get('first').iloc[0]
    section = row.get('section').iloc[0]
    
    # Now, find all the students with the same first name and section.
    shared_info = with_first[(with_first.get('first') == first) & (with_first.get('section') == section)]
    
    # Return the number of such students.
    return shared_info.shape[0]


shared_first_and_section('Ryan Ufhwdl')

2


shared_first_and_section('Dory Xaghsk')

1


with_first = with_first.assign(shared=with_first.get('name').apply(shared_first_and_section))
with_first


with_first[(with_first.get('shared') > 1)].sort_values('shared', ascending=False)


one_section_only = with_first[(with_first.get('shared') > 1) & 
                              (with_first.get('section') == '10AM')].sort_values('shared', ascending=False)
one_section_only


one_section_only.get('first').unique()

array(['Andrew', 'Kevin'], dtype=object)

...

Ellipsis

	name
first
Ethan	5
Steven	4
Jason	4
...	...
Huanchang	1
Housheng	1
Zoya	1

Lecture 9 – Functions and Apply¶

DSC 10, Fall 2022¶

Announcements¶

Agenda¶

Functions¶

Defining functions¶

Motivation¶

Example: Harmonic mean¶

Functions¶

Functions are "recipes"¶

Parameters and arguments¶

Functions can take 0 or more arguments¶

Functions don't run until you call them!¶

Example: `first_name`¶

Returning¶

Returning¶

Scope 🩺¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our `first_name` function¶

`.apply`¶

Example: Common first names¶

Activity¶

Note: `.apply` works with built-in functions, too!¶

Aside: what if names are in the index?¶

Solution: `.reset_index()`¶

Example: Shared first names and sections¶

Another function: `shared_first_and_section`¶

Sneak peek¶

Activity¶

Summary, next time¶

Summary¶

Next time¶

	name	section
0	Levy Dmxsqj	11AM
1	Aiden Nyozzx	1PM
2	Sruti Fivolq	12PM
...	...	...
408	Leni Hlfjhh	11AM
409	Dory Xaghsk	1PM
410	Laura Xfqwzu	11AM

	name	section	first
0	Levy Dmxsqj	11AM	Levy
1	Aiden Nyozzx	1PM	Aiden
2	Sruti Fivolq	12PM	Sruti
...	...	...	...
408	Leni Hlfjhh	11AM	Leni
409	Dory Xaghsk	1PM	Dory
410	Laura Xfqwzu	11AM	Laura

	name	section	first	shared
39	Ethan Dpcred	1PM	Ethan	3
352	Andrew Aspfmf	10AM	Andrew	3
80	Samuel Vwwdmu	1PM	Samuel	3
...	...	...	...	...
82	Ryan Ufhwdl	1PM	Ryan	2
40	Justin Plbevg	11AM	Justin	2
375	Kevin Sgywid	10AM	Kevin	2

	name	section	first	shared
88	Andrew Qgvdmn	10AM	Andrew	3
117	Andrew Klhlht	10AM	Andrew	3
352	Andrew Aspfmf	10AM	Andrew	3
28	Kevin Wphdws	10AM	Kevin	2
375	Kevin Sgywid	10AM	Kevin	2

Lecture 9 – Functions and Apply¶

DSC 10, Fall 2022¶

Announcements¶

Agenda¶

Functions¶

Defining functions¶

Motivation¶

Example: Harmonic mean¶

Functions¶

Functions are "recipes"¶

Parameters and arguments¶

Functions can take 0 or more arguments¶

Functions don't run until you call them!¶

Example: first_name¶

Returning¶

Returning¶

Scope 🩺¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our first_name function¶

.apply¶

Example: Common first names¶

Activity¶

Note: .apply works with built-in functions, too!¶

Aside: what if names are in the index?¶

Solution: .reset_index()¶

Example: Shared first names and sections¶

Another function: shared_first_and_section¶

Sneak peek¶

Activity¶

Summary, next time¶

Summary¶

Next time¶

Example: `first_name`¶

Using our `first_name` function¶

`.apply`¶

Note: `.apply` works with built-in functions, too!¶

Solution: `.reset_index()`¶

Another function: `shared_first_and_section`¶