# Run this cell to set up packages for lecture.
from lec08_imports import *

multiples_of_10 = np.arange(10, 130, 10)
multiples_of_10

array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120])

multiples_of_8 = np.arange(8, 13*8, 8)
multiples_of_8

array([ 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96])

multiples_of_5 = ...
multiples_of_5

Ellipsis

def multiples(k):
    '''This function returns the 
    first twelve multiples of k.'''
    return np.arange(k, 13*k, k)

multiples(8)

array([ 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96])

multiples(5)

array([ 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60])

show_def()

multiples(7)

array([ 7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84])

multiples(-2)

array([ -2,  -4,  -6,  -8, -10, -12, -14, -16, -18, -20, -22, -24])

def triple(x):
    return x * 3

triple(5)

15

triple(7 + 8)

45

triple('triton')

'tritontritontriton'

def triple(x):
    return x * 3

triple(7)

21

x

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[34], line 1
----> 1 x

NameError: name 'x' is not defined

x = 15

# When triple(12) is called, you can pretend
# there's an invisible line inside the body of x
# that says x = 12.
# The x = 15 above is ignored.
triple(12)

def greeting():
    return 'Hi! 👋'

greeting()

def custom_multiples(k, how_many):
    '''This function returns the 
    first how_many multiples of k.'''
    return np.arange(k, (how_many + 1)*k, k)

custom_multiples(10, 7)

custom_multiples(2, 100)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[35], line 1
----> 1 custom_multiples(2, 100)

NameError: name 'custom_multiples' is not defined

def where_is_the_error(something):
    '''A function to illustrate that errors don't occur 
    until functions are executed (called).'''
    return (1 / 0) + something

where_is_the_error(5)

---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
Cell In[39], line 1
----> 1 where_is_the_error(5)

Cell In[37], line 4, in where_is_the_error(something)
      1 def where_is_the_error(something):
      2     '''A function to illustrate that errors don't occur 
      3     until functions are executed (called).'''
----> 4     return (1 / 0) + something

ZeroDivisionError: division by zero

>>> first_name('Pradeep Khosla')
'Pradeep'

'Pradeep Khosla'.split(' ')[0]

'Pradeep'

def first_name(full_name):
    '''Returns the first name given a full name.'''
    return full_name.split(' ')[0]

first_name('Pradeep Khosla')

'Pradeep'

# What if there are three names?
first_name('Chancellor Pradeep Khosla')

'Chancellor'

def pythagorean(a, b):
    '''Computes the hypotenuse length of a right triangle with legs a and b.'''
    c = (a ** 2 + b ** 2) ** 0.5
    print(c)

x = pythagorean(3, 4)

5.0

# No output – why?
x

# Errors – why?
x + 10

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[50], line 2
      1 # Errors – why?
----> 2 x + 10

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

def better_pythagorean(a, b):
    '''Computes the hypotenuse length of a right triangle with legs a and b, 
       and actually returns the result.
    '''
    c = (a ** 2 + b ** 2) ** 0.5
    return c

x = better_pythagorean(3, 4)
x

5.0

x + 10

15.0

def motivational(quote):
    return 0
    print("Here's a motivational quote:", quote)

motivational('Fall seven times and stand up eight.')

0

roster = bpd.read_csv('data/roster-anon.csv')
roster

roster

roster

roster.get('name').iloc[0]

'Shawn Hhnxoq'

first_name(roster.get('name').iloc[0])

'Shawn'

first_name(roster.get('name').iloc[1])

'Tom'

roster.get('name')

0       Shawn Hhnxoq
1         Tom Egzuaz
2      Jiahao Zvwwyb
           ...      
237     Jason Eglntp
238     Renee Fhlaos
239     Vivek Tbedny
Name: name, Length: 240, dtype: object

roster.get('name').apply(first_name)

0       Shawn
1         Tom
2      Jiahao
        ...  
237     Jason
238     Renee
239     Vivek
Name: name, Length: 240, dtype: object

roster = roster.assign(
    first=roster.get('name').apply(first_name)
)
roster

name_counts = (
    roster
    .groupby('first')
    .count()
    .sort_values('name', ascending=False)
    .get(['name'])
)
name_counts

...

Ellipsis

...

Ellipsis

name_counts.get('name')

first
Ryan      4
Andrew    4
Nathan    3
         ..
Jiahao    1
Jimbo     1
Zora      1
Name: name, Length: 212, dtype: int64

# Not necessarily meaningful, but doable.
name_counts.get('name').apply(np.log)

first
Ryan      1.39
Andrew    1.39
Nathan    1.10
          ... 
Jiahao    0.00
Jimbo     0.00
Zora      0.00
Name: name, Length: 212, dtype: float64

name_counts.index

Index(['Ryan', 'Andrew', 'Nathan', 'Vanessa', 'Anthony', 'Andy', 'David',
       'Katherine', 'Noah', 'William',
       ...
       'Ishaan', 'Izabella', 'Jaden', 'Janelle', 'Jared', 'Jennifer',
       'Jeremiah', 'Jiahao', 'Jimbo', 'Zora'],
      dtype='object', name='first', length=212)

name_counts.index.apply(max)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[88], line 1
----> 1 name_counts.index.apply(max)

AttributeError: 'Index' object has no attribute 'apply'

# What is the max of an individual string?
name_counts.reset_index().get('first').apply(max)

0      y
1      w
2      t
      ..
209    o
210    o
211    r
Name: first, Length: 212, dtype: object

roster

which_section = roster[roster.get('name') == 'Jason Eglntp'].get('section').iloc[0]
which_section

'11AM'

first_cond = roster.get('first') == 'Jason' # A Boolean Series!
section_cond = roster.get('section') == which_section # A Boolean Series!
how_many = roster[first_cond & section_cond].shape[0]
how_many

1

def shared_first_and_section(name):
    # First, find the row corresponding to that full name in roster.
    # We're assuming that full names are unique.
    row = roster[roster.get('name') == name]
    
    # Then, get that student's first name and section.
    first = row.get('first').iloc[0]
    section = row.get('section').iloc[0]
    
    # Now, find all the students with the same first name and section.
    shared_info = roster[(roster.get('first') == first) & (roster.get('section') == section)]
    
    # Return the number of such students.
    return shared_info.shape[0]

shared_first_and_section('Jason Eglntp')

1

roster = roster.assign(shared=roster.get('name').apply(shared_first_and_section))
roster

roster[(roster.get('shared') >= 2)].sort_values('shared', ascending=False)

one_section_only = (
    roster[(roster.get('shared') >= 2) & 
           (roster.get('section') == '10AM')]
    .sort_values('shared', ascending=False)
)
one_section_only

# All of the names shared by multiple students in the 10AM section.
one_section_only.get('first').unique()

array(['Andy', 'Anthony', 'Nathan', 'Sophie', 'Noah', 'Andrew', 'Vanessa',
       'Amelia', 'Daniel'], dtype=object)

...

Ellipsis

Lecture 8 – Functions and Applying¶

DSC 10, Winter 2025¶

Agenda¶

Functions¶

Defining functions¶

Motivation¶

More generally¶

Functions¶

Functions are "recipes"¶

Parameters and arguments¶

Scope 🩺¶

Functions can take 0 or more arguments¶

Functions don't run until you call them!¶

Example: `first_name`¶

Returning¶

Returning¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our `first_name` function¶

`.apply`¶

Example: Common first names¶

Activity¶

`.apply` works with built-in functions, too!¶

Aside: Resetting the index¶

Example: Shared first names and sections¶

Another function: `shared_first_and_section`¶

Sneak peek¶

Activity¶

Summary, next time¶

Summary¶

Next time¶

	name	section
0	Shawn Hhnxoq	10AM
1	Tom Egzuaz	11AM
2	Jiahao Zvwwyb	11AM
...	...	...
237	Jason Eglntp	11AM
238	Renee Fhlaos	11AM
239	Vivek Tbedny	11AM

	name	section	first
0	Shawn Hhnxoq	10AM	Shawn
1	Tom Egzuaz	11AM	Tom
2	Jiahao Zvwwyb	11AM	Jiahao
...	...	...	...
237	Jason Eglntp	11AM	Jason
238	Renee Fhlaos	11AM	Renee
239	Vivek Tbedny	11AM	Vivek

	name	section	first	shared
88	Ryan Nvrosl	11AM	Ryan	3
133	Andy Vnalqe	10AM	Andy	3
55	Andy Caktll	10AM	Andy	3
...	...	...	...	...
39	Vanessa Mqyyub	10AM	Vanessa	2
34	Amelia Grfclp	10AM	Amelia	2
220	Nathan Sbyzyi	10AM	Nathan	2

Lecture 8 – Functions and Applying¶

DSC 10, Winter 2025¶

Agenda¶

Functions¶

Defining functions¶

Motivation¶

More generally¶

Functions¶

Functions are "recipes"¶

Parameters and arguments¶

Scope 🩺¶

Functions can take 0 or more arguments¶

Functions don't run until you call them!¶

Example: first_name¶

Returning¶

Returning¶

Applying functions to DataFrames¶

DSC 10 student data¶

Example: Common first names¶

Using our first_name function¶

.apply¶

Example: Common first names¶

Activity¶

.apply works with built-in functions, too!¶

Aside: Resetting the index¶

Example: Shared first names and sections¶

Another function: shared_first_and_section¶

Sneak peek¶

Activity¶

Summary, next time¶

Summary¶

Next time¶

Example: `first_name`¶

Using our `first_name` function¶

`.apply`¶

`.apply` works with built-in functions, too!¶

Another function: `shared_first_and_section`¶