# Run this cell to set up packages for lecture.
from lec09_imports import *

roster = bpd.read_csv('data/roster-anon.csv')
roster

def first_name(full_name):
    '''Returns the first name given a full name.'''
    return full_name.split(' ')[0]

roster = roster.assign(
    first=roster.get('name').apply(first_name)
)
roster

name_counts = (
    roster
    .groupby('first')
    .count()
    .sort_values('name', ascending=False)
    .get(['name'])
)
name_counts

roster[roster.get('first') == 'Ryan'].groupby('section').count()

roster[roster.get('first') == 'Vanessa'].groupby('section').count()

roster[roster.get('first') == 'Nathan'].groupby('section').count()

# One row per unique first name.
roster.groupby('first').count().get(['name'])

# One row per unique section.
roster.groupby('section').count().get(['name'])

roster

roster.groupby(['section', 'first']).count()

roster.groupby(['section', 'first']).count().reset_index()

roster.groupby(['section', 'first']).count().reset_index()

roster.groupby(['first', 'section']).count().reset_index()

counts = roster.groupby(['section', 'first']).count().reset_index()
counts

sea_temp = bpd.read_csv('data/sea_temp.csv')
sea_temp

...

Ellipsis

(sea_temp
 .groupby('MONTH') 
 .mean() 
 .plot(kind='line', y='SURFACE_TEMP')
);

# Why is there a sudden drop at the end? Look at the dates of data collection!
(sea_temp
 .groupby('YEAR') 
 .mean() 
 .plot(kind='line', y='SURFACE_TEMP')
);

offer_percentage = bpd.DataFrame().assign(
    clothing_type=['Shirt', 'Pants', 'Dress', 'Shorts', 'Shoes'],
    offer_percentage=[20, 30, 50, 30, 50]
)

clothes = bpd.DataFrame().assign(
    item=['Dress', 'Shirt', 'Shoes', 'Pants', 'Shoes'],
    retail_price=[150, 30, 90, 50, 70]
)

# The percentage of retail price that I can earn for reselling my clothes.
offer_percentage

# The items I want to sell and their retail prices.
clothes

clothes_merged = offer_percentage.merge(clothes, left_on='clothing_type', right_on='item')
clothes_merged

# Click through the presentation that appears.
merging_animation()

left_df.merge(
    right_df, 
    left_on='left_col_name',
    right_on='right_col_name'
)

clothes_merged = offer_percentage.merge(clothes, left_on='clothing_type', right_on='item')
clothes_merged

# If I sell all of the clothes in my collection, how much will I earn?
(clothes_merged.get('offer_percentage') / 100 * clothes_merged.get('retail_price')).sum()

176.0

offer_percentage.merge(clothes, left_on='clothing_type', right_on='item')

clothes.merge(offer_percentage, left_on='item', right_on='clothing_type')

offer_percentage

clothes_relabeled = clothes.assign(clothing_type=clothes.get('item')).drop(columns=['item'])
clothes_relabeled

offer_percentage.merge(clothes_relabeled, on='clothing_type')

offers_by_item = offer_percentage.set_index('clothing_type')
offers_by_item

clothes

offers_by_item.merge(clothes, left_index=True, right_on='item')

concept_check()

	name	section
0	Shawn Hhnxoq	10AM
1	Tom Egzuaz	11AM
2	Jiahao Zvwwyb	11AM
...	...	...
237	Jason Eglntp	11AM
238	Renee Fhlaos	11AM
239	Vivek Tbedny	11AM

	name	section	first
0	Shawn Hhnxoq	10AM	Shawn
1	Tom Egzuaz	11AM	Tom
2	Jiahao Zvwwyb	11AM	Jiahao
...	...	...	...
237	Jason Eglntp	11AM	Jason
238	Renee Fhlaos	11AM	Renee
239	Vivek Tbedny	11AM	Vivek

	name
first
Aarav	1
Abduboriyjon	1
Achintya	1
...	...
Zixuan	1
Ziyang	1
Zora	1

	name
section
10AM	125
11AM	115

	name	section	first
0	Shawn Hhnxoq	10AM	Shawn
1	Tom Egzuaz	11AM	Tom
2	Jiahao Zvwwyb	11AM	Jiahao
...	...	...	...
237	Jason Eglntp	11AM	Jason
238	Renee Fhlaos	11AM	Renee
239	Vivek Tbedny	11AM	Vivek

Lecture 9 – Grouping on Multiple Columns, Merging¶

DSC 10, Winter 2025¶

Agenda¶

Grouping on multiple columns¶

DSC 10 student data¶

How many students named Ryan are in each section?¶

How many students with each first name does each lecture section have?¶

Grouping on multiple columns¶

Grouping on multiple columns¶

Notice the index... 🤔¶

Does order matter?¶

Activity¶

Example: Sea temperatures 🌊¶

Concept Check ✅ – Answer at cc.dsc10.com ¶

Plots of monthly and yearly average surface temperature 📈¶

Summary: Grouping on multiple columns¶

Merging 🚙¶

Example: Clothing Resale 👕¶

If I sell all of the clothes in my collection, how much will I earn?¶

What just happened!? 🤯¶

`.merge`¶

If I sell all of the clothes in my collection, how much will I earn?¶

Does it matter which DataFrame is the left or right DataFrame? 🤔¶

Special cases¶

What if the names of the columns we want to merge on are both the same?¶

What if we want to merge using an index instead of a column?¶

Concept Check ✅ – Answer at cc.dsc10.com ¶

Followup activity¶

More practice!¶

Summary, next time¶

Summary¶

Next time¶

	YEAR	MONTH	DAY	SURFACE_TEMP
0	1916	8	22	19.5
1	1916	8	23	19.9
2	1916	8	24	19.7
...	...	...	...	...
38088	2024	6	27	21.3
38089	2024	6	28	20.8
38090	2024	6	30	21.5

	clothing_type	offer_percentage	item	retail_price
0	Shirt	20	Shirt	30
1	Pants	30	Pants	50
2	Dress	50	Dress	150
3	Shoes	50	Shoes	90
4	Shoes	50	Shoes	70

Lecture 9 – Grouping on Multiple Columns, Merging¶

DSC 10, Winter 2025¶

Agenda¶

Grouping on multiple columns¶

DSC 10 student data¶

How many students named Ryan are in each section?¶

How many students with each first name does each lecture section have?¶

Grouping on multiple columns¶

Grouping on multiple columns¶

Notice the index... 🤔¶

Does order matter?¶

Activity¶

Example: Sea temperatures 🌊¶

Concept Check ✅ – Answer at cc.dsc10.com¶

Plots of monthly and yearly average surface temperature 📈¶

Summary: Grouping on multiple columns¶

Merging 🚙¶

Example: Clothing Resale 👕¶

If I sell all of the clothes in my collection, how much will I earn?¶

What just happened!? 🤯¶

.merge¶

If I sell all of the clothes in my collection, how much will I earn?¶

Does it matter which DataFrame is the left or right DataFrame? 🤔¶

Special cases¶

What if the names of the columns we want to merge on are both the same?¶

What if we want to merge using an index instead of a column?¶

Concept Check ✅ – Answer at cc.dsc10.com¶

Followup activity¶

More practice!¶

Summary, next time¶

Summary¶

Next time¶

Concept Check ✅ – Answer at cc.dsc10.com ¶

`.merge`¶

Concept Check ✅ – Answer at cc.dsc10.com ¶