# Run this cell if you're following along – it just helps make the lectures appear prettier.
import pandas as pd
import numpy as np

np.set_printoptions(threshold=20, precision=2, suppress=True)
pd.set_option("display.max_rows", 7)
pd.set_option("display.max_columns", 8)
pd.set_option("display.precision", 2)


import babypandas as bpd
import numpy as np


apr_08 = bpd.read_csv('data/get-it-done-apr-08.csv')
apr_08


# This DataFrame has 1089 rows and 7 columns.
apr_08


apr_08.set_index('service_request_id')


apr_08


apr_08 = apr_08.set_index('service_request_id')
apr_08


# There were 7 columns before, but one of them became the index, and the index is not a column!
apr_08.shape

(1089, 6)


# Number of rows.
apr_08.shape[0]

1089


# Number of columns.
apr_08.shape[1]

6


requests = bpd.read_csv('data/get-it-done-requests.csv')
requests


requests


requests


requests.get('closed')

0        11
1         2
2       215
3         8
4         2
       ... 
1416      0
1417      0
1418     11
1419      0
1420      1
Name: closed, Length: 1421, dtype: int64


requests.get('closed')

0        11
1         2
2       215
3         8
4         2
       ... 
1416      0
1417      0
1418     11
1419      0
1420      1
Name: closed, Length: 1421, dtype: int64


type(requests.get('closed'))

babypandas.bpd.Series


requests.get('open')

0        0
1        0
2       20
3        1
4        1
        ..
1416     1
1417     3
1418     7
1419     1
1420     0
Name: open, Length: 1421, dtype: int64


requests.get('closed') + requests.get('open')

0        11
1         2
2       235
3         9
4         3
       ... 
1416      1
1417      3
1418     18
1419      1
1420      1
Length: 1421, dtype: int64


requests.assign(
    total=requests.get('closed') + requests.get('open')
)


requests


requests = requests.assign(
    total=requests.get('closed') + requests.get('open')
)
requests


requests.get('total').max()

4970


requests.get('total').mean()

82.88529204785362


requests.get('total').median()

18.0


requests.get('open').mean()

16.58972554539057


requests.get('open').median()

3.0


# Lots of information at once!
requests.get('total').describe()

count    1421.00
mean       82.89
std       258.37
min         1.00
25%         5.00
50%        18.00
75%        54.00
max      4970.00
Name: total, dtype: float64


requests.sort_values(by='total')


ordered_requests = requests.sort_values(by='total', ascending=False)
ordered_requests


# We must specify the role of False by using ascending=, 
# otherwise Python does not know how to interpret this.
requests.sort_values(by='total', False)

  File "/var/folders/ch/hyjw6whx3g9gshnp58738jc80000gp/T/ipykernel_2092/3731718929.py", line 3
    requests.sort_values(by='total', False)
                                          ^
SyntaxError: positional argument follows keyword argument


ordered_requests


ordered_requests.get('neighborhood')

207                         Downtown
404            Mid-City:City Heights
1213          Southeastern San Diego
147                  Clairemont Mesa
1389                          Uptown
                    ...             
391           Los Penasquitos Canyon
346                         La Jolla
274     Fairbanks Ranch Country Club
243                     East Elliott
1420                 Via De La Valle
Name: neighborhood, Length: 1421, dtype: object


ordered_requests.get('neighborhood').iloc[0]

'Downtown'


ordered_requests.get('service').iloc[0]

'Encampment'


apr_08


apr_08.get('status')

service_request_id
4183116      open
4183117      open
4183118      open
4183119      open
4183120      open
            ...  
4184220    closed
4184221    closed
4184223    closed
4184225    closed
4184294    closed
Name: status, Length: 1089, dtype: object


apr_08.get('status').loc[4183848]

'open'

...

Ellipsis


bpd.read_csv('data/get-it-done-apr-08.csv')


bpd.read_csv('data/get-it-done-apr-08.csv').get('public_description').loc[561]

'Abandoned van'


bpd.read_csv('data/get-it-done-apr-08.csv').get('public_description').iloc[561]

'Abandoned van'


requests


# This DataFrame only contains rows where the 'service' is 'Pothole'!
only_potholes = requests[requests.get('service') == 'Pothole']
only_potholes


only_potholes.sort_values('total', ascending=False).get('neighborhood').iloc[0]

'Clairemont Mesa'


5 == 6

False


type(5 == 6)

bool


9 + 10 < 21

True


'zebra' == 'zeb' + 'ra'

True


requests


requests.get('service') == 'Pothole'

0       False
1       False
2       False
3       False
4       False
        ...  
1416    False
1417    False
1418     True
1419    False
1420    False
Name: service, Length: 1421, dtype: bool


requests[requests.get('service') == 'Pothole']


requests


requests.get('open') > 100

0       False
1       False
2       False
3       False
4       False
        ...  
1416    False
1417    False
1418    False
1419    False
1420    False
Name: open, Length: 1421, dtype: bool


requests[requests.get('open') > 100]

	service_request_id	date_requested	neighborhood	service	status	street_address	public_description
0	4183116	2023-04-08T00:32:00	Downtown	Traffic Signal Issue	open	2ND AVE & G ST	Signal, 2nd & G
1	4183117	2023-04-08T00:44:00	Mid-City:Eastern Area	Missed Collection	open	4791 Seminole Dr, San Diego, CA 92115, USA	My Blue Recycle Bin was not collected. I'm th...
2	4183118	2023-04-08T00:49:00	Navajo	Parking	open	4728 Allied Rd, San Diego, CA 92120, USA	White van parked across my driveway. I will n...
3	4183119	2023-04-08T01:04:00	Encanto Neighborhoods	Missed Collection	open	6066 Tempas Ct	Green container
4	4183120	2023-04-08T01:10:00	Navajo	Pothole	open	7961?7979 Topaz Lake Ave	Potholes \|\| LOCATION: Topaz Lake between Pear...
...	...	...	...	...	...	...	...
1084	4184220	2023-04-08T19:53:00	Barrio Logan	Other	closed	3718 Dalbergia St	Prostitution
1085	4184221	2023-04-08T19:53:00	Barrio Logan	Other	closed	3743 Dalbergia St	Prostitution
1086	4184223	2023-04-08T19:54:00	Barrio Logan	Other	closed	2120 Woden St	Prostitution
1087	4184225	2023-04-08T19:54:00	Barrio Logan	Other	closed	2005 Vesta St	Prostitution
1088	4184294	2023-04-08T23:36:00	Southeastern San Diego	Other	closed	3762 Cottonwood St	Loud music

	service_request_id	date_requested	neighborhood	service	status	street_address	public_description
0	4183116	2023-04-08T00:32:00	Downtown	Traffic Signal Issue	open	2ND AVE & G ST	Signal, 2nd & G
1	4183117	2023-04-08T00:44:00	Mid-City:Eastern Area	Missed Collection	open	4791 Seminole Dr, San Diego, CA 92115, USA	My Blue Recycle Bin was not collected. I'm th...
2	4183118	2023-04-08T00:49:00	Navajo	Parking	open	4728 Allied Rd, San Diego, CA 92120, USA	White van parked across my driveway. I will n...
3	4183119	2023-04-08T01:04:00	Encanto Neighborhoods	Missed Collection	open	6066 Tempas Ct	Green container
4	4183120	2023-04-08T01:10:00	Navajo	Pothole	open	7961?7979 Topaz Lake Ave	Potholes \|\| LOCATION: Topaz Lake between Pear...
...	...	...	...	...	...	...	...
1084	4184220	2023-04-08T19:53:00	Barrio Logan	Other	closed	3718 Dalbergia St	Prostitution
1085	4184221	2023-04-08T19:53:00	Barrio Logan	Other	closed	3743 Dalbergia St	Prostitution
1086	4184223	2023-04-08T19:54:00	Barrio Logan	Other	closed	2120 Woden St	Prostitution
1087	4184225	2023-04-08T19:54:00	Barrio Logan	Other	closed	2005 Vesta St	Prostitution
1088	4184294	2023-04-08T23:36:00	Southeastern San Diego	Other	closed	3762 Cottonwood St	Loud music

	date_requested	neighborhood	service	status	street_address	public_description
service_request_id
4183116	2023-04-08T00:32:00	Downtown	Traffic Signal Issue	open	2ND AVE & G ST	Signal, 2nd & G
4183117	2023-04-08T00:44:00	Mid-City:Eastern Area	Missed Collection	open	4791 Seminole Dr, San Diego, CA 92115, USA	My Blue Recycle Bin was not collected. I'm th...
4183118	2023-04-08T00:49:00	Navajo	Parking	open	4728 Allied Rd, San Diego, CA 92120, USA	White van parked across my driveway. I will n...
4183119	2023-04-08T01:04:00	Encanto Neighborhoods	Missed Collection	open	6066 Tempas Ct	Green container
4183120	2023-04-08T01:10:00	Navajo	Pothole	open	7961?7979 Topaz Lake Ave	Potholes \|\| LOCATION: Topaz Lake between Pear...
...	...	...	...	...	...	...
4184220	2023-04-08T19:53:00	Barrio Logan	Other	closed	3718 Dalbergia St	Prostitution
4184221	2023-04-08T19:53:00	Barrio Logan	Other	closed	3743 Dalbergia St	Prostitution
4184223	2023-04-08T19:54:00	Barrio Logan	Other	closed	2120 Woden St	Prostitution
4184225	2023-04-08T19:54:00	Barrio Logan	Other	closed	2005 Vesta St	Prostitution
4184294	2023-04-08T23:36:00	Southeastern San Diego	Other	closed	3762 Cottonwood St	Loud music

	service_request_id	date_requested	neighborhood	service	status	street_address	public_description
0	4183116	2023-04-08T00:32:00	Downtown	Traffic Signal Issue	open	2ND AVE & G ST	Signal, 2nd & G
1	4183117	2023-04-08T00:44:00	Mid-City:Eastern Area	Missed Collection	open	4791 Seminole Dr, San Diego, CA 92115, USA	My Blue Recycle Bin was not collected. I'm th...
2	4183118	2023-04-08T00:49:00	Navajo	Parking	open	4728 Allied Rd, San Diego, CA 92120, USA	White van parked across my driveway. I will n...
3	4183119	2023-04-08T01:04:00	Encanto Neighborhoods	Missed Collection	open	6066 Tempas Ct	Green container
4	4183120	2023-04-08T01:10:00	Navajo	Pothole	open	7961?7979 Topaz Lake Ave	Potholes \|\| LOCATION: Topaz Lake between Pear...
...	...	...	...	...	...	...	...
1084	4184220	2023-04-08T19:53:00	Barrio Logan	Other	closed	3718 Dalbergia St	Prostitution
1085	4184221	2023-04-08T19:53:00	Barrio Logan	Other	closed	3743 Dalbergia St	Prostitution
1086	4184223	2023-04-08T19:54:00	Barrio Logan	Other	closed	2120 Woden St	Prostitution
1087	4184225	2023-04-08T19:54:00	Barrio Logan	Other	closed	2005 Vesta St	Prostitution
1088	4184294	2023-04-08T23:36:00	Southeastern San Diego	Other	closed	3762 Cottonwood St	Loud music

	date_requested	neighborhood	service	status	street_address	public_description
service_request_id
4183116	2023-04-08T00:32:00	Downtown	Traffic Signal Issue	open	2ND AVE & G ST	Signal, 2nd & G
4183117	2023-04-08T00:44:00	Mid-City:Eastern Area	Missed Collection	open	4791 Seminole Dr, San Diego, CA 92115, USA	My Blue Recycle Bin was not collected. I'm th...
4183118	2023-04-08T00:49:00	Navajo	Parking	open	4728 Allied Rd, San Diego, CA 92120, USA	White van parked across my driveway. I will n...
4183119	2023-04-08T01:04:00	Encanto Neighborhoods	Missed Collection	open	6066 Tempas Ct	Green container
4183120	2023-04-08T01:10:00	Navajo	Pothole	open	7961?7979 Topaz Lake Ave	Potholes \|\| LOCATION: Topaz Lake between Pear...
...	...	...	...	...	...	...
4184220	2023-04-08T19:53:00	Barrio Logan	Other	closed	3718 Dalbergia St	Prostitution
4184221	2023-04-08T19:53:00	Barrio Logan	Other	closed	3743 Dalbergia St	Prostitution
4184223	2023-04-08T19:54:00	Barrio Logan	Other	closed	2120 Woden St	Prostitution
4184225	2023-04-08T19:54:00	Barrio Logan	Other	closed	2005 Vesta St	Prostitution
4184294	2023-04-08T23:36:00	Southeastern San Diego	Other	closed	3762 Cottonwood St	Loud music

	neighborhood	service	closed	open
0	Balboa Park	Dead Animal	11	0
1	Balboa Park	Development Services - Code Enforcement	2	0
2	Balboa Park	Encampment	215	20
3	Balboa Park	Environmental Services Code Compliance	8	1
4	Balboa Park	Graffiti - Code Enforcement	2	1
...	...	...	...	...
1416	Via De La Valle	Encampment	0	1
1417	Via De La Valle	Pavement Maintenance	0	3
1418	Via De La Valle	Pothole	11	7
1419	Via De La Valle	Sidewalk Repair Issue	0	1
1420	Via De La Valle	Street Sweeping	1	0

	neighborhood	service	closed	open	total
207	Downtown	Encampment	4321	649	4970
404	Mid-City:City Heights	Illegal Dumping	3997	51	4048
1213	Southeastern San Diego	Illegal Dumping	2965	15	2980
147	Clairemont Mesa	Pothole	1847	164	2011
1389	Uptown	Graffiti - Public	1422	384	1806
...	...	...	...	...	...
391	Los Penasquitos Canyon	Environmental Services Code Compliance	1	0	1
346	La Jolla	Right-of-Way Code Enforcement	0	1	1
274	Fairbanks Ranch Country Club	Stormwater	1	0	1
243	East Elliott	Tree Maintenance	1	0	1
1420	Via De La Valle	Street Sweeping	1	0	1

symbol	meaning
`==`	equal to
`!=`	not equal to
`<`	less than
`<=`	less than or equal to
`>`	greater than
`>=`	greater than or equal to

Lecture 4 – DataFrames: Accessing, Sorting, and Querying¶

DSC 10, Spring 2023¶

Announcements¶

Agenda¶

Note:¶

DataFrames¶

pandas¶

But pandas is not so cute...¶

Enter babypandas!¶

DataFrames in babypandas 🐼¶

About the Data: Get It Done 👷¶

Reading data from a file 📖¶

Structure of a DataFrame¶

Setting a new index¶

Shape of a DataFrame¶

Annual summary of Get It Done requests¶

Example 1: Total requests¶

Finding total requests¶

Step 1 – Getting the coluimn of closed requests¶

Digression: Series¶

Steps 2 and 3 – Getting the column of open requests and calculating the total¶

Step 4 – Adding the totals to the DataFrame as a new column¶

Example 2: Analyzing requests¶

Questions¶

Example 3: What and where is the most frequently requested service?¶

Step 1 – Sorting the DataFrame¶

Step 1 – Sorting the DataFrame in descending order¶

Step 2 – Extracting the neighborhood and service¶

Example 4: Status of a request¶

Status of a request¶

Accessing using the row label¶

Activity 🚔¶

Summary: Accessing elements in a Series¶

Note¶

Reflection¶

Questions we can answer right now...¶

Questions we can't yet answer...¶

Example 5: Which neighborhood has the most 'Pothole' requests? 🕳¶

Selecting rows¶

The solution¶

Aside: Booleans¶

Comparison operators¶

What is a query? 🤔¶

How do we query a DataFrame?¶

Another query¶

Summary¶

Summary¶

Next time¶

`pandas`¶

But `pandas` is not so cute...¶

Enter `babypandas`!¶

DataFrames in `babypandas` 🐼¶

Example 5: Which neighborhood has the most `'Pothole'` requests? 🕳¶