# You'll start seeing this cell in most lectures.
# It exists to hide all of the import statements and other setup
# code we need in lecture notebooks.
from dsc80_utils import *

arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# The shape (10,) means that the array only has a single dimension,
# of size 10.
arr.shape

(10,)

2 ** arr

array([  1,   2,   4,   8,  16,  32,  64, 128, 256, 512])

(2 ** arr).sum()

1023

(2 ** arr).mean()

102.3

(2 ** arr).max()

512

(2 ** arr).argmax()

9

%%timeit
squares = []
for i in range(1_000_000):
    squares.append(i * i)

47.6 ms ± 526 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

%%timeit
squares = np.arange(1_000_000) ** 2

1.46 ms ± 77.1 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)

nums = np.array([
    [5, 1, 9, 7],
    [9, 8, 2, 3],
    [2, 5, 0, 4]
])

nums

array([[5, 1, 9, 7],
       [9, 8, 2, 3],
       [2, 5, 0, 4]])

# nums has 3 rows and 4 columns.
nums.shape

(3, 4)

# Here, we're asking to reshape np.arange(1, 7)
# so that it has 2 rows and 3 columns.
a = np.arange(1, 7).reshape((2, 3))
a

array([[1, 2, 3],
       [4, 5, 6]])

a

array([[1, 2, 3],
       [4, 5, 6]])

a.sum(axis=0)

array([5, 7, 9])

a.sum(axis=1)

array([ 6, 15])

a

array([[1, 2, 3],
       [4, 5, 6]])

# Accesses row 0 and all columns.
a[0, :]

array([1, 2, 3])

# Same as the above.
a[0]

array([1, 2, 3])

# Accesses all rows and column 1.
a[:, 1]

array([2, 5])

# Accesses row 0 and columns 1 and onwards.
a[0, 1:]

array([2, 3])

s = (5, 3)
grid = np.ones(s) * 2 * np.arange(1, 16).reshape(s)
# grid[-1, 1:].sum()

import pandas as pd
import numpy as np

# You'll see the Path(...) / syntax a lot.
# It creates the correct path to your file, 
# whether you're using Windows, macOS, or Linux.
# (Note that macOS and Linux use / to denote separate folders in paths,
# while Windows uses \.)
dog_path = Path('data') / 'dogs43.csv'
dogs = pd.read_csv(dog_path)
dogs

dogs.head(3)

dogs.tail(2)

dogs.shape

(43, 7)

# The default index of a DataFrame is 0, 1, 2, 3, ...
dogs.index

RangeIndex(start=0, stop=43, step=1)

dogs.get('breed')

0                   Brittany
1              Cairn Terrier
2     English Cocker Spaniel
               ...          
40               Bullmastiff
41                   Mastiff
42             Saint Bernard
Name: breed, Length: 43, dtype: object

dogs.get(['breed', 'kind', 'longevity'])

# Note that the index is no longer 0, 1, 2, ...!
dogs.sort_values('height', ascending=False)

# This sorts by 'height', 
# then breaks ties by 'longevity'.
# Note the difference in the last three rows between
# this DataFrame and the one above.
dogs.sort_values(['height', 'longevity'],
                 ascending=False)

dogs

	breed	kind	lifetime_cost	longevity	size	weight	height
0	Brittany	sporting	22589.0	12.92	medium	35.0	19.0
1	Cairn Terrier	terrier	21992.0	13.84	small	14.0	10.0
2	English Cocker Spaniel	sporting	18993.0	11.66	medium	30.0	16.0
...	...	...	...	...	...	...	...
40	Bullmastiff	working	13936.0	7.57	large	115.0	25.5
41	Mastiff	working	13581.0	6.50	large	175.0	30.0
42	Saint Bernard	working	20022.0	7.78	large	155.0	26.5

	breed	kind	lifetime_cost	longevity	size	weight	height
0	Brittany	sporting	22589.0	12.92	medium	35.0	19.0
1	Cairn Terrier	terrier	21992.0	13.84	small	14.0	10.0
2	English Cocker Spaniel	sporting	18993.0	11.66	medium	30.0	16.0

	breed	kind	lifetime_cost	longevity	size	weight	height
41	Mastiff	working	13581.0	6.50	large	175.0	30.0
42	Saint Bernard	working	20022.0	7.78	large	155.0	26.5

	breed	kind	lifetime_cost	longevity	size	weight	height
41	Mastiff	working	13581.0	6.50	large	175.0	30.0
36	Borzoi	hound	16176.0	9.08	large	82.5	28.0
34	Newfoundland	working	19351.0	9.32	large	125.0	27.0
...	...	...	...	...	...	...	...
29	Dandie Dinmont Terrier	terrier	21633.0	12.17	small	21.0	9.0
14	Maltese	toy	19084.0	12.25	small	5.0	9.0
8	Chihuahua	toy	26250.0	16.50	small	5.5	5.0

	breed	kind	lifetime_cost	longevity	size	weight	height
41	Mastiff	working	13581.0	6.50	large	175.0	30.0
36	Borzoi	hound	16176.0	9.08	large	82.5	28.0
34	Newfoundland	working	19351.0	9.32	large	125.0	27.0
...	...	...	...	...	...	...	...
14	Maltese	toy	19084.0	12.25	small	5.0	9.0
29	Dandie Dinmont Terrier	terrier	21633.0	12.17	small	21.0	9.0
8	Chihuahua	toy	26250.0	16.50	small	5.5	5.0

Pre-Lecture Reading for Lecture 2 – DataFrame Fundamentals¶

DSC 80, Winter 2024¶

`numpy` arrays¶

`numpy` overview¶

⚠️ The dangers of `for`-loops¶

Multi-dimensional arrays¶

Operations along axes¶

Selecting rows and columns from 2D arrays¶

Exercise

From `babypandas` to `pandas` 🐼¶

`babypandas`¶

`pandas`¶

`pandas`¶

`pandas` data structures¶

Example: Dog Breeds (woof!) 🐶¶

Review: `head`, `tail`, `shape`, `index`, `get`, and `sort_values`¶

Pre-Lecture Reading for Lecture 2 – DataFrame Fundamentals¶

DSC 80, Winter 2024¶

numpy arrays¶

numpy overview¶

⚠️ The dangers of for-loops¶

Multi-dimensional arrays¶

Operations along axes¶

Selecting rows and columns from 2D arrays¶

Exercise

From babypandas to pandas 🐼¶

babypandas¶

pandas¶

pandas¶

pandas data structures¶

Importing pandas and related libraries¶

Example: Dog Breeds (woof!) 🐶¶

Review: head, tail, shape, index, get, and sort_values¶

`numpy` arrays¶

`numpy` overview¶

⚠️ The dangers of `for`-loops¶

From `babypandas` to `pandas` 🐼¶

`babypandas`¶

`pandas`¶

`pandas`¶

`pandas` data structures¶

Importing `pandas` and related libraries¶

Review: `head`, `tail`, `shape`, `index`, `get`, and `sort_values`¶