In [3]:
from dsc106_utils import *
In [4]:
mpg = sns.load_dataset('mpg').dropna()
mpg
Out[4]:
mpg cylinders displacement horsepower ... acceleration model_year origin name
0 18.0 8 307.0 130.0 ... 12.0 70 usa chevrolet chevelle malibu
1 15.0 8 350.0 165.0 ... 11.5 70 usa buick skylark 320
2 18.0 8 318.0 150.0 ... 11.0 70 usa plymouth satellite
... ... ... ... ... ... ... ... ... ...
395 32.0 4 135.0 84.0 ... 11.6 82 usa dodge rampage
396 28.0 4 120.0 79.0 ... 18.6 82 usa ford ranger
397 31.0 4 119.0 82.0 ... 19.4 82 usa chevy s-10

392 rows × 9 columns

In [5]:
mpg['origin'].value_counts().reset_index()
Out[5]:
origin count
0 usa 245
1 japan 79
2 europe 68

Nimble design moves¶

In [6]:
px.bar(
    mpg['origin'].value_counts().reset_index(),
    x='origin',
    y='count',
)
In [7]:
px.bar(
    mpg['origin'].value_counts().reset_index(),
    y='origin',
    x='count',
)
In [8]:
px.scatter(
    mpg['origin'].value_counts().reset_index(),
    x='origin',
    y='count',
)
In [9]:
px.scatter(
    mpg['origin'].value_counts().reset_index(),
    y='origin',
    x='count',
)

1 Nominal, 1 Quantitative¶

In [10]:
px.bar(
    mpg['origin'].value_counts().reset_index(),
    x='origin',
    y='count',
)
In [11]:
px.scatter(
    mpg['origin'].value_counts().reset_index(),
    x='origin',
    y='count',
)
In [12]:
# Should we do this?
px.line(
    mpg['origin'].value_counts().reset_index(),
    x='origin',
    y='count',
)

Encoding 3 variables¶

In [15]:
px.scatter(
    mpg,
    x='horsepower',
    y='mpg',
)
In [18]:
px.scatter(
    mpg,
    x='horsepower',
    y='mpg',
    # Q-ratio:
    color='acceleration',
)
In [19]:
px.scatter(
    mpg,
    x='horsepower',
    y='mpg',
    # Nominal
    color='origin',
)
In [26]:
occ = mpg.groupby(['origin', 'cylinders']).size().rename('count').reset_index()
occ
Out[26]:
origin cylinders count
0 europe 4 61
1 europe 5 3
2 europe 6 4
... ... ... ...
6 usa 4 69
7 usa 6 73
8 usa 8 103

9 rows × 3 columns

In [30]:
px.bar(
    occ,
    x='count',
    y='origin',
    color='cylinders',
)
In [36]:
omc = mpg.groupby(['origin', 'model_year']).size().rename('count').reset_index()
omc
Out[36]:
origin model_year count
0 europe 70 5
1 europe 71 4
2 europe 72 5
... ... ... ...
36 usa 80 6
37 usa 81 13
38 usa 82 19

39 rows × 3 columns

In [37]:
px.line(
    omc,
    x='model_year',
    y='count',
    color='origin'
)

What about 4 channels?¶

In [43]:
mpg
Out[43]:
mpg cylinders displacement horsepower ... acceleration model_year origin name
0 18.0 8 307.0 130.0 ... 12.0 70 usa chevrolet chevelle malibu
1 15.0 8 350.0 165.0 ... 11.5 70 usa buick skylark 320
2 18.0 8 318.0 150.0 ... 11.0 70 usa plymouth satellite
... ... ... ... ... ... ... ... ... ...
395 32.0 4 135.0 84.0 ... 11.6 82 usa dodge rampage
396 28.0 4 120.0 79.0 ... 18.6 82 usa ford ranger
397 31.0 4 119.0 82.0 ... 19.4 82 usa chevy s-10

392 rows × 9 columns

In [49]:
means = (mpg
 .groupby(['origin', 'model_year'])
 ['mpg']
 .max()
 .reset_index()
)
means
Out[49]:
origin model_year mpg
0 europe 70 26.0
1 europe 71 30.0
2 europe 72 26.0
... ... ... ...
36 usa 80 32.1
37 usa 81 39.0
38 usa 82 38.0

39 rows × 3 columns

In [53]:
px.scatter(
    means,
    x='model_year',
    y='origin',
    color='origin', # redundant encoding!
    size='mpg',
)