import pandas as pd
import numpy as np
import os

import util

import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
plt.rc('figure', dpi=100, figsize=(10, 5))
plt.rc('font', size=12)


heights = pd.read_csv(os.path.join('data', 'heights.csv'))
heights = (
    heights
    .rename(columns={'childHeight': 'child', 'childNum': 'number'})
    .drop('midparentHeight', axis=1)
)
heights.head()


np.random.seed(42) # So that we get the same results each time (for lecture)
heights_mcar = util.make_mcar(heights, 'child', pct=0.50)
heights_mcar.head()


heights_mcar_mfilled = heights_mcar.fillna(heights_mcar['child'].mean())
heights_mcar_mfilled['child'].head()

0    73.200000
1    69.200000
2    66.640685
3    66.640685
4    73.500000
Name: child, dtype: float64


plt.hist([heights['child'], heights_mcar['child'].dropna(), heights_mcar_mfilled['child']])
plt.legend(['full data', 'missing (mcar)', 'imputed']);


# Figure out the number of missing values
num_null = heights_mcar['child'].isna().sum()

# Sample that number of values from the observed dataset
fill_values = heights_mcar['child'].dropna().sample(num_null, replace=True)

# Find the positions where values in heights_mcar are missing
fill_values.index = heights_mcar.loc[heights_mcar['child'].isna()].index

# Fill in the missing values
heights_mcar_dfilled = heights_mcar.fillna({'child': fill_values.to_dict()})  # fill the vals


plt.hist([heights['child'], heights_mcar['child'], heights_mcar_dfilled['child']], density=True);
plt.legend(['full data','missing (mcar)', 'distr imputed']);


heights_mcar.head()


# This function implements the 3-step process we studied earlier
def create_imputed(col):
    num_null = col.isna().sum()
    fill_values = col.dropna().sample(num_null, replace=True)
    fill_values.index = col.loc[col.isna()].index
    return col.fillna(fill_values.to_dict())


create_imputed(heights_mcar['child']).head()

0    73.2
1    69.2
2    67.7
3    66.0
4    73.5
Name: child, dtype: float64


mult_imp = pd.concat([create_imputed(heights_mcar['child']).rename(k) for k in range(100)], axis=1)
mult_imp.head()


# Random sample of 15 imputed columns
mult_imp.sample(15, axis=1).plot(kind='kde', alpha=0.5, legend=False);


mult_imp.mean().plot(kind='hist', bins=20, ec='w', density=True);


!curl -v https://httpbin.org/html


!curl -d 'name=King Triton' https://httpbin.org/post


!curl -d 'name=King Triton' https://youtube.com


import requests


url = 'https://ucsd.edu'
resp = requests.get(url)


resp

<Response [200]>


type(resp.text)

str


len(resp.text)

43692


print(resp.text[:1000])

<!DOCTYPE html>
<html lang="en">
  <head>
  
  
    <meta charset="utf-8"/>
    <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
    <meta content="width=device-width, initial-scale=1" name="viewport"/>
    <title>University of California San Diego</title>
    <meta content="University of California, San Diego" name="ORGANIZATION"/>
    <meta content="index,follow,noarchive" name="robots"/>
    <meta content="UCSD" name="SITE"/>
    <meta content="University of California San Diego" name="PAGETITLE"/>
    <meta content="The University California San Diego is one of the world's leading public research universities, located in beautiful La Jolla, California" name="DESCRIPTION"/>
    <link href="favicon.ico" rel="icon"/>


<!-- Site-specific CSS files -->
    
  <link href="https://www.ucsd.edu/_resources/css/vendor/brix_sans.css" rel="stylesheet" type="text/css"/>
  
  <!-- CSS complied from style overrides -->
  <link href="https://www.ucsd.edu/_resources/css/s


resp.request.url

'https://ucsd.edu/'


post_response = requests.post('https://httpbin.org/post',
                              data={'name': 'King Triton'})
post_response

<Response [200]>


print(post_response.text)

{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "name": "King Triton"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate, br", 
    "Content-Length": "16", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.26.0", 
    "X-Amzn-Trace-Id": "Root=1-626b9412-7d96f8fc5ad980b61f34ae79"
  }, 
  "json": null, 
  "origin": "70.95.172.151", 
  "url": "https://httpbin.org/post"
}


r = requests.get('https://httpstat.us/503')
print(r.status_code)

503


r.text

'503 Service Unavailable'


status_codes = [200, 201, 403, 404, 503]

for code in status_codes:
    r = requests.get(f'https://httpstat.us/{code}')
    print(f'{code} ok: {r.ok}')

200 ok: True
201 ok: True
403 ok: False
404 ok: False
503 ok: False


requests.get('https://httpstat.us/400').raise_for_status()

---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
/var/folders/pd/w73mdrsj2836_7gp0brr2q7r0000gn/T/ipykernel_8477/2094305732.py in <module>
----> 1 requests.get('https://httpstat.us/400').raise_for_status()

~/opt/anaconda3/lib/python3.9/site-packages/requests/models.py in raise_for_status(self)
    951 
    952         if http_error_msg:
--> 953             raise HTTPError(http_error_msg, response=self)
    954 
    955     def close(self):

HTTPError: 400 Client Error: Bad Request for url: https://httpstat.us/400


import json


f = open(os.path.join('data', 'family.json'), 'r')
family_tree = json.load(f)


family_tree

{'name': 'Grandma',
 'age': 94,
 'children': [{'name': 'Dad',
   'age': 60,
   'children': [{'name': 'Me', 'age': 23}, {'name': 'Brother', 'age': 21}]},
  {'name': 'My Aunt',
   'children': [{'name': 'Cousin 1', 'age': 34},
    {'name': 'Cousin 2',
     'age': 36,
     'children': [{'name': 'Cousin 2 Jr.', 'age': 2}]}]}]}


family_tree['children'][0]['children'][0]['age']

23


x = 4


eval('x + 5')

9


f = open(os.path.join('data', 'family.json'), 'r')
eval(f.read())

{'name': 'Grandma',
 'age': 94,
 'children': [{'name': 'Dad',
   'age': 60,
   'children': [{'name': 'Me', 'age': 23}, {'name': 'Brother', 'age': 21}]},
  {'name': 'My Aunt',
   'children': [{'name': 'Cousin 1', 'age': 34},
    {'name': 'Cousin 2',
     'age': 36,
     'children': [{'name': 'Cousin 2 Jr.', 'age': 2}]}]}]}


f_other = open(os.path.join('data', 'evil_family.json'))
eval(f_other.read())

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/var/folders/pd/w73mdrsj2836_7gp0brr2q7r0000gn/T/ipykernel_8477/3392341705.py in <module>
      1 f_other = open(os.path.join('data', 'evil_family.json'))
----> 2 eval(f_other.read())

<string> in <module>

~/Desktop/80/private/lectures/sp22/lec14/util.py in err()
    132 # For JSON evaluation example
    133 def err():
--> 134     raise ValueError('i just deleted all your files lol 😂')

ValueError: i just deleted all your files lol 😂


f_other = open(os.path.join('data', 'evil_family.json'))
s = f_other.read()
s

'{\n    "name": "Grandma",\n    "age": 94,\n    "children": [\n        {\n        "name": util.err(),\n        "age": 60,\n        "children": [{"name": "Me", "age": 23}, \n                     {"name": "Brother", "age": 21}]\n        },\n        {\n        "name": "My Aunt",\n        "children": [{"name": "Cousin 1", "age": 34}, \n                     {"name": "Cousin 2", "age": 36, "children": \n                        [{"name": "Cousin 2 Jr.", "age": 2}]\n                     }\n                    ]\n        }\n    ]\n}'


json.loads(s)

---------------------------------------------------------------------------
JSONDecodeError                           Traceback (most recent call last)
/var/folders/pd/w73mdrsj2836_7gp0brr2q7r0000gn/T/ipykernel_8477/1938830664.py in <module>
----> 1 json.loads(s)

~/opt/anaconda3/lib/python3.9/json/__init__.py in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    344             parse_int is None and parse_float is None and
    345             parse_constant is None and object_pairs_hook is None and not kw):
--> 346         return _default_decoder.decode(s)
    347     if cls is None:
    348         cls = JSONDecoder

~/opt/anaconda3/lib/python3.9/json/decoder.py in decode(self, s, _w)
    335 
    336         """
--> 337         obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    338         end = _w(s, end).end()
    339         if end != len(s):

~/opt/anaconda3/lib/python3.9/json/decoder.py in raw_decode(self, s, idx)
    353             obj, end = self.scan_once(s, idx)
    354         except StopIteration as err:
--> 355             raise JSONDecodeError("Expecting value", s, err.value) from None
    356         return obj, end

JSONDecodeError: Expecting value: line 6 column 17 (char 84)

	family	father	mother	children	number	gender	child
0	1	78.5	67.0	4	1	male	73.2
1	1	78.5	67.0	4	2	female	69.2
2	1	78.5	67.0	4	3	female	69.0
3	1	78.5	67.0	4	4	female	69.0
4	2	75.5	66.5	4	1	male	73.5

	family	father	mother	children	number	gender	child
0	1	78.5	67.0	4	1	male	73.2
1	1	78.5	67.0	4	2	female	69.2
2	1	78.5	67.0	4	3	female	NaN
3	1	78.5	67.0	4	4	female	NaN
4	2	75.5	66.5	4	1	male	73.5

	family	father	mother	children	number	gender	child
0	1	78.5	67.0	4	1	male	73.2
1	1	78.5	67.0	4	2	female	69.2
2	1	78.5	67.0	4	3	female	NaN
3	1	78.5	67.0	4	4	female	NaN
4	2	75.5	66.5	4	1	male	73.5

Lecture 14 – HTTP Basics¶

DSC 80, Spring 2022¶

Announcements¶

Agenda¶

Recap: Imputation¶

Example: Heights 🧍📏¶

Mean imputation¶

Probabilistic imputation¶

Observations¶

Randomness¶

Multiple imputation¶

Summary of imputation techniques¶

Introduction to HTTP¶

Collecting data¶

Data on the internet¶

Collecting data from the internet¶

HTTP¶

The request-response model¶

Request methods¶

Example GET request¶

Example GET response¶

Consequences of the request-response model¶

Example: istheshipstuck.com¶

Making HTTP requests¶

Making HTTP requests¶

Making HTTP requests using curl¶

Example: GET requests via curl¶

Queries in a GET request¶

Example: POST requests via curl¶

Making HTTP requests using requests¶

Example: GET requests via requests¶

Example: POST requests via requests¶

HTTP status codes¶

Successful requests ✅¶

The data formats of the internet¶

JSON¶

JSON data types¶

Example JSON object¶

Aside: eval¶

eval gone wrong¶

Handling unfamiliar data¶

Summary, next time¶

Summary¶

Example `GET` request¶

Example `GET` response¶

Example: istheshipstuck.com ¶

Making HTTP requests using `curl`¶

Example: `GET` requests via `curl`¶

Queries in a `GET` request¶

Example: `POST` requests via `curl`¶

Making HTTP requests using `requests`¶

Example: `GET` requests via `requests`¶

Example: `POST` requests via `requests`¶

Aside: `eval`¶

`eval` gone wrong¶