# Set up packages for lecture. Don't worry about understanding this code, but
# make sure to run it if you're following along.
import numpy as np
import babypandas as bpd
import pandas as pd
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
%reload_ext pandas_tutor
%set_pandas_tutor_options {'projectorMode': True}
set_matplotlib_formats("svg")
plt.style.use('fivethirtyeight')
np.set_printoptions(threshold=20, precision=2, suppress=True)
pd.set_option("display.max_rows", 7)
pd.set_option("display.max_columns", 8)
pd.set_option("display.precision", 2)
The Midterm Exam is in one week, on Friday, 10/28 during your assigned lecture.
Simulation.
np.random.choice(options)
.options
, is a list or array to choose from.options
. By default, all elements are equally likely to be chosen.# Simulate a fair coin flip
np.random.choice(['Heads', 'Tails'])
'Heads'
# Simulate a roll of a die
np.random.choice(np.arange(1, 7))
5
np.random.choice(options, n)
will return an array of n
randomly selected elements from options
.
# Simulate 10 fair coin flips
np.random.choice(['Heads', 'Tails'], 10)
array(['Tails', 'Heads', 'Tails', 'Tails', 'Heads', 'Tails', 'Heads', 'Heads', 'Heads', 'Tails'], dtype='<U5')
np.random.choice
selects with replacement.replace=False
.# Choose three colleges to win free HDH swag
colleges = ['Revelle', 'John Muir', 'Thurgood Marshall',
'Earl Warren', 'Eleanor Roosevelt', 'Sixth', 'Seventh']
np.random.choice(colleges, 3, replace=False)
array(['Earl Warren', 'Eleanor Roosevelt', 'Seventh'], dtype='<U17')
What is the probability of getting 60 or more heads if we flip 100 coins?
Strategy:
np.random.choice
to flip 100 coins.np.count_nonzero
to count the number of heads.np.count_nonzero(array)
returns the number of entries in array
that are True
.coins = np.random.choice(['Heads', 'Tails'], 100)
coins
array(['Tails', 'Tails', 'Heads', ..., 'Heads', 'Heads', 'Tails'], dtype='<U5')
(coins == 'Heads').sum()
49
np.count_nonzero(coins == 'Heads') # counts the number of Trues in sequence
49
count_nonzero
?True == 1
and False == 0
, so counting the non-zero elements counts the number of True
s.It's a good idea to do this, as it makes it easier to run the experiment repeatedly.
def coin_experiment():
coins = np.random.choice(['Heads', 'Tails'], 100)
return np.count_nonzero(coins == 'Heads')
coin_experiment()
59
for
-loop!np.append
!head_counts = np.array([])
head_counts
array([], dtype=float64)
head_counts = np.append(head_counts, 15)
head_counts
array([15.])
head_counts = np.append(head_counts, 25)
head_counts
array([15., 25.])
# Specify the number of repetitions
repetitions = 10000
# Create an empty array to store the results
head_counts = np.array([])
for i in np.arange(repetitions):
# For each repetition, run the experiment and add the result to head_counts
head_count = coin_experiment()
head_counts = np.append(head_counts, head_count)
len(head_counts)
10000
head_counts
array([46., 59., 51., ..., 44., 49., 45.])
# In how many experiments was the number of heads >= 60?
at_least_60 = np.count_nonzero(head_counts >= 60)
at_least_60
289
# What is this as a proportion?
at_least_60 / repetitions
0.0289
# Can also use np.mean()! Why?
np.mean(head_counts >= 60)
0.0289
This is quite close to the true theoretical answer!
# The theoretical answer – don't worry about how or why this code works
import math
sum([math.comb(100, i) * (1 / 2) ** 100 for i in np.arange(60, 101)])
0.028443966820490392
bpd.DataFrame().assign(
Number_of_Heads=head_counts
).plot(kind='hist', bins=np.arange(30, 70), density=True, ec='w', figsize=(10, 5));
plt.axvline(60, color='C1');
Suppose you’re on a game show, and you’re given the choice of three doors: behind one door is a car 🚗; behind the others, goats 🐐🐐.
You pick a door, say No. 2, and the host, who knows what’s behind the doors, opens another door, say No. 3, which has a goat.
He then says to you, “Do you want to pick door No. 1?”
Question: Is it to your advantage to switch your choice?
(The question was originally posed in Parade magazine’s "Ask Marilyn" column. It is called the "Monty Hall problem" because Monty Hall was the host of the game show in question, "Let's Make a Deal.")
You originally selected door #2. The host reveals door #3 to have a goat behind it. What should you do?
A. Might as well stick with door number #2; it has just as high a chance of winning as door #1. It doesn't matter whether you switch or not.
B. Switch to door number #1; it has a higher chance of winning than door #2.
Let's simulate the Monty Hall problem many times to estimate the probability of winning.
When a contestant picks their door, there are three equally-likely outcomes:
behind_picked_door = np.random.choice(['Car', 'Goat #1', 'Goat #2'])
behind_picked_door
'Goat #2'
Suppose we can see what is behind their door (but the contestant can't).
# Determine winning_strategy ('Stay' or 'Switch') based on what behind_picked_door is.
if behind_picked_door == 'Car':
winning_strategy = 'Stay'
else:
# A goat was behind the picked door. Monty will reveal the other goat.
# Switching wins:
winning_strategy = 'Switch'
Let's turn this into a function to make it easier to repeat:
def simulate_monty_hall():
behind_picked_door = np.random.choice(['Car', 'Goat #1', 'Goat #2'])
if behind_picked_door == 'Car':
winning_strategy = 'Stay'
else:
winning_strategy = 'Switch'
# print(behind_picked_door, 'was behind the door. Winning strategy:', winning_strategy)
return winning_strategy
simulate_monty_hall()
'Switch'
We should save the winning strategies. To do so, let's use np.append
:
repetitions = 10000
winning_strategies = np.array([])
for i in np.arange(repetitions):
winning_strategy = simulate_monty_hall()
winning_strategies = np.append(winning_strategies, winning_strategy)
winning_strategies
array(['Stay', 'Stay', 'Stay', ..., 'Stay', 'Switch', 'Stay'], dtype='<U32')
winning_strategies
array(['Stay', 'Stay', 'Stay', ..., 'Stay', 'Switch', 'Stay'], dtype='<U32')
np.count_nonzero(winning_strategies == 'Switch')
6696
np.count_nonzero(winning_strategies == 'Switch') / repetitions
0.6696
np.count_nonzero(winning_strategies == 'Stay') / repetitions
0.3304
'Switch'
(or 'Stay'
).'Switch'
. That is, initialize switch_count
to 0, and add 1 to it each time the winning strategy is 'Switch'
.switch_count = 0
for i in np.arange(repetitions):
winning_strategy = simulate_monty_hall()
if winning_strategy == 'Switch':
switch_count = switch_count + 1
switch_count / repetitions
0.6717
1 - switch_count / repetitions
0.32830000000000004
No arrays needed! This strategy won't always work; it depends on the goal of the simulation.
To estimate the probability of an event through simulation:
for
-loop, and save the results in an array with np.append
.np.count_nonzero
.