# Set up packages for lecture. Don't worry about understanding this code, but
# make sure to run it if you're following along.
import numpy as np
import babypandas as bpd
import pandas as pd
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib.pyplot as plt
set_matplotlib_formats("svg")
plt.style.use('ggplot')
np.set_printoptions(threshold=20, precision=2, suppress=True)
pd.set_option("display.max_rows", 7)
pd.set_option("display.max_columns", 8)
pd.set_option("display.precision", 2)
The Midterm Exam is in four days, on Friday 2/10 during your assigned lecture.
Simulation.
np.random.choice(options)
.options
, is a list or array to choose from.options
. By default, all elements are equally likely to be chosen.# Simulate a fair coin flip
np.random.choice(['Heads', 'Tails'])
'Heads'
# Simulate a roll of a die
np.random.choice(np.arange(1, 7))
3
np.random.choice(options, n)
will return an array of n
randomly selected elements from options
.
# Simulate 10 fair coin flips
np.random.choice(['Heads', 'Tails'], 10)
array(['Heads', 'Heads', 'Tails', 'Heads', 'Heads', 'Heads', 'Heads', 'Tails', 'Tails', 'Tails'], dtype='<U5')
np.random.choice
selects with replacement.replace=False
.# Choose three colleges to win free HDH swag
colleges = ['Revelle', 'John Muir', 'Thurgood Marshall',
'Earl Warren', 'Eleanor Roosevelt', 'Sixth', 'Seventh']
np.random.choice(colleges, 3, replace=False)
array(['Seventh', 'Revelle', 'Eleanor Roosevelt'], dtype='<U17')
What is the probability of getting 60 or more heads if we flip 100 coins?
Strategy:
np.random.choice
to flip 100 coins.np.count_nonzero
to count the number of heads.np.count_nonzero(array)
returns the number of entries in array
that are True
.coins = np.random.choice(['Heads', 'Tails'], 100)
coins
array(['Tails', 'Tails', 'Tails', ..., 'Heads', 'Tails', 'Tails'], dtype='<U5')
(coins == 'Heads').sum()
44
np.count_nonzero(coins == 'Heads') # counts the number of Trues in sequence
44
count_nonzero
?True == 1
and False == 0
, so counting the non-zero elements counts the number of True
s.It's a good idea to do this, as it makes it easier to run the experiment repeatedly.
def coin_experiment():
coins = np.random.choice(['Heads', 'Tails'], 100)
return np.count_nonzero(coins == 'Heads')
coin_experiment()
52
for
-loop!np.append
!head_counts = np.array([])
head_counts
array([], dtype=float64)
head_counts = np.append(head_counts, 15)
head_counts
array([15.])
head_counts = np.append(head_counts, 25)
head_counts
array([15., 25.])
# Specify the number of repetitions
repetitions = 10000
# Create an empty array to store the results
head_counts = np.array([])
for i in np.arange(repetitions):
# For each repetition, run the experiment and add the result to head_counts
head_count = coin_experiment()
head_counts = np.append(head_counts, head_count)
len(head_counts)
10000
head_counts
array([57., 44., 55., ..., 48., 44., 49.])
# In how many experiments was the number of heads >= 60?
at_least_60 = np.count_nonzero(head_counts >= 60)
at_least_60
278
# What is this as a proportion?
at_least_60 / repetitions
0.0278
# Can also use np.mean()! Why?
np.mean(head_counts >= 60)
0.0278
This is quite close to the true theoretical answer!
# The theoretical answer – don't worry about how or why this code works
import math
sum([math.comb(100, i) * (1 / 2) ** 100 for i in np.arange(60, 101)])
0.028443966820490392
bpd.DataFrame().assign(
Number_of_Heads=head_counts
).plot(kind='hist', bins=np.arange(30, 70), density=True, ec='w', figsize=(10, 5));
plt.axvline(60, color='C1');
Suppose you’re on a game show, and you’re given the choice of three doors: behind one door is a car 🚗; behind the others, goats 🐐🐐.
You pick a door, say No. 2, and the host, who knows what’s behind the doors, opens another door, say No. 3, which has a goat.
He then says to you, “Do you want to pick door No. 1?”
Question: Is it to your advantage to switch your choice?
(The question was posed in Parade magazine’s "Ask Marilyn" column in 1990. It is called the "Monty Hall problem" because Monty Hall hosted a similar game show called "Let's Make a Deal.")
You originally selected door #2. The host reveals door #3 to have a goat behind it. What should you do?
A. Might as well stick with door number #2; it has just as high a chance of winning as door #1. It doesn't matter whether you switch or not.
B. Switch to door number #1; it has a higher chance of winning than door #2.
Let's simulate the Monty Hall problem many times to estimate the probability of winning if we switch.
Then we'll repeat the process for staying each time.
When you pick a door, there are three equally-likely outcomes:
behind_picked_door = np.random.choice(['Car', 'Goat #1', 'Goat #2'])
behind_picked_door
'Goat #2'
When Monty opens a different door, he always reveals a goat.
if behind_picked_door == 'Goat #1':
revealed = 'Goat #2'
elif behind_picked_door == 'Goat #2':
revealed = 'Goat #1'
else:
revealed = np.random.choice(['Goat #1', 'Goat #2'])
revealed
'Goat #1'
If you always switch, you'll end up winning the prize that is neither behind_picked_door
nor revealed
.
for prize in ['Car', 'Goat #1', 'Goat #2']:
if prize != behind_picked_door and prize != revealed:
your_prize = prize
your_prize
'Car'
Let's turn this into a function to make it easier to repeat:
def simulate_switch_strategy():
behind_picked_door = np.random.choice(['Car', 'Goat #1', 'Goat #2'])
if behind_picked_door == 'Goat #1':
revealed = 'Goat #2'
elif behind_picked_door == 'Goat #2':
revealed = 'Goat #1'
else:
revealed = np.random.choice(['Goat #1', 'Goat #2'])
for prize in ['Car', 'Goat #1', 'Goat #2']:
if prize != behind_picked_door and prize != revealed:
your_prize = prize
#print(behind_picked_door, 'was behind the door.', revealed, 'was revealed by the host. Your prize was:', your_prize)
return your_prize
simulate_switch_strategy()
'Goat #1'
We should save your prize in each game. To do so, let's use np.append
:
repetitions = 10000
your_prizes = np.array([])
for i in np.arange(repetitions):
your_prize = simulate_switch_strategy()
your_prizes = np.append(your_prizes, your_prize)
your_prizes
array(['Goat #2', 'Car', 'Goat #2', ..., 'Goat #1', 'Car', 'Car'], dtype='<U32')
your_prizes
array(['Goat #2', 'Car', 'Goat #2', ..., 'Goat #1', 'Car', 'Car'], dtype='<U32')
np.count_nonzero(your_prizes == 'Car')
6759
np.count_nonzero(your_prizes == 'Car') / repetitions
0.6759
This is quite close to the true probability of winning if you switch, $\frac{2}{3}$.
car_count
to 0, and add 1 to it each time your prize is a car.car_count = 0
for i in np.arange(repetitions):
your_prize = simulate_switch_strategy()
if your_prize == 'Car':
car_count = car_count + 1
car_count / repetitions
0.6681
No arrays needed! This strategy won't always work; it depends on the goal of the simulation.
car_count = 0
for i in np.arange(repetitions):
behind_picked_door = np.random.choice(['Car', 'Goat #1', 'Goat #2'])
your_prize = behind_picked_door
if your_prize == 'Car':
car_count = car_count + 1
car_count / repetitions
0.3326
To estimate the probability of an event through simulation:
for
-loop, and save the results in an array with np.append
.np.count_nonzero
.