Monday, February 9, 2015

py13. Central Limit Theorem and Confidence Level in Python

A normal sample of size 500 is used. We create 5000 such samples and record their mean values.


The calculated mean will be slightly different from the population mean within margin of error (moe).


For 5 to 95% confidence level, the moe is found and the proportion of means inside the range. We expect a linear line with slope of 1 (y = x)

# ex13.py
from __future__ import print_function, division
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
N = 5000
n = 500
p = 0.6
val = np.zeros(N)
pop_sd = (15*np.random.sample(1)+10)[0] 
pop_mn = p*n
for j in range(N):
    s = np.random.normal(loc = pop_mn, scale = pop_sd, size = n)
    val[j] = s.mean()

# In generation, population values are not known
# Only 1 sample will be used in practice. 
# Many samples are used for the CLT
use_sample = True
if use_sample:
    mn1 = val.mean()
    sd1 = val.std()
else:
    mn1 = pop_mn
    sd1 = pop_sd/np.sqrt(n)

conf_range = range(5,100,5)
v = np.zeros(len(conf_range))
for i,per in enumerate(conf_range):
    z = norm.ppf((100+per)/200)
    moe = z * sd1
    ins = (val > mn1 - moe) & (val < mn1 + moe)
    v[i] = sum(ins)/N

plt.plot(conf_range,v*100,'b')
plt.xlabel('Confidence level')
plt.ylabel('Proportion mean within moe')
plt.show()

Output:

No comments:

Post a Comment