-
Notifications
You must be signed in to change notification settings - Fork 0
/
sampling_uncertainty.py
83 lines (71 loc) · 2.56 KB
/
sampling_uncertainty.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import numpy as np
import pandas as pd
import random
from matplotlib import pyplot as plt
from scipy.stats import norm
import funcs_for_stats as myst
population_size = 50000
population_df = pd.DataFrame(
{'observations': np.random.normal(size=population_size)})
n_samples = 500
sub_sample_size = 500
sub_sample_observations = pd.DataFrame(
columns=['average', 'st_dev'], index=range(n_samples)
)
n_bins = 50
width = 0.3
kwargs_mean = {'alpha': 0.5,
'bins': np.linspace(-width, width, n_bins),
'density': True,
'histtype': 'bar',
'stacked': True,
}
kwargs_std = {'alpha': 0.5,
'bins': np.linspace(1-width, 1+width, n_bins),
'density': True,
'histtype': 'bar',
'stacked': True,
}
kwargs_list = [kwargs_mean, kwargs_std]
n_samples = 5000
multiplier = 9
sample_size = 300
sample_sizes = [sample_size, multiplier*sample_size]
samples = [myst.get_sampled(n_samples, sample_sizes[x], population_df)
for x in range(len(sample_sizes))]
fig, axes = plt.subplots(2, 2)
data_names = ['small', 'big']
for i, feature in enumerate(['average', 'st_dev']):
for j, data_sample in enumerate(samples):
ax = axes[j][i]
ax.hist(data_sample[feature], **kwargs_list[i])
ax.set_title(f'Plot ({feature}, {data_names[j]})')
mean = round(data_sample[feature].mean(), 4)
sigma = round(data_sample[feature].std(), 4)
y = norm.pdf(kwargs_list[i].get("bins"), mean, sigma)
if i % 2 == 0:
ax.set_title(f'P($\mu$, N = {sample_sizes[j]})')
ax.plot(kwargs_list[i].get("bins"),
y,
'b--',
linewidth=1,
)
ax.text(0.05, 0.9,
f'$\mu_\mu$ = {mean}\n$\sigma_\mu$ = {sigma}',
horizontalalignment='left',
verticalalignment='top',
transform=ax.transAxes)
else:
ax.set_title(f'P($\sigma$, N = {sample_sizes[j]})')
ax.plot(kwargs_list[i].get("bins"),
y,
'b--',
linewidth=1,
)
ax.text(0.05, 0.9,
f'$\mu_\sigma$ = {mean}\n$\sigma_\sigma$ = {sigma}',
horizontalalignment='left',
verticalalignment='top',
transform=ax.transAxes)
plt.tight_layout()
plt.show()