Study about standard normal distribution.
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from IPython.display import display, HTML
import numpy as np
import pandas as pd
np.seterr(divide='ignore')
plt.rcParams['figure.figsize'] = (16, 8)
where,
def normalDistribution(x, mu, sigma):
return 1/(sigma*(2*np.pi)**0.5)*np.exp(-1/2*((x - mu)/sigma)**2)
x = np.linspace(-4, 4, 101)
p = normalDistribution(x, 0, 1)
plt.plot(x, p)
plt.ylim(0, 1)
plt.annotate(
"",
xy=(-1, 0.5), xycoords='data',
xytext=(1, 0.5), textcoords='data',
arrowprops=dict(arrowstyle="<|-|>")
)
plt.text(0, 0.51, '68% within\n1 standard deviation', ha='center', va='bottom', size=12)
plt.annotate(
"",
xy=(-2, 0.65), xycoords='data',
xytext=(2, 0.65), textcoords='data',
arrowprops=dict(arrowstyle="<|-|>")
)
plt.text(0, 0.66, '95% within\n2 standard deviation', ha='center', va='bottom', size=12)
plt.annotate(
"",
xy=(-3, 0.8), xycoords='data',
xytext=(3, 0.8), textcoords='data',
arrowprops=dict(arrowstyle="<|-|>")
)
plt.text(0, 0.81, '99.7% within\n3 standard deviation', ha='center', va='bottom', size=12)
plt.grid(alpha=0.25, linestyle='-.')
plt.xticks(
range(-4, 5),
[
'', r'$\mu-3\sigma$', r'$\mu-2\sigma$',
r'$\mu-\sigma$', r'$\mu$', r'$\mu+\sigma$',
r'$\mu+2\sigma$', r'$\mu+3\sigma$', ''
]
)
plt.yticks(np.linspace(0, 1, 6), ['']*6)
plt.title('Probability density')
plt.show()
Special case of density probability, when $\mu=0$ and $\sigma=1$ $$ \large f(x)=\frac{1}{\sqrt{2\pi}}e^{-\frac{1}{2}x^2} $$
def stdNormalDistribution(x):
return normalDistribution(x, 0, 1)
For the range $[-\infty, \infty]$ the value of $\Phi$ should be $1$.
fig, ax = plt.subplots()
plt.plot(x, p, c='red', linewidth=4)
x1 = x[:65]; y1 = p[:65]
verts = [(x1[0], 0), *zip(x1, y1), (x1[-1], 0)]
P = Polygon(verts, facecolor='0.8', edgecolor='0.6', hatch='x')
ax.add_patch(P)
plt.xlim([-4, 4])
plt.grid()
plt.show()
Probability from 0 to Z, or: $$ \large f(z)=\Phi(z) - \frac{1}{2} $$
fig, ax = plt.subplots()
plt.plot(x, p, c='red', linewidth=4)
x1 = x[50:65]; y1 = p[50:65]
verts = [(x1[0], 0), *zip(x1, y1), (x1[-1], 0)]
P = Polygon(verts, facecolor='0.8', edgecolor='0.6', hatch='x')
ax.add_patch(P)
plt.xlim([-4, 4])
plt.grid()
plt.show()
def definiteIntegral(f, a, b, N=10000):
result = 0
dx = abs(b - a)/N
while a < b:
result += f(a + dx/2)*dx
a += dx
return result
def zScore(z, option=0):
'''
Cumulative standard normal distribution
Option:
0 => one-tailed under *default
1 => one-tailed above
2 => two-tailed inside
3 => two-tailed outside
'''
Z = abs(z)
try:
sign = z/abs(z)
except ZeroDivisionError:
return 0.0
p = definiteIntegral(stdNormalDistribution, 0, Z)
if option == 1:
return 0.5 - p*sign
if option == 2:
return 2*p
if option == 3:
return 1 - 2*p
return 0.5 + p*sign
fig, [[axA, axB], [axC, axD]] = plt.subplots(2, 2, sharex='col', sharey='row')
axA.plot(x, p, c='red', linewidth=2)
slice = x <= 1.5
xP = x[slice]; yP = p[slice]
vertsP = [(xP[0], 0), *zip(xP, yP), (xP[-1], 0)]
PP = Polygon(vertsP, facecolor='0.8', edgecolor='0.7', hatch='xx')
axA.add_patch(PP)
axA.set_title('one-tailed under')
axB.plot(x, p, c='red', linewidth=2)
slice = x >= 1.5
xP = x[slice]; yP = p[slice]
vertsP = [(xP[0], 0), *zip(xP, yP), (xP[-1], 0)]
PP = Polygon(vertsP, facecolor='0.8', edgecolor='0.7', hatch='xx')
axB.add_patch(PP)
axB.set_title('one-tailed above')
axC.plot(x, p, c='red', linewidth=2)
slice = abs(x) <= 1.5
xP = x[slice]; yP = p[slice]
vertsP = [(xP[0], 0), *zip(xP, yP), (xP[-1], 0)]
PP = Polygon(vertsP, facecolor='0.8', edgecolor='0.7', hatch='xx')
axC.add_patch(PP)
axC.set_title('two-tailed inside')
axD.plot(x, p, c='red', linewidth=2)
slice = x >= 1.5
xP = x[slice]; yP = p[slice]
vertsP = [(xP[0], 0), *zip(xP, yP), (xP[-1], 0)]
PP = Polygon(vertsP, facecolor='0.8', edgecolor='0.7', hatch='xx')
axD.add_patch(PP)
slice = x <= -1.5
xP = x[slice]; yP = p[slice]
vertsP = [(xP[0], 0), *zip(xP, yP), (xP[-1], 0)]
PP = Polygon(vertsP, facecolor='0.8', edgecolor='0.7', hatch='xx')
axD.add_patch(PP)
axD.set_title('two-tailed outside')
plt.show()
def zTable(option=0, digits=5, hsteps=0.01, zmin=-3.4, zmax=3.4):
if option >= 2:
zmin = 0.0 if zmin <= 0 else zmin
steps = np.arange(zmin, zmax + 0.1, hsteps)
steps = steps.reshape(-1, int(0.1/hsteps))
df = pd.DataFrame(data=steps)
d = int(np.ceil(abs(np.log10(hsteps))))
cols = ['{0:.{1}f}'.format(e, d) for e in np.arange(0, 0.1, hsteps)]
idxs = np.linspace(zmin, zmax, int((zmax - zmin)/0.1) + 1)
df.index = idxs; df.columns = cols; ztable=df.rename_axis('+=>', axis=1)
ztable = ztable.applymap(lambda x: round(zScore(x, option=option), digits))
return ztable
# Display z table
ztable = zTable(zmin=1, zmax=3)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
display(ztable)
+=> | 0.00 | 0.01 | 0.02 | 0.03 | 0.04 | 0.05 | 0.06 | 0.07 | 0.08 | 0.09 |
---|---|---|---|---|---|---|---|---|---|---|
1.0 | 0.84137 | 0.84375 | 0.84614 | 0.84852 | 0.85085 | 0.85314 | 0.85543 | 0.85771 | 0.85995 | 0.86214 |
1.1 | 0.86433 | 0.86652 | 0.86867 | 0.87076 | 0.87288 | 0.87495 | 0.87698 | 0.87902 | 0.88102 | 0.88300 |
1.2 | 0.88495 | 0.88686 | 0.88879 | 0.89065 | 0.89251 | 0.89437 | 0.89617 | 0.89798 | 0.89975 | 0.90147 |
1.3 | 0.90322 | 0.90490 | 0.90658 | 0.90824 | 0.90988 | 0.91151 | 0.91311 | 0.91466 | 0.91623 | 0.91776 |
1.4 | 0.91924 | 0.92075 | 0.92220 | 0.92364 | 0.92509 | 0.92647 | 0.92788 | 0.92924 | 0.93056 | 0.93191 |
1.5 | 0.93319 | 0.93448 | 0.93576 | 0.93699 | 0.93824 | 0.93945 | 0.94062 | 0.94181 | 0.94295 | 0.94408 |
1.6 | 0.94522 | 0.94630 | 0.94740 | 0.94847 | 0.94950 | 0.95055 | 0.95154 | 0.95254 | 0.95354 | 0.95449 |
1.7 | 0.95543 | 0.95638 | 0.95728 | 0.95820 | 0.95909 | 0.95994 | 0.96081 | 0.96164 | 0.96246 | 0.96329 |
1.8 | 0.96407 | 0.96487 | 0.96563 | 0.96638 | 0.96713 | 0.96784 | 0.96856 | 0.96927 | 0.96995 | 0.97063 |
1.9 | 0.97130 | 0.97193 | 0.97258 | 0.97320 | 0.97381 | 0.97442 | 0.97500 | 0.97558 | 0.97616 | 0.97670 |
2.0 | 0.97726 | 0.97780 | 0.97831 | 0.97883 | 0.97932 | 0.97982 | 0.98031 | 0.98077 | 0.98125 | 0.98170 |
2.1 | 0.98214 | 0.98258 | 0.98300 | 0.98341 | 0.98383 | 0.98422 | 0.98462 | 0.98500 | 0.98537 | 0.98575 |
2.2 | 0.98610 | 0.98645 | 0.98680 | 0.98713 | 0.98746 | 0.98778 | 0.98809 | 0.98840 | 0.98870 | 0.98899 |
2.3 | 0.98928 | 0.98956 | 0.98983 | 0.99010 | 0.99036 | 0.99062 | 0.99087 | 0.99111 | 0.99135 | 0.99158 |
2.4 | 0.99181 | 0.99203 | 0.99224 | 0.99245 | 0.99266 | 0.99286 | 0.99305 | 0.99324 | 0.99343 | 0.99361 |
2.5 | 0.99379 | 0.99396 | 0.99413 | 0.99430 | 0.99446 | 0.99462 | 0.99477 | 0.99492 | 0.99506 | 0.99520 |
2.6 | 0.99534 | 0.99547 | 0.99560 | 0.99573 | 0.99585 | 0.99598 | 0.99609 | 0.99621 | 0.99632 | 0.99643 |
2.7 | 0.99654 | 0.99664 | 0.99674 | 0.99684 | 0.99693 | 0.99702 | 0.99711 | 0.99720 | 0.99728 | 0.99737 |
2.8 | 0.99744 | 0.99752 | 0.99760 | 0.99767 | 0.99774 | 0.99782 | 0.99788 | 0.99795 | 0.99801 | 0.99808 |
2.9 | 0.99813 | 0.99819 | 0.99825 | 0.99831 | 0.99836 | 0.99841 | 0.99846 | 0.99851 | 0.99856 | 0.99861 |
3.0 | 0.99865 | 0.99870 | 0.99874 | 0.99878 | 0.99882 | 0.99886 | 0.99889 | 0.99893 | 0.99897 | 0.99900 |
%%time
# save z table as HTML
HTML = '''
<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" href="z-table.css">
</head>
<body>
'''
HTML += '\n<h1>Z Tables</h1>\n'
HTML += '\n<h2>One-tailed under</h2>\n'
ztable = zTable(option=0, hsteps=0.005)
HTML += ztable.to_html()
HTML += '\n<h2>One-tailed above</h2>\n'
ztable = zTable(option=1, hsteps=0.005)
HTML += ztable.to_html()
HTML += '\n<h2>Two-tailed inside</h2>\n'
ztable = zTable(option=2, hsteps=0.005)
HTML += ztable.to_html()
HTML += '\n<h2>Two-tailed outside</h2>\n'
ztable = zTable(option=3, hsteps=0.005)
HTML += ztable.to_html()
HTML += '''
</body>
</html>
'''
ztable_html = open('output/z-table.html', 'w')
ztable_html.write(HTML)
ztable_html.close()
Wall time: 1min 14s