%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

np.seterr(divide="ignore", invalid="ignore")
plt.rcParams["figure.figsize"] = (16, 8)

def normalDistribution(x, mu, sigma):
    norm = 1/(sigma*(2*np.pi)**0.5)
    return norm*np.exp(-1/2*((x - mu)/(sigma))**2)

x = np.linspace(-10, 10, 1000)

# Distribution A
mu, sigma = 0, 1

plt.plot(
    x, normalDistribution(x, mu, sigma), 
    label=f'$\mu={mu} \\quad|\\quad \sigma={sigma}$'
)

# Distribution B
mu, sigma = 5, 1

plt.plot(
    x, normalDistribution(x, mu, sigma), 
    label=f'$\mu={mu} \\quad|\\quad \sigma={sigma}$'
)

# Distribution C
mu, sigma = 0, 2

plt.plot(
    x, normalDistribution(x, mu, sigma), 
    label=f'$\mu={mu} \\quad|\\quad \sigma={sigma}$'
)

# Visualization
plt.xlabel(r'$x$')
plt.ylabel(r'$f(x)$')
plt.legend()
plt.show()

def Phi(x):
    t = x
    dt = t[1] - t[0]
    ft = np.exp(-t**2/2)
    norm = 1/((2*np.pi)**0.5)
    RANGE = np.arange(x.size) + 1
    integ = np.array([ft[:x].sum() for x in RANGE])
    return norm*integ*dt

def cumulativeNormalDistribution(x, mu, sigma):
    Phi_ = Phi((x - mu)/(sigma*2**0.5))
    return 1/2*(1 + Phi_)

x = np.linspace(-10, 10, 1000)

# Distribution A
mu, sigma = 0, 1

plt.plot(
    x, cumulativeNormalDistribution(x, mu, sigma), 
    label=f'$\mu={mu} \\quad|\\quad \sigma={sigma}$'
)

# Distribution B
mu, sigma = 5, 1

plt.plot(
    x, cumulativeNormalDistribution(x, mu, sigma), 
    label=f'$\mu={mu} \\quad|\\quad \sigma={sigma}$'
)

# Distribution C
mu, sigma = 0, 2

plt.plot(
    x, cumulativeNormalDistribution(x, mu, sigma), 
    label=f'$\mu={mu} \\quad|\\quad \sigma={sigma}$'
)

# Visualization
plt.xlabel(r'$x$')
plt.ylabel(r'$F(x)$')
plt.legend()
plt.show()

def Gamma(s, infty=100, dt=1e-4):
    t = np.arange(0, infty, dt)
    s_1 = s - 1 if s >= 1 else s
    ft = t**s_1*np.exp(-t)
    return ft.sum()*dt

def chiSquaredDistribution(x, k):
    norm = 1/(2**(k/2)*Gamma(k/2))
    x_ = x**(k/2 - 1)
    e_ = np.exp(-x/2)
    return np.where(x >= 0, norm*x_*e_, 0)

x = np.linspace(0, 20, 1000)

# Distributions at k
for k in [2, 3, 5, 9, 15]:
    plt.plot(
        x, chiSquaredDistribution(x, k), 
        label=f'$k={k}$'
    )

# Visualization
plt.xlabel(r'$x$')
plt.ylabel(r'$f(x)$')
plt.ylim(0, 0.5)
plt.legend()
plt.show()

def li_gamma(x, s):
    t = x
    dt = t[1] - t[0]
    ft = t**(s - 1)*np.exp(-t)
    RANGE = np.arange(x.size) + 1
    integ = np.array([ft[:x].sum() for x in RANGE])
    return integ*dt

def cumulativeChiSquaredDistribution(x, k):
    norm = 1/Gamma(k/2)
    return norm*li_gamma(x/2, k/2)

x = np.linspace(0, 20, 1000)

# Distributions at k
for k in [2, 3, 5, 9, 15]:
    plt.plot(
        x, cumulativeChiSquaredDistribution(x, k), 
        label=f'$k={k}$'
    )

# Visualization
plt.xlabel(r'$x$')
plt.ylabel(r'$F(x)$')

plt.legend()
plt.show()

def studentsTDistribution(x, v):
    norm = Gamma((v + 1)/2)/((v*np.pi)**0.5*Gamma(v/2))
    return norm*(1 + x**2/v)**(-(v + 1)/2)

x = np.linspace(-5, 5, 1000)

# Distributions at v
for v in [2, 5, 20, 100]:
    plt.plot(
        x, studentsTDistribution(x, v), 
        label=f'$v={v}$'
    )

# Visualization
plt.xlabel(r'$x$')
plt.ylabel(r'$f(x)$')

plt.legend()
plt.show()

def factorial(n):
    if n <= 1:
        return 1
    return np.prod(np.arange(n) + 1, dtype=np.float64)

def risingFactorial(x, n):
    RANGE = np.arange(n)
    return np.prod([x + k for k in RANGE], dtype=np.float64)

def hypergeometric(a, b, c, z, infty=50):
    f = lambda n: factorial(n)
    a_ = lambda n: risingFactorial(a, n)
    b_ = lambda n: risingFactorial(b, n)
    c_ = lambda n: risingFactorial(c, n)
    RANGE = np.arange(infty)
    ABCZN = [a_(n)*b_(n)/c_(n)*z**n/f(n) for n in RANGE]
    _2F1 = np.sum(ABCZN, axis=0)
    # Given the definition is only for |z| < 1
    # and to deal with the analytic continuation problem
    # lets use the function from scipy for a while.
    import scipy.special as sc
    return sc.hyp2f1(a, b, c, z)

def cumulativeStudentsTDistribution(x, v):
    norm = Gamma((v + 1)/2)/((v*np.pi)**0.5*Gamma(v/2))
    _2F1 = hypergeometric(1/2, (v + 1)/2, 3/2, -x**2/v)
    return 1/2 + x*norm*_2F1

x = np.linspace(-5, 5, 1000, dtype=np.float64)

# Distributions at v
for v in [2, 5, 20, 100]:
    plt.plot(
        x, cumulativeStudentsTDistribution(x, v), 
        label=f'$v={v}$'
    )

# Visualization
plt.xlabel(r'$x$')
plt.ylabel(r'$F(x)$')

plt.legend()
plt.show()

def Beta(z1, z2, dt=1e-4):
    z1, z2 = max(z1, 0), max(z2, 0)
    t = np.linspace(0, 1, int(1/dt) + 1)
    ft = t**(z1 - 1)*(1 - t)**(z2 - 1)
    ft[~np.isfinite(ft)] = 0
    return ft.sum()*dt

def fDistribution(x, d1, d2):
    num1 = (d1*x)**d1*d2**d2
    den1 = (d1*x + d2)**(d1 + d2)
    num2 = (num1/den1)**0.5
    den2 = x*Beta(d1/2, d2/2)
    return num2/den2

x = np.linspace(0, 3, 500)

# Distributions at d1 and d2
for [d1, d2] in [(1, 1), (2, 1), (5, 2), (10, 3), (100, 5)]:
    plt.plot(
        x, fDistribution(x, d1, d2), 
        label=f'$d_1={d1} \quad | \quad d_2={d2}$'
    )

# Visualization
plt.xlabel(r'$x$')
plt.ylabel(r'$f(x)$')
plt.ylim([0, 2])

plt.legend()
plt.show()

def i_beta(x, a, b):
    t = x
    dt = t[1] - t[0]
    ft = t**(a - 1)*(1 - t)**(b - 1)
    ft[~np.isfinite(ft)] = 0
    RANGE = np.arange(x.size) + 1
    integ = np.array([ft[:x].sum() for x in RANGE])
    return integ*dt

def ri_beta(x, a, b):
    Ix = i_beta(x, a, b)/Beta(a, b)
    # For any reason, the incomplete beta ..
    # .. function is not working as expected.
    # Check back in the future.
    # Lets use the function from scipy for a while.
    import scipy.special as sc
    return sc.betainc(a, b, x)

def cumulativeFDistribution(x, d1, d2):
    d1d2x = d1*x/(d1*x + d2)
    return ri_beta(d1d2x, d1/2, d2/2)

x = np.linspace(0, 3, 500)

# Distributions at d1 and d2
for [d1, d2] in [(1, 1), (2, 1), (5, 2), (10, 3), (100, 5)]:
    plt.plot(
        x, cumulativeFDistribution(x, d1, d2), 
        label=f'$d_1={d1} \quad | \quad d_2={d2}$'
    )

# Visualization
plt.xlabel(r'$x$')
plt.ylabel(r'$F(x)$')
plt.ylim([0, 1])

plt.legend()
plt.show()

Probability Distribution - Continuous¶

Introduction¶

Probability density function¶

Cumulative distribution function¶

Normal distribution¶

Chi-squared distribution¶

Student's t-distribution¶

F-distribution¶