Cơ bản về xác suất thống kê

các khái niệm xác suất thống kê

Các khái niệm cơ bản

Bernoulli distribution và categorical distribution

import numpy as np
class bernoulli():
    def pmf(x, p):
        # probability mass function
        return p**x * (1 - p)**(1 - x)
    def mean(p):
        # expected value of bernoulli random variable
        return p
    def var(p):
        # variance of bernoulli random variable
        return p * (1 - p)
    def std(p):
        # standart deviation of bernoulli random variable
        return bernoulli.var(p)**(1.0/2)
    def rvs(p, size = 1):
        # random variates
        res = np.array([])
        for i in range(size):
            if np.random.rand() <= p:
                res = np.append(res, 1)
            else:
                res = np.append(res, 0)
        return res

p = 0.2
print(bernoulli.mean(p))
print(bernoulli.var(p))
print(bernoulli.std(p))

# each execution generates random number,
print(bernoulli.rvs(p, size = 11))

Univariate và Multivariate normal distribution

Univariate normal distribution

%matplotlib inline
%config InlineBackend.figure_formats = ['svg']

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import cm

sns.set_style('darkgrid')
np.random.seed(42)

def univariate_normal(x, mean, variance):
    return (1.0 / np.sqrt(2 * np.pi * variance)) * np.exp(-(x - mean)**2 / (2 * variance))

# plot
x = np.linspace(-3, 5, num = 100)
y = 2 * x - 1

fig = plt.figure(figsize=(5, 3))
plt.plot(x, univariate_normal(x, mean = 0, variance = 1), label = '$\mathcal{N}(0, 1)$')
plt.plot(x, univariate_normal(x, mean = 2, variance = 3), label = '$\mathcal{N}(2, 3)$')
plt.plot(x, univariate_normal(x, mean = 0, variance = .2), label = '$\mathcal{N}(0, 0.2)$')
plt.plot(x, y, label = '$\mathcal{y = 2x - 1}$')

plt.xlabel('$x$', fontsize = 13)
plt.ylabel('density: $p(x)$', fontsize = 13)

plt.title('Univariate normal distributions')
plt.ylim([0, 1])
plt.xlim([-3, 5])
plt.legend(loc = 1)
fig.subplots_adjust(bottom=0.15)
plt.show()

Multivariate normal distribution

def multivariate_normal(x, d, mean, covariance):
    x_m = x - mean
    return (1.0 / (np.sqrt( (2 * np.pi)**d * np.linalg.det(covariance)))) * np.exp(-0.5 * (np.linalg.solve(covariance, x_m).T.dot(x_m)))

# plot bivariate distribution
def generate_surface(mean, covariance, d):
    # helper function to generate density surface
    nb_of_x = 50
    x1s = np.linspace(-5, 5, num=nb_of_x)
    x2s = np.linspace(-5, 5, num=nb_of_x)
    x1, x2 = np.meshgrid(x1s, x2s) # generate grid
    pdf = np.zeros((nb_of_x, nb_of_x))
    # file the cost matrix fro each combination of weights
    for i in range(nb_of_x):
        for j in range(nb_of_x):
            pdf[i, j] = multivariate_normal(
                np.matrix([[x1[i, j]], [x2[i,j]]]), d, mean, covariance)

    return x1, x2, pdf

fig, (ax1, ax2) = plt.subplots(nrows = 1, ncols = 2, figsize = (8, 4))
d = 2

# plot of independent normals
bivariate_mean = np.matrix([[0.], [0.]]) # mean
bivariate_covariance = np.matrix([
    [1.0, 0.0],
    [0.0, 1.0]
])

x1, x2, p = generate_surface(bivariate_mean, bivariate_covariance, d)

con = ax1.contourf(x1, x2, p, 33, cmap=cm.YlGnBu)
ax1.set_xlabel('$x_1$', fontsize = 13)
ax1.set_ylabel('$x_2$', fontsize = 13)
ax1.axis([-2.5, 2.5, -2.5, 2.5])
ax1.set_aspect('equal')
ax1.set_title('Independent variables', fontsize = 12)

# Plot bivariate distribution
bivariate_mean = np.matrix([[0.], [1.]]) # mean
bivariate_covariance = np.matrix([
    [1.0, 0.8],
    [0.8, 1.0]
])

x1, x2, p = generate_surface(bivariate_mean, bivariate_covariance, d)

con = ax2.contourf(x1, x2, p, 33, cmap=cm.YlGnBu)
ax2.set_xlabel('$x_1$', fontsize = 13)
ax2.set_ylabel('$x_2$', fontsize = 13)
ax2.axis([-2.5, 2.5, -1.5, 3.5])
ax2.set_aspect('equal')
ax2.set_title('Correlated variables', fontsize = 12)


# add colorbar and title
fig.subplots_adjust(right = 0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.02, 0.7])
cbar = fig.colorbar(con, cax = cbar_ax)
cbar.ax.set_ylabel('$p(x_1, x_2)$', fontsize = 13)
plt.suptitle('Bivariate normal distributions', fontsize = 13, y = 0.95)
plt.show()

Dirichlet distribution

Link tham khảo

Full ipynb

Tài liệu tham khảo

Machine learning cơ bản

Hết.

Cơ bản về xác suất thống kê

Cơ bản về xác suất thống kê

Các khái niệm cơ bản

Bernoulli distribution và categorical distribution

Univariate và Multivariate normal distribution

Univariate normal distribution

Multivariate normal distribution

Dirichlet distribution

Link tham khảo

Tài liệu tham khảo

CATALOG

FEATURED TAGS

LINKS