9. Stochastic Methods: Exercise Solutions

9. Stochastic Methods: Exercise Solutions#

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

1. Integration#

When an explicit function like \(x_n = g(x_1,...,x_{n-1})\) is not available, you can use a binary function that takes 1 within and 0 outside of a region to measure its volume.

  1. Measure the volume of an n-dimensional sphere using a binary function $\( f(x) = \left\{ \begin{array}{ccc} 1 & \mbox{if} & \sum_{i=1}^n x_i^2 \le 1 \\ 0 & \mbox{otherwise} & \end{array}\right. \)$

from scipy.special import gamma
m = 10000
N = 15
x = np.random.random((N, m)) # one quadrant
vol = np.zeros((N+1, 3))  # estimate by two methods
for n in range(2, N+1):
    # by height
    h2 = 1 - np.sum(x[:n-1]**2, axis=0)
    vol[n,0] = 2**n*np.sum(np.sqrt((h2>0)*h2))/m
    # by in/out
    r = np.sum(x[:n]**2, axis=0)
    vol[n,1] = 2**n*np.sum(r<=1)/m
    # theoretical value
    vol[n,2] = 2*np.pi**(n/2)/gamma(n/2)/n
print(vol)
[[0.         0.         0.        ]
 [0.         0.         0.        ]
 [3.15243994 3.1488     3.14159265]
 [4.21682594 4.2512     4.1887902 ]
 [5.02534608 5.0272     4.9348022 ]
 [5.40758711 5.504      5.26378901]
 [5.45715134 5.4272     5.16771278]
 [5.08352633 4.928      4.72476597]
 [4.36761871 4.2496     4.05871213]
 [3.27062382 3.328      3.2985089 ]
 [2.41358503 2.1504     2.55016404]
 [1.39438297 1.024      1.88410388]
 [0.81346759 0.4096     1.33526277]
 [0.41747283 0.         0.91062875]
 [0.         0.         0.59926453]
 [0.         0.         0.38144328]]
  1. Measure the area or volume of a shape of your interest by sampling method.

A donut of the hole radius \(r0\) and outer radius \(r1\).

m = 10000
r0 = 2  # hole radius
r1 = 6  # outer radius 
R = (r0+r1)/2  # radius of the center of the tube
r = (r1-r0)/2  # radius of the tube
x = np.random.random((3, m))  # to be scaled [r1,r1,r]
d = np.sqrt((r1*x[0])**2 + (r1*x[1])**2)  # horizontal distance from center
vol = 8*r1*r1*r*np.sum((d-R)**2 + (r*x[2])**2 <= r**2)/m  # within the tube?
print(vol)
316.6272

2. Rejection Sampling#

Let us generate samples from Gamma distribution $\( p(x; k, \theta) = \frac{1}{\Gamma(k)\theta^k}x^{k-1}e^{-\frac{x}{\theta}} \)\( with the shape parameter \)k>0\( and the scaling parameter \)\theta\( using the gamma function \)\Gamma(k)$ available in scipy.special.

from scipy.special import gamma
from scipy.special import factorial

Here is how the gamma function looks like, together with factorial at integer values.

kmax = 5
x = np.linspace(0, kmax, 50)
plt.plot(x, gamma(x))
plt.plot(range(1,kmax+1), [factorial(k-1) for k in range(1,kmax+1)], 'o')
plt.xlabel('x'); plt.ylabel('gamma(x)');
_images/4d87b594168959fd49fdb1e16d80f8cfec4495a4d9a1add8cb5889cadaf60a20.png
  1. Define the Gamma density function with arbitrary \(k\) and \(\theta\).

def p_gamma(x, k=1, theta=1):
    """density function of gamma distribution"""
    return(x**(k-1)*np.exp(-x/theta)/(gamma(k)*theta**k)) 

Consider the exponential distribution \(q(x;\mu)=\frac{1}{\mu}e^{-\frac{x}{\mu}}\) as the proposal distribution.

def p_exp(x, mu=1):
    """density function of exponential distribution"""
    return(np.exp(-x/mu)/mu)
def x_exp(n=1, mu=1):
    """sample from exponential distribution"""
    y = np.random.random(n)  # uniform in [0,1]
    return(-mu*np.log(1 - y))

The ratio of the target and proposal distributions is $\( \frac{p(x;k,\theta)}{q(x;\mu)} = \frac{\mu}{\Gamma(k)\theta^k}x^{k-1}e^{(\frac{1}{\mu}-\frac{1}{\theta})x}. \)\( By setting \)\( \frac{d}{dx}\frac{p(x;k,\theta)}{q(x;\mu)} = 0 \)\( we have \)\( \{(k-1)x^{k-2}+(\frac{1}{\mu}-\frac{1}{\theta})x^{k-1}\}e^{(\frac{1}{\mu}-\frac{1}{\theta})x} = 0. \)\( Thus at \)\( x = \frac{(k-1)\mu\theta}{\mu-\theta} \)\( the ratio \)\frac{p(x;k,\theta)}{q(x;\mu)}\( takes the maximum \)\( \frac{\mu^k}{\Gamma(k)\theta}\left(\frac{k-1}{\mu-\theta}\right)^{k-1}e^{1-k}. \)\( 2) What is a good choice of \)\mu\( to satisfy \)p(x)\le cq(x)\( and what is the value of \)c$ for that?

By setting \(\mu=k\theta\), we have \(p(x)\le cq(x)\) with \(c=\frac{k^k}{\Gamma(k)}e^{1-k}\)

  1. Verify that \(cq(x)\) covers \(p(x)\) by plotting them for some \(k\) and \(\theta\).

k = 2.5
theta = 1.5
c = (k**k)/gamma(k)*np.exp(1-k)
x = np.linspace(0, 10, 100)
plt.plot(x, p_gamma(x, k, theta), label='p(x)')
plt.plot(x, c*p_exp(x, k*theta), label='c*q(x)')
plt.xlabel('x'); plt.ylabel('p(x)'); plt.legend();
_images/5c8c742ea1c775f0fbb506e70ed51937d46cd54f537fc7d44138de920ac7b667.png
  1. Implement a function to general samples from Gamma distribution with arbitorary \(k\) and \(\theta\) using rejection sampling from exponential distribution.

def x_gamma(n=1, k=1, theta=1):
    """sample from gamma distribution by rejection sampling"""
    c = (k**k)/gamma(k)*np.exp(1-k)
    #print("c =", c)
    xe = x_exp(n, k*theta)
    paccept = p_gamma(xe, k, theta)/(c*p_exp(xe, k*theta))
    accept = np.random.random(n)<paccept
    xg = xe[accept]  # rejection sampling
    #print("accept rate =", len(xg)/n)
    return(xg)
k = 3.5
theta = 1.5
xs = x_gamma(1000, k, theta)
plt.hist(xs, bins=20, density=True)
x = np.linspace(0, 10, 100)
plt.plot(x, p_gamma(x, k, theta))
plt.xlabel('x'); plt.ylabel('p(x)');
_images/fc6da4e889141b237d59b50b563cda8af1a36b4e5f294634b7c321d71065e7a3.png

3. Importance Sampling#

You have \(m\) sample values \(f(x_i)\) \((i=1,...m)\) at \(x_i\) following a normal distribution $\( q(x;\mu_0,\sigma_0) = \frac{1}{\sqrt{2\pi \sigma_0^2}}e^{-\frac{(x-\mu_0)^2}{2\sigma_0^2}}. \)$

Consider estimating the mean of \(f(x)\) for samples with a different normal distribution \(p(x;\mu_1, \sigma_1)\) by importance sampling $\( E_p[h(x)] = E_q\left[\frac{p(x)}{q(x)}h(x)\right] \)$

  1. What is the importance weight \(\frac{p(x)}{q(x)}\)?

\[\frac{\sigma_0}{\sigma_1}e^{\frac{(x-\mu_0)^2}{2\sigma_0^2}-\frac{(x-\mu_1)^2}{2\sigma_1^2}} \]
  1. Let us consider the simplest case of \(f(x)=x\).

Generate \(m=100\) samples with \(\mu_0=100\) and \(\sigma_0=20\) and take the sample mean \(E_q[f(x)]\).

def f(x):
    return x
def normal(x, mu=0, sig=1):
    """normal distribution function"""
    return np.exp(-((x-mu)/sig)**2/2)/(np.sqrt(2*np.pi)*sig)
m = 100
mu0 = 100
sig0 = 20
xs = mu0 + sig0*np.random.randn(m)
plt.hist(xs, bins=20, density=True)
print(np.mean(f(xs)))
x = np.linspace(mu0-3*sig0, mu0+3*sig0)
plt.plot(x, normal(x, mu0, sig0))
plt.xlabel('x'); plt.ylabel('p(x)');
101.6337139046687
_images/b85b9691e7506fd9b9c44e092dd2c47a4837424abb2ca265b7e5611cb2d259fb.png
  1. Estimate the mean \(E_p[f(x)]\) for \(\mu_1=120\) and \(\sigma_1=10\) by importance sampling.

mu1 = 120
sig1 = 10
importance = sig0/sig1*np.exp((xs-mu0)**2/(2*sig0**2)-(xs-mu1)**2/(2*sig1**2))
mean1 = np.mean(importance*f(xs))
print(mean1)
plt.hist(xs, bins=20, density=True)
plt.plot(x, normal(x, mu0, sig0))
plt.plot(x, normal(x, mu1, sig1))
plt.plot(xs, importance/100, '*');
plt.xlabel('x'); plt.ylabel('p(x)');
108.93161620571482
_images/bdf44136e42eca81ee17589f1e7a8f01f662b8d52181252327dbb5f07eb1d647.png
  1. See how the result changes with different settings of \(\mu_1\), \(\sigma_1\) and sample size \(m\).

# more samples
m = 1000
xs = mu0 + sig0*np.random.randn(m)
plt.hist(xs, bins=20, density=True)
print(np.mean(f(xs)))
importance = sig0/sig1*np.exp((xs-mu0)**2/(2*sig0**2)-(xs-mu1)**2/(2*sig1**2))
mean1 = np.mean(importance*f(xs))
print(mean1)
plt.plot(x, normal(x, mu0, sig0))
plt.plot(x, normal(x, mu1, sig1))
plt.plot(xs, importance/100, '*');
plt.xlabel('x'); plt.ylabel('p(x)');
100.90415821909802
124.2821528459725
_images/394462360b7d5e81760980d2e7f22651981cf88c18c9af63cf7d1e9d37c7e0f0.png
# closer distribution
m = 1000
mu1 = 110
xs = mu0 + sig0*np.random.randn(m)
plt.hist(xs, bins=20, density=True)
print(np.mean(f(xs)))
importance = sig0/sig1*np.exp((xs-mu0)**2/(2*sig0**2)-(xs-mu1)**2/(2*sig1**2))
mean1 = np.mean(importance*f(xs))
print(mean1)
plt.plot(x, normal(x, mu0, sig0))
plt.plot(x, normal(x, mu1, sig1))
plt.plot(xs, importance/100, '*');
plt.xlabel('x'); plt.ylabel('p(x)');
100.50218317020335
109.87788154478456
_images/c724d91b63c60bee204ff9290125edd73a9a8f3aebc4b2b3f6a0b9530e11f4ec.png

Optional) Try with a different function \(f(x)\).

4. MCMC#

Try applying Metropolis sampling to your own (unnormalized) distribution.

def metropolis(p, x0, sig=0.1, m=1000):
    """metropolis: Metropolis sampling
    p:unnormalized probability, x0:initial point,
    sif:sd of proposal distribution, m:number of sampling"""
    n = len(x0)  # dimension
    p0 = p(x0)
    x = []
    for i in range(m):
        x1 = x0 + sig*np.random.randn(n)
        p1 = p(x1)
        pacc = min(1, p1/p0)
        if np.random.rand()<pacc:
            x.append(x1)
            x0 = x1
            p0 = p1
    return np.array(x)
def donut(x, r0=1, r1=2):
    """donut-like distribution: r0:hole radius, r1:outer radius"""
    R = (r0+r1)/2  # radius of the center of the tube
    r = (r1-r0)/2  # radius of the tube
    d = np.sqrt(x[0]**2 + x[1]**2)  # horizontal distance from center
    #return np.sqrt((d>r0)*(d<r1)*(r**2-(d-R)**2))
    return np.exp(-((d-R)/r)**2)
rx = 3  # plot rage
x = np.linspace(-rx, rx)
X, Y = np.meshgrid(x, x)
Z = donut(np.array([X,Y]))
plt.contour(X, Y, Z)
plt.axis('square')
(-3.0, 3.0, -3.0, 3.0)
_images/822e6633f0f9ac3945503d7446e675dd1a948d891b6e4ddb82c7c7c1f3622a54.png
x = metropolis(donut, [0,0], m=5000)
s = len(x); print(s)
plt.contour(X, Y, Z)
plt.scatter(x[:,0], x[:,1], c=np.arange(s), marker='.');
plt.axis('square')
4542
(-3.0, 3.0, -3.0, 3.0)
_images/638dbf33c9eda653810a80134492308e17a04eb26e3a52cc0166876ec9e4ec6a.png