├── AM_Sampling.py
├── Adaptive_MCMC_for_Bayesian_Inference.pdf
├── CMA_Sampling.py
├── Experiments.ipynb
├── FileHandling.py
├── GaA_Sampling.py
├── L_AM_Sampling.py
├── MH_Sampling.py
├── README.md
├── TestSuite.py
└── Visualization.py


/AM_Sampling.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # # *Adaptive* MH
  5 | # 
  6 | # See the 1999 and 2001 papers of Haario et al.
  7 | 
  8 | # In[1]:
  9 | 
 10 | import numpy as np
 11 | import numpy.linalg as la
 12 | import matplotlib.pyplot as plt
 13 | import scipy as sp
 14 | import scipy.stats as ss
 15 | import math
 16 | import random
 17 | from collections import namedtuple
 18 | 
 19 | 
 20 | from MH_Sampling import acceptance_decision
 21 | from FileHandling import save_state
 22 | from TestSuite import generate_state_space, generate_iid_samples, get_distribution, get_samples
 23 | 
 24 | 
 25 | # In[2]:
 26 | 
 27 | # save Adaptive Metropolis parameters in Named Tuple
 28 | 
 29 | AM_Pars = namedtuple('AM_Pars', 
 30 |                      ['Origin', 'Id',
 31 |                       'sigma_0', 'sigma_opt', 
 32 |                       'C_0', 'C_opt', 
 33 |                       'z_samples'])
 34 | 
 35 | # initialize parameters
 36 | def init_AM_pars(sp):
 37 |     dim, origin, idty, = sp['dim'], sp['Origin'], sp['Id'], 
 38 |     sigma_0, sigma_opt = 0.1/np.sqrt(dim), sp['sigma_opt']
 39 |     cov_0, cov_opt = sigma_0**2*idty, sigma_opt**2*idty
 40 |     return AM_Pars(Origin=origin, Id=idty,
 41 |                    sigma_0=sigma_0, sigma_opt=sigma_opt,
 42 |                    C_0=cov_0, C_opt=cov_opt, 
 43 |                    z_samples=get_samples(sp=sp, name='Z'))
 44 | 
 45 | 
 46 | # # Adaptive MH algorithm *AM*
 47 | 
 48 | # ## Generate the candidate next sample
 49 | # 
 50 | # We consider a version of the *Adaptive Metropolis* (*AM*) sampler of Haario
 51 | # et al. (2001). We want to sample from the $d$-dimensional target distribution $\pi(\mathbf{x})$. 
 52 | # 
 53 | # We perform a Metropolis algorithm with covariance matrix $\pmb{Q}_n$ at iteration $n$ given by
 54 | # 
 55 | # $$\mathbf{Q}_n(\mathbf{x}, ·) = N(\mathbf{x}, \sigma_{0}^2 \mathbb{1}_d)$$
 56 | # 
 57 | # for $n \leq 2d$, while for $n > 2d$
 58 | # 
 59 | # $$\mathbf{Q}_n(\mathbf{x}, ·) = (1 − \beta) N(\mathbf{x}, \sigma_{opt}^2 \mathbf{C}_n) + 
 60 | # \beta N(\mathbf{x}, \sigma_{0}^2 \mathbb{1}_d)$$
 61 | # 
 62 | # where $\mathbf{C}_n$ is the current empirical estimate of the covariance of the target distribution
 63 | # based on the samples so far, $\sigma_{0}^2 = \frac{0.1^2}{d}$ and $\sigma_{opt}^2 = \frac{2.38^2}{d}$ are the initial and optimal scale, respectively, and $\beta$ is a small positive constant, we use $\beta = 0.05$.
 64 | #  
 65 | # In other words, the next candidate is sampled from
 66 | # 
 67 | # $$\mathbf{x}^{*} \sim \mathbf{Q}_n(\mathbf{x}, ·)$$
 68 | #  
 69 | # The text above is adapted from Section 2 of Gareth O. Roberts and Jeffrey S. Rosenthal (2008) 
 70 | # *Examples of Adaptive MCMC*.
 71 | 
 72 | # ## Random covariance matrix $M$ from the above paper.
 73 | 
 74 | # In[3]:
 75 | 
 76 | def get_proposal_cov(M2, n, pars, beta=0.05):
 77 |     d, _ = M2.shape
 78 |     init_period = 2*d
 79 |     s_0, s_opt, C_0 = pars.sigma_0, pars.sigma_opt, pars.C_0
 80 |     if np.random.rand()<=beta or n<= init_period:
 81 |         return C_0
 82 |     else:
 83 |         # We can always divide M2 by n-1 since n > init_period
 84 |         return (s_opt/(n - 1))*M2
 85 | 
 86 | 
 87 | # In[4]:
 88 | 
 89 | def generate_AM_candidate(current, M2, n, pars):
 90 |     prop_cov = get_proposal_cov(M2, n, pars)
 91 |     candidate = ss.multivariate_normal(mean=current, cov=prop_cov).rvs()
 92 |     return candidate
 93 | 
 94 | 
 95 | # ## Update the mean $\mathbf{m}$ and the the covariance $\mathbf{C}$ 
 96 | # 
 97 | # In the *AM*-algorithm, the **mean** is updated as
 98 | # 
 99 | # $$\mathbf{m}_{n+1} = \frac{n}{n+1}\mathbf{m}_{n} + \frac{1}{n+1}\left(\mathbf{x}_{n+1} - \mathbf{m}_{n}\right)$$
100 | # 
101 | # and the **covariance** as
102 | # 
103 | # $$\mathbf{C}_{n+1} = \frac{n}{n+1}\mathbf{C}_{n} + \frac{1}{n+1}\left( 
104 | # \left(\mathbf{x}_{n+1} - \mathbf{m}_{n}\right)\left(\mathbf{x}_{n+1} - \mathbf{m}_{n}\right)^\top - \mathbf{C}_{n} \right)$$
105 | # 
106 | # where $\mathbf{x}_{n+1}$ is the sample generated at step $n+1$.
107 | 
108 | # In the Welford algorithm, 
109 | # 
110 | # $$M_n \triangleq \sum_{i=1}^{n} {(x_i - \overline{x}_n)}^2$$ 
111 | # 
112 | # or in other words 
113 | # 
114 | # $$s_n^2 = \frac{M_n}{n-1}$$
115 | # 
116 | # It is easier to update $M_n$ in a numerical stable way,
117 | # 
118 | # $$M_n = M_{n-1} + (x_n - \overline{x}_{n+1})(x_n - \overline{x}_n)^\top$$
119 | 
120 | # In[5]:
121 | 
122 | def update_moments(mean, M2, sample, n):
123 |     next_n = n + 1
124 |     w = 1/next_n
125 |     new_mean = mean + w*(sample - mean)
126 |     delta_bf, delta_af = sample - mean, sample - new_mean
127 |     new_M2 = M2 + np.outer(delta_bf, delta_af)
128 |     return new_mean, new_M2, next_n
129 | 
130 | 
131 | # In[6]:
132 | 
133 | def multiple_of_10000(n):
134 |     return n%10000 == 0
135 | 
136 | 
137 | # In[7]:
138 | 
139 | def AM_sampler(pars, target, initial_state, run_data):
140 |     ds, N = run_data.DataStore, run_data.N
141 |     target_pdf = target['pdf']
142 |     
143 |     current = initial_state
144 |     mean, M2 = pars.Origin, np.zeros_like(pars.Id)
145 |     accepted = True
146 |     
147 |     for n in range(0, N):
148 |         save_state(data_store=ds, step=n,
149 |                    state=current, value=target_pdf(current),
150 |                    mean=mean, covariance=M2, accepted_p=accepted)
151 |         
152 |         # generate new candidate
153 |         candidate = generate_AM_candidate(current=current, M2=M2, n=n, pars=pars)
154 |         
155 |         # run Metropolis Hastings for acceptance criteria
156 |         accepted = acceptance_decision(current=current, proposed=candidate, pdf=target_pdf)
157 |         
158 |         # accepted candidate becomes new state
159 |         if accepted: 
160 |             current = candidate
161 |         # We always update M2, where S^2 = M2/n-1 
162 |         # whether the proposed samples are accepted or not
163 |         mean, M2, n = update_moments(mean, M2, current, n)
164 |     return run_data
165 | 
166 | 


--------------------------------------------------------------------------------
/Adaptive_MCMC_for_Bayesian_Inference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philipk01/Optimization_and_Sampling_for_Bayesian_Inference/8236469751b6c23f2ce25ef8122e7824f9b91696/Adaptive_MCMC_for_Bayesian_Inference.pdf


--------------------------------------------------------------------------------
/CMA_Sampling.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # # CMA *Sampling*
  5 | 
  6 | # In[22]:
  7 | 
  8 | import numpy as np
  9 | import numpy.linalg as la
 10 | import matplotlib.pyplot as plt
 11 | import scipy as sp
 12 | import scipy.stats as ss
 13 | import math
 14 | import random
 15 | from collections import namedtuple
 16 | #%precision 4
 17 | #%matplotlib inline
 18 | 
 19 | 
 20 | # In[23]:
 21 | 
 22 | # project python modules
 23 | from MH_Sampling import acceptance_decision
 24 | from FileHandling import save_state
 25 | from TestSuite import generate_state_space, generate_iid_samples, get_samples
 26 | 
 27 | 
 28 | # # CMA Sampler
 29 | # 
 30 | # Both the CMA-ES optimization and sampling algorithm have a number of 
 31 | # **strategy parameters** that do not change during the execution of the algorithm. 
 32 | # We are using the default values as recommended in the paper *C. Igel, T. Suttorp, and N. Hansen, A Computational Efficient Covariance Matrix Update and a $(1+1)$-CMA for Evolution  Strategies*. henceforth called *the paper*.
 33 | # 
 34 | # In the update of the **global scale** $\sigma$, the following parameters with given initial values are used
 35 | # 
 36 | # - **damping parameter** $$k = 1 + \frac{d}{2}$$ where $d$ is the dimension of the state space. (**Note**: In the paper, $d$ is used for
 37 | # the damping parameter instead of $k$ and $n$ for dimension instead of $d$)
 38 | # - **target success rate** $$p_{s}^{succ} = \frac{2}{11}$$
 39 | # (**Remark BM**: compare $p_{s}^{succ}$ to the optimal acceptance ratio in Metropolis-Hastings.)
 40 | # - **learning rate** $$\lambda_p = \frac{1}{12}$$ used in the update of the average success rate 
 41 | # $\bar{p}_s \in \left[ 0, 1 \right]$, cf. the procedure *update_scale* below for more information.
 42 | # 
 43 | # For the **covariance matrix adaptation**, they are
 44 | # 
 45 | # - **evolution point weight** $$\lambda_{\mathbf{p}} = \frac{2}{d+2}$$ 
 46 | # - **covariance matrix weight** $$\lambda_{\mathbf{C}} = \frac{2}{d^2 + 6}$$
 47 | # - **threshold** $$\theta_p$$ for *average success rate* $\bar{p}_s$. The update of the evolution point $\mathbf{p}_c$ and the covariance matrix $\mathbf{C}$ depend on the test $\bar{p}_s < \theta_p$, cf. the procedure *update_cov* below for more information.  
 48 | # 
 49 | # **Note**: in the CMA ES literature, step size is used instead of global scale. In order to be consistent 
 50 | # with the MCMC literature we prefer and use the latter.
 51 | 
 52 | # # Initial values 
 53 | # 
 54 | # The initial values are set as follows
 55 | # 
 56 | # - **average success rate** $\bar{p}_s = p_{s}^{succ}$ where $p_{s}^{succ}$ is the *target success rate*.
 57 | # - **evolution point** $\mathbf{p}_c = \mathbf{0}$
 58 | # - **covariance matrix** $\mathbf{C} = \mathbb{1}_d$
 59 | # 
 60 | # The choice of the initial candidate $\mathbf{x}_0$ and the initial global scale $\sigma$ are problem dependent.
 61 | # Here, we initialize $\mathbf{x}_0$ with a random point in a hypercube centered in the origin. Its side can
 62 | # vary. And $\sigma = 1$.
 63 | 
 64 | # In[24]:
 65 | 
 66 | # save covariance matrix adaptation (CMA) parameters in Named Tuple
 67 | 
 68 | CMA_Parameters = namedtuple('CMA_Parameters', 
 69 |                                ['z_samples',
 70 |                                #Parameters used in the global scale control
 71 |                                 's', 'k', 't_succ', 'c_p', 
 72 |                                 #Parameters used in the covariance adaptation
 73 |                                 'c_c', 'c_cov', 'p_thres'])
 74 | 
 75 | def init_CMA_pars(sp):
 76 |     dim = sp['dim']
 77 |     return CMA_Parameters(z_samples=get_samples(sp=sp, name='Z'),
 78 |                           s=1, 
 79 |                           k=1+dim/2, 
 80 |                           t_succ=2/11, 
 81 |                           c_p=1/12, 
 82 |                           c_c=2/(dim+2), 
 83 |                           c_cov=2/(dim**2+6), 
 84 |                           p_thres=0.44)
 85 | 
 86 | 
 87 | # # Generation of the candidate $\mathbf{x}^{*}$
 88 | # 
 89 | # Let $\mathbf{C} = 
 90 | # \mathbf{Q}\mathbf{Q}^{\top}$ be the Cholesky decomposition of the covariance matrix $\mathbf{C}$. The candidate next state $\mathbf{x}^{*}$ is generated as 
 91 | # 
 92 | # $$\mathbf{x}^{*} = \mathbf{x}_n + \sigma_n \mathbf{Q}_n \mathbf{z}_n$$
 93 | # 
 94 | # where $\sigma_n$ are the global scale, $\mathbf{Q}_n$ the Cholesky factor of $\mathbf{C}_n$,
 95 | # and $\mathbf{z}_n \sim N(\mathbf{0}, \mathbb{1}_d)$ at time step $n$, respectively. 
 96 | 
 97 | # In[25]:
 98 | 
 99 | def generate_CMA_candidate(current, scale, cov, z_sample):
100 |     # Use postive definite square root of C, cf. Tutiorial, p. 6.
101 |     Q = la.cholesky(cov) 
102 |     y = Q @ z_sample
103 |     candidate = current + scale*y
104 |     return (candidate, y)
105 | 
106 | 
107 | # ## Update of the global scale $\sigma$
108 | # 
109 | # **Each time step**, the global scale $\sigma$ is updated and the update consists of two steps. 
110 | # First, average success rate $\bar{p}_{s}$
111 | # is updated and this depends on the acceptance of the generated candidate. Next,
112 | # $\sigma$ itself is updated.
113 | # 
114 | # The **average success rate** $\bar{p}_{s}$ and the **global scale** 
115 | # $\sigma$ are updated as
116 | # 
117 | # \begin{align}
118 | # \bar{p}_{s} & \leftarrow (1 -  c_p) \bar{p}_{s} + c_p \textit{ accepted_p}\\
119 | # \sigma & \leftarrow  \sigma ~ exp \left( \frac{1}{k} \left( \bar{p}_{s} - \frac{p_{s}^{target}}{1 - p_{s}^{target}} (1-  \bar{p}_{s}) \right) \right)
120 | # \end{align}
121 | 
122 | # In[26]:
123 | 
124 | def update_scale(p_succ, sigma, accepted_p, pars):
125 |     # Parameters used in the global scale control
126 |     k, t_succ, c_p = pars.k, pars.t_succ, pars.c_p
127 |     
128 |     # Update the average success rate.
129 |     p_succ = (1- c_p)*p_succ + c_p*accepted_p
130 | 
131 |     # Update global scale.
132 |     w = t_succ/(1-t_succ)
133 |     sigma = sigma * np.exp(1/k*(p_succ - w*(1-p_succ)))
134 |     return p_succ, sigma
135 | 
136 | 
137 | # In[27]:
138 | 
139 | def f(d):
140 |     k=1+d/2
141 |     t_succ = 2/11
142 |     w = t_succ/(1-t_succ)
143 |     return lambda x: np.exp(1/k*(x - w*(1-x)))
144 | 
145 | 
146 | # In[28]:
147 | 
148 | f_d = f(d=50)
149 | f_d(2/11)
150 | 
151 | 
152 | # In[29]:
153 | 
154 | D = 2
155 | k = 1 + D/2
156 | f_d = f(d=D)
157 | x_range = np.linspace(start=0, stop=1, num=50, endpoint=True)
158 | f_range = f_d(x_range)
159 | t_succ = 2/11
160 | coords = (t_succ, f_d(t_succ))
161 | plt.title(r'$f(x) = e^{\frac{1}{k} ( x - w (1 -  x ))}$')
162 | plt.annotate('target threshold', xy=coords, xytext=(0.2, 1.2),
163 |              arrowprops=dict(facecolor='black', shrink=0.05))
164 | plt.plot(x_range, f_range);
165 | 
166 | 
167 | # # Update of the covariance matrix $\mathbf{C}$
168 | # 
169 | # The covariance matrix is updated only **when** the generated candidate $\mathbf{x}^{*}$ 
170 | # is **accepted**.
171 | # This update also consists of two steps. 
172 | # First, the evolution point $\mathbf{p}_{c}$
173 | # is updated followed by the update of the covariance matrix itself. 
174 | # These updates depend on whether $\bar{p}_s < \theta_p$ or not.
175 | # 
176 | # The **evolution point** $\mathbf{p}_{c}$ and the **covariance matrix** 
177 | # $\mathbf{C}$ are updated as
178 | # 
179 | # \begin{align}
180 | # \mathbf{p}_{c}  &\leftarrow
181 | # \begin{cases}
182 | # \left(1-\lambda_\mathbf{p} \right) \mathbf{p}_{c} + \sqrt{ \lambda_\mathbf{p}(2 - \lambda_\mathbf{p})}  
183 | # ~\mathbf{y}   & \mbox{if $\bar{p_{s}} <  \theta_p$}\\
184 | # \left (1-\lambda_\mathbf{p} \right) \mathbf{p}_{c} & \mbox{otherwise}
185 | # \end{cases} \\
186 | # \mathbf{C} &\leftarrow	 
187 | # \begin{cases}
188 | # \left( 1- \lambda_\mathbf{C} \right) \mathbf{C} + \lambda_\mathbf{C}  
189 | # \mathbf{p}_{c}\mathbf{p}_{c}^{\top} & \mbox{if $\bar{p_{s}} < \theta_p$}\\
190 | # \left ( 1-\lambda_\mathbf{C} \right) \mathbf{C} + \lambda_\mathbf{C}   \left (\mathbf{p}_{c}\mathbf{p}_{c}^{\top} + \lambda_\mathbf{p}(2-\lambda_\mathbf{p}) \mathbf{C}  \right) & \mbox{otherwise}
191 | # \end{cases} 
192 | # \end{align}
193 | # 
194 | # where $\mathbf{y} = \mathbf{Q} \mathbf{z}$ with $\mathbf{z} \sim N(\mathbf{0}, \mathbb{1}_d)$ and $\mathbf{C} = 
195 | # \mathbf{Q}\mathbf{Q}^{\top}$, cf. above for more information about how samples are generated.
196 | 
197 | # In[30]:
198 | 
199 | def update_cov(cov, evol_point, y, avg_success_rate, pars):
200 |     # Parameters used in the covariance matrix adapation
201 |     c_c, c_cov, p_thres = pars.c_c, pars.c_cov, pars.p_thres
202 |     if avg_success_rate < p_thres:
203 |         evol_point = (1-c_c)*evol_point + np.sqrt(c_c*(2-c_c))*y
204 |         cov = (1-c_cov)*cov + c_cov*np.outer(evol_point.T, evol_point)
205 |     else:
206 |         evol_point = (1-c_c)*evol_point
207 |         cov = (1-c_cov)*cov + c_cov*(np.outer(evol_point.T, evol_point) + c_c*(2-c_c)*cov)
208 |     return evol_point, cov
209 | 
210 | 
211 | # # (1+1)-CMA ES Sampler
212 | # 
213 | # The sampler operates as follows
214 | # 
215 | #  Initialize $\mathbf{x}$, $\sigma = 1$, $\mathbf{C} = \mathbb{1}_d$, 
216 | # $\bar{p}_s = p_s^{target}$, and $\mathbf{p}_c = \mathbf{0}$
217 | # 
218 | #  **repeat** 
219 | # >   1. determine $\mathbf{Q}$ such that $\mathbf{C} = \mathbf{Q} \mathbf{Q}^\top$
220 | # >   2. $\mathbf{z} \sim N(\mathbf{0}, ~ \mathbb{1}_d)$
221 | # >   3. $\mathbf{x}^{*} = \mathbf{x} + \sigma ~ \mathbf{Q} ~ \mathbf{z}$
222 | # >   4. *accepted_p* = *acceptance_decision*($\mathbf{x}, \mathbf{x}^{*}, \pi$) where $\pi$ is the target distribution
223 | # >   5. **if** *accepted_p* **then** 
224 | # >>    * $\mathbf{x} \leftarrow \mathbf{x}^{*}$
225 | # >>    * *updateCov*($\mathbf{C}, ~ \mathbf{Q} \mathbf{z}, ~ \bar{p}_s, ~ \mathbf{p}_c$)
226 | # 
227 | #  **until** stopping criterium is met
228 | 
229 | # In[31]:
230 | 
231 | def CMA_sampler(pars, target, initial_state, run_data):
232 |     target_pdf, sp = target['pdf'], target['State Space']
233 |     Origin, Id = sp['Origin'], sp['Id']
234 |     s, p_succ, p_c = pars.s, pars.t_succ, Origin
235 |     ds, N = run_data.DataStore, run_data.N
236 |     z_samples = pars.z_samples
237 |     
238 |     x_current = initial_state
239 |     C = Id
240 |     
241 |     save_state(data_store=ds, 
242 |                step=0, 
243 |                state=x_current, 
244 |                value=target_pdf(x_current), 
245 |                accepted_p=True, 
246 |                mean=p_c, 
247 |                covariance=C, 
248 |                scale=s, 
249 |                threshold=p_succ)
250 |     
251 |     for n in range(1, N):
252 |         # generate new candidate sample
253 |         x_new, delta = generate_CMA_candidate(current=x_current, scale=s, cov=C, z_sample=z_samples[n])
254 |         
255 |         # run Metropolis Hastings acceptance criteria
256 |         accepted_p = acceptance_decision(current=x_current, proposed=x_new, pdf=target_pdf)
257 |         p_succ, s = update_scale(p_succ=p_succ, sigma=s, accepted_p=accepted_p, pars=pars)
258 |         
259 |         if accepted_p:
260 |             # accepted candidate becomes new state
261 |             x_current = x_new
262 |             p_c, C = update_cov(evol_point=p_c, cov=C, y=delta, avg_success_rate=p_succ, pars=pars)
263 |         
264 |         # save accepted and non-accepted sates in namedtuple
265 |         save_state(data_store=ds, 
266 |                    step=n, 
267 |                    state=x_current, 
268 |                    value=target_pdf(x_current), 
269 |                    accepted_p=accepted_p, 
270 |                    mean=p_c, 
271 |                    covariance=C, 
272 |                    scale=s, 
273 |                    threshold=p_succ)
274 |     return run_data
275 | 
276 | 


--------------------------------------------------------------------------------
/FileHandling.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # # Save and Inspect the state
  5 | 
  6 | # In[66]:
  7 | 
  8 | 
  9 | import os
 10 | import numpy as np
 11 | 
 12 | 
 13 | # In[67]:
 14 | 
 15 | 
 16 | def save_state(data_store, step, state, value, accepted_p, 
 17 |                mean=None, covariance=None, scale=None, threshold=None, C_evol_pt=None):
 18 |     data_store['States'][step] = state
 19 |     data_store['Densities'][step] = value
 20 |     data_store['Accepted_p'][step] = accepted_p
 21 |     data_store['Means'][step] = mean 
 22 |     data_store['Covariances'][step] = covariance 
 23 |     data_store['Scales'][step] = scale
 24 |     data_store['Thresholds'][step] = threshold
 25 |     
 26 | def inspect_state(data_store, step):
 27 |     state = data_store['States'][step]
 28 |     value = data_store['Densities'][step] 
 29 |     accepted_p = data_store['Accepted_p'][step] 
 30 |     mean = data_store['Means'][step] 
 31 |     covariance = data_store['Covariances'][step] 
 32 |     scale = data_store['Scales'][step]
 33 |     threshold = data_store['Thresholds'][step] 
 34 |     print("State:", state, "R: ", scale, "\nThreshold: ", threshold, "\nState: ", state, 
 35 |           "\nIt's value: ", value, "\nMean: ", mean, 
 36 |           "\nCovariance: ", covariance)
 37 | 
 38 | 
 39 | # # Save in the file format used by *PyMC3*
 40 | 
 41 | # ## Structure of the data directory
 42 | #  
 43 | # The directory structure of 'Data' is as follows
 44 | #  
 45 | #     1. for each dimension $d$ of the state space, 'Data' contains a folder 'Dim d'  
 46 | #     2. for each target, 'Dim d' contains a folder 'Target k' where $k$ is the index of 
 47 | #     that targets in the test suite
 48 | #     3. for each sampler, 'Target k' contains a folder named after that sampler
 49 | #     4. for each run given the dimension of the state space, the target and the sampler, 
 50 | #     a file 'chain_i' is generated where $i$ is the index of the run. 
 51 | #     
 52 | # 
 53 | # The global variable PARENT_FOLDER contains the parent folder, i.e. the folder where the experimental data will be store, e.g.
 54 | # 
 55 | #     PARENT_FOLDER = '/Users/BM/Documents/Programming/Python/Notebooks/MCMC/To execute a run'
 56 | #     
 57 | # The functions below assume that the parent folder is correctly set.
 58 | 
 59 | # In[68]:
 60 | 
 61 | 
 62 | def relative_path_to_chain(dim, t_name, s_name):
 63 |     data_folder = 'Data'
 64 |     dim_folder = 'Dimension_{}'.format(dim)
 65 |     target_folder = t_name
 66 |     sampler_folder = s_name
 67 |     return './'+'/'.join([data_folder, dim_folder, target_folder, sampler_folder])
 68 | 
 69 | class ChDir(object):
 70 |     """
 71 |     Step into a directory temporarily.
 72 |     """
 73 |     def __init__(self, path):
 74 |         self.old_dir = os.getcwd()
 75 |         self.new_dir = path
 76 |  
 77 |     def __enter__(self):
 78 |         os.chdir(self.new_dir)
 79 |  
 80 |     def __exit__(self, *args):
 81 |         os.chdir(self.old_dir)
 82 | 
 83 | def save_chain(chain, idx, individual_components_p=True):
 84 |     """Save a single-chain trace with index 'idx'. PyMC3 uses the labels x__0, x__1, x__2, etc.
 85 |     for a vector when are regarded as COMPONENTS of that vector. 
 86 |     If we want to treat them INDIVIDUALLY the labels x_0, x_1, x_2, etc. have to be used. 
 87 |     This is, we use double versus single underscore.
 88 |     """
 89 |     chain_name = 'chain-{}.csv'.format(idx)
 90 |     _, nbcols = chain.shape
 91 |     underscore = '_' if individual_components_p else '__'
 92 |     varnames = ['x{}{}'.format(underscore, index) for index in range(nbcols)]
 93 |     header = ','.join(varnames)
 94 |     np.savetxt(fname=chain_name, X=chain, header=header, comments='', delimiter=',')
 95 | 
 96 | def save_run_data(run_data, parent_folder):
 97 |     warning = 'Parent Folder \'%s\' does NOT exist'%(parent_folder)
 98 |     if not os.path.exists(parent_folder):
 99 |         return warning
100 |     chain = run_data.DataStore['States']
101 |     chain_folder = relative_path_to_chain(dim=run_data.StateSpace['dim'],
102 |                                           t_name=run_data.Target['Name'] , 
103 |                                           s_name=run_data.Sampler['Name'])
104 |     if not os.path.exists(chain_folder):
105 |         os.makedirs(chain_folder)
106 |     with ChDir(chain_folder):
107 |         nbfiles = len(os.listdir())
108 |         save_chain(chain=chain, idx=nbfiles)
109 | 
110 | def save_comparison(combined_data, parent_folder):
111 |     for i, run_data in enumerate(combined_data):
112 |         save_run_data(run_data, parent_folder)
113 | 
114 | 
115 | # In[75]:
116 | 
117 | 
118 | def read_states(f_name, dim, t_name, s_name):
119 |     chains_folder = relative_path_to_chain(dim=dim, t_name=t_name, s_name=s_name)
120 |     with ChDir(chains_folder):
121 |         return np.loadtxt(fname=f_name, skiprows=1, delimiter=',')
122 | 
123 | 


--------------------------------------------------------------------------------
/GaA_Sampling.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # # Gaussian Adaptation *Sampling*
  5 | 
  6 | # In[8]:
  7 | 
  8 | import numpy as np
  9 | import numpy.linalg as la
 10 | import matplotlib.pyplot as plt
 11 | import scipy as sp
 12 | import scipy.stats as ss
 13 | import math
 14 | import random
 15 | from collections import namedtuple
 16 | 
 17 | 
 18 | # In[3]:
 19 | 
 20 | from MH_Sampling import acceptance_decision
 21 | from FileHandling import save_state
 22 | from TestSuite import generate_state_space, generate_iid_samples, get_distribution, get_samples
 23 | 
 24 | 
 25 | # # *Maximum Entropy* Principle
 26 | # To be done.
 27 | # 
 28 | # The entropy of the multivariate normal distribution $N(\mathbf{m}, \mathbf{C})$ with mean $\mathbf{m}$ and covariance matrix $\mathbf{C}$ is 
 29 | # 
 30 | # $$ H(N) = \ln \sqrt{(2 \pi e)^n \det \mathbf{C}}$$
 31 | 
 32 | # In[4]:
 33 | 
 34 | def entropy(cov):
 35 |     dim1, dim2 = cov.shape
 36 |     assert dim1==dim2
 37 |     return math.log(np.sqrt((2* math.pi * math.e)**dim1 * la.det(cov)))
 38 | 
 39 | 
 40 | # ## Check Covariance Matrix
 41 | 
 42 | # In[5]:
 43 | 
 44 | def analyse_cov(cov):
 45 |     eigenvals, eigenvecs = la.eig(cov)
 46 |     print('Covariance Matrix:\n', cov)
 47 |     print('Determinant:', la.det(cov))
 48 |     print('Eigenvalues:', eigenvals)
 49 |     print('Eigenvectors:', eigenvecs)
 50 |     print('Symmetric:', np.allclose(cov, cov.T))
 51 | 
 52 | 
 53 | # # Gaussian Adaptation according to Mueller's Matlab code
 54 | # 
 55 | # This notebook is based on Mueller's Matlab code and the paper *"Gaussian Adaptation as a unifying framework for black-box optimization and adaptive Monte Carlo sampling"* by *Christian I. Muellen* and *Ivo F. Sbalzarini*.
 56 | 
 57 | # ## Strategy Parameters
 58 | # 
 59 | # The **strategy parameters** are:
 60 | # - the **acceptance ratio** $P$
 61 | # - the **expansion** $f_e > 1$ and **contracton factor** $f_c < 1$ used to update the global scale $\sigma$
 62 | # - the **weigths** $\lambda_{\mathbf{m}}$, $\lambda_{\mathbf{C}}$, and $\lambda_{\theta}$ 
 63 | # used to update the mean $\mathbf{m}$, the covariance matrix $\mathbf{C}$, and the threshold $\theta$, respectively.
 64 | # 
 65 | # They are **initialized** as follows, cf. p.2 of the MATLAB code of Mueller:
 66 | # 
 67 | # - **acceptance ratio** $P = \frac{1}{e}$. **REMARK BM**: Check the explanation regarding $P = \frac{s}{s+f}$ where $s$ and $f$ are the number of successes and failures so far.
 68 | # 
 69 | # - **expansion factor** $f_e = 1 + \beta (1 - P)$ and **contraction factor** 
 70 | # $f_c = 1 - \beta P$ where $\beta = \lambda_{\mathbf{C}}$
 71 | # 
 72 | # - **weights** are initialized as follows
 73 | # 
 74 | # - $\lambda_{\mathbf{C}} = \frac{\ln (d+1)}{(d+1)^2}$
 75 | # - $\lambda_{\mathbf{m}} = \frac{1}{ed}$
 76 | # - $\lambda_{\theta} = \frac{1}{ed}$ without restart, cf. the end of Section II.B of the paper what to do in case of restart.
 77 | # 
 78 | # Here, $d$ is the dimension of the **search space** in case of **optimization** or the **state space**
 79 | # in case of **sampling**.
 80 | 
 81 | # ### Initializing *strategy parameters*
 82 | # Cf. above for their initial values.
 83 | 
 84 | # In[30]:
 85 | 
 86 | GaA_Pars = namedtuple('GaA_Pars', 
 87 |                       ['l_C', 'l_m', 'b', 'P', 
 88 |                        'f_e', 'f_c', 'max_scale', 'max_cond', 
 89 |                        'Origin', 'Id'])
 90 | 
 91 | 
 92 | # In[6]:
 93 | 
 94 | def init_GaA_pars(sp):
 95 |     D, origin, identity = sp['dim'], sp['Origin'], sp['Id']
 96 |     tmp_l_c = math.log(D+1)/(D + 1)**2
 97 |     tmp_P = 1/math.e
 98 |     return GaA_Pars(l_C=tmp_l_c, 
 99 |                     l_m=1/(math.e*D), 
100 |                     b=tmp_l_c, 
101 |                     P=tmp_P, 
102 |                     f_e=1 + tmp_l_c*(1-tmp_P), 
103 |                     f_c=1 - tmp_l_c*tmp_P, 
104 |                     max_scale=1000,
105 |                     max_cond=80, # he value used by Mueller is 1e6*D but this results in errors
106 |                     Origin=origin,
107 |                     Id=identity)
108 | 
109 | 
110 | # In[10]:
111 | 
112 | def display_parameters(pars):
113 |     str_1 = "l_C: {:1.4f}\nl_m: {:1.4f}\nb: {:1.4f}\nP: {:1.4f}"
114 |     str_2 = "\nf_e: {:1.4f}\nf_c: {:1.4f}\nmax_scale: {:1.4f}\nmax_cond: {:1.4f}"
115 |     pars_info_1 = str_1.format(pars.l_C, pars.l_m, pars.b, pars.P)
116 |     pars_info_2 = str_2.format(pars.f_e, pars.f_c, pars.max_scale, pars.max_cond)
117 |     print( pars_info_1,  pars_info_2)
118 | 
119 | 
120 | # # Generate next sample using $\mathbf{Q}$
121 | # 
122 | # The new state $\mathbf{x}_{n+1}$ is generated as follows
123 | # 
124 | # $$\mathbf{x}_{n+1} = \mathbf{m}_{n} + \sigma_n \mathbf{Q}_{n} \mathbf{z}_{n}$$ 
125 | # 
126 | # where $\sigma_n$ is the global scale, $\mathbf{Q}_{n}$ is the "square root" of the covariance matrix $\mathbf{C}_{n}$ as defined below, and 
127 | # $\mathbf{z}_{n}$ is a sample of the multivariate standard normal distribution $N(0,1)$ generated at step $n$.
128 | 
129 | # In[11]:
130 | 
131 | def Q_generate_GaA_candidate(mean, scale, Q, z_sample):
132 |     # This function uses the normalized sqrt Q of the covariance matrix C. 
133 |     # cf. p.7 of the MATLAB code of Mueller
134 |     x = mean + scale*(Q @ z_sample.T)
135 |     return x
136 | 
137 | 
138 | # # Generate next sample using $C$
139 | 
140 | # In[12]:
141 | 
142 | def C_generate_GaA_candidate(mean, C, z_sample):
143 |     return mean + C @ z_sample.T
144 | 
145 | 
146 | # # Updates of the *scale* $\sigma$, the *mean* $\mathbf{m}$, and the *threshold* $\theta$
147 | 
148 | # ## Update of the **scale** $\sigma$
149 | # 
150 | # The **scale** is *updated at each step*: 
151 | # 
152 | # When the new sample is **accepted** then the scale is **increased**:
153 | # 
154 | # $$\sigma_{n+1} = f_e \sigma_n$$
155 | # where $f_e > 1$ is the *expansion factor*, one of the strategy parameters of Gaussion Adaptation. 
156 | # 
157 | # When the sample is **rejected** then the scale is **decreased**:
158 | # $$\sigma_{n+1} = f_c \sigma_n$$
159 | # where $f_c < 1$ is the *contraction factor*, another strategy parameter. 
160 | 
161 | # In[13]:
162 | 
163 | def contract(scale, pars):
164 |     return pars.f_c*scale
165 | 
166 | def expand(scale, pars):
167 |     # cf. p.10 of the MATLAB code of Mueller
168 |     f_e, max_scale = pars.f_e, pars.max_scale
169 |     next_scale = f_e*scale
170 |     if next_scale <= max_scale:
171 |         return next_scale
172 |     else: 
173 |         return max_scale
174 | 
175 | 
176 | # ## Update of the *mean* $\mathbf{m}$
177 | # 
178 | # 
179 | # These are 
180 | # 
181 | # The **mean** is **only updated** when the new sample $\mathbf{x}_{n+1}$ is **accepted**. Th new mean is
182 | # 
183 | # $$\mathbf{m}_{n+1} = (1 - \lambda_{\mathbf{m}}) \mathbf{m}_{n} + \lambda_{\mathbf{m}} \mathbf{x}_n$$
184 | # 
185 | # Here, $\lambda_{\mathbf{m}}$ and $\lambda_{\mathbf{C}}$ are *strategy parameters* of *Gaussian Adaptation*.
186 | 
187 | # In[14]:
188 | 
189 | # This code is for global optimization, NOT sampling. 
190 | def GaA_mean_update_2(mean, sample, pars):
191 |     l_m = pars.l_m
192 |     return (1-l_m)*mean + l_m*sample
193 | 
194 | 
195 | # In[15]:
196 | 
197 | # In case of sampling l_m = 1, in other words the new sample becomes the next mean.
198 | def GaA_mean_update(mean, sample, pars):
199 |     return sample
200 | 
201 | 
202 | # # Update of the covariance matrices $\mathbf{C}$ and $\mathbf{Q}$
203 | 
204 | # $\mathbf{C}$ and $\mathbf{Q}$ are covariance matrices and therefore positive definite and symmetric. 
205 | # Symmetry might get lost due to rounding off errors in the update process. After each update
206 | # we make sure that the result is still symmetric.
207 | # 
208 | # The first way to do this uses the *Numpy*-function *triu* that returns the upper triangle part of a matrix.
209 | # The second one uses the *transpose* of a matrix. Recall that $\mathbf{S} = \mathbf{S}^\top$ for a symmetric matrix
210 | # $\mathbf{S}$.
211 | 
212 | # In[16]:
213 | 
214 | def trui_enforce_symmetry(cov):
215 |     dim1, dim2 = cov.shape
216 |     assert dim1==dim2
217 |     return np.triu(cov,0)+np.triu(cov,1).T
218 | 
219 | def transpose_enforce_symmetry(cov):
220 |     dim1, dim2 = cov.shape
221 |     assert dim1==dim2
222 |     return 1/2*(cov+cov.T)
223 | 
224 | 
225 | # # Update of the "*square root*" $\mathbf{Q}$ of the *covariance* matrix $\mathbf{C}$
226 | 
227 | # First, we calculate $\Delta \mathbf{C}_n$ as follows
228 | # 
229 | # $$\Delta \mathbf{C}_{n+1} = (1-\lambda_\mathbf{C})\mathbb{1}_d + \lambda_\mathbf{C} \mathbf{z}_n \mathbf{z}_n^\top$$
230 | # 
231 | # where $\mathbb{1}_d$ is the identity matrix, $\mathbf{z}_n$ is the $n$th sample of the multivariate standard Gaussian distribution, and $\lambda_\mathbf{C}$ is the strategy parameter used in the update of the covariance matrix $\mathbf{C}$.
232 | 
233 | # In[17]:
234 | 
235 | def delta_C(z_sample, pars):
236 |     l_C = pars.l_C
237 |     identity = pars.Id
238 |     deltaC = (1-l_C)*identity + l_C*np.outer(z_sample, z_sample)
239 |     #return enforce_symmetry(deltaC)
240 |     return deltaC
241 | 
242 | 
243 | # Next, we define $\Delta \mathbf{Q}_{n+1}$ as 
244 | # 
245 | # $$\Delta \mathbf{Q}_{n+1} \triangleq \sqrt{\Delta \mathbf{C}_{n+1}}$$
246 | 
247 | # In[18]:
248 | 
249 | def sqrtm(cov):    
250 |     D, B = la.eigh(cov)
251 |     sqrtD = np.diag(np.sqrt(D))
252 |     # Return the sqrt Q of the matrix C
253 |     return B @ sqrtD @ B.T
254 | 
255 | 
256 | # Finally, we calculate $\mathbf{Q}_{n+1}$ as
257 | # $$\mathbf{Q}_{n+1} = \mathbf{Q}_n \Delta \mathbf{Q}_{n+1}$$
258 | 
259 | # In[19]:
260 | 
261 | def normalize(cov):
262 |     D, _ = cov.shape
263 |     normalization_constant = la.det(cov)**(1/D)
264 |     normalized_cov = cov/normalization_constant
265 |     #det = la.det(normalized_cov)
266 |     #np.testing.assert_almost_equal(det, 1.0)
267 |     return normalized_cov
268 | 
269 | 
270 | # In[20]:
271 | 
272 | def GaA_Q_update(z_sample, Q, pars):
273 |     max_cond = pars.max_cond
274 |     deltaC = delta_C(z_sample, pars)
275 |     deltaQ = sqrtm(deltaC)
276 |     Q_next = normalize(transpose_enforce_symmetry(Q @ deltaQ))
277 |     if la.cond(Q_next) <=  max_cond:
278 |         return Q_next
279 |     else: 
280 |         return Q
281 | 
282 | 
283 | # # Update of the *covariance* matrix $\mathbf{C}$
284 | 
285 | # In[21]:
286 | 
287 | def GaA_C_update(C, mean, sample, pars):
288 |     # Cf. p.10 of the MATLAB code of Mueller
289 |     l_C, max_cond = pars.l_C, pars.max_cond
290 |     delta = mean - sample
291 |     C_next = (1 - l_C)*C + l_C*np.outer(delta, delta)
292 |     if la.cond(C_next) <= max_cond:
293 |         return C_next
294 |     else: 
295 |         return C  
296 | 
297 | 
298 | # # Gaussian Adaptation Sampling
299 | 
300 | # In[22]:
301 | 
302 | def Q_GaA_sampler(pars, target, initial_state, run_data):
303 |     target_pdf, sp = target['pdf'], target['State Space']
304 |     Origin, Id = sp['Origin'], sp['Id']
305 |     
306 |     ds, N = run_data.DataStore, run_data.N
307 |     z_samples = get_samples(sp=sp, name='Z')
308 |     
309 |     #Set up and save the initial state
310 |     m = x_current = initial_state
311 |     sigma = 1
312 |     Q = Id
313 |     save_state(data_store=ds, 
314 |                step=0, 
315 |                state=x_current, 
316 |                value=target_pdf(x_current),
317 |                accepted_p=True, 
318 |                mean=m, 
319 |                covariance=Q, 
320 |                scale=sigma, 
321 |                threshold=None)
322 |     
323 |     #Sample and save state
324 |     for n in range(1, N):
325 |         z_sample = z_samples[n]
326 |         x_proposed = Q_generate_GaA_candidate(mean=x_current, 
327 |                                               scale=sigma, 
328 |                                               Q=Q, 
329 |                                               z_sample=z_sample)
330 |         accepted = acceptance_decision(x_current, x_proposed, target_pdf)
331 |         if accepted:
332 |             x_current = x_proposed
333 |             sigma = expand(sigma, pars=pars)
334 |             m = GaA_mean_update(mean=m, sample=x_proposed, pars=pars)
335 |             Q = GaA_Q_update(Q=Q, z_sample=z_sample, pars=pars)
336 |         else:
337 |             sigma = contract(sigma, pars=pars)
338 |         save_state(data_store=ds, 
339 |                    step=n, 
340 |                    state=x_current, 
341 |                    value=target_pdf(x_current),
342 |                    accepted_p=accepted, 
343 |                    mean=m, 
344 |                    covariance=Q, 
345 |                    scale=sigma, 
346 |                    threshold=None)
347 |     return run_data
348 | 
349 | 
350 | # In[23]:
351 | 
352 | def C_GaA_sampler(pars, target, initial_state, run_data):
353 |     target_pdf, sp = target['pdf'], target['State Space']
354 |     Origin, Id = sp['Origin'], sp['Id']
355 |     
356 |     ds, N = run_data.DataStore, run_data.N
357 |     z_samples = get_samples(sp=sp, name='Z')
358 | 
359 |     #Set up and save the initial state
360 |     m = x_current = initial_state
361 |     sigma = 1
362 |     C = Id
363 |     
364 |     save_state(data_store=ds, 
365 |                step=0, 
366 |                state=x_current, 
367 |                value=target_pdf(x_current),
368 |                accepted_p=True, 
369 |                mean=m, 
370 |                covariance=C, 
371 |                scale=sigma, 
372 |                threshold=None)
373 |     
374 |     #Sample and save state
375 |     for n in range(1, N):
376 |         z_sample = z_samples[n]
377 |         x_proposed = C_generate_GaA_candidate(mean=x_current,
378 |                                               C=C, 
379 |                                               z_sample=z_sample)
380 |         accepted = acceptance_decision(x_current, x_proposed, target_pdf)
381 |         if accepted:
382 |             x_current = x_proposed
383 |             sigma = expand(sigma, pars=pars)
384 |             m = GaA_mean_update(mean=m, sample=x_proposed, pars=pars)
385 |             C = GaA_C_update(C=C, mean=m, sample=x_proposed, pars=pars)
386 |         else:
387 |             sigma = contract(sigma, pars=pars)
388 |         save_state(data_store=ds, 
389 |                    step=n, 
390 |                    state=x_current, 
391 |                    value=target_pdf(x_current),
392 |                    accepted_p=accepted, 
393 |                    mean=m, 
394 |                    covariance=C, 
395 |                    scale=sigma, 
396 |                    threshold=None)
397 |     return run_data
398 | 
399 | 


--------------------------------------------------------------------------------
/L_AM_Sampling.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # # *Adaptive* MH using *Cholesky decomposition* of the covariance
  5 | # 
  6 | # See the 1999 and 2001 papers of Haario et al.
  7 | 
  8 | # In[1]:
  9 | 
 10 | import numpy as np
 11 | import numpy.linalg as la
 12 | import matplotlib.pyplot as plt
 13 | import scipy as sp
 14 | import scipy.stats as ss
 15 | import math
 16 | import random
 17 | import numba
 18 | from collections import namedtuple
 19 | get_ipython().run_line_magic('precision', '4')
 20 | get_ipython().run_line_magic('matplotlib', 'inline')
 21 | 
 22 | 
 23 | # from importlib import reload 
 24 | # reload(ut)
 25 | 
 26 | # In[2]:
 27 | 
 28 | from MH_Sampling import acceptance_decision
 29 | from FileHandling import save_state
 30 | from TestSuite import generate_state_space, generate_iid_samples, get_samples
 31 | 
 32 | 
 33 | # In[3]:
 34 | 
 35 | AM_Pars = namedtuple('AM_Pars', 
 36 |                      ['Origin', 'Id',
 37 |                       'sigma_0', 'sigma_opt', 'L_0', 
 38 |                       'z_samples'])
 39 | 
 40 | def init_AM_pars(sp):
 41 |     dim, origin, idty = sp['dim'], sp['Origin'], sp['Id'] 
 42 |     sigma_0, sigma_opt = 0.1/np.sqrt(dim), sp['sigma_opt']
 43 |     L_0 = idty
 44 |     return AM_Pars(Origin=origin, Id=idty,
 45 |                    sigma_0=sigma_0, sigma_opt=sigma_opt, L_0=L_0,
 46 |                    z_samples=get_standard_normal_samples(sp))
 47 | 
 48 | 
 49 | # # Adaptive MH algorithm *AM*
 50 | 
 51 | # ## Generate the candidate next sample
 52 | # 
 53 | # We consider a version of the *Adaptive Metropolis* (*AM*) sampler of Haario
 54 | # et al. (2001). We want to sample from the $d$-dimensional target distribution $\pi(\mathbf{x})$. 
 55 | # 
 56 | # We perform a Metropolis algorithm with covariance matrix $\pmb{Q}_n$ at iteration $n$ given by
 57 | # 
 58 | # $$\mathbf{Q}_n(\mathbf{x}, ·) = N(\mathbf{x}, \sigma_{0}^2 \mathbb{1}_d)$$
 59 | # 
 60 | # for $n \leq 2d$, while for $n > 2d$
 61 | # 
 62 | # $$\mathbf{Q}_n(\mathbf{x}, ·) = (1 − \beta) N(\mathbf{x}, \sigma_{opt}^2 \mathbf{C}_n) + 
 63 | # \beta N(\mathbf{x}, \sigma_{0}^2 \mathbb{1}_d)$$
 64 | # 
 65 | # where $\mathbf{C}_n$ is the current empirical estimate of the covariance of the target distribution
 66 | # based on the samples so far, $\sigma_{0}^2 = \frac{0.1^2}{d}$ and $\sigma_{opt}^2 = \frac{2.38^2}{d}$ are the initial and optimal scale, respectively, and $\beta$ is a small positive constant, we use $\beta = 0.05$.
 67 | #  
 68 | # In other words, the next candidate is sampled from
 69 | # 
 70 | # $$\mathbf{x}^{*} \sim \mathbf{Q}_n(\mathbf{x}, ·)$$
 71 | #  
 72 | # The text above is adapted from Section 2 of Gareth O. Roberts and Jeffrey S. Rosenthal (2008) 
 73 | # *Examples of Adaptive MCMC*.
 74 | 
 75 | # ## Random covariance matrix $M$ from the above paper.
 76 | 
 77 | # In[4]:
 78 | 
 79 | Factors = namedtuple('Factors',
 80 |                      ['Chol', 'Scale'])
 81 | 
 82 | 
 83 | # In[5]:
 84 | 
 85 | def get_prop_data(L, n, pars):
 86 |     beta = 0.05
 87 |     d, _ = L.shape
 88 |     sigma_0, sigma_opt, L_0 = pars.sigma_0, pars.sigma_opt, pars.L_0
 89 |     init, current = Factors(Chol=L_0, Scale=sigma_0), Factors(Chol=L, Scale=sigma_opt)
 90 |     init_period = 2*d
 91 |     if n <= init_period:
 92 |         return init
 93 |     else:
 94 |         return current if np.random.binomial(n=1, p=1-beta) else init
 95 | 
 96 | 
 97 | # # Generation of candidate
 98 | # 
 99 | # If the proposal distribution is the $d$-dimensional multivariate normal distribution $N(\pmb{m}, \pmb{C})$ then 
100 | # the next candidate $\pmb{x}^{*}$ is generated according to that distribution, i.e. 
101 | # 
102 | # $$\pmb{x}^{*} \sim N(\pmb{m}, \pmb{C})$$
103 | # 
104 | # If $L$ is the lower Cholesky factor of $C$, i.e. $C = L L^\top$ this can be rewritten as 
105 | # 
106 | # $$\pmb{x}^{*} = \pmb{m} + L \pmb{z}$$
107 | # 
108 | # where $\pmb{z} \sim N(\pmb{0}, \mathbb{1}_d)$ is a sample of the $d$-dimensional standard normal distribution. 
109 | # 
110 | # In case of $$\pmb{x}^{*} \sim N(\pmb{m}, \sigma^2 \pmb{C})$$ this becomes
111 | # 
112 | # $$\pmb{x}^{*} = \pmb{m} + \sigma L \pmb{z}$$
113 | # 
114 | # 
115 | 
116 | # In[6]:
117 | 
118 | def C_generate_candidate(m, C, s):
119 |     return 
120 | 
121 | 
122 | # In[7]:
123 | 
124 | def L_generate_candidate(m, L, s, z):
125 |     return m + s*L@z
126 | 
127 | 
128 | # In[8]:
129 | 
130 | # see "A More Efficient Rank-one Covariance Matrix Update for Evolution Strategies" Igel, Krause 2015
131 | # and adapted slightly to incoporate alpha, beta != 1
132 | @numba.jit(nopython=True)
133 | def rank_1_update(L, u, alpha, beta):
134 |     assert alpha > 0, 'Argument alpha should be positive'
135 |     assert beta > 0, 'Argument beta should be positive'
136 |     d = len(u)
137 |     L = np.sqrt(alpha)*L  #Added
138 |     b = 1
139 |     nL = np.zeros_like(L)
140 |     v = np.copy(u)  #Added
141 |     for j in np.arange(d):
142 |         nL[j,j] = np.sqrt(L[j,j]**2 + (beta/b)*(v[j]**2))
143 |         gamma = b*L[j,j]**2 + beta*v[j]**2
144 |         for k in range(j+1, d):
145 |             v[k] = v[k] - (v[j]/L[j,j])*L[k,j]
146 |             nL[k,j] = (nL[j,j]/L[j,j])*L[k,j] + (nL[j,j]*beta*v[j]/gamma)*v[k]
147 |         b = b + beta*(v[j]**2/L[j,j]**2)
148 |     return nL
149 | 
150 | 
151 | # In[9]:
152 | 
153 | def update_moments(mean, L, sample, n):
154 |     next_n = n + 1
155 |     w = 1/next_n
156 |     new_mean = mean + w*(sample - mean)
157 |     new_L = rank_1_update(L=L, u=sample, alpha=1-w, beta=w)
158 |     return new_mean, new_L, next_n
159 | 
160 | 
161 | # In[10]:
162 | 
163 | @numba.jit
164 | def update_L(samples):
165 |     N, d = samples.shape
166 |     initial_period = 2*d
167 |     initial_cov = np.cov(samples[:initial_period], rowvar=False)
168 |     initial_mean = np.mean(samples[:initial_period], axis=0)
169 |     C = initial_cov
170 |     L = la.cholesky(initial_cov) 
171 |     mean = initial_mean
172 |     for n in range(initial_period, len(samples)):
173 |         sample = samples[n]
174 |         w = 1/(n+1)
175 |         L = rank_1_update(L, sample-mean, alpha=(n-1)/n, beta=w)
176 |         mean = (1-w)*mean + w*sample
177 |     return L@L.T
178 | 
179 | 
180 | # In[11]:
181 | 
182 | def AM_sampler(pars, target, initial_state, run_data): 
183 |     ds, N = run_data.DataStore, run_data.N
184 |     
185 |     
186 |     target_pdf = target['pdf']
187 |     z_samples = pars.z_samples
188 |     
189 |     current = initial_state
190 |     mean, L, sigma_0 = pars.Origin, pars.L_0, pars.sigma_0
191 |     accepted = True
192 |     d = len(initial_state)
193 |     init_period = 2*d
194 |     samples=[]
195 |     for n in range(init_period):
196 |         save_state(data_store=ds, step=n,
197 |                    state=current, value=target_pdf(current),
198 |                    mean=mean, covariance=L, accepted_p=accepted)
199 |         candidate = L_generate_candidate(m=current, L=L, s=sigma_0, z=z_samples[n])
200 |         accepted = MH_decision(current=current, proposed=candidate, pdf=target_pdf)
201 |         if accepted: 
202 |             current = candidate
203 |         else:
204 |             current = current
205 |         samples.append(current)
206 |     # Calculate the first two moments at the end of initial period.
207 |     initial_cov = np.cov(samples, rowvar=False)
208 |     initial_mean = np.mean(samples, axis=0)
209 |     C = initial_cov
210 |     L = la.cholesky(initial_cov) 
211 |     mean = initial_mean
212 |     
213 |    
214 |     # Once the initial period is finished we start to adapt.
215 |     for n in range(init_period, N):
216 |         #if n%1000 == 0:
217 |         #    print('n:', n)
218 |         save_state(data_store=ds, step=n,
219 |                    state=current, value=target_pdf(current),
220 |                    mean=mean, covariance=L, accepted_p=accepted)
221 |         
222 |         p_L, p_sigma = get_prop_data(L=L, n=n, pars=pars)
223 |         candidate = L_generate_candidate(m=current, L=p_L, s=p_sigma, z=z_samples[n])
224 |         accepted = MH_decision(current=current, proposed=candidate, pdf=target_pdf)
225 |         if accepted: 
226 |             current = candidate
227 |         mean, L, n = update_moments(mean, L, current, n)
228 |     return run_data
229 | 
230 | 


--------------------------------------------------------------------------------
/MH_Sampling.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # # *Metropolis-Hastings* Sampling
  5 | 
  6 | # In[ ]:
  7 | 
  8 | import numpy as np
  9 | import numpy.linalg as la
 10 | import matplotlib.pyplot as plt
 11 | import scipy as sp
 12 | import scipy.stats as ss
 13 | import math
 14 | import random
 15 | from collections import namedtuple
 16 | import numba
 17 | #%precision 4
 18 | #%matplotlib inline
 19 | 
 20 | 
 21 | # In[ ]:
 22 | 
 23 | from FileHandling import save_state
 24 | from TestSuite import get_samples
 25 | 
 26 | 
 27 | # In[ ]:
 28 | 
 29 | def acceptance_decision(current, proposed, pdf):
 30 |     # Remark: 'accepted_p' includes the case where p_proposed > p_current 
 31 |     # since u, a random number between 0 and 1, is then
 32 |     # always less than the ratio p_proposed/p_current
 33 |     # But for readability we make a distinction between the 
 34 |     # between cases below.
 35 |     
 36 |     p_current, p_proposed = pdf(current), pdf(proposed)
 37 |     if p_current <= p_proposed:
 38 |         return True
 39 |     else:
 40 |         u = np.random.rand()
 41 |         return u <= p_proposed/p_current
 42 | 
 43 | 
 44 | # In[ ]:
 45 | 
 46 | # computing the Metroppolis-Hastings acceptance
 47 | 
 48 | def compose2(f, g):
 49 |     return lambda x: f(g(x))
 50 | 
 51 | def likelihood_acceptance_decision(current, proposed, log_pdf):
 52 |     # Remark: 'accepted_p' includes the case where p_proposed > p_current 
 53 |     # since u, a random number between 0 and 1, is then
 54 |     # always less than the ratio p_proposed/p_current
 55 |     # But for readability we make a distinction in the code below between the 
 56 |     # two cases.
 57 |     
 58 |     p_current, p_proposed = log_pdf(current), log_pdf(proposed)
 59 |     if p_current <= p_proposed:
 60 |         return True
 61 |     else:
 62 |         u = np.random.rand()
 63 |         return u <= p_proposed/p_current
 64 | 
 65 | 
 66 | # In[ ]:
 67 | 
 68 | ## Proposal Distribution
 69 | # Samples are generated when a run is initialized.
 70 | 
 71 | MH_Pars = namedtuple('MH_Pars', ['Proposal'])
 72 | 
 73 | def init_MH_pars(sp):
 74 |     proposal = sp['Test Suite']['Proposal']
 75 |     return MH_Pars(Proposal=proposal)
 76 | 
 77 | 
 78 | # In[ ]:
 79 | 
 80 | def generate_candidate(center, delta):
 81 |     return center + delta
 82 | 
 83 | 
 84 | # # Metropolis-Hastings algorithm
 85 | 
 86 | # In[ ]:
 87 | 
 88 | def MH_sampler(pars, target, initial_state, run_data, C_generation=False, likelihood=True):
 89 |     ds, N = run_data.DataStore, run_data.N
 90 |     
 91 |     target_pdf = target['pdf']
 92 |     proposal_samples = pars.Proposal['Samples']
 93 | 
 94 |     current = initial_state
 95 |     accepted = True
 96 |     
 97 |     #The integration of the C- and L-variant still has to be done.
 98 |     #if C_generation:
 99 |     #    generation_function = generate_candidate
100 |     #else:
101 |     #    generation_fuction = L_generate_candidate
102 |         
103 |     if likelihood: 
104 |         decision_function, comparison_function = likelihood_acceptance_decision, compose2(np.log, target['pdf']) 
105 |     else: 
106 |         decision_function, comparison_function = acceptance_decision, target['pdf']
107 |         
108 |     for n in range(1, N):
109 |         save_state(data_store=ds, step=n,
110 |                    state=current, value=target_pdf(current),
111 |                    accepted_p=accepted)
112 |         proposed = generate_candidate(center=current, delta=proposal_samples[n])
113 |         accepted = decision_function(current, proposed, target_pdf)
114 |         if accepted:
115 |             current = proposed
116 |         else:# The else clause is redundant but added for readability.
117 |             current = current
118 |     return run_data
119 | 
120 | 
121 | # # Metropolis-Hastings using Cholesky factor $L$ instead of ful covariance matrix $C$
122 | # 
123 | 
124 | # In[ ]:
125 | 
126 | def L_generate_candidate(center, L, scale, z_sample):
127 |     return center + scale*L@z_sample
128 | 
129 | 
130 | # In[ ]:
131 | 
132 | def L_MH_sampler(pars, target, initial_state, run_data, likelihood=True):
133 |     ds, N = run_data.DataStore, run_data.N
134 |     sp = target['State Space']
135 |     opt_scale, L = sp['sigma_opt'], sp['Id']
136 |     
137 |     if likelihood: 
138 |         decision_function, comparison_function = likelihood_acceptance_decision, compose2(np.log, target['pdf']) 
139 |     else: 
140 |         decision_function, comparison_function = acceptance_decision, target['pdf']
141 |     
142 |     target_pdf = target['pdf']
143 |     current = initial_state
144 |     accepted = True
145 |     
146 |     z_samples = get_samples(sp=sp, name='Z')
147 |     for n in range(1, N):
148 |         save_state(data_store=ds, step=n,
149 |                    state=current, value=target_pdf(current),
150 |                    accepted_p=accepted)
151 |         proposed = L_generate_candidate(center=current, 
152 |                                         L=L, scale=opt_scale,
153 |                                         z_sample=z_samples[n])
154 |         accepted = decision_function(current, proposed, target_pdf)
155 |         if accepted:
156 |             current = proposed
157 |         else:# The else clause is redundant but added for readability.
158 |             current = current
159 |     return run_data
160 | 
161 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Adaptive Sampling
 2 | 
 3 | The project is created to distribute code written as part of my master thesis entitled: ```Adaptive Markov Chain Monte Carlo for Bayesian Inference```.
 4 | 
 5 | Adaptive Markov Chain Monte Carlo (MCMC) and stochastic optimization methods are techniques for evaluating intractable integrals. Code for sampling and optimization algorithms, as found in their original papers, has been written in Python and tested against established benchmarks. In addition,  an improvement has been made by incorporating adaptation into stochastic optimization methods, thereby transforming them into samplers. Namely, Gaussian Adaptation (GaA) and Covariance Matrix Adaptation Evolution strategy(CMA-ES) optimizers have been transformed into the Metropolis-GaA, and (1 + 1) CMA, respectively. Performance is quantified using existing convergence and performance measuring tools. Results show adaptive MCMCs with better convergence, mixing, and acceptance ratios.
 6 | 
 7 | ## Getting Started
 8 | You will need Jupyter notebook with Python 3 and the modules listed below. For detailed information and examples of experiment runs, see ```Adaptive_MCMC_for_Bayesian_Inference.pdf```, Chapter 6: Experiments.
 9 | 
10 | ### Python modules:
11 | 
12 | #### Five sampler modules
13 | * Adaptive Metropolis: ``` AM_Sampling.py ```
14 | * Covariance Matrix Adaptation: ```CMA_Sampling.py```
15 | * Gaussian Adaptation: ```GaA_Sampling.py```
16 | * Metropolis Hastings: ```MH_Sampling.py```
17 | * Adaptive MH using Cholesky decomposition of the covariance: ```L_MH_Sampling.py```
18 | 
19 | #### Test suite module
20 | * Test suites found in Haario et al. (1999): ```TestSuite.py```
21 | 
22 | 
23 | #### Supporting modules
24 | * ```FileHandling.py```
25 | * ```Visualization.py```
26 | 
27 | 
28 | ### Comparing samplers via autocorrelation
29 | ![autocorrelation](https://user-images.githubusercontent.com/16397101/39100333-195ac932-463d-11e8-87a2-b5a0bc12e4e9.png)
30 | 
31 | 
32 | ## Running the tests
33 | The module
34 | ```Experiments.ipynb```
35 | offers an easy way to run any of the five samplers and plot their results.
36 | 
37 | Open the Experiments module in a Jupyter notebook. You will also need the supporting and test modules imported, along with the required libraries as specified in ```Experiments.ipynb```.
38 | 
39 | ### Example
40 | To run the **Adaptive Metropolis** sampler, type ``` AM ``` instead of the current sampler ```CMA```. Sampler names are found in ```Experiments.ipynb``` under **The Samplers**.
41 | 
42 | In this case, the code:
43 | ```
44 | SPEC = specify_run(dim=2, N=10000, name_target='Pi_4', name_sampler='CMA', run_idx=0)
45 | DATA = execute_run(SPEC)
46 | ```
47 | 
48 | becomes:
49 | ```
50 | SPEC = specify_run(dim=2, N=10000, name_target='Pi_4', name_sampler='AM', run_idx=0)
51 | DATA = execute_run(SPEC)
52 | ```
53 | 
54 | Also, you might want to change the target distribution. To do so, you need to change ``` name_target```
55 | For reference, see ```Adaptive_MCMC_for_Bayesian_Inference.pdf``` Chapter 6.
56 | 
57 | #### Biased coin example
58 | ![trials5](https://user-images.githubusercontent.com/16397101/39100228-bc3842bc-463b-11e8-84c7-393fccb19fd9.png)
59 | 
60 | ## Built With
61 | * [Jupyter](http://jupyter.org/) - Jupyter Notebooks
62 | * [PyMC3](https://docs.pymc.io/) - Bayesian statistical modeling and Probabilistic Machine Learning focusing on advanced MCMC
63 | 
64 | ## Authors
65 | 
66 | * **Prof. Dr. Bernard Manderik** - *Initial work*
67 | 
68 | 
69 | ## Acknowledgments
70 | 
71 | * Hat tip to Nixon Kipkorir Ronoh and Edna Chelangat
72 | Milgo
73 | 


--------------------------------------------------------------------------------
/TestSuite.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # In[3]:
  5 | 
  6 | 
  7 | import numpy as np
  8 | import numpy.linalg as la
  9 | import matplotlib.pyplot as plt
 10 | import scipy as sp
 11 | import scipy.stats as ss
 12 | import math
 13 | import random
 14 | from collections import namedtuple
 15 | #get_ipython().magic('precision 4')
 16 | #get_ipython().magic('matplotlib inline')
 17 | 
 18 | 
 19 | # # State Space
 20 | # The dictionary *State Space* contains its dimension, the origin and identity matrix,
 21 | # and the optimal $\sigma_{opt}$ scale according to Gelman et al.: $\sigma_{opt} = 2.38/\sqrt{d}$,
 22 | # where $d$ is the dimension of the state space and $\sigma_{opt}$ is the scale of the isotropic proposal distribution used in the Metropolis-Hastings sampler.
 23 | # 
 24 | # Later on we will add the target distributions of the test suite used in our experiments.
 25 | 
 26 | # In[2]:
 27 | 
 28 | 
 29 | def calculate_optimal_sigma(dim):
 30 |     return 2.38/np.sqrt(dim)
 31 | 
 32 | def state_space(dim):
 33 |     return {'dim': dim, 'Origin': np.zeros(dim), 'Id': np.eye(dim), 
 34 |             'sigma_opt':calculate_optimal_sigma(dim)}
 35 | 
 36 | 
 37 | # ## A random state of the state space
 38 | 
 39 | # In[3]:
 40 | 
 41 | 
 42 | def generate_random_state(sp, min_range=-10, max_range=10):
 43 |     """Generates a random state in the state space that fits in the area to be plotted.
 44 |     """
 45 |     return np.random.uniform(low=min_range, high=max_range, size=sp['dim'])
 46 | 
 47 | 
 48 | # In[4]:
 49 | 
 50 | 
 51 | #random.seed(10) #REMARK BM: Random seed doesn't seem to work.
 52 | 
 53 | 
 54 | # In[5]:
 55 | 
 56 | 
 57 | def generate_initial_states(sp, nb_runs):
 58 |     initial_states = {i:generate_random_state(sp) for i in np.arange(nb_runs)}
 59 |     # Only update if the key does not exist yet. Check out how to do this.
 60 |     sp.update({'Initial States':initial_states})
 61 | 
 62 | 
 63 | # # Testsuite of Target Distributions
 64 | # 
 65 | # ## Uncorrelated and Correlated Gaussian Distributions
 66 | # 
 67 | # $\pi_1$ is the uncorrelated Gaussian distribution with covariance matrix
 68 | # 
 69 | # $$
 70 | # C_u=
 71 | #   \begin{pmatrix}
 72 | #     100 & 0 \\
 73 | #     0 & 1 
 74 | #   \end{pmatrix}
 75 | # $$
 76 | # 
 77 | # and $\pi_2$ is the correlated Gaussion distribution with 
 78 | # covariance matrix
 79 | # 
 80 | # $$
 81 | # C_c=
 82 | #   \begin{pmatrix}
 83 | #     50.5 & 49.5 \\
 84 | #     49.5 & 50.5 
 85 | #   \end{pmatrix}
 86 | # $$
 87 | 
 88 | # # Covariance Matrix
 89 | 
 90 | # In[6]:
 91 | 
 92 | 
 93 | def generate_rotation_matrix(theta):
 94 |     # Rotation matrix is 2-dimensional
 95 |     return np.array([[np.cos(theta), -np.sin(theta)], 
 96 |                      [np.sin(theta), np.cos(theta)]])
 97 | 
 98 | def generate_correlated_cov(uncorrelated_cov, theta):
 99 |     correlated_cov = np.copy(uncorrelated_cov)
100 |     R = generate_rotation_matrix(theta)
101 |     R_inv = la.inv(R)
102 |     # Rotate the first 2 dimensions only and leave the other dimensions
103 |     # of the covariance matrix intact.
104 |     correlated_cov[:2, :2] = R @ uncorrelated_cov[:2,:2] @ R_inv
105 |     return correlated_cov
106 | 
107 | 
108 | # ### We could also use the fact that the transpose of a rotation is also its inverse.
109 | 
110 | # def alt_generate_correlated_cov(uncorrelated_cov, theta):
111 | #     # Here we use the fact that the transpose of a rotation is also its inverse.
112 | #     correlated_cov = np.copy(uncorrelated_cov)
113 | #     R = generate_rotation_matrix(theta)
114 | #     correlated_cov[:2, :2] = R @ uncorrelated_cov[:2,:2] @ R.T
115 | #     return correlated_cov
116 | 
117 | # ## Contour Functions corresponding with the Target Distributions 
118 | # 
119 | # ### Standard Ellipse and General Ellipse
120 | # 
121 | # When the ellips with equation 
122 | # 
123 | # $$\left( \frac{x_1}{a} \right)^2 + \left( \frac{x_2}{b} \right)^2 = 1$$
124 | # 
125 | # is rotated over an angle $\theta$ then the equation of that ellips becomes
126 | # 
127 | # $$\underbrace{\left(\frac{\cos^2\theta}{a^2} + \frac{\sin^2\theta}{b^2}\right)}_\text{A} x_1^ 2 + 
128 | # \underbrace{\left( \frac{\sin^2\theta}{a^2} + \frac{\cos^2\theta}{b^2} \right)}_\text{C} x_2^2  
129 | # + 2 \underbrace{\cos \theta \sin \theta \left( \frac{1}{a^2} - \frac{1}{b^2} \right)}_\text{B} x_1 x_2 = 1$$
130 | # 
131 | # or 
132 | # 
133 | # $$A x_1^2 + 2 B x_1 x_2 + C x_2^2 = 1$$
134 | # 
135 | # where 
136 | # 
137 | # $$B^2 - A C < 0$$
138 | # actually 
139 | # 
140 | # $$B^2 - A C = -1/(ab)^2$$
141 | 
142 | # In[7]:
143 | 
144 | 
145 | def get_ellipse_parameters(cov):
146 |     
147 |     """Get the first 2 eigenvalues and their angle of covariance matrix.
148 |     The eigenvalues are returned in descending order together with 
149 |     the angle of rotation (in radians). The eigenvalues correspond with 
150 |     half the length, a and b, of these two main axes of 
151 |     the general ellipse.
152 |     If the angle is small enough, meaning that the covariance matrix 
153 |     can be considered diagonal, 0.0 is returned."""
154 |     
155 |     e, v = la.eig(cov)
156 |     e_1, e_2, *_ = e
157 |     a, b = np.sqrt(e_1), np.sqrt(e_2)
158 |     v_a, v_b, *_ = v
159 |     # a must be at least b
160 |     if a < b:
161 |         a, b = b, a
162 |         v_a, v_b = v_b, v_a   
163 |     cos, *_ = v_a
164 |     theta = np.arccos(cos)
165 |     if np.isclose(theta, 0):
166 |         theta = 0.0
167 |     return a, b, theta
168 | 
169 | 
170 | # In[8]:
171 | 
172 | 
173 | def calculate_ellipse_coefficients(a, b, theta):
174 |     sin, cos = np.sin(theta), np.cos(theta)
175 |     cos_sqd, sin_sqd = cos**2, sin**2
176 |     a_sqd, b_sqd = a**2, b**2
177 |     A = cos_sqd/a_sqd + sin_sqd/b_sqd
178 |     C = sin_sqd/a_sqd + cos_sqd/b_sqd
179 |     B = (1/a_sqd - 1/b_sqd)*sin*cos
180 |     return A, B, C
181 | 
182 | 
183 | # In[9]:
184 | 
185 | 
186 | def get_Gaussian_contour(cov):
187 |     a, b, theta = get_ellipse_parameters(cov)
188 |     A, B, C = calculate_ellipse_coefficients(a, b, theta)
189 |     return lambda x1, x2: A*x1**2 + 2*B*x1*x2 + C*x2**2
190 | 
191 | 
192 | # # Distribution
193 | # 
194 | # We have three kind of distributions in the test suite
195 | #    1. Gaussian distributions
196 | #    2. mixture of Gaussians
197 | #    3. transformed Gaussians, the so called twist distributions
198 | #    
199 | # The second kind is not implemented yet.
200 | # 
201 | # 
202 | # The dictionary *Gaussian* contains the following fields
203 | #    * its *Name*
204 | #    * the *State Space* on which the probability distribution is defined
205 | #    * its *probabibility density function* or *pdf*
206 | #    * *Samples* that are *independent and identically distributed*. These samples will be compared to
207 | #    the samples generated by the MCMC samplers studied. These samples are added at run time.
208 | #    * the *Contour Function* used to plot the 
209 | #    * *Contour Levels* corresponding to the preset confidence levels, cfr. the global variable 
210 | #    CONFIDENCE_LEVELS for the values used. The values of the $\chi^2$ distribution corresponding to
211 | #    the confidence levels used: 67, 90, 95 and 99 percent.
212 | #    
213 | # Additionarly to the fields of Gaussian dictionary, *non_Gaussian* contains the additional fields *Transformation*,
214 | # this is the function that will generated its i.i.d. samples using the samples of generating Gaussian.
215 | 
216 | # ## Gaussian Distributions in the Test Suite
217 | 
218 | # ### Draw the contour lines corresponding to preset *confidence levels*
219 | 
220 | # In[1]:
221 | 
222 | 
223 | def get_chi2s(df, confidence_levels=[0.67, 0.90, 0.95, 0.99]):
224 |     """ppf stands for the percent point function (inverse of cdf — percentiles)."""
225 |     #contour_levels = {conf:ss.chi2.ppf(conf, df) for conf in confidence_levels}
226 |     contour_levels = [ss.chi2.ppf(conf, df) for conf in confidence_levels]
227 |     return contour_levels
228 | 
229 | 
230 | # ### Generate the Gaussians given their covariances
231 | 
232 | # In[11]:
233 | 
234 | 
235 | def generate_Gaussian(sp, name, mean, cov):
236 |     d = sp['dim']
237 |     rv = ss.multivariate_normal(mean=mean, cov=cov)
238 |     return {'Name':name,
239 |             'State Space':sp,
240 |             'pdf':rv.pdf, 
241 |             'Mean':mean,
242 |             'Covariance':cov,
243 |             'Contour Function':get_Gaussian_contour(cov),
244 |             'Contour Levels':get_chi2s(df=2)
245 |             #'Samples':None,
246 |            }
247 | 
248 | 
249 | # In[12]:
250 | 
251 | 
252 | def generate_covs(sp):
253 |     # Standard Normal Z has the identity matrix as covariance
254 |     identity = sp['Id']
255 |     
256 |     # The optimal isotropic proposal is $\sigma_{opt} * Id$
257 |     var_opt = sp['sigma_opt']**2
258 |     prop_cov = var_opt*identity
259 |     
260 |     # P1_2
261 |     Pi_1_cov = np.copy(identity)
262 |     Pi_1_cov[0, 0] = 100
263 |     
264 |     # Pi_2
265 |     Pi_2_cov = generate_correlated_cov(Pi_1_cov, np.pi/4)
266 |     
267 |     # Pi_rnd
268 |     d = sp['dim']
269 |     M = np.random.normal(size=(d,d))
270 |     Pi_rnd = M@M.T
271 |     return {'Z':identity, 'Proposal':prop_cov, 'Pi_1':Pi_1_cov, 
272 |             'Pi_2':Pi_2_cov, 'Pi_rnd':Pi_rnd}
273 |     
274 | 
275 | def generate_all_Gaussians(sp):
276 |     named_covs = generate_covs(sp)
277 |     gaussians = {name:generate_Gaussian(sp=sp, name=name, mean=sp['Origin'], cov=cov)
278 |                  for name, cov in named_covs.items()}
279 |     return gaussians
280 | 
281 | 
282 | # ## Proposal Generator
283 | # 
284 | # The **radial basis** or **isotropic** proposal generator used by the Metropolis-Hastings sampler. 
285 | # Its *mean* is the origin and the *spread* is $\sigma$.
286 | 
287 | # In[13]:
288 | 
289 | 
290 | def generate_isotropic_Gaussian(sp, sigma):
291 |     origin, identity = sp['Origin'], sp['Id']
292 |     diagonal = sigma**2 * identity
293 |     return generate_Gaussian(sp=sp, name='Isotropic', mean=origin, cov=diagonal)
294 | 
295 | 
296 | # In[14]:
297 | 
298 | 
299 | def generate_random_Gaussian(sp):
300 |     d, origin = sp['dim'], sp['Origin']
301 |     M = np.random.normal(size=(d,d))
302 |     random_cov = M@M.T
303 |     return generate_Gaussian(sp=sp, name='Random', mean=origin, cov=random_cov)
304 | 
305 | 
306 | # ## Twisted Distributions in the Test Suite
307 | 
308 | # In[15]:
309 | 
310 | 
311 | def f_twist(b):
312 |     def phi_b(x):
313 |         """Argument and the value returned are d-dimensional numpy arrays."""
314 |         y = np.copy(x)
315 |         x1, x2 = x[:2]
316 |         y[0], y[1] = x1, x2 + b*x1**2 - 100*b
317 |         return y
318 |     
319 |     def phi_b_inv(y):
320 |         """Argument and the value returned are d-dimensional numpy arrays."""
321 |         x = np.copy(y)
322 |         y1, y2 = y[:2]
323 |         x[0], x[1] = y1, y2 - b*y1**2 + 100*b
324 |         return x
325 |     return phi_b, phi_b_inv
326 | 
327 | def compose2(f, g):
328 |     return lambda x: f(g(x))
329 | 
330 | 
331 | # In[16]:
332 | 
333 | 
334 | def apply_to(transformation, pts):
335 |     """Used to generate samples of a twist distribution given samples of a Gaussian one.
336 |     The argument transformation, e.g. phi_b(x1, x2) = (y1, y2) is a 2-dimensional
337 |     transformation of the vectors in pts. The result is an array of the transformed points.
338 |     """
339 |     transformed_pts = np.zeros_like(pts)
340 |     for i, pt in enumerate(pts):
341 |         transformed_pts[i] = transformation(pt)
342 |     return transformed_pts
343 | 
344 | 
345 | # In[17]:
346 | 
347 | 
348 | def apply(transformation):
349 |     return lambda pts: apply_to(transformation, pts)
350 | 
351 | 
352 | # In[18]:
353 | 
354 | 
355 | def get_twisted_contour(gaussian, b):
356 |     cov = gaussian['Covariance']
357 |     f = get_Gaussian_contour(cov)
358 |     return lambda x1, x2: f(x1, x2 + b*x1**2 - 100*b)
359 | 
360 | 
361 | # In[19]:
362 | 
363 | 
364 | def generate_twist(gaussian, b, name):
365 |     # The twisted distribution is a transformation of 
366 |     # the uncorrelated Gaussian distribution 'gaussian'
367 |     transformed_distr = gaussian.copy()
368 |     transformed_function, inverse_twist_function = f_twist(b=b)
369 |     transformed_pdf = compose2(gaussian['pdf'], transformed_function)
370 |     contour_function = get_twisted_contour(gaussian=gaussian, b=b)
371 |     transformed_distr.update({'Name':name, 
372 |                               'Generator':gaussian, 
373 |                               'pdf':transformed_pdf,
374 |                               'Contour Function':contour_function})
375 |     transformed_distr.update({'Transformation':apply(inverse_twist_function)})
376 |     return transformed_distr
377 | 
378 | 
379 | # In[20]:
380 | 
381 | 
382 | def generate_all_twists(gaussian, b_values, names):
383 |     twists ={name:generate_twist(gaussian, b, name) 
384 |              for b, name in zip(b_values, names)}
385 |     return twists
386 | 
387 | 
388 | # In[21]:
389 | 
390 | 
391 | def generate_test_suite(sp):
392 |     gaussians = generate_all_Gaussians(sp)
393 |     twists = generate_all_twists(gaussian=gaussians['Pi_1'], 
394 |                                  b_values=[0.03, 0.1], 
395 |                                  names=['Pi_3', 'Pi_4'])
396 |     sp.update({'Test Suite':{**gaussians, **twists}})
397 | 
398 | 
399 | # In[22]:
400 | 
401 | 
402 | def generate_state_space(dim, nb_runs=100, N=None):
403 |     sp = state_space(dim=dim)
404 |     generate_test_suite(sp)
405 |     generate_initial_states(sp=sp, nb_runs=nb_runs)
406 |     return sp
407 | 
408 | 
409 | # ### Generate independent and identically distributed or i.i.d. samples
410 | # 
411 | # These samples will be generated when we initialize a run. They are compared to the correlated samples generated by a MCMC sampler.
412 | 
413 | # In[23]:
414 | 
415 | 
416 | def iid_samples_Gaussian(gaussian, N):
417 |     mean, cov = gaussian['Mean'], gaussian['Covariance']
418 |     rv = ss.multivariate_normal(mean=mean, cov=cov)
419 |     samples = rv.rvs(size=N)
420 |     gaussian.update({'Samples':samples})
421 | 
422 | 
423 | # ### Generate i.i.d. samples of an transformed Gaussian distribution.
424 | # These samples will be generated when we initialize a run. They are compared to the correlated samples generated by a MCMC sampler.
425 | 
426 | # In[24]:
427 | 
428 | 
429 | def iid_samples_transformed_Gaussian(distr, N):
430 |     #Samples are generated by transforming the random samples of 
431 |     #the generating Gaussian distribution.
432 |     generator = distr['Generator']
433 |     transformation = distr['Transformation']
434 |     if not 'Samples' in generator:
435 |         iid_samples_Gaussian(generator, N)
436 |     transformed_samples = transformation(generator['Samples'])
437 |     distr.update({'Samples':transformed_samples})
438 | 
439 | 
440 | # ## Generate i.i.d. samples for the whole Test Suite
441 | 
442 | # In[25]:
443 | 
444 | 
445 | def generate_iid_samples(sp, N):
446 |     test_suite = sp['Test Suite']
447 |     for name, distr in test_suite.items():
448 |         if 'Generator' not in distr:
449 |             iid_samples_Gaussian(gaussian=distr, N=N)
450 |         else:
451 |             iid_samples_transformed_Gaussian(distr=distr, N=N)
452 | 
453 | 
454 | # ## Getter functions for the samples of a distribution
455 | 
456 | # In[26]:
457 | 
458 | 
459 | def get_distribution(sp, name):
460 |     return sp['Test Suite'][name]
461 | 
462 | def get_samples(sp, name):
463 |     return get_distribution(sp, name)['Samples']
464 | 
465 | 
466 | # # Time to test
467 | 
468 | # In[27]:
469 | 
470 | 
471 | def inspect(sp, field):
472 |     test_suite = sp['Test Suite']
473 |     for key, distr in test_suite.items():
474 |         print(key, distr[field])
475 |         
476 | #inspect(SP, 'Covariance')
477 | 
478 | 
479 | # In[28]:
480 | 
481 | 
482 | def inspect_Gaussian(sp, name_gaussian):
483 |     gaussian = sp['Test Suite'][name_gaussian]
484 |     print(gaussian['Name'])
485 |     print(gaussian['Mean'])
486 |     print(gaussian['Covariance'])
487 |     print(gaussian['Samples'][:5])
488 | 
489 | def inspect_transformed_Gaussian(sp, name_distr):
490 |     distr = sp['Test Suite'][name_distr]
491 |     print(distr['Name'])
492 |     print(distr['Mean'])
493 |     print(distr['Covariance'])
494 |     inspect_Gaussian(sp, distr['Generator']['Name'])
495 |     print(distr['Samples'][:5])
496 | 
497 | #inspect_transformed_Gaussian(SP, 'Pi_4')
498 | 
499 | 
500 | # SP = generate_state_space(dim=2, nb_runs=10)
501 | # generate_iid_samples(SP, N=1000)
502 | # TESTSUITE = SP['Test Suite']
503 | 
504 | # Z_samples = get_samples(SP, name='Z')
505 | # 
506 | # prop =  SP['Test Suite']['Proposal']
507 | # prop_cov = prop['Covariance']
508 | # prop_samples = prop['Samples']
509 | # samples = Z_samples @ prop_cov
510 | # 
511 | # samples[:10], prop_samples[:10]
512 | 


--------------------------------------------------------------------------------
/Visualization.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # In[1]:
  5 | 
  6 | 
  7 | import numpy as np
  8 | import numpy.linalg as la
  9 | import matplotlib.pyplot as plt
 10 | import scipy as sp
 11 | import scipy.stats as ss
 12 | import math
 13 | import random
 14 | from collections import namedtuple
 15 | #get_ipython().magic('precision 4')
 16 | #get_ipython().magic('matplotlib inline')
 17 | 
 18 | 
 19 | # # *Visualize* the results of an *MCMC* run
 20 | # 
 21 | # ## Set Up the Grid
 22 | # 
 23 | # The values of $x_{min}, x_{max}, {nb}_{x}, y_{min}, y_{max}, \text{ and } {nb}_{y}$ depend on where the **probability mass** of the **target distribution** is located, i.e. where the probability density function is sufficiently 'large'.
 24 | 
 25 | # In[2]:
 26 | 
 27 | 
 28 | Grid = namedtuple('Grid', ['x_min', 'x_max', 'y_min', 'y_max', 'X', 'Y'])
 29 | 
 30 | def make_grid(x_min=-30.0, x_max=30.0, nb_x =100, 
 31 |               y_min=-30.0, y_max=30.0, nb_y =100):
 32 |     x_list = np.linspace(x_min, x_max, nb_x)
 33 |     y_list = np.linspace(y_min, y_max, nb_y)
 34 |     x, y = np.meshgrid(x_list, y_list)
 35 |     return Grid(x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max, X=x, Y=y)
 36 | 
 37 | GRID = make_grid()
 38 | 
 39 | 
 40 | # In[4]:
 41 | 
 42 | 
 43 | def Mahalanobis_distance(mean, point, precision):
 44 |     # The precision matrix is the inverse of the covariance matrix.
 45 |     delta = mean - point
 46 |     return np.sqrt(delta @ precision @ delta.T)
 47 | 
 48 | def squared_Mahalanobis_distance(point, precision):
 49 |     # The precision matrix is the inverse of the covariance matrix.
 50 |     delta = mean - point
 51 |     return delta @ precision @ delta.T
 52 | 
 53 | def Mahalanobis_distance_to_origin(point, precision):
 54 |     # The precision matrix is the inverse of the covariance matrix.
 55 |     return np.sqrt(point @ precision @ point.T)
 56 | 
 57 | def squared_Mahalanobis_distance_to_origin(point, precision):
 58 |     # The precision matrix is the inverse of the covariance matrix.
 59 |     return point @ precision @ point.T
 60 | 
 61 | def calculate_fractions(distribution, samples, burnin_pct=0):
 62 |     precision = la.inv(distribution['Covariance'])
 63 |     end_burnin = burnin_pct*len(samples)//100
 64 |     samples_at_equilibrium = samples[end_burnin:]
 65 |     nb_samples = len(samples_at_equilibrium)
 66 |     d_sq = [squared_Mahalanobis_distance_to_origin(sample, precision) 
 67 |             for sample in samples_at_equilibrium]
 68 |     return [sum(d_sq <= contour_level)/nb_samples 
 69 |             for contour_level in distribution['Contour Levels']]
 70 | 
 71 | 
 72 | # # the histogram of the distances
 73 | # n, bins, patches = plt.hist(Distances, 50, normed=1, facecolor='green', alpha=0.75)
 74 | # 
 75 | # plt.xlabel('Distance to the Mean')
 76 | # plt.ylabel('Relative Frequency')
 77 | # plt.title(r'$\mathrm{Histogram\ of\ Sample\ Distance\ to\ the\ Mean}$')
 78 | # plt.grid(True);
 79 | 
 80 | # # cumulative distribution of the distances
 81 | # values, base = np.histogram(Distances, bins=100)
 82 | # # evaluate the cumulative
 83 | # cumulative = np.cumsum(values)
 84 | # # plot the cumulative function
 85 | # plt.plot(base[:-1], cumulative, c='blue');
 86 | 
 87 | # fig = plt.figure("i.i.d.", figsize=(7, 7))
 88 | # ax = fig.add_subplot(1, 1, 1) 
 89 | # subplot(ax, Pi_2, Pi_2.Samples[::1000], dim1=0, dim2=1, 
 90 | #         title='Distribution of i.i.d. generated samples.')
 91 | 
 92 | # ## Contour Lines corresponding with given Confidence Levels
 93 | # 
 94 | # Next we plot the contour lines corresponding with 10, 90, 95 and 99 percent confidence. Therefore we
 95 | # use the corresponding values of $\chi^2$-distribution. In case of a bivariate distribution we have 2 **degrees of freedom**. The values of this distribution can be found at the webpage https://people.richland.edu/james/lecture/m170/tbl-chi.html for instance.
 96 | 
 97 | # In[23]:
 98 | 
 99 | def plot_contour_lines(ax, distribution, dim1, dim2):
100 |     global GRID
101 |     X, Y = GRID.X, GRID.Y
102 |     # Plot the contour lines
103 |     contour_function = distribution['Contour Function']
104 |     # Since we project and a 2-dimensional subspace we will use 2 degrees of freedom
105 |     # instead of the dimension of the statespace as we did before.
106 |     contour_levels = distribution['Contour Levels']
107 |     #chi_squares = distribution.ChiSquares
108 |     Z = contour_function(X, Y)
109 |     ax.contour(X, Y, Z, contour_levels)
110 |     
111 | def scatter_samples(ax, samples, dim1, dim2):
112 |     ax.scatter(samples[:, dim1], samples[:, dim2])
113 |     
114 | def subplot(ax, distribution, samples, dim1, dim2, title, fraction_str=None):
115 |     ax.set_title(title, fontweight='bold', color='blue', fontsize=14)
116 |     ax.axis([GRID.x_min, GRID.x_max, GRID.y_min, GRID.y_max])
117 |     ax.set_xlabel('Dimension ' + str(dim1))
118 |     ax.set_ylabel('Dimension ' + str(dim2))
119 |     plot_contour_lines(ax, distribution, dim1, dim2)
120 |     scatter_samples(ax, samples, dim1, dim2)
121 | 
122 | 
123 | # In[7]:
124 | 
125 | 
126 | def compare_to_iid_samples(run_data, nb_samples, dim1=0, dim2=1, burnin_pct=50):
127 |     global GRID    
128 |     fig, ((ax_left, ax_right)) = plt.subplots(nrows=1, ncols=2, figsize=(15,7))
129 |     target = run_data.Target
130 |     # Data to be plotted.
131 |     step = run_data.N//nb_samples
132 |     mcmc_samples = run_data.DataStore['States']
133 |     iid_samples = target['Samples']
134 |     mcmc_samples_2_display = mcmc_samples[::step]
135 |     iid_samples_2_display = iid_samples[::step]
136 |     mcmc_fractions = calculate_fractions(target, mcmc_samples, burnin_pct)
137 |     iid_fractions = calculate_fractions(target, iid_samples, burnin_pct)
138 |     
139 |     # Information to be shown.
140 |     s_name = run_data.Sampler['Name']
141 |     title_str = 'Distribution of samples generated by {:s}'
142 |     title_info = title_str.format(s_name)
143 |     burnin_str = 'Burn in used is {:d} percent of the generated samples.'
144 |     burnin_info = burnin_str.format(burnin_pct)
145 |     mcmc_str = '{:s} Fractions: {:1.5f}, {:1.5f}, {:1.5f}, and {:1.5f}'
146 |     mcmc_info = mcmc_str.format(s_name, *mcmc_fractions) 
147 |     iid_str = 'IID Fractions: {:1.5f}, {:1.5f}, {:1.5f}, and {:1.5f}'
148 |     iid_info = iid_str.format(*iid_fractions)
149 |     title_mcmc = '{:s} Generated'.format(s_name)
150 |     title_idd = 'IID Generated'
151 |     suptitle_str = 'Comparison of the {:s} (left) vs. the IID (right) sample distribution'
152 |     suptitle = suptitle_str.format(s_name)
153 |     
154 |     # Display everything.
155 |     print(burnin_info)
156 |     print(mcmc_info)
157 |     print(iid_info)
158 |     fig.suptitle(suptitle, fontweight='bold', color='red', fontsize=18)
159 |     subplot(ax_left, target, mcmc_samples_2_display, dim1, dim2, title=title_mcmc)
160 |     subplot(ax_right, target, iid_samples_2_display, dim1, dim2, title=title_idd)
161 | 
162 | 
163 | # In[6]:
164 | 
165 | 
166 | def plot_samples(run_data, nb_samples, dim1=0, dim2=1, burnin_pct=50):
167 |     global GRID
168 |     # New figure window for the current sampling method
169 |     s_name = run_data.Sampler['Name']
170 |     fig = plt.figure(s_name, figsize=(7, 7)) 
171 |     ax = fig.add_subplot(1, 1, 1)
172 |     # Data to be plotted.
173 |     target = run_data.Target
174 |     # Data to be plotted.
175 |     step = run_data.N//nb_samples
176 |     mcmc_samples_2_display = run_data.DataStore['States'][::step]
177 |     # Information to be shown.
178 |     fig_title_str = 'Distribution of samples generated by {:s}'
179 |     fig_title =  fig_title_str.format(s_name)
180 |     #Plot everything.
181 |     subplot(ax, target, mcmc_samples_2_display, dim1, dim2, title=fig_title)
182 |     
183 | def subplot_2(ax, samples, dim1, dim2, title, color):
184 |     ax.set_title(title, fontweight='bold', fontsize=14)
185 |     ax.set_xlabel('Dimension ' + str(dim1))
186 |     ax.set_ylabel('Dimension ' + str(dim2))
187 |     ax.scatter(samples[:, dim1], samples[:, dim2], color=color)
188 |     
189 | def compare_sample_spread(dim1, dim2, list_of_samples, titles, colors):
190 |     # Ensure that dim1 and dim2 are less than the dimension of the state space.
191 |     _, dim = list_of_samples[0].shape
192 |     assert dim1 < dim, "dim1 should be less then %r" % dim
193 |     assert dim2 < dim, "dim2 should be less then %r" % dim
194 |     
195 |     #Generate the supplots.
196 |     fig, (axes) = plt.subplots(nrows=1, ncols=2, figsize=(15,7), sharex='col', sharey='row')
197 |     for ax, samples, title, color in zip(axes, list_of_samples, titles, colors):
198 |         subplot_2(ax=ax, samples=samples, dim1=dim1, dim2=dim2, title=title, color=color)
199 | 
200 | 


--------------------------------------------------------------------------------