├── AM_Sampling.py ├── Adaptive_MCMC_for_Bayesian_Inference.pdf ├── CMA_Sampling.py ├── Experiments.ipynb ├── FileHandling.py ├── GaA_Sampling.py ├── L_AM_Sampling.py ├── MH_Sampling.py ├── README.md ├── TestSuite.py └── Visualization.py /AM_Sampling.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # *Adaptive* MH 5 | # 6 | # See the 1999 and 2001 papers of Haario et al. 7 | 8 | # In[1]: 9 | 10 | import numpy as np 11 | import numpy.linalg as la 12 | import matplotlib.pyplot as plt 13 | import scipy as sp 14 | import scipy.stats as ss 15 | import math 16 | import random 17 | from collections import namedtuple 18 | 19 | 20 | from MH_Sampling import acceptance_decision 21 | from FileHandling import save_state 22 | from TestSuite import generate_state_space, generate_iid_samples, get_distribution, get_samples 23 | 24 | 25 | # In[2]: 26 | 27 | # save Adaptive Metropolis parameters in Named Tuple 28 | 29 | AM_Pars = namedtuple('AM_Pars', 30 | ['Origin', 'Id', 31 | 'sigma_0', 'sigma_opt', 32 | 'C_0', 'C_opt', 33 | 'z_samples']) 34 | 35 | # initialize parameters 36 | def init_AM_pars(sp): 37 | dim, origin, idty, = sp['dim'], sp['Origin'], sp['Id'], 38 | sigma_0, sigma_opt = 0.1/np.sqrt(dim), sp['sigma_opt'] 39 | cov_0, cov_opt = sigma_0**2*idty, sigma_opt**2*idty 40 | return AM_Pars(Origin=origin, Id=idty, 41 | sigma_0=sigma_0, sigma_opt=sigma_opt, 42 | C_0=cov_0, C_opt=cov_opt, 43 | z_samples=get_samples(sp=sp, name='Z')) 44 | 45 | 46 | # # Adaptive MH algorithm *AM* 47 | 48 | # ## Generate the candidate next sample 49 | # 50 | # We consider a version of the *Adaptive Metropolis* (*AM*) sampler of Haario 51 | # et al. (2001). We want to sample from the $d$-dimensional target distribution $\pi(\mathbf{x})$. 52 | # 53 | # We perform a Metropolis algorithm with covariance matrix $\pmb{Q}_n$ at iteration $n$ given by 54 | # 55 | # $$\mathbf{Q}_n(\mathbf{x}, ·) = N(\mathbf{x}, \sigma_{0}^2 \mathbb{1}_d)$$ 56 | # 57 | # for $n \leq 2d$, while for $n > 2d$ 58 | # 59 | # $$\mathbf{Q}_n(\mathbf{x}, ·) = (1 − \beta) N(\mathbf{x}, \sigma_{opt}^2 \mathbf{C}_n) + 60 | # \beta N(\mathbf{x}, \sigma_{0}^2 \mathbb{1}_d)$$ 61 | # 62 | # where $\mathbf{C}_n$ is the current empirical estimate of the covariance of the target distribution 63 | # based on the samples so far, $\sigma_{0}^2 = \frac{0.1^2}{d}$ and $\sigma_{opt}^2 = \frac{2.38^2}{d}$ are the initial and optimal scale, respectively, and $\beta$ is a small positive constant, we use $\beta = 0.05$. 64 | # 65 | # In other words, the next candidate is sampled from 66 | # 67 | # $$\mathbf{x}^{*} \sim \mathbf{Q}_n(\mathbf{x}, ·)$$ 68 | # 69 | # The text above is adapted from Section 2 of Gareth O. Roberts and Jeffrey S. Rosenthal (2008) 70 | # *Examples of Adaptive MCMC*. 71 | 72 | # ## Random covariance matrix $M$ from the above paper. 73 | 74 | # In[3]: 75 | 76 | def get_proposal_cov(M2, n, pars, beta=0.05): 77 | d, _ = M2.shape 78 | init_period = 2*d 79 | s_0, s_opt, C_0 = pars.sigma_0, pars.sigma_opt, pars.C_0 80 | if np.random.rand()<=beta or n<= init_period: 81 | return C_0 82 | else: 83 | # We can always divide M2 by n-1 since n > init_period 84 | return (s_opt/(n - 1))*M2 85 | 86 | 87 | # In[4]: 88 | 89 | def generate_AM_candidate(current, M2, n, pars): 90 | prop_cov = get_proposal_cov(M2, n, pars) 91 | candidate = ss.multivariate_normal(mean=current, cov=prop_cov).rvs() 92 | return candidate 93 | 94 | 95 | # ## Update the mean $\mathbf{m}$ and the the covariance $\mathbf{C}$ 96 | # 97 | # In the *AM*-algorithm, the **mean** is updated as 98 | # 99 | # $$\mathbf{m}_{n+1} = \frac{n}{n+1}\mathbf{m}_{n} + \frac{1}{n+1}\left(\mathbf{x}_{n+1} - \mathbf{m}_{n}\right)$$ 100 | # 101 | # and the **covariance** as 102 | # 103 | # $$\mathbf{C}_{n+1} = \frac{n}{n+1}\mathbf{C}_{n} + \frac{1}{n+1}\left( 104 | # \left(\mathbf{x}_{n+1} - \mathbf{m}_{n}\right)\left(\mathbf{x}_{n+1} - \mathbf{m}_{n}\right)^\top - \mathbf{C}_{n} \right)$$ 105 | # 106 | # where $\mathbf{x}_{n+1}$ is the sample generated at step $n+1$. 107 | 108 | # In the Welford algorithm, 109 | # 110 | # $$M_n \triangleq \sum_{i=1}^{n} {(x_i - \overline{x}_n)}^2$$ 111 | # 112 | # or in other words 113 | # 114 | # $$s_n^2 = \frac{M_n}{n-1}$$ 115 | # 116 | # It is easier to update $M_n$ in a numerical stable way, 117 | # 118 | # $$M_n = M_{n-1} + (x_n - \overline{x}_{n+1})(x_n - \overline{x}_n)^\top$$ 119 | 120 | # In[5]: 121 | 122 | def update_moments(mean, M2, sample, n): 123 | next_n = n + 1 124 | w = 1/next_n 125 | new_mean = mean + w*(sample - mean) 126 | delta_bf, delta_af = sample - mean, sample - new_mean 127 | new_M2 = M2 + np.outer(delta_bf, delta_af) 128 | return new_mean, new_M2, next_n 129 | 130 | 131 | # In[6]: 132 | 133 | def multiple_of_10000(n): 134 | return n%10000 == 0 135 | 136 | 137 | # In[7]: 138 | 139 | def AM_sampler(pars, target, initial_state, run_data): 140 | ds, N = run_data.DataStore, run_data.N 141 | target_pdf = target['pdf'] 142 | 143 | current = initial_state 144 | mean, M2 = pars.Origin, np.zeros_like(pars.Id) 145 | accepted = True 146 | 147 | for n in range(0, N): 148 | save_state(data_store=ds, step=n, 149 | state=current, value=target_pdf(current), 150 | mean=mean, covariance=M2, accepted_p=accepted) 151 | 152 | # generate new candidate 153 | candidate = generate_AM_candidate(current=current, M2=M2, n=n, pars=pars) 154 | 155 | # run Metropolis Hastings for acceptance criteria 156 | accepted = acceptance_decision(current=current, proposed=candidate, pdf=target_pdf) 157 | 158 | # accepted candidate becomes new state 159 | if accepted: 160 | current = candidate 161 | # We always update M2, where S^2 = M2/n-1 162 | # whether the proposed samples are accepted or not 163 | mean, M2, n = update_moments(mean, M2, current, n) 164 | return run_data 165 | 166 | -------------------------------------------------------------------------------- /Adaptive_MCMC_for_Bayesian_Inference.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philipk01/Optimization_and_Sampling_for_Bayesian_Inference/8236469751b6c23f2ce25ef8122e7824f9b91696/Adaptive_MCMC_for_Bayesian_Inference.pdf -------------------------------------------------------------------------------- /CMA_Sampling.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # CMA *Sampling* 5 | 6 | # In[22]: 7 | 8 | import numpy as np 9 | import numpy.linalg as la 10 | import matplotlib.pyplot as plt 11 | import scipy as sp 12 | import scipy.stats as ss 13 | import math 14 | import random 15 | from collections import namedtuple 16 | #%precision 4 17 | #%matplotlib inline 18 | 19 | 20 | # In[23]: 21 | 22 | # project python modules 23 | from MH_Sampling import acceptance_decision 24 | from FileHandling import save_state 25 | from TestSuite import generate_state_space, generate_iid_samples, get_samples 26 | 27 | 28 | # # CMA Sampler 29 | # 30 | # Both the CMA-ES optimization and sampling algorithm have a number of 31 | # **strategy parameters** that do not change during the execution of the algorithm. 32 | # We are using the default values as recommended in the paper *C. Igel, T. Suttorp, and N. Hansen, A Computational Efficient Covariance Matrix Update and a $(1+1)$-CMA for Evolution Strategies*. henceforth called *the paper*. 33 | # 34 | # In the update of the **global scale** $\sigma$, the following parameters with given initial values are used 35 | # 36 | # - **damping parameter** $$k = 1 + \frac{d}{2}$$ where $d$ is the dimension of the state space. (**Note**: In the paper, $d$ is used for 37 | # the damping parameter instead of $k$ and $n$ for dimension instead of $d$) 38 | # - **target success rate** $$p_{s}^{succ} = \frac{2}{11}$$ 39 | # (**Remark BM**: compare $p_{s}^{succ}$ to the optimal acceptance ratio in Metropolis-Hastings.) 40 | # - **learning rate** $$\lambda_p = \frac{1}{12}$$ used in the update of the average success rate 41 | # $\bar{p}_s \in \left[ 0, 1 \right]$, cf. the procedure *update_scale* below for more information. 42 | # 43 | # For the **covariance matrix adaptation**, they are 44 | # 45 | # - **evolution point weight** $$\lambda_{\mathbf{p}} = \frac{2}{d+2}$$ 46 | # - **covariance matrix weight** $$\lambda_{\mathbf{C}} = \frac{2}{d^2 + 6}$$ 47 | # - **threshold** $$\theta_p$$ for *average success rate* $\bar{p}_s$. The update of the evolution point $\mathbf{p}_c$ and the covariance matrix $\mathbf{C}$ depend on the test $\bar{p}_s < \theta_p$, cf. the procedure *update_cov* below for more information. 48 | # 49 | # **Note**: in the CMA ES literature, step size is used instead of global scale. In order to be consistent 50 | # with the MCMC literature we prefer and use the latter. 51 | 52 | # # Initial values 53 | # 54 | # The initial values are set as follows 55 | # 56 | # - **average success rate** $\bar{p}_s = p_{s}^{succ}$ where $p_{s}^{succ}$ is the *target success rate*. 57 | # - **evolution point** $\mathbf{p}_c = \mathbf{0}$ 58 | # - **covariance matrix** $\mathbf{C} = \mathbb{1}_d$ 59 | # 60 | # The choice of the initial candidate $\mathbf{x}_0$ and the initial global scale $\sigma$ are problem dependent. 61 | # Here, we initialize $\mathbf{x}_0$ with a random point in a hypercube centered in the origin. Its side can 62 | # vary. And $\sigma = 1$. 63 | 64 | # In[24]: 65 | 66 | # save covariance matrix adaptation (CMA) parameters in Named Tuple 67 | 68 | CMA_Parameters = namedtuple('CMA_Parameters', 69 | ['z_samples', 70 | #Parameters used in the global scale control 71 | 's', 'k', 't_succ', 'c_p', 72 | #Parameters used in the covariance adaptation 73 | 'c_c', 'c_cov', 'p_thres']) 74 | 75 | def init_CMA_pars(sp): 76 | dim = sp['dim'] 77 | return CMA_Parameters(z_samples=get_samples(sp=sp, name='Z'), 78 | s=1, 79 | k=1+dim/2, 80 | t_succ=2/11, 81 | c_p=1/12, 82 | c_c=2/(dim+2), 83 | c_cov=2/(dim**2+6), 84 | p_thres=0.44) 85 | 86 | 87 | # # Generation of the candidate $\mathbf{x}^{*}$ 88 | # 89 | # Let $\mathbf{C} = 90 | # \mathbf{Q}\mathbf{Q}^{\top}$ be the Cholesky decomposition of the covariance matrix $\mathbf{C}$. The candidate next state $\mathbf{x}^{*}$ is generated as 91 | # 92 | # $$\mathbf{x}^{*} = \mathbf{x}_n + \sigma_n \mathbf{Q}_n \mathbf{z}_n$$ 93 | # 94 | # where $\sigma_n$ are the global scale, $\mathbf{Q}_n$ the Cholesky factor of $\mathbf{C}_n$, 95 | # and $\mathbf{z}_n \sim N(\mathbf{0}, \mathbb{1}_d)$ at time step $n$, respectively. 96 | 97 | # In[25]: 98 | 99 | def generate_CMA_candidate(current, scale, cov, z_sample): 100 | # Use postive definite square root of C, cf. Tutiorial, p. 6. 101 | Q = la.cholesky(cov) 102 | y = Q @ z_sample 103 | candidate = current + scale*y 104 | return (candidate, y) 105 | 106 | 107 | # ## Update of the global scale $\sigma$ 108 | # 109 | # **Each time step**, the global scale $\sigma$ is updated and the update consists of two steps. 110 | # First, average success rate $\bar{p}_{s}$ 111 | # is updated and this depends on the acceptance of the generated candidate. Next, 112 | # $\sigma$ itself is updated. 113 | # 114 | # The **average success rate** $\bar{p}_{s}$ and the **global scale** 115 | # $\sigma$ are updated as 116 | # 117 | # \begin{align} 118 | # \bar{p}_{s} & \leftarrow (1 - c_p) \bar{p}_{s} + c_p \textit{ accepted_p}\\ 119 | # \sigma & \leftarrow \sigma ~ exp \left( \frac{1}{k} \left( \bar{p}_{s} - \frac{p_{s}^{target}}{1 - p_{s}^{target}} (1- \bar{p}_{s}) \right) \right) 120 | # \end{align} 121 | 122 | # In[26]: 123 | 124 | def update_scale(p_succ, sigma, accepted_p, pars): 125 | # Parameters used in the global scale control 126 | k, t_succ, c_p = pars.k, pars.t_succ, pars.c_p 127 | 128 | # Update the average success rate. 129 | p_succ = (1- c_p)*p_succ + c_p*accepted_p 130 | 131 | # Update global scale. 132 | w = t_succ/(1-t_succ) 133 | sigma = sigma * np.exp(1/k*(p_succ - w*(1-p_succ))) 134 | return p_succ, sigma 135 | 136 | 137 | # In[27]: 138 | 139 | def f(d): 140 | k=1+d/2 141 | t_succ = 2/11 142 | w = t_succ/(1-t_succ) 143 | return lambda x: np.exp(1/k*(x - w*(1-x))) 144 | 145 | 146 | # In[28]: 147 | 148 | f_d = f(d=50) 149 | f_d(2/11) 150 | 151 | 152 | # In[29]: 153 | 154 | D = 2 155 | k = 1 + D/2 156 | f_d = f(d=D) 157 | x_range = np.linspace(start=0, stop=1, num=50, endpoint=True) 158 | f_range = f_d(x_range) 159 | t_succ = 2/11 160 | coords = (t_succ, f_d(t_succ)) 161 | plt.title(r'$f(x) = e^{\frac{1}{k} ( x - w (1 - x ))}$') 162 | plt.annotate('target threshold', xy=coords, xytext=(0.2, 1.2), 163 | arrowprops=dict(facecolor='black', shrink=0.05)) 164 | plt.plot(x_range, f_range); 165 | 166 | 167 | # # Update of the covariance matrix $\mathbf{C}$ 168 | # 169 | # The covariance matrix is updated only **when** the generated candidate $\mathbf{x}^{*}$ 170 | # is **accepted**. 171 | # This update also consists of two steps. 172 | # First, the evolution point $\mathbf{p}_{c}$ 173 | # is updated followed by the update of the covariance matrix itself. 174 | # These updates depend on whether $\bar{p}_s < \theta_p$ or not. 175 | # 176 | # The **evolution point** $\mathbf{p}_{c}$ and the **covariance matrix** 177 | # $\mathbf{C}$ are updated as 178 | # 179 | # \begin{align} 180 | # \mathbf{p}_{c} &\leftarrow 181 | # \begin{cases} 182 | # \left(1-\lambda_\mathbf{p} \right) \mathbf{p}_{c} + \sqrt{ \lambda_\mathbf{p}(2 - \lambda_\mathbf{p})} 183 | # ~\mathbf{y} & \mbox{if $\bar{p_{s}} < \theta_p$}\\ 184 | # \left (1-\lambda_\mathbf{p} \right) \mathbf{p}_{c} & \mbox{otherwise} 185 | # \end{cases} \\ 186 | # \mathbf{C} &\leftarrow 187 | # \begin{cases} 188 | # \left( 1- \lambda_\mathbf{C} \right) \mathbf{C} + \lambda_\mathbf{C} 189 | # \mathbf{p}_{c}\mathbf{p}_{c}^{\top} & \mbox{if $\bar{p_{s}} < \theta_p$}\\ 190 | # \left ( 1-\lambda_\mathbf{C} \right) \mathbf{C} + \lambda_\mathbf{C} \left (\mathbf{p}_{c}\mathbf{p}_{c}^{\top} + \lambda_\mathbf{p}(2-\lambda_\mathbf{p}) \mathbf{C} \right) & \mbox{otherwise} 191 | # \end{cases} 192 | # \end{align} 193 | # 194 | # where $\mathbf{y} = \mathbf{Q} \mathbf{z}$ with $\mathbf{z} \sim N(\mathbf{0}, \mathbb{1}_d)$ and $\mathbf{C} = 195 | # \mathbf{Q}\mathbf{Q}^{\top}$, cf. above for more information about how samples are generated. 196 | 197 | # In[30]: 198 | 199 | def update_cov(cov, evol_point, y, avg_success_rate, pars): 200 | # Parameters used in the covariance matrix adapation 201 | c_c, c_cov, p_thres = pars.c_c, pars.c_cov, pars.p_thres 202 | if avg_success_rate < p_thres: 203 | evol_point = (1-c_c)*evol_point + np.sqrt(c_c*(2-c_c))*y 204 | cov = (1-c_cov)*cov + c_cov*np.outer(evol_point.T, evol_point) 205 | else: 206 | evol_point = (1-c_c)*evol_point 207 | cov = (1-c_cov)*cov + c_cov*(np.outer(evol_point.T, evol_point) + c_c*(2-c_c)*cov) 208 | return evol_point, cov 209 | 210 | 211 | # # (1+1)-CMA ES Sampler 212 | # 213 | # The sampler operates as follows 214 | # 215 | # Initialize $\mathbf{x}$, $\sigma = 1$, $\mathbf{C} = \mathbb{1}_d$, 216 | # $\bar{p}_s = p_s^{target}$, and $\mathbf{p}_c = \mathbf{0}$ 217 | # 218 | # **repeat** 219 | # > 1. determine $\mathbf{Q}$ such that $\mathbf{C} = \mathbf{Q} \mathbf{Q}^\top$ 220 | # > 2. $\mathbf{z} \sim N(\mathbf{0}, ~ \mathbb{1}_d)$ 221 | # > 3. $\mathbf{x}^{*} = \mathbf{x} + \sigma ~ \mathbf{Q} ~ \mathbf{z}$ 222 | # > 4. *accepted_p* = *acceptance_decision*($\mathbf{x}, \mathbf{x}^{*}, \pi$) where $\pi$ is the target distribution 223 | # > 5. **if** *accepted_p* **then** 224 | # >> * $\mathbf{x} \leftarrow \mathbf{x}^{*}$ 225 | # >> * *updateCov*($\mathbf{C}, ~ \mathbf{Q} \mathbf{z}, ~ \bar{p}_s, ~ \mathbf{p}_c$) 226 | # 227 | # **until** stopping criterium is met 228 | 229 | # In[31]: 230 | 231 | def CMA_sampler(pars, target, initial_state, run_data): 232 | target_pdf, sp = target['pdf'], target['State Space'] 233 | Origin, Id = sp['Origin'], sp['Id'] 234 | s, p_succ, p_c = pars.s, pars.t_succ, Origin 235 | ds, N = run_data.DataStore, run_data.N 236 | z_samples = pars.z_samples 237 | 238 | x_current = initial_state 239 | C = Id 240 | 241 | save_state(data_store=ds, 242 | step=0, 243 | state=x_current, 244 | value=target_pdf(x_current), 245 | accepted_p=True, 246 | mean=p_c, 247 | covariance=C, 248 | scale=s, 249 | threshold=p_succ) 250 | 251 | for n in range(1, N): 252 | # generate new candidate sample 253 | x_new, delta = generate_CMA_candidate(current=x_current, scale=s, cov=C, z_sample=z_samples[n]) 254 | 255 | # run Metropolis Hastings acceptance criteria 256 | accepted_p = acceptance_decision(current=x_current, proposed=x_new, pdf=target_pdf) 257 | p_succ, s = update_scale(p_succ=p_succ, sigma=s, accepted_p=accepted_p, pars=pars) 258 | 259 | if accepted_p: 260 | # accepted candidate becomes new state 261 | x_current = x_new 262 | p_c, C = update_cov(evol_point=p_c, cov=C, y=delta, avg_success_rate=p_succ, pars=pars) 263 | 264 | # save accepted and non-accepted sates in namedtuple 265 | save_state(data_store=ds, 266 | step=n, 267 | state=x_current, 268 | value=target_pdf(x_current), 269 | accepted_p=accepted_p, 270 | mean=p_c, 271 | covariance=C, 272 | scale=s, 273 | threshold=p_succ) 274 | return run_data 275 | 276 | -------------------------------------------------------------------------------- /FileHandling.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Save and Inspect the state 5 | 6 | # In[66]: 7 | 8 | 9 | import os 10 | import numpy as np 11 | 12 | 13 | # In[67]: 14 | 15 | 16 | def save_state(data_store, step, state, value, accepted_p, 17 | mean=None, covariance=None, scale=None, threshold=None, C_evol_pt=None): 18 | data_store['States'][step] = state 19 | data_store['Densities'][step] = value 20 | data_store['Accepted_p'][step] = accepted_p 21 | data_store['Means'][step] = mean 22 | data_store['Covariances'][step] = covariance 23 | data_store['Scales'][step] = scale 24 | data_store['Thresholds'][step] = threshold 25 | 26 | def inspect_state(data_store, step): 27 | state = data_store['States'][step] 28 | value = data_store['Densities'][step] 29 | accepted_p = data_store['Accepted_p'][step] 30 | mean = data_store['Means'][step] 31 | covariance = data_store['Covariances'][step] 32 | scale = data_store['Scales'][step] 33 | threshold = data_store['Thresholds'][step] 34 | print("State:", state, "R: ", scale, "\nThreshold: ", threshold, "\nState: ", state, 35 | "\nIt's value: ", value, "\nMean: ", mean, 36 | "\nCovariance: ", covariance) 37 | 38 | 39 | # # Save in the file format used by *PyMC3* 40 | 41 | # ## Structure of the data directory 42 | # 43 | # The directory structure of 'Data' is as follows 44 | # 45 | # 1. for each dimension $d$ of the state space, 'Data' contains a folder 'Dim d' 46 | # 2. for each target, 'Dim d' contains a folder 'Target k' where $k$ is the index of 47 | # that targets in the test suite 48 | # 3. for each sampler, 'Target k' contains a folder named after that sampler 49 | # 4. for each run given the dimension of the state space, the target and the sampler, 50 | # a file 'chain_i' is generated where $i$ is the index of the run. 51 | # 52 | # 53 | # The global variable PARENT_FOLDER contains the parent folder, i.e. the folder where the experimental data will be store, e.g. 54 | # 55 | # PARENT_FOLDER = '/Users/BM/Documents/Programming/Python/Notebooks/MCMC/To execute a run' 56 | # 57 | # The functions below assume that the parent folder is correctly set. 58 | 59 | # In[68]: 60 | 61 | 62 | def relative_path_to_chain(dim, t_name, s_name): 63 | data_folder = 'Data' 64 | dim_folder = 'Dimension_{}'.format(dim) 65 | target_folder = t_name 66 | sampler_folder = s_name 67 | return './'+'/'.join([data_folder, dim_folder, target_folder, sampler_folder]) 68 | 69 | class ChDir(object): 70 | """ 71 | Step into a directory temporarily. 72 | """ 73 | def __init__(self, path): 74 | self.old_dir = os.getcwd() 75 | self.new_dir = path 76 | 77 | def __enter__(self): 78 | os.chdir(self.new_dir) 79 | 80 | def __exit__(self, *args): 81 | os.chdir(self.old_dir) 82 | 83 | def save_chain(chain, idx, individual_components_p=True): 84 | """Save a single-chain trace with index 'idx'. PyMC3 uses the labels x__0, x__1, x__2, etc. 85 | for a vector when are regarded as COMPONENTS of that vector. 86 | If we want to treat them INDIVIDUALLY the labels x_0, x_1, x_2, etc. have to be used. 87 | This is, we use double versus single underscore. 88 | """ 89 | chain_name = 'chain-{}.csv'.format(idx) 90 | _, nbcols = chain.shape 91 | underscore = '_' if individual_components_p else '__' 92 | varnames = ['x{}{}'.format(underscore, index) for index in range(nbcols)] 93 | header = ','.join(varnames) 94 | np.savetxt(fname=chain_name, X=chain, header=header, comments='', delimiter=',') 95 | 96 | def save_run_data(run_data, parent_folder): 97 | warning = 'Parent Folder \'%s\' does NOT exist'%(parent_folder) 98 | if not os.path.exists(parent_folder): 99 | return warning 100 | chain = run_data.DataStore['States'] 101 | chain_folder = relative_path_to_chain(dim=run_data.StateSpace['dim'], 102 | t_name=run_data.Target['Name'] , 103 | s_name=run_data.Sampler['Name']) 104 | if not os.path.exists(chain_folder): 105 | os.makedirs(chain_folder) 106 | with ChDir(chain_folder): 107 | nbfiles = len(os.listdir()) 108 | save_chain(chain=chain, idx=nbfiles) 109 | 110 | def save_comparison(combined_data, parent_folder): 111 | for i, run_data in enumerate(combined_data): 112 | save_run_data(run_data, parent_folder) 113 | 114 | 115 | # In[75]: 116 | 117 | 118 | def read_states(f_name, dim, t_name, s_name): 119 | chains_folder = relative_path_to_chain(dim=dim, t_name=t_name, s_name=s_name) 120 | with ChDir(chains_folder): 121 | return np.loadtxt(fname=f_name, skiprows=1, delimiter=',') 122 | 123 | -------------------------------------------------------------------------------- /GaA_Sampling.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Gaussian Adaptation *Sampling* 5 | 6 | # In[8]: 7 | 8 | import numpy as np 9 | import numpy.linalg as la 10 | import matplotlib.pyplot as plt 11 | import scipy as sp 12 | import scipy.stats as ss 13 | import math 14 | import random 15 | from collections import namedtuple 16 | 17 | 18 | # In[3]: 19 | 20 | from MH_Sampling import acceptance_decision 21 | from FileHandling import save_state 22 | from TestSuite import generate_state_space, generate_iid_samples, get_distribution, get_samples 23 | 24 | 25 | # # *Maximum Entropy* Principle 26 | # To be done. 27 | # 28 | # The entropy of the multivariate normal distribution $N(\mathbf{m}, \mathbf{C})$ with mean $\mathbf{m}$ and covariance matrix $\mathbf{C}$ is 29 | # 30 | # $$ H(N) = \ln \sqrt{(2 \pi e)^n \det \mathbf{C}}$$ 31 | 32 | # In[4]: 33 | 34 | def entropy(cov): 35 | dim1, dim2 = cov.shape 36 | assert dim1==dim2 37 | return math.log(np.sqrt((2* math.pi * math.e)**dim1 * la.det(cov))) 38 | 39 | 40 | # ## Check Covariance Matrix 41 | 42 | # In[5]: 43 | 44 | def analyse_cov(cov): 45 | eigenvals, eigenvecs = la.eig(cov) 46 | print('Covariance Matrix:\n', cov) 47 | print('Determinant:', la.det(cov)) 48 | print('Eigenvalues:', eigenvals) 49 | print('Eigenvectors:', eigenvecs) 50 | print('Symmetric:', np.allclose(cov, cov.T)) 51 | 52 | 53 | # # Gaussian Adaptation according to Mueller's Matlab code 54 | # 55 | # This notebook is based on Mueller's Matlab code and the paper *"Gaussian Adaptation as a unifying framework for black-box optimization and adaptive Monte Carlo sampling"* by *Christian I. Muellen* and *Ivo F. Sbalzarini*. 56 | 57 | # ## Strategy Parameters 58 | # 59 | # The **strategy parameters** are: 60 | # - the **acceptance ratio** $P$ 61 | # - the **expansion** $f_e > 1$ and **contracton factor** $f_c < 1$ used to update the global scale $\sigma$ 62 | # - the **weigths** $\lambda_{\mathbf{m}}$, $\lambda_{\mathbf{C}}$, and $\lambda_{\theta}$ 63 | # used to update the mean $\mathbf{m}$, the covariance matrix $\mathbf{C}$, and the threshold $\theta$, respectively. 64 | # 65 | # They are **initialized** as follows, cf. p.2 of the MATLAB code of Mueller: 66 | # 67 | # - **acceptance ratio** $P = \frac{1}{e}$. **REMARK BM**: Check the explanation regarding $P = \frac{s}{s+f}$ where $s$ and $f$ are the number of successes and failures so far. 68 | # 69 | # - **expansion factor** $f_e = 1 + \beta (1 - P)$ and **contraction factor** 70 | # $f_c = 1 - \beta P$ where $\beta = \lambda_{\mathbf{C}}$ 71 | # 72 | # - **weights** are initialized as follows 73 | # 74 | # - $\lambda_{\mathbf{C}} = \frac{\ln (d+1)}{(d+1)^2}$ 75 | # - $\lambda_{\mathbf{m}} = \frac{1}{ed}$ 76 | # - $\lambda_{\theta} = \frac{1}{ed}$ without restart, cf. the end of Section II.B of the paper what to do in case of restart. 77 | # 78 | # Here, $d$ is the dimension of the **search space** in case of **optimization** or the **state space** 79 | # in case of **sampling**. 80 | 81 | # ### Initializing *strategy parameters* 82 | # Cf. above for their initial values. 83 | 84 | # In[30]: 85 | 86 | GaA_Pars = namedtuple('GaA_Pars', 87 | ['l_C', 'l_m', 'b', 'P', 88 | 'f_e', 'f_c', 'max_scale', 'max_cond', 89 | 'Origin', 'Id']) 90 | 91 | 92 | # In[6]: 93 | 94 | def init_GaA_pars(sp): 95 | D, origin, identity = sp['dim'], sp['Origin'], sp['Id'] 96 | tmp_l_c = math.log(D+1)/(D + 1)**2 97 | tmp_P = 1/math.e 98 | return GaA_Pars(l_C=tmp_l_c, 99 | l_m=1/(math.e*D), 100 | b=tmp_l_c, 101 | P=tmp_P, 102 | f_e=1 + tmp_l_c*(1-tmp_P), 103 | f_c=1 - tmp_l_c*tmp_P, 104 | max_scale=1000, 105 | max_cond=80, # he value used by Mueller is 1e6*D but this results in errors 106 | Origin=origin, 107 | Id=identity) 108 | 109 | 110 | # In[10]: 111 | 112 | def display_parameters(pars): 113 | str_1 = "l_C: {:1.4f}\nl_m: {:1.4f}\nb: {:1.4f}\nP: {:1.4f}" 114 | str_2 = "\nf_e: {:1.4f}\nf_c: {:1.4f}\nmax_scale: {:1.4f}\nmax_cond: {:1.4f}" 115 | pars_info_1 = str_1.format(pars.l_C, pars.l_m, pars.b, pars.P) 116 | pars_info_2 = str_2.format(pars.f_e, pars.f_c, pars.max_scale, pars.max_cond) 117 | print( pars_info_1, pars_info_2) 118 | 119 | 120 | # # Generate next sample using $\mathbf{Q}$ 121 | # 122 | # The new state $\mathbf{x}_{n+1}$ is generated as follows 123 | # 124 | # $$\mathbf{x}_{n+1} = \mathbf{m}_{n} + \sigma_n \mathbf{Q}_{n} \mathbf{z}_{n}$$ 125 | # 126 | # where $\sigma_n$ is the global scale, $\mathbf{Q}_{n}$ is the "square root" of the covariance matrix $\mathbf{C}_{n}$ as defined below, and 127 | # $\mathbf{z}_{n}$ is a sample of the multivariate standard normal distribution $N(0,1)$ generated at step $n$. 128 | 129 | # In[11]: 130 | 131 | def Q_generate_GaA_candidate(mean, scale, Q, z_sample): 132 | # This function uses the normalized sqrt Q of the covariance matrix C. 133 | # cf. p.7 of the MATLAB code of Mueller 134 | x = mean + scale*(Q @ z_sample.T) 135 | return x 136 | 137 | 138 | # # Generate next sample using $C$ 139 | 140 | # In[12]: 141 | 142 | def C_generate_GaA_candidate(mean, C, z_sample): 143 | return mean + C @ z_sample.T 144 | 145 | 146 | # # Updates of the *scale* $\sigma$, the *mean* $\mathbf{m}$, and the *threshold* $\theta$ 147 | 148 | # ## Update of the **scale** $\sigma$ 149 | # 150 | # The **scale** is *updated at each step*: 151 | # 152 | # When the new sample is **accepted** then the scale is **increased**: 153 | # 154 | # $$\sigma_{n+1} = f_e \sigma_n$$ 155 | # where $f_e > 1$ is the *expansion factor*, one of the strategy parameters of Gaussion Adaptation. 156 | # 157 | # When the sample is **rejected** then the scale is **decreased**: 158 | # $$\sigma_{n+1} = f_c \sigma_n$$ 159 | # where $f_c < 1$ is the *contraction factor*, another strategy parameter. 160 | 161 | # In[13]: 162 | 163 | def contract(scale, pars): 164 | return pars.f_c*scale 165 | 166 | def expand(scale, pars): 167 | # cf. p.10 of the MATLAB code of Mueller 168 | f_e, max_scale = pars.f_e, pars.max_scale 169 | next_scale = f_e*scale 170 | if next_scale <= max_scale: 171 | return next_scale 172 | else: 173 | return max_scale 174 | 175 | 176 | # ## Update of the *mean* $\mathbf{m}$ 177 | # 178 | # 179 | # These are 180 | # 181 | # The **mean** is **only updated** when the new sample $\mathbf{x}_{n+1}$ is **accepted**. Th new mean is 182 | # 183 | # $$\mathbf{m}_{n+1} = (1 - \lambda_{\mathbf{m}}) \mathbf{m}_{n} + \lambda_{\mathbf{m}} \mathbf{x}_n$$ 184 | # 185 | # Here, $\lambda_{\mathbf{m}}$ and $\lambda_{\mathbf{C}}$ are *strategy parameters* of *Gaussian Adaptation*. 186 | 187 | # In[14]: 188 | 189 | # This code is for global optimization, NOT sampling. 190 | def GaA_mean_update_2(mean, sample, pars): 191 | l_m = pars.l_m 192 | return (1-l_m)*mean + l_m*sample 193 | 194 | 195 | # In[15]: 196 | 197 | # In case of sampling l_m = 1, in other words the new sample becomes the next mean. 198 | def GaA_mean_update(mean, sample, pars): 199 | return sample 200 | 201 | 202 | # # Update of the covariance matrices $\mathbf{C}$ and $\mathbf{Q}$ 203 | 204 | # $\mathbf{C}$ and $\mathbf{Q}$ are covariance matrices and therefore positive definite and symmetric. 205 | # Symmetry might get lost due to rounding off errors in the update process. After each update 206 | # we make sure that the result is still symmetric. 207 | # 208 | # The first way to do this uses the *Numpy*-function *triu* that returns the upper triangle part of a matrix. 209 | # The second one uses the *transpose* of a matrix. Recall that $\mathbf{S} = \mathbf{S}^\top$ for a symmetric matrix 210 | # $\mathbf{S}$. 211 | 212 | # In[16]: 213 | 214 | def trui_enforce_symmetry(cov): 215 | dim1, dim2 = cov.shape 216 | assert dim1==dim2 217 | return np.triu(cov,0)+np.triu(cov,1).T 218 | 219 | def transpose_enforce_symmetry(cov): 220 | dim1, dim2 = cov.shape 221 | assert dim1==dim2 222 | return 1/2*(cov+cov.T) 223 | 224 | 225 | # # Update of the "*square root*" $\mathbf{Q}$ of the *covariance* matrix $\mathbf{C}$ 226 | 227 | # First, we calculate $\Delta \mathbf{C}_n$ as follows 228 | # 229 | # $$\Delta \mathbf{C}_{n+1} = (1-\lambda_\mathbf{C})\mathbb{1}_d + \lambda_\mathbf{C} \mathbf{z}_n \mathbf{z}_n^\top$$ 230 | # 231 | # where $\mathbb{1}_d$ is the identity matrix, $\mathbf{z}_n$ is the $n$th sample of the multivariate standard Gaussian distribution, and $\lambda_\mathbf{C}$ is the strategy parameter used in the update of the covariance matrix $\mathbf{C}$. 232 | 233 | # In[17]: 234 | 235 | def delta_C(z_sample, pars): 236 | l_C = pars.l_C 237 | identity = pars.Id 238 | deltaC = (1-l_C)*identity + l_C*np.outer(z_sample, z_sample) 239 | #return enforce_symmetry(deltaC) 240 | return deltaC 241 | 242 | 243 | # Next, we define $\Delta \mathbf{Q}_{n+1}$ as 244 | # 245 | # $$\Delta \mathbf{Q}_{n+1} \triangleq \sqrt{\Delta \mathbf{C}_{n+1}}$$ 246 | 247 | # In[18]: 248 | 249 | def sqrtm(cov): 250 | D, B = la.eigh(cov) 251 | sqrtD = np.diag(np.sqrt(D)) 252 | # Return the sqrt Q of the matrix C 253 | return B @ sqrtD @ B.T 254 | 255 | 256 | # Finally, we calculate $\mathbf{Q}_{n+1}$ as 257 | # $$\mathbf{Q}_{n+1} = \mathbf{Q}_n \Delta \mathbf{Q}_{n+1}$$ 258 | 259 | # In[19]: 260 | 261 | def normalize(cov): 262 | D, _ = cov.shape 263 | normalization_constant = la.det(cov)**(1/D) 264 | normalized_cov = cov/normalization_constant 265 | #det = la.det(normalized_cov) 266 | #np.testing.assert_almost_equal(det, 1.0) 267 | return normalized_cov 268 | 269 | 270 | # In[20]: 271 | 272 | def GaA_Q_update(z_sample, Q, pars): 273 | max_cond = pars.max_cond 274 | deltaC = delta_C(z_sample, pars) 275 | deltaQ = sqrtm(deltaC) 276 | Q_next = normalize(transpose_enforce_symmetry(Q @ deltaQ)) 277 | if la.cond(Q_next) <= max_cond: 278 | return Q_next 279 | else: 280 | return Q 281 | 282 | 283 | # # Update of the *covariance* matrix $\mathbf{C}$ 284 | 285 | # In[21]: 286 | 287 | def GaA_C_update(C, mean, sample, pars): 288 | # Cf. p.10 of the MATLAB code of Mueller 289 | l_C, max_cond = pars.l_C, pars.max_cond 290 | delta = mean - sample 291 | C_next = (1 - l_C)*C + l_C*np.outer(delta, delta) 292 | if la.cond(C_next) <= max_cond: 293 | return C_next 294 | else: 295 | return C 296 | 297 | 298 | # # Gaussian Adaptation Sampling 299 | 300 | # In[22]: 301 | 302 | def Q_GaA_sampler(pars, target, initial_state, run_data): 303 | target_pdf, sp = target['pdf'], target['State Space'] 304 | Origin, Id = sp['Origin'], sp['Id'] 305 | 306 | ds, N = run_data.DataStore, run_data.N 307 | z_samples = get_samples(sp=sp, name='Z') 308 | 309 | #Set up and save the initial state 310 | m = x_current = initial_state 311 | sigma = 1 312 | Q = Id 313 | save_state(data_store=ds, 314 | step=0, 315 | state=x_current, 316 | value=target_pdf(x_current), 317 | accepted_p=True, 318 | mean=m, 319 | covariance=Q, 320 | scale=sigma, 321 | threshold=None) 322 | 323 | #Sample and save state 324 | for n in range(1, N): 325 | z_sample = z_samples[n] 326 | x_proposed = Q_generate_GaA_candidate(mean=x_current, 327 | scale=sigma, 328 | Q=Q, 329 | z_sample=z_sample) 330 | accepted = acceptance_decision(x_current, x_proposed, target_pdf) 331 | if accepted: 332 | x_current = x_proposed 333 | sigma = expand(sigma, pars=pars) 334 | m = GaA_mean_update(mean=m, sample=x_proposed, pars=pars) 335 | Q = GaA_Q_update(Q=Q, z_sample=z_sample, pars=pars) 336 | else: 337 | sigma = contract(sigma, pars=pars) 338 | save_state(data_store=ds, 339 | step=n, 340 | state=x_current, 341 | value=target_pdf(x_current), 342 | accepted_p=accepted, 343 | mean=m, 344 | covariance=Q, 345 | scale=sigma, 346 | threshold=None) 347 | return run_data 348 | 349 | 350 | # In[23]: 351 | 352 | def C_GaA_sampler(pars, target, initial_state, run_data): 353 | target_pdf, sp = target['pdf'], target['State Space'] 354 | Origin, Id = sp['Origin'], sp['Id'] 355 | 356 | ds, N = run_data.DataStore, run_data.N 357 | z_samples = get_samples(sp=sp, name='Z') 358 | 359 | #Set up and save the initial state 360 | m = x_current = initial_state 361 | sigma = 1 362 | C = Id 363 | 364 | save_state(data_store=ds, 365 | step=0, 366 | state=x_current, 367 | value=target_pdf(x_current), 368 | accepted_p=True, 369 | mean=m, 370 | covariance=C, 371 | scale=sigma, 372 | threshold=None) 373 | 374 | #Sample and save state 375 | for n in range(1, N): 376 | z_sample = z_samples[n] 377 | x_proposed = C_generate_GaA_candidate(mean=x_current, 378 | C=C, 379 | z_sample=z_sample) 380 | accepted = acceptance_decision(x_current, x_proposed, target_pdf) 381 | if accepted: 382 | x_current = x_proposed 383 | sigma = expand(sigma, pars=pars) 384 | m = GaA_mean_update(mean=m, sample=x_proposed, pars=pars) 385 | C = GaA_C_update(C=C, mean=m, sample=x_proposed, pars=pars) 386 | else: 387 | sigma = contract(sigma, pars=pars) 388 | save_state(data_store=ds, 389 | step=n, 390 | state=x_current, 391 | value=target_pdf(x_current), 392 | accepted_p=accepted, 393 | mean=m, 394 | covariance=C, 395 | scale=sigma, 396 | threshold=None) 397 | return run_data 398 | 399 | -------------------------------------------------------------------------------- /L_AM_Sampling.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # *Adaptive* MH using *Cholesky decomposition* of the covariance 5 | # 6 | # See the 1999 and 2001 papers of Haario et al. 7 | 8 | # In[1]: 9 | 10 | import numpy as np 11 | import numpy.linalg as la 12 | import matplotlib.pyplot as plt 13 | import scipy as sp 14 | import scipy.stats as ss 15 | import math 16 | import random 17 | import numba 18 | from collections import namedtuple 19 | get_ipython().run_line_magic('precision', '4') 20 | get_ipython().run_line_magic('matplotlib', 'inline') 21 | 22 | 23 | # from importlib import reload 24 | # reload(ut) 25 | 26 | # In[2]: 27 | 28 | from MH_Sampling import acceptance_decision 29 | from FileHandling import save_state 30 | from TestSuite import generate_state_space, generate_iid_samples, get_samples 31 | 32 | 33 | # In[3]: 34 | 35 | AM_Pars = namedtuple('AM_Pars', 36 | ['Origin', 'Id', 37 | 'sigma_0', 'sigma_opt', 'L_0', 38 | 'z_samples']) 39 | 40 | def init_AM_pars(sp): 41 | dim, origin, idty = sp['dim'], sp['Origin'], sp['Id'] 42 | sigma_0, sigma_opt = 0.1/np.sqrt(dim), sp['sigma_opt'] 43 | L_0 = idty 44 | return AM_Pars(Origin=origin, Id=idty, 45 | sigma_0=sigma_0, sigma_opt=sigma_opt, L_0=L_0, 46 | z_samples=get_standard_normal_samples(sp)) 47 | 48 | 49 | # # Adaptive MH algorithm *AM* 50 | 51 | # ## Generate the candidate next sample 52 | # 53 | # We consider a version of the *Adaptive Metropolis* (*AM*) sampler of Haario 54 | # et al. (2001). We want to sample from the $d$-dimensional target distribution $\pi(\mathbf{x})$. 55 | # 56 | # We perform a Metropolis algorithm with covariance matrix $\pmb{Q}_n$ at iteration $n$ given by 57 | # 58 | # $$\mathbf{Q}_n(\mathbf{x}, ·) = N(\mathbf{x}, \sigma_{0}^2 \mathbb{1}_d)$$ 59 | # 60 | # for $n \leq 2d$, while for $n > 2d$ 61 | # 62 | # $$\mathbf{Q}_n(\mathbf{x}, ·) = (1 − \beta) N(\mathbf{x}, \sigma_{opt}^2 \mathbf{C}_n) + 63 | # \beta N(\mathbf{x}, \sigma_{0}^2 \mathbb{1}_d)$$ 64 | # 65 | # where $\mathbf{C}_n$ is the current empirical estimate of the covariance of the target distribution 66 | # based on the samples so far, $\sigma_{0}^2 = \frac{0.1^2}{d}$ and $\sigma_{opt}^2 = \frac{2.38^2}{d}$ are the initial and optimal scale, respectively, and $\beta$ is a small positive constant, we use $\beta = 0.05$. 67 | # 68 | # In other words, the next candidate is sampled from 69 | # 70 | # $$\mathbf{x}^{*} \sim \mathbf{Q}_n(\mathbf{x}, ·)$$ 71 | # 72 | # The text above is adapted from Section 2 of Gareth O. Roberts and Jeffrey S. Rosenthal (2008) 73 | # *Examples of Adaptive MCMC*. 74 | 75 | # ## Random covariance matrix $M$ from the above paper. 76 | 77 | # In[4]: 78 | 79 | Factors = namedtuple('Factors', 80 | ['Chol', 'Scale']) 81 | 82 | 83 | # In[5]: 84 | 85 | def get_prop_data(L, n, pars): 86 | beta = 0.05 87 | d, _ = L.shape 88 | sigma_0, sigma_opt, L_0 = pars.sigma_0, pars.sigma_opt, pars.L_0 89 | init, current = Factors(Chol=L_0, Scale=sigma_0), Factors(Chol=L, Scale=sigma_opt) 90 | init_period = 2*d 91 | if n <= init_period: 92 | return init 93 | else: 94 | return current if np.random.binomial(n=1, p=1-beta) else init 95 | 96 | 97 | # # Generation of candidate 98 | # 99 | # If the proposal distribution is the $d$-dimensional multivariate normal distribution $N(\pmb{m}, \pmb{C})$ then 100 | # the next candidate $\pmb{x}^{*}$ is generated according to that distribution, i.e. 101 | # 102 | # $$\pmb{x}^{*} \sim N(\pmb{m}, \pmb{C})$$ 103 | # 104 | # If $L$ is the lower Cholesky factor of $C$, i.e. $C = L L^\top$ this can be rewritten as 105 | # 106 | # $$\pmb{x}^{*} = \pmb{m} + L \pmb{z}$$ 107 | # 108 | # where $\pmb{z} \sim N(\pmb{0}, \mathbb{1}_d)$ is a sample of the $d$-dimensional standard normal distribution. 109 | # 110 | # In case of $$\pmb{x}^{*} \sim N(\pmb{m}, \sigma^2 \pmb{C})$$ this becomes 111 | # 112 | # $$\pmb{x}^{*} = \pmb{m} + \sigma L \pmb{z}$$ 113 | # 114 | # 115 | 116 | # In[6]: 117 | 118 | def C_generate_candidate(m, C, s): 119 | return 120 | 121 | 122 | # In[7]: 123 | 124 | def L_generate_candidate(m, L, s, z): 125 | return m + s*L@z 126 | 127 | 128 | # In[8]: 129 | 130 | # see "A More Efficient Rank-one Covariance Matrix Update for Evolution Strategies" Igel, Krause 2015 131 | # and adapted slightly to incoporate alpha, beta != 1 132 | @numba.jit(nopython=True) 133 | def rank_1_update(L, u, alpha, beta): 134 | assert alpha > 0, 'Argument alpha should be positive' 135 | assert beta > 0, 'Argument beta should be positive' 136 | d = len(u) 137 | L = np.sqrt(alpha)*L #Added 138 | b = 1 139 | nL = np.zeros_like(L) 140 | v = np.copy(u) #Added 141 | for j in np.arange(d): 142 | nL[j,j] = np.sqrt(L[j,j]**2 + (beta/b)*(v[j]**2)) 143 | gamma = b*L[j,j]**2 + beta*v[j]**2 144 | for k in range(j+1, d): 145 | v[k] = v[k] - (v[j]/L[j,j])*L[k,j] 146 | nL[k,j] = (nL[j,j]/L[j,j])*L[k,j] + (nL[j,j]*beta*v[j]/gamma)*v[k] 147 | b = b + beta*(v[j]**2/L[j,j]**2) 148 | return nL 149 | 150 | 151 | # In[9]: 152 | 153 | def update_moments(mean, L, sample, n): 154 | next_n = n + 1 155 | w = 1/next_n 156 | new_mean = mean + w*(sample - mean) 157 | new_L = rank_1_update(L=L, u=sample, alpha=1-w, beta=w) 158 | return new_mean, new_L, next_n 159 | 160 | 161 | # In[10]: 162 | 163 | @numba.jit 164 | def update_L(samples): 165 | N, d = samples.shape 166 | initial_period = 2*d 167 | initial_cov = np.cov(samples[:initial_period], rowvar=False) 168 | initial_mean = np.mean(samples[:initial_period], axis=0) 169 | C = initial_cov 170 | L = la.cholesky(initial_cov) 171 | mean = initial_mean 172 | for n in range(initial_period, len(samples)): 173 | sample = samples[n] 174 | w = 1/(n+1) 175 | L = rank_1_update(L, sample-mean, alpha=(n-1)/n, beta=w) 176 | mean = (1-w)*mean + w*sample 177 | return L@L.T 178 | 179 | 180 | # In[11]: 181 | 182 | def AM_sampler(pars, target, initial_state, run_data): 183 | ds, N = run_data.DataStore, run_data.N 184 | 185 | 186 | target_pdf = target['pdf'] 187 | z_samples = pars.z_samples 188 | 189 | current = initial_state 190 | mean, L, sigma_0 = pars.Origin, pars.L_0, pars.sigma_0 191 | accepted = True 192 | d = len(initial_state) 193 | init_period = 2*d 194 | samples=[] 195 | for n in range(init_period): 196 | save_state(data_store=ds, step=n, 197 | state=current, value=target_pdf(current), 198 | mean=mean, covariance=L, accepted_p=accepted) 199 | candidate = L_generate_candidate(m=current, L=L, s=sigma_0, z=z_samples[n]) 200 | accepted = MH_decision(current=current, proposed=candidate, pdf=target_pdf) 201 | if accepted: 202 | current = candidate 203 | else: 204 | current = current 205 | samples.append(current) 206 | # Calculate the first two moments at the end of initial period. 207 | initial_cov = np.cov(samples, rowvar=False) 208 | initial_mean = np.mean(samples, axis=0) 209 | C = initial_cov 210 | L = la.cholesky(initial_cov) 211 | mean = initial_mean 212 | 213 | 214 | # Once the initial period is finished we start to adapt. 215 | for n in range(init_period, N): 216 | #if n%1000 == 0: 217 | # print('n:', n) 218 | save_state(data_store=ds, step=n, 219 | state=current, value=target_pdf(current), 220 | mean=mean, covariance=L, accepted_p=accepted) 221 | 222 | p_L, p_sigma = get_prop_data(L=L, n=n, pars=pars) 223 | candidate = L_generate_candidate(m=current, L=p_L, s=p_sigma, z=z_samples[n]) 224 | accepted = MH_decision(current=current, proposed=candidate, pdf=target_pdf) 225 | if accepted: 226 | current = candidate 227 | mean, L, n = update_moments(mean, L, current, n) 228 | return run_data 229 | 230 | -------------------------------------------------------------------------------- /MH_Sampling.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # *Metropolis-Hastings* Sampling 5 | 6 | # In[ ]: 7 | 8 | import numpy as np 9 | import numpy.linalg as la 10 | import matplotlib.pyplot as plt 11 | import scipy as sp 12 | import scipy.stats as ss 13 | import math 14 | import random 15 | from collections import namedtuple 16 | import numba 17 | #%precision 4 18 | #%matplotlib inline 19 | 20 | 21 | # In[ ]: 22 | 23 | from FileHandling import save_state 24 | from TestSuite import get_samples 25 | 26 | 27 | # In[ ]: 28 | 29 | def acceptance_decision(current, proposed, pdf): 30 | # Remark: 'accepted_p' includes the case where p_proposed > p_current 31 | # since u, a random number between 0 and 1, is then 32 | # always less than the ratio p_proposed/p_current 33 | # But for readability we make a distinction between the 34 | # between cases below. 35 | 36 | p_current, p_proposed = pdf(current), pdf(proposed) 37 | if p_current <= p_proposed: 38 | return True 39 | else: 40 | u = np.random.rand() 41 | return u <= p_proposed/p_current 42 | 43 | 44 | # In[ ]: 45 | 46 | # computing the Metroppolis-Hastings acceptance 47 | 48 | def compose2(f, g): 49 | return lambda x: f(g(x)) 50 | 51 | def likelihood_acceptance_decision(current, proposed, log_pdf): 52 | # Remark: 'accepted_p' includes the case where p_proposed > p_current 53 | # since u, a random number between 0 and 1, is then 54 | # always less than the ratio p_proposed/p_current 55 | # But for readability we make a distinction in the code below between the 56 | # two cases. 57 | 58 | p_current, p_proposed = log_pdf(current), log_pdf(proposed) 59 | if p_current <= p_proposed: 60 | return True 61 | else: 62 | u = np.random.rand() 63 | return u <= p_proposed/p_current 64 | 65 | 66 | # In[ ]: 67 | 68 | ## Proposal Distribution 69 | # Samples are generated when a run is initialized. 70 | 71 | MH_Pars = namedtuple('MH_Pars', ['Proposal']) 72 | 73 | def init_MH_pars(sp): 74 | proposal = sp['Test Suite']['Proposal'] 75 | return MH_Pars(Proposal=proposal) 76 | 77 | 78 | # In[ ]: 79 | 80 | def generate_candidate(center, delta): 81 | return center + delta 82 | 83 | 84 | # # Metropolis-Hastings algorithm 85 | 86 | # In[ ]: 87 | 88 | def MH_sampler(pars, target, initial_state, run_data, C_generation=False, likelihood=True): 89 | ds, N = run_data.DataStore, run_data.N 90 | 91 | target_pdf = target['pdf'] 92 | proposal_samples = pars.Proposal['Samples'] 93 | 94 | current = initial_state 95 | accepted = True 96 | 97 | #The integration of the C- and L-variant still has to be done. 98 | #if C_generation: 99 | # generation_function = generate_candidate 100 | #else: 101 | # generation_fuction = L_generate_candidate 102 | 103 | if likelihood: 104 | decision_function, comparison_function = likelihood_acceptance_decision, compose2(np.log, target['pdf']) 105 | else: 106 | decision_function, comparison_function = acceptance_decision, target['pdf'] 107 | 108 | for n in range(1, N): 109 | save_state(data_store=ds, step=n, 110 | state=current, value=target_pdf(current), 111 | accepted_p=accepted) 112 | proposed = generate_candidate(center=current, delta=proposal_samples[n]) 113 | accepted = decision_function(current, proposed, target_pdf) 114 | if accepted: 115 | current = proposed 116 | else:# The else clause is redundant but added for readability. 117 | current = current 118 | return run_data 119 | 120 | 121 | # # Metropolis-Hastings using Cholesky factor $L$ instead of ful covariance matrix $C$ 122 | # 123 | 124 | # In[ ]: 125 | 126 | def L_generate_candidate(center, L, scale, z_sample): 127 | return center + scale*L@z_sample 128 | 129 | 130 | # In[ ]: 131 | 132 | def L_MH_sampler(pars, target, initial_state, run_data, likelihood=True): 133 | ds, N = run_data.DataStore, run_data.N 134 | sp = target['State Space'] 135 | opt_scale, L = sp['sigma_opt'], sp['Id'] 136 | 137 | if likelihood: 138 | decision_function, comparison_function = likelihood_acceptance_decision, compose2(np.log, target['pdf']) 139 | else: 140 | decision_function, comparison_function = acceptance_decision, target['pdf'] 141 | 142 | target_pdf = target['pdf'] 143 | current = initial_state 144 | accepted = True 145 | 146 | z_samples = get_samples(sp=sp, name='Z') 147 | for n in range(1, N): 148 | save_state(data_store=ds, step=n, 149 | state=current, value=target_pdf(current), 150 | accepted_p=accepted) 151 | proposed = L_generate_candidate(center=current, 152 | L=L, scale=opt_scale, 153 | z_sample=z_samples[n]) 154 | accepted = decision_function(current, proposed, target_pdf) 155 | if accepted: 156 | current = proposed 157 | else:# The else clause is redundant but added for readability. 158 | current = current 159 | return run_data 160 | 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Adaptive Sampling 2 | 3 | The project is created to distribute code written as part of my master thesis entitled: ```Adaptive Markov Chain Monte Carlo for Bayesian Inference```. 4 | 5 | Adaptive Markov Chain Monte Carlo (MCMC) and stochastic optimization methods are techniques for evaluating intractable integrals. Code for sampling and optimization algorithms, as found in their original papers, has been written in Python and tested against established benchmarks. In addition, an improvement has been made by incorporating adaptation into stochastic optimization methods, thereby transforming them into samplers. Namely, Gaussian Adaptation (GaA) and Covariance Matrix Adaptation Evolution strategy(CMA-ES) optimizers have been transformed into the Metropolis-GaA, and (1 + 1) CMA, respectively. Performance is quantified using existing convergence and performance measuring tools. Results show adaptive MCMCs with better convergence, mixing, and acceptance ratios. 6 | 7 | ## Getting Started 8 | You will need Jupyter notebook with Python 3 and the modules listed below. For detailed information and examples of experiment runs, see ```Adaptive_MCMC_for_Bayesian_Inference.pdf```, Chapter 6: Experiments. 9 | 10 | ### Python modules: 11 | 12 | #### Five sampler modules 13 | * Adaptive Metropolis: ``` AM_Sampling.py ``` 14 | * Covariance Matrix Adaptation: ```CMA_Sampling.py``` 15 | * Gaussian Adaptation: ```GaA_Sampling.py``` 16 | * Metropolis Hastings: ```MH_Sampling.py``` 17 | * Adaptive MH using Cholesky decomposition of the covariance: ```L_MH_Sampling.py``` 18 | 19 | #### Test suite module 20 | * Test suites found in Haario et al. (1999): ```TestSuite.py``` 21 | 22 | 23 | #### Supporting modules 24 | * ```FileHandling.py``` 25 | * ```Visualization.py``` 26 | 27 | 28 | ### Comparing samplers via autocorrelation 29 | ![autocorrelation](https://user-images.githubusercontent.com/16397101/39100333-195ac932-463d-11e8-87a2-b5a0bc12e4e9.png) 30 | 31 | 32 | ## Running the tests 33 | The module 34 | ```Experiments.ipynb``` 35 | offers an easy way to run any of the five samplers and plot their results. 36 | 37 | Open the Experiments module in a Jupyter notebook. You will also need the supporting and test modules imported, along with the required libraries as specified in ```Experiments.ipynb```. 38 | 39 | ### Example 40 | To run the **Adaptive Metropolis** sampler, type ``` AM ``` instead of the current sampler ```CMA```. Sampler names are found in ```Experiments.ipynb``` under **The Samplers**. 41 | 42 | In this case, the code: 43 | ``` 44 | SPEC = specify_run(dim=2, N=10000, name_target='Pi_4', name_sampler='CMA', run_idx=0) 45 | DATA = execute_run(SPEC) 46 | ``` 47 | 48 | becomes: 49 | ``` 50 | SPEC = specify_run(dim=2, N=10000, name_target='Pi_4', name_sampler='AM', run_idx=0) 51 | DATA = execute_run(SPEC) 52 | ``` 53 | 54 | Also, you might want to change the target distribution. To do so, you need to change ``` name_target``` 55 | For reference, see ```Adaptive_MCMC_for_Bayesian_Inference.pdf``` Chapter 6. 56 | 57 | #### Biased coin example 58 | ![trials5](https://user-images.githubusercontent.com/16397101/39100228-bc3842bc-463b-11e8-84c7-393fccb19fd9.png) 59 | 60 | ## Built With 61 | * [Jupyter](http://jupyter.org/) - Jupyter Notebooks 62 | * [PyMC3](https://docs.pymc.io/) - Bayesian statistical modeling and Probabilistic Machine Learning focusing on advanced MCMC 63 | 64 | ## Authors 65 | 66 | * **Prof. Dr. Bernard Manderik** - *Initial work* 67 | 68 | 69 | ## Acknowledgments 70 | 71 | * Hat tip to Nixon Kipkorir Ronoh and Edna Chelangat 72 | Milgo 73 | -------------------------------------------------------------------------------- /TestSuite.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[3]: 5 | 6 | 7 | import numpy as np 8 | import numpy.linalg as la 9 | import matplotlib.pyplot as plt 10 | import scipy as sp 11 | import scipy.stats as ss 12 | import math 13 | import random 14 | from collections import namedtuple 15 | #get_ipython().magic('precision 4') 16 | #get_ipython().magic('matplotlib inline') 17 | 18 | 19 | # # State Space 20 | # The dictionary *State Space* contains its dimension, the origin and identity matrix, 21 | # and the optimal $\sigma_{opt}$ scale according to Gelman et al.: $\sigma_{opt} = 2.38/\sqrt{d}$, 22 | # where $d$ is the dimension of the state space and $\sigma_{opt}$ is the scale of the isotropic proposal distribution used in the Metropolis-Hastings sampler. 23 | # 24 | # Later on we will add the target distributions of the test suite used in our experiments. 25 | 26 | # In[2]: 27 | 28 | 29 | def calculate_optimal_sigma(dim): 30 | return 2.38/np.sqrt(dim) 31 | 32 | def state_space(dim): 33 | return {'dim': dim, 'Origin': np.zeros(dim), 'Id': np.eye(dim), 34 | 'sigma_opt':calculate_optimal_sigma(dim)} 35 | 36 | 37 | # ## A random state of the state space 38 | 39 | # In[3]: 40 | 41 | 42 | def generate_random_state(sp, min_range=-10, max_range=10): 43 | """Generates a random state in the state space that fits in the area to be plotted. 44 | """ 45 | return np.random.uniform(low=min_range, high=max_range, size=sp['dim']) 46 | 47 | 48 | # In[4]: 49 | 50 | 51 | #random.seed(10) #REMARK BM: Random seed doesn't seem to work. 52 | 53 | 54 | # In[5]: 55 | 56 | 57 | def generate_initial_states(sp, nb_runs): 58 | initial_states = {i:generate_random_state(sp) for i in np.arange(nb_runs)} 59 | # Only update if the key does not exist yet. Check out how to do this. 60 | sp.update({'Initial States':initial_states}) 61 | 62 | 63 | # # Testsuite of Target Distributions 64 | # 65 | # ## Uncorrelated and Correlated Gaussian Distributions 66 | # 67 | # $\pi_1$ is the uncorrelated Gaussian distribution with covariance matrix 68 | # 69 | # $$ 70 | # C_u= 71 | # \begin{pmatrix} 72 | # 100 & 0 \\ 73 | # 0 & 1 74 | # \end{pmatrix} 75 | # $$ 76 | # 77 | # and $\pi_2$ is the correlated Gaussion distribution with 78 | # covariance matrix 79 | # 80 | # $$ 81 | # C_c= 82 | # \begin{pmatrix} 83 | # 50.5 & 49.5 \\ 84 | # 49.5 & 50.5 85 | # \end{pmatrix} 86 | # $$ 87 | 88 | # # Covariance Matrix 89 | 90 | # In[6]: 91 | 92 | 93 | def generate_rotation_matrix(theta): 94 | # Rotation matrix is 2-dimensional 95 | return np.array([[np.cos(theta), -np.sin(theta)], 96 | [np.sin(theta), np.cos(theta)]]) 97 | 98 | def generate_correlated_cov(uncorrelated_cov, theta): 99 | correlated_cov = np.copy(uncorrelated_cov) 100 | R = generate_rotation_matrix(theta) 101 | R_inv = la.inv(R) 102 | # Rotate the first 2 dimensions only and leave the other dimensions 103 | # of the covariance matrix intact. 104 | correlated_cov[:2, :2] = R @ uncorrelated_cov[:2,:2] @ R_inv 105 | return correlated_cov 106 | 107 | 108 | # ### We could also use the fact that the transpose of a rotation is also its inverse. 109 | 110 | # def alt_generate_correlated_cov(uncorrelated_cov, theta): 111 | # # Here we use the fact that the transpose of a rotation is also its inverse. 112 | # correlated_cov = np.copy(uncorrelated_cov) 113 | # R = generate_rotation_matrix(theta) 114 | # correlated_cov[:2, :2] = R @ uncorrelated_cov[:2,:2] @ R.T 115 | # return correlated_cov 116 | 117 | # ## Contour Functions corresponding with the Target Distributions 118 | # 119 | # ### Standard Ellipse and General Ellipse 120 | # 121 | # When the ellips with equation 122 | # 123 | # $$\left( \frac{x_1}{a} \right)^2 + \left( \frac{x_2}{b} \right)^2 = 1$$ 124 | # 125 | # is rotated over an angle $\theta$ then the equation of that ellips becomes 126 | # 127 | # $$\underbrace{\left(\frac{\cos^2\theta}{a^2} + \frac{\sin^2\theta}{b^2}\right)}_\text{A} x_1^ 2 + 128 | # \underbrace{\left( \frac{\sin^2\theta}{a^2} + \frac{\cos^2\theta}{b^2} \right)}_\text{C} x_2^2 129 | # + 2 \underbrace{\cos \theta \sin \theta \left( \frac{1}{a^2} - \frac{1}{b^2} \right)}_\text{B} x_1 x_2 = 1$$ 130 | # 131 | # or 132 | # 133 | # $$A x_1^2 + 2 B x_1 x_2 + C x_2^2 = 1$$ 134 | # 135 | # where 136 | # 137 | # $$B^2 - A C < 0$$ 138 | # actually 139 | # 140 | # $$B^2 - A C = -1/(ab)^2$$ 141 | 142 | # In[7]: 143 | 144 | 145 | def get_ellipse_parameters(cov): 146 | 147 | """Get the first 2 eigenvalues and their angle of covariance matrix. 148 | The eigenvalues are returned in descending order together with 149 | the angle of rotation (in radians). The eigenvalues correspond with 150 | half the length, a and b, of these two main axes of 151 | the general ellipse. 152 | If the angle is small enough, meaning that the covariance matrix 153 | can be considered diagonal, 0.0 is returned.""" 154 | 155 | e, v = la.eig(cov) 156 | e_1, e_2, *_ = e 157 | a, b = np.sqrt(e_1), np.sqrt(e_2) 158 | v_a, v_b, *_ = v 159 | # a must be at least b 160 | if a < b: 161 | a, b = b, a 162 | v_a, v_b = v_b, v_a 163 | cos, *_ = v_a 164 | theta = np.arccos(cos) 165 | if np.isclose(theta, 0): 166 | theta = 0.0 167 | return a, b, theta 168 | 169 | 170 | # In[8]: 171 | 172 | 173 | def calculate_ellipse_coefficients(a, b, theta): 174 | sin, cos = np.sin(theta), np.cos(theta) 175 | cos_sqd, sin_sqd = cos**2, sin**2 176 | a_sqd, b_sqd = a**2, b**2 177 | A = cos_sqd/a_sqd + sin_sqd/b_sqd 178 | C = sin_sqd/a_sqd + cos_sqd/b_sqd 179 | B = (1/a_sqd - 1/b_sqd)*sin*cos 180 | return A, B, C 181 | 182 | 183 | # In[9]: 184 | 185 | 186 | def get_Gaussian_contour(cov): 187 | a, b, theta = get_ellipse_parameters(cov) 188 | A, B, C = calculate_ellipse_coefficients(a, b, theta) 189 | return lambda x1, x2: A*x1**2 + 2*B*x1*x2 + C*x2**2 190 | 191 | 192 | # # Distribution 193 | # 194 | # We have three kind of distributions in the test suite 195 | # 1. Gaussian distributions 196 | # 2. mixture of Gaussians 197 | # 3. transformed Gaussians, the so called twist distributions 198 | # 199 | # The second kind is not implemented yet. 200 | # 201 | # 202 | # The dictionary *Gaussian* contains the following fields 203 | # * its *Name* 204 | # * the *State Space* on which the probability distribution is defined 205 | # * its *probabibility density function* or *pdf* 206 | # * *Samples* that are *independent and identically distributed*. These samples will be compared to 207 | # the samples generated by the MCMC samplers studied. These samples are added at run time. 208 | # * the *Contour Function* used to plot the 209 | # * *Contour Levels* corresponding to the preset confidence levels, cfr. the global variable 210 | # CONFIDENCE_LEVELS for the values used. The values of the $\chi^2$ distribution corresponding to 211 | # the confidence levels used: 67, 90, 95 and 99 percent. 212 | # 213 | # Additionarly to the fields of Gaussian dictionary, *non_Gaussian* contains the additional fields *Transformation*, 214 | # this is the function that will generated its i.i.d. samples using the samples of generating Gaussian. 215 | 216 | # ## Gaussian Distributions in the Test Suite 217 | 218 | # ### Draw the contour lines corresponding to preset *confidence levels* 219 | 220 | # In[1]: 221 | 222 | 223 | def get_chi2s(df, confidence_levels=[0.67, 0.90, 0.95, 0.99]): 224 | """ppf stands for the percent point function (inverse of cdf — percentiles).""" 225 | #contour_levels = {conf:ss.chi2.ppf(conf, df) for conf in confidence_levels} 226 | contour_levels = [ss.chi2.ppf(conf, df) for conf in confidence_levels] 227 | return contour_levels 228 | 229 | 230 | # ### Generate the Gaussians given their covariances 231 | 232 | # In[11]: 233 | 234 | 235 | def generate_Gaussian(sp, name, mean, cov): 236 | d = sp['dim'] 237 | rv = ss.multivariate_normal(mean=mean, cov=cov) 238 | return {'Name':name, 239 | 'State Space':sp, 240 | 'pdf':rv.pdf, 241 | 'Mean':mean, 242 | 'Covariance':cov, 243 | 'Contour Function':get_Gaussian_contour(cov), 244 | 'Contour Levels':get_chi2s(df=2) 245 | #'Samples':None, 246 | } 247 | 248 | 249 | # In[12]: 250 | 251 | 252 | def generate_covs(sp): 253 | # Standard Normal Z has the identity matrix as covariance 254 | identity = sp['Id'] 255 | 256 | # The optimal isotropic proposal is $\sigma_{opt} * Id$ 257 | var_opt = sp['sigma_opt']**2 258 | prop_cov = var_opt*identity 259 | 260 | # P1_2 261 | Pi_1_cov = np.copy(identity) 262 | Pi_1_cov[0, 0] = 100 263 | 264 | # Pi_2 265 | Pi_2_cov = generate_correlated_cov(Pi_1_cov, np.pi/4) 266 | 267 | # Pi_rnd 268 | d = sp['dim'] 269 | M = np.random.normal(size=(d,d)) 270 | Pi_rnd = M@M.T 271 | return {'Z':identity, 'Proposal':prop_cov, 'Pi_1':Pi_1_cov, 272 | 'Pi_2':Pi_2_cov, 'Pi_rnd':Pi_rnd} 273 | 274 | 275 | def generate_all_Gaussians(sp): 276 | named_covs = generate_covs(sp) 277 | gaussians = {name:generate_Gaussian(sp=sp, name=name, mean=sp['Origin'], cov=cov) 278 | for name, cov in named_covs.items()} 279 | return gaussians 280 | 281 | 282 | # ## Proposal Generator 283 | # 284 | # The **radial basis** or **isotropic** proposal generator used by the Metropolis-Hastings sampler. 285 | # Its *mean* is the origin and the *spread* is $\sigma$. 286 | 287 | # In[13]: 288 | 289 | 290 | def generate_isotropic_Gaussian(sp, sigma): 291 | origin, identity = sp['Origin'], sp['Id'] 292 | diagonal = sigma**2 * identity 293 | return generate_Gaussian(sp=sp, name='Isotropic', mean=origin, cov=diagonal) 294 | 295 | 296 | # In[14]: 297 | 298 | 299 | def generate_random_Gaussian(sp): 300 | d, origin = sp['dim'], sp['Origin'] 301 | M = np.random.normal(size=(d,d)) 302 | random_cov = M@M.T 303 | return generate_Gaussian(sp=sp, name='Random', mean=origin, cov=random_cov) 304 | 305 | 306 | # ## Twisted Distributions in the Test Suite 307 | 308 | # In[15]: 309 | 310 | 311 | def f_twist(b): 312 | def phi_b(x): 313 | """Argument and the value returned are d-dimensional numpy arrays.""" 314 | y = np.copy(x) 315 | x1, x2 = x[:2] 316 | y[0], y[1] = x1, x2 + b*x1**2 - 100*b 317 | return y 318 | 319 | def phi_b_inv(y): 320 | """Argument and the value returned are d-dimensional numpy arrays.""" 321 | x = np.copy(y) 322 | y1, y2 = y[:2] 323 | x[0], x[1] = y1, y2 - b*y1**2 + 100*b 324 | return x 325 | return phi_b, phi_b_inv 326 | 327 | def compose2(f, g): 328 | return lambda x: f(g(x)) 329 | 330 | 331 | # In[16]: 332 | 333 | 334 | def apply_to(transformation, pts): 335 | """Used to generate samples of a twist distribution given samples of a Gaussian one. 336 | The argument transformation, e.g. phi_b(x1, x2) = (y1, y2) is a 2-dimensional 337 | transformation of the vectors in pts. The result is an array of the transformed points. 338 | """ 339 | transformed_pts = np.zeros_like(pts) 340 | for i, pt in enumerate(pts): 341 | transformed_pts[i] = transformation(pt) 342 | return transformed_pts 343 | 344 | 345 | # In[17]: 346 | 347 | 348 | def apply(transformation): 349 | return lambda pts: apply_to(transformation, pts) 350 | 351 | 352 | # In[18]: 353 | 354 | 355 | def get_twisted_contour(gaussian, b): 356 | cov = gaussian['Covariance'] 357 | f = get_Gaussian_contour(cov) 358 | return lambda x1, x2: f(x1, x2 + b*x1**2 - 100*b) 359 | 360 | 361 | # In[19]: 362 | 363 | 364 | def generate_twist(gaussian, b, name): 365 | # The twisted distribution is a transformation of 366 | # the uncorrelated Gaussian distribution 'gaussian' 367 | transformed_distr = gaussian.copy() 368 | transformed_function, inverse_twist_function = f_twist(b=b) 369 | transformed_pdf = compose2(gaussian['pdf'], transformed_function) 370 | contour_function = get_twisted_contour(gaussian=gaussian, b=b) 371 | transformed_distr.update({'Name':name, 372 | 'Generator':gaussian, 373 | 'pdf':transformed_pdf, 374 | 'Contour Function':contour_function}) 375 | transformed_distr.update({'Transformation':apply(inverse_twist_function)}) 376 | return transformed_distr 377 | 378 | 379 | # In[20]: 380 | 381 | 382 | def generate_all_twists(gaussian, b_values, names): 383 | twists ={name:generate_twist(gaussian, b, name) 384 | for b, name in zip(b_values, names)} 385 | return twists 386 | 387 | 388 | # In[21]: 389 | 390 | 391 | def generate_test_suite(sp): 392 | gaussians = generate_all_Gaussians(sp) 393 | twists = generate_all_twists(gaussian=gaussians['Pi_1'], 394 | b_values=[0.03, 0.1], 395 | names=['Pi_3', 'Pi_4']) 396 | sp.update({'Test Suite':{**gaussians, **twists}}) 397 | 398 | 399 | # In[22]: 400 | 401 | 402 | def generate_state_space(dim, nb_runs=100, N=None): 403 | sp = state_space(dim=dim) 404 | generate_test_suite(sp) 405 | generate_initial_states(sp=sp, nb_runs=nb_runs) 406 | return sp 407 | 408 | 409 | # ### Generate independent and identically distributed or i.i.d. samples 410 | # 411 | # These samples will be generated when we initialize a run. They are compared to the correlated samples generated by a MCMC sampler. 412 | 413 | # In[23]: 414 | 415 | 416 | def iid_samples_Gaussian(gaussian, N): 417 | mean, cov = gaussian['Mean'], gaussian['Covariance'] 418 | rv = ss.multivariate_normal(mean=mean, cov=cov) 419 | samples = rv.rvs(size=N) 420 | gaussian.update({'Samples':samples}) 421 | 422 | 423 | # ### Generate i.i.d. samples of an transformed Gaussian distribution. 424 | # These samples will be generated when we initialize a run. They are compared to the correlated samples generated by a MCMC sampler. 425 | 426 | # In[24]: 427 | 428 | 429 | def iid_samples_transformed_Gaussian(distr, N): 430 | #Samples are generated by transforming the random samples of 431 | #the generating Gaussian distribution. 432 | generator = distr['Generator'] 433 | transformation = distr['Transformation'] 434 | if not 'Samples' in generator: 435 | iid_samples_Gaussian(generator, N) 436 | transformed_samples = transformation(generator['Samples']) 437 | distr.update({'Samples':transformed_samples}) 438 | 439 | 440 | # ## Generate i.i.d. samples for the whole Test Suite 441 | 442 | # In[25]: 443 | 444 | 445 | def generate_iid_samples(sp, N): 446 | test_suite = sp['Test Suite'] 447 | for name, distr in test_suite.items(): 448 | if 'Generator' not in distr: 449 | iid_samples_Gaussian(gaussian=distr, N=N) 450 | else: 451 | iid_samples_transformed_Gaussian(distr=distr, N=N) 452 | 453 | 454 | # ## Getter functions for the samples of a distribution 455 | 456 | # In[26]: 457 | 458 | 459 | def get_distribution(sp, name): 460 | return sp['Test Suite'][name] 461 | 462 | def get_samples(sp, name): 463 | return get_distribution(sp, name)['Samples'] 464 | 465 | 466 | # # Time to test 467 | 468 | # In[27]: 469 | 470 | 471 | def inspect(sp, field): 472 | test_suite = sp['Test Suite'] 473 | for key, distr in test_suite.items(): 474 | print(key, distr[field]) 475 | 476 | #inspect(SP, 'Covariance') 477 | 478 | 479 | # In[28]: 480 | 481 | 482 | def inspect_Gaussian(sp, name_gaussian): 483 | gaussian = sp['Test Suite'][name_gaussian] 484 | print(gaussian['Name']) 485 | print(gaussian['Mean']) 486 | print(gaussian['Covariance']) 487 | print(gaussian['Samples'][:5]) 488 | 489 | def inspect_transformed_Gaussian(sp, name_distr): 490 | distr = sp['Test Suite'][name_distr] 491 | print(distr['Name']) 492 | print(distr['Mean']) 493 | print(distr['Covariance']) 494 | inspect_Gaussian(sp, distr['Generator']['Name']) 495 | print(distr['Samples'][:5]) 496 | 497 | #inspect_transformed_Gaussian(SP, 'Pi_4') 498 | 499 | 500 | # SP = generate_state_space(dim=2, nb_runs=10) 501 | # generate_iid_samples(SP, N=1000) 502 | # TESTSUITE = SP['Test Suite'] 503 | 504 | # Z_samples = get_samples(SP, name='Z') 505 | # 506 | # prop = SP['Test Suite']['Proposal'] 507 | # prop_cov = prop['Covariance'] 508 | # prop_samples = prop['Samples'] 509 | # samples = Z_samples @ prop_cov 510 | # 511 | # samples[:10], prop_samples[:10] 512 | -------------------------------------------------------------------------------- /Visualization.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | import numpy as np 8 | import numpy.linalg as la 9 | import matplotlib.pyplot as plt 10 | import scipy as sp 11 | import scipy.stats as ss 12 | import math 13 | import random 14 | from collections import namedtuple 15 | #get_ipython().magic('precision 4') 16 | #get_ipython().magic('matplotlib inline') 17 | 18 | 19 | # # *Visualize* the results of an *MCMC* run 20 | # 21 | # ## Set Up the Grid 22 | # 23 | # The values of $x_{min}, x_{max}, {nb}_{x}, y_{min}, y_{max}, \text{ and } {nb}_{y}$ depend on where the **probability mass** of the **target distribution** is located, i.e. where the probability density function is sufficiently 'large'. 24 | 25 | # In[2]: 26 | 27 | 28 | Grid = namedtuple('Grid', ['x_min', 'x_max', 'y_min', 'y_max', 'X', 'Y']) 29 | 30 | def make_grid(x_min=-30.0, x_max=30.0, nb_x =100, 31 | y_min=-30.0, y_max=30.0, nb_y =100): 32 | x_list = np.linspace(x_min, x_max, nb_x) 33 | y_list = np.linspace(y_min, y_max, nb_y) 34 | x, y = np.meshgrid(x_list, y_list) 35 | return Grid(x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max, X=x, Y=y) 36 | 37 | GRID = make_grid() 38 | 39 | 40 | # In[4]: 41 | 42 | 43 | def Mahalanobis_distance(mean, point, precision): 44 | # The precision matrix is the inverse of the covariance matrix. 45 | delta = mean - point 46 | return np.sqrt(delta @ precision @ delta.T) 47 | 48 | def squared_Mahalanobis_distance(point, precision): 49 | # The precision matrix is the inverse of the covariance matrix. 50 | delta = mean - point 51 | return delta @ precision @ delta.T 52 | 53 | def Mahalanobis_distance_to_origin(point, precision): 54 | # The precision matrix is the inverse of the covariance matrix. 55 | return np.sqrt(point @ precision @ point.T) 56 | 57 | def squared_Mahalanobis_distance_to_origin(point, precision): 58 | # The precision matrix is the inverse of the covariance matrix. 59 | return point @ precision @ point.T 60 | 61 | def calculate_fractions(distribution, samples, burnin_pct=0): 62 | precision = la.inv(distribution['Covariance']) 63 | end_burnin = burnin_pct*len(samples)//100 64 | samples_at_equilibrium = samples[end_burnin:] 65 | nb_samples = len(samples_at_equilibrium) 66 | d_sq = [squared_Mahalanobis_distance_to_origin(sample, precision) 67 | for sample in samples_at_equilibrium] 68 | return [sum(d_sq <= contour_level)/nb_samples 69 | for contour_level in distribution['Contour Levels']] 70 | 71 | 72 | # # the histogram of the distances 73 | # n, bins, patches = plt.hist(Distances, 50, normed=1, facecolor='green', alpha=0.75) 74 | # 75 | # plt.xlabel('Distance to the Mean') 76 | # plt.ylabel('Relative Frequency') 77 | # plt.title(r'$\mathrm{Histogram\ of\ Sample\ Distance\ to\ the\ Mean}$') 78 | # plt.grid(True); 79 | 80 | # # cumulative distribution of the distances 81 | # values, base = np.histogram(Distances, bins=100) 82 | # # evaluate the cumulative 83 | # cumulative = np.cumsum(values) 84 | # # plot the cumulative function 85 | # plt.plot(base[:-1], cumulative, c='blue'); 86 | 87 | # fig = plt.figure("i.i.d.", figsize=(7, 7)) 88 | # ax = fig.add_subplot(1, 1, 1) 89 | # subplot(ax, Pi_2, Pi_2.Samples[::1000], dim1=0, dim2=1, 90 | # title='Distribution of i.i.d. generated samples.') 91 | 92 | # ## Contour Lines corresponding with given Confidence Levels 93 | # 94 | # Next we plot the contour lines corresponding with 10, 90, 95 and 99 percent confidence. Therefore we 95 | # use the corresponding values of $\chi^2$-distribution. In case of a bivariate distribution we have 2 **degrees of freedom**. The values of this distribution can be found at the webpage https://people.richland.edu/james/lecture/m170/tbl-chi.html for instance. 96 | 97 | # In[23]: 98 | 99 | def plot_contour_lines(ax, distribution, dim1, dim2): 100 | global GRID 101 | X, Y = GRID.X, GRID.Y 102 | # Plot the contour lines 103 | contour_function = distribution['Contour Function'] 104 | # Since we project and a 2-dimensional subspace we will use 2 degrees of freedom 105 | # instead of the dimension of the statespace as we did before. 106 | contour_levels = distribution['Contour Levels'] 107 | #chi_squares = distribution.ChiSquares 108 | Z = contour_function(X, Y) 109 | ax.contour(X, Y, Z, contour_levels) 110 | 111 | def scatter_samples(ax, samples, dim1, dim2): 112 | ax.scatter(samples[:, dim1], samples[:, dim2]) 113 | 114 | def subplot(ax, distribution, samples, dim1, dim2, title, fraction_str=None): 115 | ax.set_title(title, fontweight='bold', color='blue', fontsize=14) 116 | ax.axis([GRID.x_min, GRID.x_max, GRID.y_min, GRID.y_max]) 117 | ax.set_xlabel('Dimension ' + str(dim1)) 118 | ax.set_ylabel('Dimension ' + str(dim2)) 119 | plot_contour_lines(ax, distribution, dim1, dim2) 120 | scatter_samples(ax, samples, dim1, dim2) 121 | 122 | 123 | # In[7]: 124 | 125 | 126 | def compare_to_iid_samples(run_data, nb_samples, dim1=0, dim2=1, burnin_pct=50): 127 | global GRID 128 | fig, ((ax_left, ax_right)) = plt.subplots(nrows=1, ncols=2, figsize=(15,7)) 129 | target = run_data.Target 130 | # Data to be plotted. 131 | step = run_data.N//nb_samples 132 | mcmc_samples = run_data.DataStore['States'] 133 | iid_samples = target['Samples'] 134 | mcmc_samples_2_display = mcmc_samples[::step] 135 | iid_samples_2_display = iid_samples[::step] 136 | mcmc_fractions = calculate_fractions(target, mcmc_samples, burnin_pct) 137 | iid_fractions = calculate_fractions(target, iid_samples, burnin_pct) 138 | 139 | # Information to be shown. 140 | s_name = run_data.Sampler['Name'] 141 | title_str = 'Distribution of samples generated by {:s}' 142 | title_info = title_str.format(s_name) 143 | burnin_str = 'Burn in used is {:d} percent of the generated samples.' 144 | burnin_info = burnin_str.format(burnin_pct) 145 | mcmc_str = '{:s} Fractions: {:1.5f}, {:1.5f}, {:1.5f}, and {:1.5f}' 146 | mcmc_info = mcmc_str.format(s_name, *mcmc_fractions) 147 | iid_str = 'IID Fractions: {:1.5f}, {:1.5f}, {:1.5f}, and {:1.5f}' 148 | iid_info = iid_str.format(*iid_fractions) 149 | title_mcmc = '{:s} Generated'.format(s_name) 150 | title_idd = 'IID Generated' 151 | suptitle_str = 'Comparison of the {:s} (left) vs. the IID (right) sample distribution' 152 | suptitle = suptitle_str.format(s_name) 153 | 154 | # Display everything. 155 | print(burnin_info) 156 | print(mcmc_info) 157 | print(iid_info) 158 | fig.suptitle(suptitle, fontweight='bold', color='red', fontsize=18) 159 | subplot(ax_left, target, mcmc_samples_2_display, dim1, dim2, title=title_mcmc) 160 | subplot(ax_right, target, iid_samples_2_display, dim1, dim2, title=title_idd) 161 | 162 | 163 | # In[6]: 164 | 165 | 166 | def plot_samples(run_data, nb_samples, dim1=0, dim2=1, burnin_pct=50): 167 | global GRID 168 | # New figure window for the current sampling method 169 | s_name = run_data.Sampler['Name'] 170 | fig = plt.figure(s_name, figsize=(7, 7)) 171 | ax = fig.add_subplot(1, 1, 1) 172 | # Data to be plotted. 173 | target = run_data.Target 174 | # Data to be plotted. 175 | step = run_data.N//nb_samples 176 | mcmc_samples_2_display = run_data.DataStore['States'][::step] 177 | # Information to be shown. 178 | fig_title_str = 'Distribution of samples generated by {:s}' 179 | fig_title = fig_title_str.format(s_name) 180 | #Plot everything. 181 | subplot(ax, target, mcmc_samples_2_display, dim1, dim2, title=fig_title) 182 | 183 | def subplot_2(ax, samples, dim1, dim2, title, color): 184 | ax.set_title(title, fontweight='bold', fontsize=14) 185 | ax.set_xlabel('Dimension ' + str(dim1)) 186 | ax.set_ylabel('Dimension ' + str(dim2)) 187 | ax.scatter(samples[:, dim1], samples[:, dim2], color=color) 188 | 189 | def compare_sample_spread(dim1, dim2, list_of_samples, titles, colors): 190 | # Ensure that dim1 and dim2 are less than the dimension of the state space. 191 | _, dim = list_of_samples[0].shape 192 | assert dim1 < dim, "dim1 should be less then %r" % dim 193 | assert dim2 < dim, "dim2 should be less then %r" % dim 194 | 195 | #Generate the supplots. 196 | fig, (axes) = plt.subplots(nrows=1, ncols=2, figsize=(15,7), sharex='col', sharey='row') 197 | for ax, samples, title, color in zip(axes, list_of_samples, titles, colors): 198 | subplot_2(ax=ax, samples=samples, dim1=dim1, dim2=dim2, title=title, color=color) 199 | 200 | --------------------------------------------------------------------------------