├── .gitignore
├── LICENSE
├── PMCMC-tutorial.Rproj
├── R
    ├── ReedFrost.R
    └── Volatility_SIS.R
├── README.md
├── assets
    ├── andre_estimates_21_02.txt
    └── references.bib
├── dureauLibbi.Rmd
├── dureauLibbi.html
├── pmcmcTutorial.Rmd
├── pmcmcTutorial.html
├── rwmodel.Rmd
└── rwmodel.html


/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 akira-endo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/PMCMC-tutorial.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/R/ReedFrost.R:
--------------------------------------------------------------------------------
  1 | #' A function to simulate epidemics using the Reed-Frost model
  2 | #'
  3 | #' This function allows you to simulate epidemics.
  4 | #' @param Nsim.
  5 | #' @keywords Reed-Frost
  6 | #' @export
  7 | #' @examples
  8 | #' simulateReedFrost()
  9 | 
 10 | simulateReedFrost <- function(Nsim=100000, Npop=10000, p=0.00015, I0=1, Nit=50, fig=T)
 11 | {
 12 |   S0=Npop-I0
 13 |   I=t(matrix(rep(c(I0,rep(0,Nit-1)),Nsim),ncol = Nsim))
 14 |   S=t(matrix(rep(c(S0,rep(0,Nit-1)),Nsim),ncol = Nsim))
 15 |   for(t in 2:Nit)
 16 |   {
 17 |     I[,t]=rbinom(Nsim,size=S[,t-1],1-(1-p)^I[,t-1])
 18 |     S[,t]=S[,t-1]-I[,t]
 19 |   }
 20 | 
 21 |   if(fig)
 22 |     hist(apply(I,1,max),breaks = 100, freq = T)
 23 | 
 24 |   return(list(S=S,I=I))
 25 | }
 26 | 
 27 | RF_with_obs<-function(Npop=10000, p=0.00015, Np = 1, p_obs = 0.05, size = 2)
 28 | {
 29 |   initial_pop= Npop -rpois(n= Np, lambda = 5)#Npop-rpois(n = 1, lambda = 20)
 30 |   initial_I=Npop- initial_pop #rpois(n=1, lambda = 5)
 31 |   simu_epi=simulateReedFrost(Nsim=1,Npop=initial_pop, p=p, I0=initial_I, fig=F)
 32 |   X<-cbind(simu_epi$S[1,],simu_epi$I[1,]) #the "hidden" Markov chain
 33 |   Y<-rnbinom(n=length(X[,1]),mu=p_obs*X[,2], size=size) #the observered process
 34 | 
 35 |   return(list(X=X,Y=Y))
 36 | }
 37 | 
 38 | RF_SIR <- function(Y, Np=1000, Npop=10000, p=0.00015, p_obs = 0.05, size = 2)
 39 | {
 40 |   N=length(Y)
 41 | 
 42 |   #Sequential important sampling
 43 |   Xp<-array(rep(0,2*N*Np),dim=c(Np,N,2))      #particles
 44 |   gammap<-matrix(rep(0,N*Np),ncol=N)  #partial likelihood
 45 |   wp<-matrix(rep(0,N*Np),ncol=N)      #unormalised importance weights
 46 |   Wp<-matrix(rep(0,N*Np),ncol=N)      #normalised importance weights
 47 |   #Xrp<-matrix(rep(0,N*Np),ncol=N)     #resampled particles
 48 |   A<-matrix(rep(0,N*Np),ncol=N) #Ancestry of particle
 49 | 
 50 |   #Initialisation of the algorithm
 51 |   Xp[,1,1]<-Npop -rpois(n= Np, lambda = 5)#-rpois(n = Np, lambda = 20) #initialisation of the size of susceptible population for each particle
 52 |   Xp[,1,2]<-Npop- Xp[,1,1] # rpois(n= Np, lambda = 5) #initialisation of the size of the infectious population for each particle
 53 |   gammap[,1]<-dpois(Npop-Xp[,1,1], lambda = 20)*dpois(Xp[,1,2],lambda=5)*dnbinom(Y[1],mu=p_obs*Xp[,1,2],size=size)
 54 | 
 55 |   wp[,1]<-gammap[,1]/(dpois(Npop-Xp[,1,1], lambda = 20)*dpois(Xp[,1,2],lambda=5))
 56 |   Wp[,1]<-wp[,1]/sum(wp[,1])
 57 | 
 58 |   #Sequential calculation of particles and importance weights
 59 |   for(i in 2:N)
 60 |   {
 61 |     #Resampling step -using systematic resampling
 62 |     U1<-runif(1,min = 0,max = 1/Np)
 63 |     cumWj<-0
 64 |     lU<-U1
 65 |     index<-1
 66 |     for(j in 1:Np)
 67 |     {
 68 |       cumWjminus<-cumWj
 69 |       cumWj<-cumWj+Wp[j,i-1]
 70 |       if(lU<cumWj) #test if at least one Uk is between two corresponding cumulative Wp
 71 |       {
 72 |         Nji<-1+floor((cumWj-lU)*Np)
 73 |         A[index:(index+Nji-1),i]<-j
 74 |         lU<-lU+Nji/Np
 75 |       }
 76 |     }
 77 | 
 78 |     #IS step
 79 |     Xp[,i,2]<-rbinom(Np,size=Xp[,i-1,1],1-(1-p)^Xp[,i-1,2])
 80 |     Xp[,i,1]<-Xp[,i-1,1]-Xp[,i,2]
 81 |     wp[,i]<-dbinom(Xp[,i,2],size=Xp[,i-1,1],1-(1-p)^Xp[,i-1,2])*dnbinom(Y[i],mu=p_obs*Xp[,i,2],size=size)/dbinom(Xp[,i,2],size=Xp[,i-1,1],1-(1-p)^Xp[,i-1,2])
 82 |     Wp[,i]<-wp[,i]/sum(wp[,i])
 83 |   }
 84 |   return(list(Xp=Xp, wp=wp, A=A))
 85 | }
 86 | 
 87 | computeMeanPathRF<-function(particleSet)
 88 | {
 89 |   N=length(particleSet$Xp[1,,1])
 90 | 
 91 |   #Compute the mean particles path
 92 |   mu_Xp<-matrix(rep(0,3*N),ncol=3)
 93 |   ESS<-rep(0,N)
 94 |   low_Xp<-rep(0,N)
 95 |   up_Xp<-rep(0,N)
 96 |   for(i in 1:N)
 97 |   {
 98 |     #ESS[i]<-1/sum(particleSet$Wp[,i]^2)
 99 |     mu_Xp[i,1]<-sum(particleSet$Xp[,i,2]*particleSet$wp[,i])/sum(particleSet$wp[,i])
100 |     #mu_Xp[i,1]<-mean(particleSet$Xp[rmultinom(N,N,prob=particleSet$wp[,i]),i,2])
101 |     mu_Xp[i,2:3]<-quantile(sample(particleSet$Xp[,i,2],N,replace=T,prob=particleSet$wp[,i]),c(0.025,0.975))
102 |   }
103 | 
104 |   return(list(ESS=ESS,mu_Xp=mu_Xp))
105 | }
106 | 
107 | RF_PMMH <- function(Y, Np=1000, Nchain=1000, Npop=10000, p0=0.0001, PBar = T)
108 | {
109 |   marginal_likelihood <- rep(0, Nchain)
110 |   p_estimate <- rep(0, Nchain)
111 | 
112 |   p_estimate[1]<-p0
113 | 
114 |   filterParticles<-RF_SIR(simuRF$Y, Np=Np, p=p_estimate[1])
115 |   marginal_likelihood[1]<-prod(colSums(filterParticles$wp)/Np)
116 | 
117 |   if(PBar) pb <- txtProgressBar(min = 0, max = Nchain, style=3)
118 | 
119 |   for(i in 2:Nchain)
120 |   {
121 |     if(PBar) setTxtProgressBar(pb,i)
122 | 
123 |     p_prop <- p_estimate[i-1] + rnorm(1,sd=0.01/Np)
124 | 
125 |     filterParticles<-RF_SIR(simuRF$Y, Np=Np, p= p_prop)
126 |     marginal_likelihood_prop<-prod(colSums(filterParticles$wp)/Np)
127 | 
128 |     if(runif(n=1) < marginal_likelihood_prop/marginal_likelihood[i-1])
129 |     {
130 |       p_estimate[i]<-p_prop
131 |       marginal_likelihood[i]<-marginal_likelihood_prop
132 |     } else {
133 |       p_estimate[i]<-p_estimate[i-1]
134 |       marginal_likelihood[i]<-marginal_likelihood[i-1]
135 |     }
136 |   }
137 |   return(list(LL=marginal_likelihood,p=p_estimate))
138 | }
139 | 


--------------------------------------------------------------------------------
/R/Volatility_SIS.R:
--------------------------------------------------------------------------------
 1 | SV_simulate<-function(N=500,alpha=0.91,sigma=1,beta=0.5)
 2 | {
 3 |   X=rep(0,N)
 4 |   Y=rep(0,N)
 5 | 
 6 |   X[1]<-rnorm(1,mean=0, sd=sqrt(sigma^2/(1-alpha^2)))
 7 |   Wn<-rnorm(1,mean=0,sd=1)
 8 |   Y[1]<-beta*exp(X[1]/2)*Wn
 9 | 
10 |   for(i in 2:N)
11 |   {
12 |     Vn<-rnorm(1,mean=0,sd=1)
13 |     Wn<-rnorm(1,mean=0,sd=1)
14 |     X[i]<-alpha*X[i-1]+sigma*Vn
15 |     Y[i]<-beta*exp(X[i]/2)*Wn
16 |   }
17 | 
18 |   return(list(X=X,Y=Y))
19 | }
20 | 
21 | SV_SIR<-function(Y, Np=1000, alpha=0.91,sigma=1,beta=0.5)
22 | {
23 | 
24 |   N=length(Y)
25 | 
26 |   #Sequential important sampling
27 |   Xp<-matrix(rep(0,N*Np),ncol=N)      #particles
28 |   gammap<-matrix(rep(0,N*Np),ncol=N)  #partial likelihood
29 |   wp<-matrix(rep(0,N*Np),ncol=N)      #unormalised importance weights
30 |   Wp<-matrix(rep(0,N*Np),ncol=N)      #normalised importance weights
31 |   #Xrp<-matrix(rep(0,N*Np),ncol=N)     #resampled particles
32 |   A<-matrix(rep(0,N*Np),ncol=N) #Ancestry of particle
33 | 
34 |   #Initialisation of the algorithm
35 |   Xp[,1]<-rnorm(Np,mean=0, sd=sqrt(sigma^2/(1-alpha^2))) #initial proposal
36 |   gammap[,1]<-dnorm(Xp[,1],mean=0, sd=sqrt(sigma^2/(1-alpha^2)))*dnorm(Y[1],mean=0,sd=beta*exp(Xp[,1]/2))
37 |   wp[,1]<-gammap[,1]/dnorm(Xp[,1],mean=0, sd=sqrt(sigma^2/(1-alpha^2)))
38 |   Wp[,1]<-wp[,1]/sum(wp[,1])
39 | 
40 |   partial_likelihood<-sum(wp[,1])/Np
41 | 
42 |   #Sequential calculation of particles and importance weights
43 |   for(i in 2:N)
44 |   {
45 |     #Resampling step -using systematic resampling
46 |     U1<-runif(1,min = 0,max = 1/Np)
47 |     cumWj<-0
48 |     lU<-U1
49 |     index<-1
50 |     for(j in 1:Np)
51 |     {
52 |       cumWjminus<-cumWj
53 |       cumWj<-cumWj+Wp[j,i-1]
54 |       if(lU<cumWj) #test if at least one Uk is between two corresponding cumulative Wp
55 |       {
56 |         Nji<-1+floor((cumWj-lU)*Np)
57 |         A[index:(index+Nji-1),i]<-j
58 |         lU<-lU+Nji/Np
59 |       }
60 |     }
61 | 
62 |     #IS step
63 |     Xp[,i]<-rnorm(Np,mean=alpha*Xp[A[,i],i-1], sd=sigma)
64 |     #gammap[,i]<-gammap[,i-1]*dnorm(Xp[,i],mean=alpha*Xp[,i-1], sd=1)*dnorm(Y[i],mean=0,sd=beta*exp(Xp[,i]/2))
65 |     #wp[,i]<-gammap[,i]/dnorm(Xp[,i],mean=alpha*Xp[,i-1], sd=sigma)
66 |     wp[,i]<-dnorm(Xp[,i],mean=alpha*Xp[A[,i],i-1], sd=sigma)*dnorm(Y[i],mean=0,sd=beta*exp(Xp[,i]/2))/dnorm(Xp[,i],mean=alpha*Xp[A[,i],i-1], sd=sigma)
67 |     Wp[,i]<-wp[,i]/sum(wp[,i])
68 | 
69 |   }
70 |   return(list(Xp=Xp, wp=wp, A=A))
71 | }
72 | 
73 | computeMeanPath<-function(particleSet)
74 | {
75 |   N=length(particleSet$Xp[1,])
76 | 
77 |   #Compute the mean particles path
78 |   mu_Xp<-rep(0,N)
79 |   ESS<-rep(0,N)
80 |   low_Xp<-rep(0,N)
81 |   up_Xp<-rep(0,N)
82 |   for(i in 1:N)
83 |   {
84 |     ESS[i]<-1/sum(particleSet$Wp[,i]^2)
85 |     mu_Xp[i]<-sum(particleSet$Xp[,i]*particleSet$wp[,i])/sum(particleSet$wp[,i])
86 |   }
87 | 
88 |   return(mu_Xp)
89 | }
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction to Particle Markov chain Monte Carlo (PMCMC)
 2 | Supplementary materials related to Endo et al. "Introduction to particle Markov-chain Monte Carlo for disease dynamics modellers" (https://doi.org/10.1016/j.epidem.2019.100363).
 3 | 
 4 | ## Main files
 5 | * rwmodel.Rmd/[.html](https://akira-endo.github.io/Intro-PMCMC/rwmodel.html)
 6 | 
 7 |   Rmarkdown to reproduce the random-walk model example described in Section 2.1 of the paper.
 8 | 
 9 | * pmcmcTutorial.Rmd/[.html](https://akira-endo.github.io/Intro-PMCMC/pmcmcTutorial.html)
10 | 
11 |   Quick overview of PMCMC with a few examples, including the Read-Frost model example (Section 3.1) in the paper.
12 | 
13 | * dureauLibbi.Rmd/[.html](https://akira-endo.github.io/Intro-PMCMC/dureauLibbi.html)
14 | 
15 |   Dureau model example (Section 3.2) in the paper.
16 | 
17 | ## Licence
18 | 
19 | [MIT](https://github.com/akira-endo/Intro-PMCMC/blob/master/LICENSE)
20 | 
21 | ## Authors
22 | 
23 | [Akira Endo](https://github.com/akira-endo), 
24 | [Edwin van Leeuwen](https://github.com/BlackEdder), 
25 | [Marc Baguelin](https://github.com/MJomaba)
26 | 


--------------------------------------------------------------------------------
/assets/andre_estimates_21_02.txt:
--------------------------------------------------------------------------------
 1 | 1	3	30	42	42	9	2
 2 | 4	13	133	161	184	47	8
 3 | 16	55	850	479	464	159	20
 4 | 58	374	3806	1617	1475	373	42
 5 | 138	619	6748	2909	2391	630	83
 6 | 245	1302	9965	5220	5199	1524	185
 7 | 1125	5128	32217	17649	15897	4680	531
 8 | 1325	5223	22269	18436	17795	4595	492
 9 | 220	2721	9253	11989	13468	2622	207
10 | 105	1401	4765	8232	8709	1670	116
11 | 42	506	1935	4087	4143	1050	66
12 | 22	247	987	2136	2129	841	48
13 | 12	123	534	1315	1450	637	49
14 | 7	164	927	2256	2040	658	32
15 | 10	205	2122	2676	2236	694	75
16 | 18	269	3575	2456	2431	1045	49
17 | 29	401	5294	3431	3261	1544	71
18 | 33	806	8567	6521	6265	1993	95
19 | 74	1094	10662	8251	7726	2735	148
20 | 151	2168	20452	11872	11388	3946	233
21 | 258	4110	30913	14966	16874	6583	270
22 | 363	4255	20205	12929	19952	8612	500
23 | 262	2922	13160	11259	16817	7636	488
24 | 227	2973	16423	8477	13376	6799	413
25 | 240	2670	14900	5416	9326	5321	290
26 | 226	1864	8405	3271	5400	3092	210
27 | 215	1300	4819	2172	4067	2025	139
28 | 154	930	3756	1748	3216	1784	138
29 | 105	644	2363	1313	2430	1457	123
30 | 69	366	1018	727	1351	1031	69
31 | 39	163	378	529	1066	1100	82
32 | 44	156	434	715	1472	1283	78
33 | 28	99	325	421	888	690	36
34 | 26	117	325	308	628	246	13
35 | 23	97	307	202	394	66	3
36 | 14	56	201	150	307	25	1
37 | 10	41	102	75	156	7	0
38 | 7	16	37	34	90	0	0


--------------------------------------------------------------------------------
/dureauLibbi.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Dureau model"
  3 | #documentclass: book
  4 | author: Edwin van Leeuwen
  5 | bibliography: assets/references.bib
  6 | ---
  7 | 
  8 | ```{r setup, include=F}
  9 | library(tidyverse)
 10 | library(ggplot2)
 11 | library(ggpubr)
 12 | library(pander)
 13 | library(lubridate)
 14 | library(latex2exp)
 15 | 
 16 | knitr::opts_chunk$set(cache = T, echo = F, message = F, warning = F, dpi = 300)
 17 | 
 18 | theme_set(theme_bw())
 19 | 
 20 | # Appropiate colours for colourblind
 21 | cbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999")
 22 | 
 23 | scale_colour_discrete <- function(...)
 24 |   scale_colour_manual(..., values = cbPalette)
 25 | scale_fill_discrete <- function(...)
 26 |   scale_fill_manual(..., values = cbPalette)
 27 | ```
 28 | 
 29 | # Introduction
 30 | 
 31 | This example is a re-implementation of the model first presented by @dureau_capturing_2013. The pandemic influenza epidemic in 2009 in the UK exhibited two waves, which is highly unusual and the study explored whether this could be explained by changes in the transmission rate over time, due to school holidays. They explored a number of models. In this example we will explore their simplest model. The data used was a weekly time-series of the GP consultations in the UK during the 2009 pandemic. 
 32 | 
 33 | # Model
 34 | 
 35 | The model is borrowed from the original study [@dureau_capturing_2013]. The modelled states are: susceptibles (\(S\)), exposed (\(E\)), infected (\(I\)) and recovered (\(R\)).
 36 | 
 37 | \begin{align}\begin{split}
 38 | \frac{\mathrm{d}S}{\mathrm{d}t} & = - \beta S(t) \frac{I(t)}{N}\\
 39 | \frac{\mathrm{d}E}{\mathrm{d}t} & = \beta S(t) \frac{I(t)}{N} - k E(t)\\
 40 | \frac{\mathrm{d}I}{\mathrm{d}t} & = k E(t) - \gamma I(t)\\
 41 | \frac{\mathrm{d}R}{\mathrm{d}t} & = \gamma I(t)\\
 42 | \frac{\mathrm{d}Z}{\mathrm{d}t} & = k E(t) - Z \delta(t \bmod 7)\\
 43 | \frac{\mathrm{d}x}{\mathrm{d}t} & = \sigma \mathrm{dW}\\
 44 | \beta & = e^{x(t)}
 45 | \end{split}\end{align}
 46 | with \(Z\) the cumulative number of new infections per seven days and \(x\) the log transformed transmissibility.
 47 | \(\delta(t \bmod 7)\) is the Dirac delta function, which causes \(Z\) to reset to zero when \(t \bmod 7 = 0\), i.e., every seven days.  The latent and infectious periods correspond to \(\frac1k\) and \(\frac1{\gamma}\), respectively. The amplitude of the stochasticity in the transmission rate is controlled by the parameter \(\sigma\).
 48 | 
 49 | ## Likelihood
 50 | 
 51 | The likelihood of the data was assumed to be log-normally distributed in
 52 | the original manuscript. Not everyone infected ends up visiting
 53 | a GP, either due to asymptomatic infections or the patients' likelihood
 54 | to consult a GP. Therefore the authors
 55 | assumed that the true incidence was 10 times higher than the number of GP
 56 | visits. This correction was further supported by serological survey (see
 57 | the original study for further details [@dureau_capturing_2013]).
 58 | 
 59 | \[\mathcal{L}(Z|y_i, \tau) = \mathcal{N}(\log(y_i), \log (Z/10), \tau)\]
 60 | 
 61 | ## Priors
 62 | 
 63 | Most parameters in the model have a wide uniform prior, except for the latent period, which was assumed to be normal distributed with mean 1.59 and standard deviation 0.02, the infectious period, which was normal distributed with mean 1.08 and standard deviation 0.075 and the initial recovered (immune) population, which was normal distributed with a mean of 0.15 and standard deviation of 0.15 (truncated between 0 and 1; @durea_capturing_2013). 
 64 | 
 65 | ```{r load_data, echo = T}
 66 | library(rbi)
 67 | library(rbi.helpers)
 68 | # Load the data
 69 | v <- read.csv("assets/andre_estimates_21_02.txt", sep  = "\t") %>%
 70 |   rowSums()
 71 | y <- data.frame(value = v) %>%
 72 |   mutate(time = seq(7, by = 7, length.out = n())) %>%
 73 |   dplyr::select(time, value)
 74 | ncores <- 8
 75 | minParticles <- max(ncores, 16)
 76 | ```
 77 | 
 78 | ```{r model, echo = T}
 79 | model_str <- "
 80 | model dureau {
 81 |   obs y
 82 | 
 83 |   state S
 84 |   state E
 85 |   state I
 86 |   state R
 87 |   state x
 88 | 
 89 |   state Z
 90 | 
 91 |   input N
 92 |   param k
 93 |   param gamma
 94 |   param sigma // Noise driver
 95 |   param E0
 96 |   param I0
 97 |   param R0
 98 |   param x0
 99 |   param tau
100 | 
101 |   sub parameter {
102 |     k ~ truncated_gaussian(1.59, 0.02, lower = 0) // k is the period here, not the rate, i.e. 1/k is the rate
103 |     gamma ~ truncated_gaussian(1.08, 0.075, lower = 0) // gamma is the period, not the rate
104 |     sigma ~ uniform(0,1)
105 |     x0 ~ uniform(-5,2)
106 |     I0 ~ uniform(-16, -9)
107 |     E0 ~ uniform(-16, -9)
108 |     R0 ~ truncated_gaussian(0.15, 0.15, lower = 0, upper = 1)
109 |     tau ~ uniform(0, 1)
110 |   }
111 | 
112 |   sub initial {
113 |     S <- N
114 |     R <- R0*S
115 |     S <- S - R
116 | 
117 |     E <- exp(E0 + log(S))
118 |     S <- S - E
119 |     I <- exp(I0 + log(S))
120 |     S <- S - I
121 |     x <- x0
122 |     Z <- 0
123 |   }
124 | 
125 |   sub transition(delta = 1) {
126 |     Z <- ((t_now) % 7 == 0 ? 0 : Z)
127 |     noise e
128 |     e ~ wiener()
129 |     ode(alg = 'RK4(3)', h = 1.0, atoler = 1.0e-3, rtoler = 1.0e-8) {
130 |       dx/dt = sigma*e
131 |       dS/dt = -exp(x)*S*I/N
132 |       dE/dt = exp(x)*S*I/N - E/k
133 |       dI/dt = E/k-I/gamma
134 |       dR/dt = I/gamma
135 |       dZ/dt = E/k
136 |     }
137 |   }
138 | 
139 |   sub observation {
140 |     y ~ log_normal(log(max(Z/10.0, 0)), tau)
141 |   }
142 | 
143 |   sub proposal_parameter {
144 |     k ~ gaussian(k, 0.005)
145 |     sigma ~ gaussian(sigma, 0.01)
146 |     gamma ~ gaussian(gamma, 0.01)
147 |     x0 ~ gaussian(x0, 0.05)
148 |     E0 ~ gaussian(E0, 0.05)
149 |     I0 ~ gaussian(I0, 0.05)
150 |     R0 ~ gaussian(R0, 0.05)
151 |     tau ~ gaussian(tau, 0.05)
152 |   }
153 | }"
154 | ```
155 | 
156 | # Results
157 | 
158 | Run the inference (note this can take some time):
159 | 
160 | ```{r, echo = T}
161 | model <- bi_model(lines = stringi::stri_split_lines(model_str)[[1]])
162 | bi_model <- libbi(model)
163 | input_lst <- list(N = 52196381)
164 | end_time <- max(y$time)
165 | obs_lst <- list(y = y %>% dplyr::filter(time <= end_time))
166 | 
167 | bi <- sample(bi_model, end_time = end_time, input = input_lst, obs = obs_lst, nsamples = 1000, nparticles = minParticles, nthreads = ncores, proposal = 'prior') %>% 
168 |   adapt_particles(min = minParticles, max = minParticles*200) %>%
169 |   adapt_proposal(min = 0.05, max = 0.4) %>%
170 |   sample(nsamples = 5000, thin = 5) %>% # burn in 
171 |   sample(nsamples = 5000, thin = 5)
172 | 
173 | bi_lst <- bi_read(bi %>% sample_obs)
174 | ```
175 | 
176 | ```{r figDatafit, dependson = c("load_results"), fig.cap="Model inference results. Top panel shows the GP consultation results, with the points showing the actual data points. The ribbons represent the 95%% and 50%% confidence interval in incidence and the black line shows the median. The middle panel shows the transmissibiltiy over time and the bottom panel the change in transmissibility relative to the starting transmissibility.", echo = F, fig.height = 8}
177 | fitY <- bi_lst$y %>% 
178 |   group_by(time) %>%
179 |   mutate(
180 |     q025 = quantile(value, 0.025),
181 |     q25 = quantile(value, 0.25),
182 |     q50 = quantile(value, 0.5),
183 |     q75 = quantile(value, 0.75),
184 |     q975 = quantile(value, 0.975)
185 |   ) %>% ungroup() %>%
186 |   left_join(y %>% rename(Y = value))
187 | 
188 | g1 <- ggplot(data = fitY) +
189 |   geom_ribbon(aes(x = time, ymin = q25, ymax = q75), alpha = 0.3) +
190 |   geom_ribbon(aes(x = time, ymin = q025, ymax = q975), alpha = 0.3) +
191 |   geom_line(aes(x = time, y = q50)) +
192 |   geom_point(aes(x = time, y = Y), colour = "Red") +
193 |   ylab("Incidence") +
194 |   xlab("Time")
195 | 
196 | plot_df <- bi_lst$x %>% mutate(value = exp(value)) %>%
197 |   group_by(time) %>%
198 |   mutate(
199 |     q025 = quantile(value, 0.025),
200 |     q25 = quantile(value, 0.25),
201 |     q50 = quantile(value, 0.5),
202 |     q75 = quantile(value, 0.75),
203 |     q975 = quantile(value, 0.975)
204 |   ) %>% ungroup()
205 | 
206 | g2 <- ggplot(data = plot_df) +
207 |   geom_ribbon(aes(x = time, ymin = q25, ymax = q75), alpha = 0.3) +
208 |   geom_ribbon(aes(x = time, ymin = q025, ymax = q975), alpha = 0.3) +
209 |   geom_line(aes(x = time, y = q50)) +
210 |   ylab(TeX("Transmissibility ($\\beta(t)$)")) +
211 |   xlab("Time")
212 | 
213 | plot_df <- bi_lst$x %>% mutate(value = exp(value)) %>% 
214 |   group_by(np) %>% mutate(value = value - value[1]) %>%
215 |   group_by(time) %>%
216 |   mutate(
217 |     q025 = quantile(value, 0.025),
218 |     q25 = quantile(value, 0.25),
219 |     q50 = quantile(value, 0.5),
220 |     q75 = quantile(value, 0.75),
221 |     q975 = quantile(value, 0.975)
222 |   ) %>% ungroup()
223 | 
224 | g3 <- ggplot(data = plot_df) +
225 |   geom_ribbon(aes(x = time, ymin = q25, ymax = q75), alpha = 0.3) +
226 |   geom_ribbon(aes(x = time, ymin = q025, ymax = q975), alpha = 0.3) +
227 |   geom_line(aes(x = time, y = q50)) +
228 |   ylab(TeX("Relative trans. ($\\beta(t)-\\beta(0)$)")) +
229 |   xlab("Time")
230 | 
231 | 
232 | ggarrange(g1, g2, g3, ncol = 1, nrow = 3, align = "v")
233 | ```
234 | 
235 | Figure \@ref(fig:figDatafit) shows the results of the data fitting. The top panel shows the incidence data (red dots), with the two distinct epidemiological waves and the model predict. As shown the model is able to reproduce both waves.  The middle panel shows transmissibility over time, with an apparent dip between day 50 and 100. This dip can be confirmed by comparing the transmissibility to the transmissibility at time 0 (bottom panel), which shows that between day 50 and 100 the transmissibility is below the starting transmissibility in all cases. The dip in transmissibility coincides with the time period that the first epidemiological wave started to slow down.
236 | 
237 | # References
238 | 


--------------------------------------------------------------------------------
/pmcmcTutorial.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "A PMCMC Tutorial for infectious disease modellers"
  3 | author: "Marc Baguelin"
  4 | output:
  5 |   html_document: default
  6 |   html_notebook: default
  7 |   pdf_document:
  8 |     toc: yes
  9 | always_allow_html: yes
 10 | ---
 11 | 
 12 | ```{r}
 13 | #Files with the code of the functions used in the tutorial
 14 | source(file = "R/Volatility_SIS.R")
 15 | source(file = "R/ReedFrost.R")
 16 | ```
 17 | 
 18 | #Introduction
 19 | This is a tutorial on PMCMC with a special attention to transmission disease modellers.
 20 | 
 21 | This tutorial will guide you through the basic concepts necessary to understand particle Markov chain Monte Carlo (PMCMC) and will let you build an algorithm to perform pMcMC on the Reed-Frost model, one of the simplest of the stochastic epidemic models. 
 22 | 
 23 | This tutorial has been built by taking material from different references
 24 | Andrieu et al.
 25 | Chopin & Papaspiliopoulos
 26 | Robert & Casela
 27 | 
 28 | #Background concepts
 29 | 
 30 | pMcMC is built out of several key concepts from statistics. One of the problems for mathematical modellers to get into Particle MCMC, is in my opinion, that we might not be familiar with some of these (elementary) statistical concepts. It is important to understand well each of these elementary concepts.
 31 | 
 32 | Another difficulty arises as many of these key concepts have connectiongs in different disciplines and sometimes have been developed concurrently under different names.
 33 | 
 34 | ```{r, echo=F}
 35 | DiagrammeR::grViz("
 36 | digraph rmarkdown{
 37 | 'Monte Carlo' -> 'Importance Sampling' -> 'Sequential IS' -> 'SMC';
 38 | 'Monte Carlo' -> 'MCMC';
 39 | {'MCMC' 'SMC'} -> 'Particle MCMC'
 40 | }", height=300)
 41 | ```
 42 | 
 43 | ##Monte Carlo approximation
 44 | 
 45 | Monte Carlo methods have originated in the 1940's during the effort by allied physicist to develop a nuclear bomb in Los Alamos. One of most common problem in statistical inference is to compute integrals. Monte Carlo is a numerical method which allows to calculate easily (or at least relatively easily compared with other methods) integrals. The method use the definition of expectation as an integral over a probability measure.
 46 | $$\mathbb{E}_{f} [h(X) ] = \int_{\mathcal{X}} h(x)f(x)dx$$
 47 | 
 48 | $$\frac{1}{N} \sum \limits^{N}_{n=1}h(X^{n}), \space X^{n} \sim f,$$
 49 | 
 50 | ##Bayesian inference
 51 | 
 52 | Very often, we want to relate a model of a phenomenon with some observations of this phenomenon. Most of the time the model is defined as equations describing the mechanisms of the phenomenon (mechanistic model). We thus usually have and idea on . The fundamental idea behind Bayesian inference is to use Bayes' theorem to derive the probability of  
 53 | 
 54 | ##Importance sampling
 55 | 
 56 | The idea behind importance sampling is simple. Monte Carlo approximation relies on the ability to draw from a certain distribution (q). In many cases, this might be complicated or inefficient. The idea is to sample from a simpler distribution and then calculate "weights" associated with these samples in order be able to derive a Monte Carlo approximation with these samples.
 57 | 
 58 | ###Basic algorithm
 59 | 
 60 | Importance sampling is based on the simple identity
 61 | $$\mathbb{E}_{f} [h(X) ] = \int_{\mathcal{X}} h(x)\frac{f(x)}{g(x)}g(x)dx=\mathbb{E}_{g}\left [\frac{h(X)f(X)}{g(X)} \right ].$$
 62 | And thus $\mathbb{E}_{f} [h(X) ]$ can be estimated as
 63 | $$\frac{1}{n} \sum\limits_{j=1}^{n}\frac{f(X_{j})}{g(X_{j})}h(X_j)\rightarrow\mathbb{E}_{f} [h(X) ]$$
 64 | with $X_{j}$ drawn from $g$. $g$ can be chosen arbitrary as soon as supp($g$) $\supset$ supp($h \times f$) with supp$(f)=\{x \in X | f(x) \neq 0 \}$ for $f: X \rightarrow \mathbb{R}$.
 65 | 
 66 | ###Example. Estimating the tail of a normal distribution
 67 | 
 68 | Let's assume that we are interested in computing $P(Z>4.5)$ for $Z \sim \mathcal{N}(0,1)$
 69 | 
 70 | ```{r}
 71 | pnorm(-4.5)
 72 | ```
 73 | 
 74 | So to compute $P(Z>4.5)$ using a naive Monte Carlo estimate is very unefficient as a hit is produced on average every 3 millions or so draws. To get a stable estimate we would need a huge number of simulations.
 75 | Using importance sampling, we can chose another distribution, valued on $[ 4.5 , +\infty[$ to calculate $P(Z>4.5)$. A natural choice is
 76 | $$g(y)=\frac{e^{-y}}{\int_{4.5}^{\infty}e^{-x}dx }=e^{-(y-4.5)}$$
 77 | which leads to the following importance sampling estimator
 78 | $$\frac{1}{n} \sum\limits_{i=1}^{n} \frac{f(Y^i)}{g(Y^i)}=\frac{1}{n}\sum\limits_{i=1}^{n} \frac{e^{-Y^2/2+Y_i-4.5}}{\sqrt{2 \pi}}$$
 79 | 
 80 | ```{r}
 81 | #Number of Monte Carlo samples
 82 | Nsim=10^3
 83 | 
 84 | #Draw samples from the truncated exponential
 85 | y=rexp(Nsim)+4.5
 86 | 
 87 | #Calculation of the importance weights
 88 | weit=dnorm(y)/dexp(y-4.5)
 89 | 
 90 | #Plot the the Monte-Carlo estimator and in red the true value
 91 | plot(cumsum(weit)/1:Nsim,type="l")
 92 | abline(a=pnorm(-4.5),b=0,col="red")
 93 | ```
 94 | 
 95 | ###Importance sampling resampling
 96 | 
 97 | ##State space models
 98 | 
 99 | ###Example 1. Volatility model
100 | 
101 | $$\begin{array}{rcl}
102 | X_{n} & = & \alpha X_{n-1} + \sigma V_{n}\\
103 | Y_{n} & = & \beta \exp \left ( \frac{X_{n}}{2}\right) W_{n}  
104 | \end{array}$$
105 | 
106 | ```{r}
107 | simu<-SV_simulate(N=500,alpha=0.91,sigma=1,beta=0.5) #changed .5 for orginal 0.91
108 | 
109 | plot(simu$X,type="l",col="blue",ylim=c(-10,10),xlab="Time")
110 | points(simu$Y,pch="*",col="red")
111 | 
112 | ```
113 | 
114 | ###Example 2. Reed-Frost model with Negative Binomial observation
115 | 
116 | The Reed-Frost model is a dicrete-time stochastic model, with each time step representing a new generation of infectious agents. During the period of time between two timesteps, each of the susceptible individuals has a probability $p$ of getting infected by any infectious individual. The probability of escaping infection from all the infectious individuals is $(1-p)^{I_{n-1}}$ and thus the number of infectious individuals at the next time step (i.e. the ones who did not escape infection) can be drawn from a binomial distribution with probability $1-(1-p)^{I_{n-1}}$. On top of this we assume that we can only detect a fraction of the cases. 
117 | 
118 | $$\begin{array}{rcl}
119 | I_{n} & \sim & Bin \left (S_{n-1}, 1-(1-p)^{I_{n-1}} \right )\\
120 | S_{n} & = & S_{n-1} - I_{n} 
121 | \end{array}$$
122 | 
123 | The Reed-Frost model can be made into a state space model by using $(S_{n},I_{n})$ as the state space and writing an observation function. For example if we assume, that on average only a proportion $p_{obs}$ of the infected individuals are observed and that the observed cases are distributed following a neg-binomial of size $s$.
124 | 
125 | $$\begin{array}{rcl}
126 | X_{n}&=&(S_{n},I_{n})\\
127 | Y_{n} &\sim& NegBin(\mu=p_{obs}*I_{n}, size=s) 
128 | \end{array}$$
129 | 
130 | ```{r}
131 | #Simulate a Reed-Frost model with negative binomial observation
132 | simuRF<-RF_with_obs()
133 | 
134 | #Plot of the observation
135 | plot(simuRF$Y, pch=18, col="red", xlab="Number of generations", ylab="Number of cases")
136 | 
137 | #Blue line representing the actual underlying epidemics scaled by the proportion detected
138 | lines(simuRF$X[,2]*.05,col="#0000ff77", lwd=4)
139 | ```
140 | 
141 | ##Filtering and inference
142 | 
143 | When confronting a SSM with data, two common problems pop up. We might want to know what is the actual hidden trajectory of the system, this is particularly important in situations where you want to do some sort of control/reactive intervention. This type of question is called "Filtering" i.e. recovering the actual state of the system through the noise and distortion created by the observation process. The filtering problem assumes that the parameter $\theta$ of the models are fully known, all the uncertainty in the status coming from the stochasticity of the processes and the noise
144 | 
145 | $$p_{\theta}(X_{1:T}|Y_{1:T})$$
146 | 
147 | Another problem is to estimate which likely parameter distribution given the observation. In the case of SSM, it means infer jointly
148 | 
149 | $$p(\theta,x_{1:T}|Y_{1:T}) \propto p_{\theta}(X_{1:T},Y_{1:T})p(\theta)$$
150 | 
151 | #Sequential Monte Carlo
152 | 
153 | Sequential Monte Carlo (SMC) is a methodology which trying to exploit the sequential time structure of the model to calculate efficiently some quantities. In particular it is particularly useful to infer the hidden states of the model (filtering) but also to calculate the overall (i.e. marginalising over the possible hidden states) probability of the model to generate the data (the marginal likelihood). This is this property which allows to integrate SMC with other algorithms from computational statistics (such as MCMC) to perform jointly inference of hidden states and model parameters.
154 | 
155 | ##Sequential importance sampling
156 | 
157 | 
158 | 
159 | ##Sequential sampling resampling
160 | 
161 | ###Example 1 Volatility
162 | 
163 | ```{r}
164 | #Plot the hidden state of the trajectory
165 | plot(simu$X,type="l",col="blue")
166 | 
167 | #Run the particle filter
168 | filterParticles<-SV_SIR(simu$Y)
169 | 
170 | #Calculate the mean path at each time point
171 | mu_Xp<-computeMeanPath(filterParticles)
172 | 
173 | #Plot the filtered hidden states
174 | points(mu_Xp,pch=19,col="#00aa0066")
175 | ```
176 | 
177 | ###Example 2 Reed-Frost epidemic model
178 | 
179 | 
180 | 
181 | ```{r}
182 | #SMC on the observed epidemic
183 | filter_particlesRF <- RF_SIR(simuRF$Y)
184 | summary_PF_RF <- computeMeanPathRF(filter_particlesRF)
185 | plot(NULL,xlim=c(0,50),ylim=c(0,max(summary_PF_RF$mu_Xp[,3],simuRF$Y/0.05))  ,col="white",xlab="Time",ylab="Number of cases")
186 | 
187 | #95% confidence interval
188 | polygon(c(1:50,50:1),c(summary_PF_RF$mu_Xp[,2],summary_PF_RF$mu_Xp[50:1,3]),col="#00bb0033",border=NA)
189 | 
190 | #Hidden Markov model
191 | lines(simuRF$X[,2],col="blue",lwd=2)
192 | 
193 | #Mean of particles
194 | lines(summary_PF_RF$mu_Xp[,1],col="#00bb0066",lwd=2)
195 | 
196 | #Observations
197 | points(simuRF$Y/0.05, pch=18, col="red")
198 | legend("topright", legend = c("Scaled observations","95% CI of particles","Mean particles","Hidden states"), bty="n", col=c("red","#00bb0033","#00bb0066","blue"), pch=c(18,15,NA,NA), pt.cex=c(1,2,1,1),lty=c(0,0,1,1), lwd=2)
199 | 
200 | ```
201 | 
202 | We will use in this last section the Particle marginal Metropolis–Hastings sampler from Andrieu et al.
203 | 
204 | ```{r}
205 | Na<-50
206 | Np<-5000
207 | p_estimate <- seq(from=0.00014,to=0.00017,length.out = Na)
208 | marginal_likelihood <- rep(0,Na)
209 | 
210 | for(i in 1:Na)
211 | {
212 |   filterParticles<-RF_SIR(simuRF$Y, Np=Np, p=p_estimate[i])
213 |   
214 |   marginal_likelihood[i]<-prod(colSums(filterParticles$wp)/Np)
215 | }
216 | 
217 | plot(p_estimate, log(marginal_likelihood), xlab="probability of transmission", ylab="Marginal log-likelihood")
218 | abline(v=0.00015, col="red")
219 | ```
220 | 
221 | #Particle MCMC
222 | 
223 | ```{r}
224 | #Run a particle Marginal Metropolis Hastings algorithm
225 | inference_results<-RF_PMMH(simuRF$Y, PBar=F, p0=0.0001)
226 | ```
227 | 
228 | 
229 | ```{r}
230 | plot(inference_results$p, ylim=c(0,0.00020), ylab = "Probability of transmission")
231 | abline(h=0.00015,col="red")
232 | ```
233 | 
234 | 
235 | 


--------------------------------------------------------------------------------
/rwmodel.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Random-walk model"
  3 | author: "Akira Endo"
  4 | date: "12 December 2018"
  5 | output:
  6 |   html_document: default
  7 |   pdf_document: default
  8 | ---
  9 | 
 10 | ```{r setup, include=FALSE}
 11 | knitr::opts_chunk$set(echo = TRUE)
 12 | ```
 13 | 
 14 | ## Random-walk model
 15 | This is a simple toy model (random-walk model) used as an example to in the main text. We assume that \(x_t\) evolves following a simple Gaussian kernel \(x_t\sim \mathcal N(x_{t-1},\sigma)\) with an initial state \(x_1=0\). The data is assumed to be obtained as a rounded integer with a Gaussian measurement error \(y_t=round(\mathcal N(x_{t-1},0.1))\). Consider that a five-step observation (\(T=5\)) with \(\sigma=0.5\) yielded \(y_{1:5}=(0,1,1,1,2)\). Without any observations, the possible trajectories of \(x_{1:5}\) are very diverse (Figure 1, Left panel). Trajectories \(x_{1:5}\) consistent with the observation are only a fractions of those (Figure 1, Right panel), and they need to be efficiently sampled to compute the marginal likelihood \(p(y_{1:T}|\sigma)\).
 16 | 
 17 | ```{r prep}
 18 | ## Preparation
 19 | yt<-c(0,1,1,1,2) #observed data
 20 | 
 21 | set.seed(122018)
 22 | 
 23 | #SMC by the bootstrap filter
 24 | BF_sim<-function(yt,sig,merror,size){
 25 |   tlen<-length(yt)
 26 |   xt<-matrix(0,tlen,size)
 27 |   xt_prop<-matrix(0,tlen,size)
 28 |   avg_wt<-numeric(tlen)
 29 |   avg_wt[1]=1
 30 |   for(t in 2:tlen){
 31 |     xt_prop[t,]<-rnorm(size,xt[t-1,],sig) #proposal particles
 32 |     weights=pnorm(yt[t]+0.5,xt_prop[t,],merror)-pnorm(yt[t]-0.5,xt_prop[t,],merror) #weighted by p(yt|xt_prop)
 33 |     avg_wt[t]=sum(weights)/size
 34 |     if(sum(weights)==0)weights=weights+1 #marginal likelihood 0: avoid error in sample() below due to weights=0
 35 |     ind<-sample(size,size,replace=T,prob=weights) #resample
 36 |     xt[1:t,]=rbind(xt[1:(t-1),ind],xt_prop[t,ind])
 37 |     
 38 |   }
 39 |   return(list(xt=xt,xt_prop=xt_prop,avg_wt=avg_wt))
 40 | }
 41 | 
 42 | sig=0.5
 43 | merror=0.1
 44 | xt_prior<-apply(rbind(0,matrix(rnorm(10000,0,sig),4)),2,cumsum) #prior samples of xt
 45 | xt_BF<-BF_sim(yt,sig,merror,10000)
 46 | xt_posterior<-xt_BF$xt#posterior samples of x_t
 47 | xt_prop<-xt_BF$xt_prop
 48 | ```
 49 | 
 50 | ```{r plot-traj, fig.width=8,fig.height=4}
 51 | ## Figure 1. Trajectories of x_t
 52 | par(mfrow=c(1,2))
 53 | matplot(xt_prior,type="l",lty=1,ylim=c(-3,3),ylab="hidden state",xlab="time")
 54 | matplot(xt_posterior[,1:500],type="l",lty=1,ylim=c(-3,3),main="",ylab="hidden state",xlab="time")
 55 | ```
 56 | ```{r plot-pos, fig.width=12,fig.height=3}
 57 | ## Figure 2. Posterior distribution p(x|y)
 58 | par(mfrow=c(1,4),cex=1)
 59 | for(i in 2:5){
 60 |   hist(xt_posterior[i,],xlab=paste0("x",i),main="",breaks=15,col="gray",freq=F,ylim=c(0,2))
 61 |   abline(v=c(0,1,1,1,2)[i],col="red")
 62 | }
 63 | 
 64 | 
 65 | ```
 66 | 
 67 | ## SMC
 68 | The bootstrap filter is one of the most popular algorithms for sequantial Monte Carlo (SMC). At each time step, candidate particles are generated from the time-evolution process \(f_\theta(x_t|x_{t-1})\). Particles are then filetered by the importance sampling resampling (ISR) based on \(g_\theta(y_t|x_t)\) to yield samples from the target distribution \(p_\theta(x_t|y_{1:t})\propto g_\theta(y_t|x_t)f_\theta(x_t|x_{t-1},y_{1:t-1})\).
 69 | 
 70 | ```{r smc}
 71 | set.seed(122018)
 72 | xt_BF<-BF_sim(yt,sig,merror=0.1,200) #SMC
 73 | xt_posterior<-xt_BF$xt #posterior samples of xt
 74 | xt_prop<-xt_BF$xt_prop #proposed particles at each time step
 75 | ```
 76 | ```{r plot-smc, fig.width=12, fig.height=6}
 77 | ## Figure 3. Sequential sampling of trajectories x
 78 | par(mfrow=c(2,4))
 79 | for(t in 2:5){ #plot trajectories
 80 |   matplot(rbind(xt_posterior[1:(t-1),],xt_prop[t,],matrix(NA,5-t,ncol(xt_posterior))),type="l",lty=1,ylim=c(-1,3),ylab="hidden state",xlab="time")
 81 |   matplot(rbind(xt_posterior[1:t,],matrix(NA,5-t,ncol(xt_posterior))),type="l",lty=1,ylim=c(-1,3),ylab="hidden state",xlab="time")
 82 | }
 83 | 
 84 | ```
 85 | 
 86 | ##PMCMC
 87 | Particle Markov-chain Monte Carlo is a method to estimate parameter \(\theta\) of a hidden-Markov process by efficiently sampling the hidden variable \(x\) with SMC. SMC is used as a sub-algorithm that returns the approximated marginal likelihood \(\hat p(y_{1:T}|\theta)\). Here, the parameter in the Gaussian kernel \(\sigma\) is estimated with an improper prior over the positive real line \(p(\sigma)=\mathcal U(0,\infty)\).
 88 | 
 89 | ```{r pmcmc}
 90 | set.seed(122018)
 91 | tlen<-5 # total time steps
 92 | mcmclen<-5000 # Iterations in MCMC
 93 | nparticle<-200 # Particles used in SMC 
 94 | merror=0.1 #measurement error
 95 | 
 96 | # Initialise MCMC
 97 | sig_mcmc<-numeric(mcmclen) #sigma 
 98 | sig_mcmc[1]=1
 99 | ll<-numeric(mcmclen) #log-likelihood
100 | ll[1]=-Inf
101 | x_mcmc<-matrix(0,mcmclen,tlen) #x_t
102 | prop_sd<-0.5 #sd of proposal distribution
103 | 
104 | # PMCMC
105 | for(n in 2:mcmclen){
106 |   sig_prop<-sig_mcmc[n-1]+rnorm(1,0,prop_sd)
107 |   sig_prop=abs(sig_prop)
108 |   BF<-BF_sim(yt,sig_prop,merror,nparticle)
109 |   ll_prop<-sum(log(BF$avg_wt))
110 |   prob_update<-min(1,exp(ll_prop-ll[n-1]))
111 |   if(runif(1)<prob_update){
112 |     sig_mcmc[n]=sig_prop
113 |     x_mcmc[n,]=BF$xt[,sample(nparticle,1)]
114 |     ll[n]=ll_prop
115 |   }else{
116 |     sig_mcmc[n]=sig_mcmc[n-1]
117 |     x_mcmc[n,]=x_mcmc[n-1,]
118 |     ll[n]=ll[n-1]
119 |   }
120 | }
121 | ```
122 | ```{r plot-pmcmc, fig.width=12, fig.height=8}
123 | ## Figure 4. PMCMC results
124 | par(mfrow=c(2,2))
125 | 
126 | # MCMC chain for sigma
127 | plot(sig_mcmc,type="l",xlab="iteration",ylab="sigma")
128 | 
129 | # Posterior distribution for sigma
130 | hist(sig_mcmc[-(1:1000)],breaks=100,freq=F,main="",xlab="sigma",border="dimgray")
131 | # Median and 95% Credible intervals
132 | abline(v=quantile(sig_mcmc[-(1:1000)],c(0.025,0.5,0.975)),lty=c(2,1,2),lwd=2,col=1)
133 | 
134 | # Posterior samples of X_1:5
135 | matplot(t(x_mcmc[-(1:1000),]),type="l",lty=1,ylim=c(-1,3),ylab="hidden state",xlab="time")
136 | 
137 | ```
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------