├── .Rbuildignore
├── .Rprofile
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── RcppExports.R
    ├── create_lagmatrix.R
    ├── fast_dmvnorm.R
    ├── fast_rmvnorm.R
    ├── gandkinversecdf.R
    ├── get_my_colors.R
    ├── hilbert_order.R
    ├── levydriventransition.R
    ├── metrics.R
    ├── metropolishastings.R
    ├── mmd.R
    ├── model_get_autoregressive.R
    ├── model_get_cosine.R
    ├── model_get_gandk.R
    ├── model_get_levydriven.R
    ├── model_get_mgandk.R
    ├── model_get_multivariatenormal.R
    ├── model_get_normal.R
    ├── model_get_pz_4param.R
    ├── model_get_queue.R
    ├── model_get_ricker.R
    ├── model_get_toggleswitch.R
    ├── particle_filter.R
    ├── particlemetropolishastings.R
    ├── plot_functions.R
    ├── proposals.R
    ├── pz_transition.R
    ├── setmytheme.R
    ├── systematic_resampling.R
    ├── wasserstein.R
    ├── wcovariance.R
    ├── winference-package.R
    ├── wmean.R
    ├── wsmc.R
    └── wsmc_to_dataframe.R
├── README.md
├── WInference.Rproj
├── inst
    ├── reproduceabc
    │   ├── README.R
    │   ├── ar1
    │   │   ├── .Rapp.history
    │   │   ├── README.R
    │   │   ├── ar1_generate_data.R
    │   │   ├── ar1_plots.R
    │   │   ├── ar1_wsmc_delay1.R
    │   │   └── ar1_wsmc_marginal.R
    │   ├── cosine
    │   │   ├── README.R
    │   │   ├── cosine_generate_data.R
    │   │   ├── cosine_mcmc.R
    │   │   ├── cosine_plots.R
    │   │   ├── cosine_wsmc_curvematching_wasserstein.R
    │   │   └── cosine_wsmc_euclidean.R
    │   ├── gandk
    │   │   ├── README.R
    │   │   ├── gandk_abctools.R
    │   │   ├── gandk_generate_data.R
    │   │   ├── gandk_mcmc.R
    │   │   ├── gandk_plots_compare.R
    │   │   ├── gandk_plots_convergence.R
    │   │   ├── gandk_plots_ncomputed.R
    │   │   ├── gandk_plots_threshold.R
    │   │   ├── gandk_plots_w_to_posterior.R
    │   │   └── gandk_wsmc.R
    │   ├── levydriven
    │   │   ├── README.R
    │   │   ├── levydriven_generate_data.R
    │   │   ├── levydriven_is_correction.R
    │   │   ├── levydriven_mh.R
    │   │   ├── levydriven_plots.R
    │   │   ├── levydriven_timings.R
    │   │   ├── levydriven_wsmc_hilbert.R
    │   │   └── levydriven_wsmc_with_summary.R
    │   ├── mgandk
    │   │   ├── README.R
    │   │   ├── mgandk_generate_data.R
    │   │   ├── mgandk_mcmc.R
    │   │   ├── mgandk_plots.R
    │   │   ├── mgandk_plots_ncomputed.R
    │   │   ├── mgandk_plots_threshold.R
    │   │   ├── mgandk_plots_wdist_to_posterior.R
    │   │   ├── mgandk_timings.R
    │   │   ├── mgandk_wsmc_hilbert.R
    │   │   ├── mgandk_wsmc_mmd.R
    │   │   ├── mgandk_wsmc_swap.R
    │   │   └── mgandk_wsmc_wasserstein.R
    │   ├── mvnormal
    │   │   ├── README.R
    │   │   ├── mvnormal_generate_data.R
    │   │   ├── mvnormal_plots.R
    │   │   ├── mvnormal_rejection_summary.R
    │   │   ├── mvnormal_rejection_wasserstein.R
    │   │   ├── mvnormal_timings.R
    │   │   ├── mvnormal_wsmc_euclidean.R
    │   │   ├── mvnormal_wsmc_summary.R
    │   │   └── mvnormal_wsmc_wasserstein.R
    │   ├── queue
    │   │   ├── 50.intermediateobs.neal.Rdata
    │   │   ├── README.R
    │   │   ├── queue_abctools.R
    │   │   ├── queue_plots_compare.R
    │   │   ├── queue_pmmh_intermediate.R
    │   │   └── queue_wsmc_marginal_intermediate.R
    │   ├── supplementary
    │   │   ├── README.R
    │   │   ├── check_assumption_plot.R
    │   │   ├── gandk.checkassumption.R
    │   │   ├── multivariate.transportdistancecomparison.R
    │   │   ├── multivariate.transportdistancecomparison.plots.R
    │   │   ├── wasserstein_clt_mvtnorm_mean.R
    │   │   └── wasserstein_clt_mvtnorm_mean_plots.R
    │   └── toggleswitch
    │   │   ├── README.R
    │   │   ├── toggle_switch_generate.R
    │   │   ├── toggle_switch_load_summary.R
    │   │   ├── toggle_switch_plots.R
    │   │   ├── toggle_switch_summary.R
    │   │   ├── toggle_switch_summary_smc.R
    │   │   └── toggle_switch_wsmc.R
    ├── reproducepointestimation
    │   ├── README.R
    │   ├── cauchydata_normalfit_fixedmk_diffn.R
    │   ├── cauchydata_normalfit_fixedmk_diffn_KL.R
    │   ├── gamma_normal_bootstrap.R
    │   ├── gamma_normal_fixedmk_diffn.R
    │   ├── gamma_normal_fixedn_diffmk.R
    │   ├── gamma_normal_functions.R
    │   ├── gandk.cluster.script.R
    │   ├── gandk_correlated.R
    │   ├── gandk_coverage.R
    │   ├── gandk_functions.R
    │   ├── gandk_plots.R
    │   ├── lognormal.cluster.script.R
    │   ├── lognormal_correlated.R
    │   ├── lognormal_coverage.R
    │   ├── lognormal_functions.R
    │   └── lognormal_plots.R
    └── tutorials
    │   ├── tutorial_cosine.Rmd
    │   ├── tutorial_cosine.pdf
    │   ├── tutorial_distances.Rmd
    │   ├── tutorial_distances.pdf
    │   ├── tutorial_gandk.Rmd
    │   ├── tutorial_gandk.pdf
    │   ├── tutorial_normal.Rmd
    │   └── tutorial_normal.pdf
├── man
    ├── .Rapp.history
    ├── cost_matrix_L1.Rd
    ├── cost_matrix_L2.Rd
    ├── cost_matrix_Lp.Rd
    ├── create_lagmatrix.Rd
    ├── fast_dmvnorm.Rd
    ├── fast_rmvnorm.Rd
    ├── get_autoregressive.Rd
    ├── get_cosine.Rd
    ├── get_gandk.Rd
    ├── get_levydriven.Rd
    ├── get_mgandk.Rd
    ├── get_multivariate_normal.Rd
    ├── get_normal.Rd
    ├── get_pz_4param.Rd
    ├── get_ricker.Rd
    ├── get_toggleswitch.Rd
    ├── hilbert_order.Rd
    ├── pz_transition.Rd
    ├── setmytheme.Rd
    ├── systematic_resampling.Rd
    ├── wasserstein.Rd
    ├── wcovariance.Rd
    ├── winference-package.Rd
    └── wmean.Rd
├── src
    ├── HilbertCode.cpp
    ├── HilbertCode.h
    ├── Makevars
    ├── RcppExports.cpp
    ├── compute_cost.cpp
    ├── gandkinversecdf.cpp
    ├── hilbert_order.cpp
    ├── levydriven_.cpp
    ├── median.cpp
    ├── median.h
    ├── mmd.cpp
    ├── mvnorm.cpp
    ├── mvnorm.h
    ├── pz_functions.cpp
    ├── resampling.cpp
    ├── resampling.h
    ├── swapsweep.cpp
    ├── systematic.cpp
    ├── toggle_switch.cpp
    ├── variouswasserstein.cpp
    ├── wasserstein.cpp
    ├── wasserstein_auto.cpp
    ├── wasserstein_semi_discrete.cpp
    ├── weighted_averages.cpp
    └── weighted_averages.h
└── winference.Rproj


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.Rprofile:
--------------------------------------------------------------------------------
1 | .First <- function(){
2 |   print("hello")
3 |   Sys.setenv(MAKEFLAGS = " -j4")
4 | }
5 | 
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.txt
 2 | *.tex
 3 | .*.swp
 4 | .Rproj.user
 5 | .Rhistory
 6 | .RData
 7 | *.RData
 8 | inst/reproducepointestimation/*.RData
 9 | inst/reproducepointestimation/*.Rdata
10 | inst/reproducepointestimation/*.pdf
11 | inst/reproducepointestimation/*.png
12 | .Ruserdata
13 | .DS_Store
14 | src/*.o
15 | src/*.so
16 | src/*.dll
17 | TODO
18 | inst/tutorials/*cache*/
19 | inst/tutorials/*files*/
20 | inst/reproduce-old/
21 | 
22 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: winference
 2 | Type: Package
 3 | Title: Approximate Bayesian Computation with the Wasserstein distance
 4 | Version: 0.1.2
 5 | Author: Espen Bernton, Mathieu Gerber, Pierre E. Jacob
 6 | Maintainer: Pierre E. Jacob <pierre.jacob@fas.harvard.edu>
 7 | Description: Parameter inference for generative models, by using an approximate Bayesian 
 8 |   computation approach where summary statistics are replaced by the Wasserstein distance
 9 |   between synthetic and observed data.
10 | License: GPL (>= 2)
11 | LazyData: TRUE
12 | Depends:
13 |     Rcpp,RcppEigen,doParallel,doRNG,foreach,ggplot2,ggthemes,dplyr,reshape2,BH,transport
14 | Imports:
15 |     Rcpp (>= 0.11.6),RcppEigen,BH
16 | LinkingTo: Rcpp,RcppEigen,BH
17 | RoxygenNote: 6.1.1
18 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(cost_matrix_L1)
 4 | export(cost_matrix_L2)
 5 | export(cost_matrix_Lp)
 6 | export(create_lagmatrix)
 7 | export(exact_lsap_distance)
 8 | export(exact_lsap_given_C)
 9 | export(exact_transport_distance)
10 | export(exact_transport_given_C)
11 | export(fast_dmvnorm)
12 | export(fast_rmvnorm)
13 | export(gandkcdf)
14 | export(gandkinversecdf)
15 | export(gandkinversecdf_givennormals)
16 | export(get_autoregressive)
17 | export(get_cosine)
18 | export(get_gandk)
19 | export(get_hilbert_to_y)
20 | export(get_levydriven)
21 | export(get_lsap_to_y)
22 | export(get_mgandk)
23 | export(get_mmd_to_y)
24 | export(get_multivariate_normal)
25 | export(get_my_colors)
26 | export(get_normal)
27 | export(get_pz_4param)
28 | export(get_queue)
29 | export(get_ricker)
30 | export(get_sinkhorn_to_y)
31 | export(get_toggleswitch)
32 | export(get_transport_to_y)
33 | export(hilbert_distance)
34 | export(hilbert_order)
35 | export(independent_proposal)
36 | export(levydriven_rtransition_rand)
37 | export(metropolishastings)
38 | export(mhchainlist_to_dataframe)
39 | export(mixture_mclust)
40 | export(mixture_rmixmod)
41 | export(mmd)
42 | export(move_step)
43 | export(move_step_onetheta)
44 | export(particle_filter)
45 | export(plot_bivariate)
46 | export(plot_bivariate_polygon)
47 | export(plot_marginal)
48 | export(plot_marginal_time)
49 | export(plot_ncomputed)
50 | export(plot_threshold)
51 | export(plot_threshold_time)
52 | export(pmmh)
53 | export(pz_transition)
54 | export(randomwalk_proposal)
55 | export(setmytheme)
56 | export(sinkhorn_distance)
57 | export(sinkhorn_given_C)
58 | export(std_move_step_onetheta)
59 | export(swap_distance)
60 | export(swap_kernel_distance)
61 | export(systematic_resampling)
62 | export(systematic_resampling_given_u)
63 | export(systematic_resampling_n)
64 | export(update_proposal)
65 | export(wasserstein)
66 | export(wcovariance)
67 | export(wmean)
68 | export(wsmc)
69 | export(wsmc_continue)
70 | export(wsmc_one_step)
71 | export(wsmc_to_dataframe)
72 | importFrom(Rcpp,sourceCpp)
73 | useDynLib(winference)
74 | 


--------------------------------------------------------------------------------
/R/create_lagmatrix.R:
--------------------------------------------------------------------------------
 1 | #'@rdname create_lagmatrix
 2 | #'@title create_lagmatrix
 3 | #'@description This function creates the delay reconstruction, i.e. a matrix
 4 | #' where the first row contains y_1, ..., y_T (the given univariate time series)
 5 | #' the second row contains NA, y_2, ..., y_T .... etc
 6 | #' the k-th row contains NA, NA, ..., y_{k+1}, ..., y_T
 7 | #'@return a matrix with k rows and n-k columns, where n is the length of the provided time series
 8 | #'@export
 9 | create_lagmatrix <- function(timeseries, k){
10 |   if (k == 0){
11 |     return(matrix(timeseries, nrow = 1))
12 |   }
13 |   res <- matrix(NA, nrow = k+1, ncol = ncol(timeseries))
14 |   res[1,] <- timeseries
15 |   for (lagvalue in 1:k){
16 |     res[lagvalue+1,] <- lag(timeseries[1,], lagvalue)
17 |   }
18 |   return(res[,(k+1):ncol(res),drop=F])
19 | }
20 | 


--------------------------------------------------------------------------------
/R/fast_dmvnorm.R:
--------------------------------------------------------------------------------
1 | #'@rdname fast_dmvnorm
2 | #'@title fast_dmvnorm
3 | #'@description evaluate multivariate Normal log-densities
4 | #'@export
5 | 
6 | fast_dmvnorm <- function(x, mean, covariance){
7 |   return(dmvnorm(x, mean, covariance))
8 | }
9 | 


--------------------------------------------------------------------------------
/R/fast_rmvnorm.R:
--------------------------------------------------------------------------------
1 | #'@rdname fast_rmvnorm
2 | #'@title fast_rmvnorm
3 | #'@description generate multivariate Normal draws
4 | #'@export
5 | 
6 | fast_rmvnorm <- function(nparticles, mean, covariance){
7 |   return(rmvnorm(nparticles, mean, covariance))
8 | }
9 | 


--------------------------------------------------------------------------------
/R/gandkinversecdf.R:
--------------------------------------------------------------------------------
 1 | #'@export
 2 | gandkinversecdf <- function(uniforms, theta){
 3 |   return(gandkinversecdf_(uniforms, theta))
 4 | }
 5 | 
 6 | #'@export
 7 | gandkinversecdf_givennormals <- function(normals, theta){
 8 |   return(gandkinversecdf_givennormals_(normals, theta))
 9 | }
10 | 
11 | #'@export
12 | gandkcdf <- function(y, theta, maxsteps = 1000, tolerance = 1e-10, lower = 1e-20, upper = 1-1e-20){
13 |   return(gandkcdf_(y, theta, maxsteps, tolerance, lower, upper))
14 | }
15 | 


--------------------------------------------------------------------------------
/R/get_my_colors.R:
--------------------------------------------------------------------------------
 1 | #'@export
 2 | get_my_colors <- function(){
 3 |   return(c("Hilbert" = "orange", "marginal" = "red", "Curve matching" = "darkblue",
 4 |            "MMD"="009E73", "Posterior"="black",
 5 |            "Summary"="orange",
 6 |            "Semi-auto"="orange",
 7 |            "SA + constraint"="red",
 8 |            "Wasserstein"="darkblue",
 9 |            "Euclidean"="#009E73",
10 |            "Swap" = "purple",
11 |            "Rej. Summary" = "purple",
12 |            "W + constraint" = "darkgreen"
13 |   ))
14 | }
15 | 


--------------------------------------------------------------------------------
/R/hilbert_order.R:
--------------------------------------------------------------------------------
 1 | #'@rdname hilbert_order
 2 | #'@title hilbert_order
 3 | #'@description This function returns the "Hilbert order" of a sample of n
 4 | #'points of dimension d, stored in a matrix with d rows and n columns
 5 | #' where d is the dimension of each sample and n the number of samples.
 6 | #' The function essentially calls Hilbert_Sort_CGAL, from the CGAL library.
 7 | #'@return a vector of index corresponding to the ordered samples
 8 | #'@export
 9 | hilbert_order <- function(x){
10 |   return(hilbert_order_(x) + 1)
11 | }
12 | 


--------------------------------------------------------------------------------
/R/levydriventransition.R:
--------------------------------------------------------------------------------
1 | #'@export
2 | levydriven_rtransition_rand <- function(nparticles, theta){
3 |   levydriven_rtransition_rand_cpp(nparticles, theta)
4 | }
5 | 


--------------------------------------------------------------------------------
/R/metropolishastings.R:
--------------------------------------------------------------------------------
 1 | # Function to perform Metropolis-Hastings
 2 | #'@export
 3 | metropolishastings <- function(observations, target, tuning_parameters, savefile = NULL, verbose = FALSE){
 4 |   # Posterior density function (log)
 5 |   posterior <- function(thetas){
 6 |     logdens <- target$dprior(thetas, target$parameters)
 7 |     which.ok <- which(is.finite(logdens))
 8 |     if (length(which.ok) > 0){
 9 |       theta.ok <- thetas[which.ok,,drop=FALSE]
10 |       logdens[which.ok] <- logdens[which.ok] + target$loglikelihood(theta.ok, observations, target$parameters)
11 |     }
12 |     return(logdens)
13 |   }
14 | 
15 |   niterations <- tuning_parameters$niterations
16 |   nchains <- tuning_parameters$nchains
17 |   cov_proposal <- tuning_parameters$cov_proposal
18 |   p <- ncol(tuning_parameters$init_chains)
19 | 
20 |   # store whole chains
21 |   chains <- rep(list(matrix(nrow = niterations, ncol = p)), nchains)
22 |   # current states of the chains
23 |   current_chains <- matrix(nrow = nchains, ncol = p)
24 |   # initialization of the chains
25 |   current_chains <- matrix(tuning_parameters$init_chains, nrow = nchains, ncol = p)
26 |   for (ichain in 1:nchains){
27 |     chains[[ichain]][1,] <- current_chains[ichain,]
28 |   }
29 |   # log target density values associated with the current states of the chains
30 |   current_dtarget <-  posterior(current_chains)
31 |   #
32 |   naccepts <- 0
33 |   # run the chains
34 |   for (iteration in 2:niterations){
35 |     if ((iteration %% max(1, floor(niterations/100)) == 1) && (verbose)){
36 |       cat("iteration ", iteration, "/", niterations, "\n")
37 |       cat("average acceptance:", naccepts / (iteration*nchains) * 100, "%\n")
38 |     }
39 |     if (iteration > 250 && tuning_parameters$adaptation > 0  && (iteration %% tuning_parameters$adaptation) == 0){
40 |       # adapt the proposal covariance matrix based on the last < 50,000 samples of all chains
41 |       mcmc_samples <- foreach(ichain = 1:nchains, .combine = rbind) %do% {
42 |         matrix(chains[[ichain]][max(1, iteration - tuning_parameters$adaptation):(iteration-1),], ncol = p)
43 |       }
44 |       cov_proposal <- cov(mcmc_samples) / p
45 |     }
46 |     # proposals
47 |     proposals <- current_chains + fast_rmvnorm(nchains, rep(0, p), cov_proposal)
48 |     # proposals' target density
49 |     proposal_dtarget <- posterior(proposals)
50 |     # log Metropolis Hastings ratio
51 |     acceptance_ratios <- (proposal_dtarget - current_dtarget)
52 |     # uniforms for the acceptance decisions
53 |     uniforms <- runif(n = nchains)
54 |     # acceptance decisions
55 |     accepts <- (log(uniforms) < acceptance_ratios)
56 |     naccepts <- naccepts + sum(accepts)
57 |     # make the appropriate replacements
58 |     current_chains[accepts,] <- proposals[accepts,]
59 |     if (is.null(dim(current_chains))) current_chains <- matrix(current_chains, ncol = p)
60 |     current_dtarget[accepts] <- proposal_dtarget[accepts]
61 |     # book keeping
62 |     for (ichain in 1:nchains){
63 |       chains[[ichain]][iteration,] <- current_chains[ichain,]
64 |     }
65 |     if (!is.null(savefile) && iteration %% 1000 == 1){
66 |       mh_results <- list(chains = chains, naccepts = naccepts, cov_proposal = cov_proposal, iteration = iteration)
67 |       save(mh_results, file = savefile)
68 |     }
69 |   }
70 |   cat("average acceptance:", naccepts / (niterations*nchains) * 100, "%\n")
71 |   return(list(chains = chains, naccepts = naccepts, cov_proposal = cov_proposal))
72 | }
73 | 
74 | #'@export
75 | mhchainlist_to_dataframe <- function(chains_list){
76 |   nchains <- length(chains_list)
77 |   niterations <- nrow(chains_list[[1]])
78 |   chaindf <- foreach (i = 1:nchains, .combine = rbind) %do% {
79 |     data.frame(ichain = rep(i, niterations), iteration = 1:niterations, X = chains_list[[i]])
80 |   }
81 |   return(chaindf)
82 | }
83 | 


--------------------------------------------------------------------------------
/R/mmd.R:
--------------------------------------------------------------------------------
 1 | # returns a function that computes MMD between y and z, for fixed y
 2 | #'@export
 3 | get_mmd_to_y <- function(y){
 4 |   nobs <- ncol(y)
 5 |   Cy1 <- cost_matrix_L1(y, y)
 6 |   Cy2 <- cost_matrix_L2(y, y)^2
 7 |   eps <- median(as.numeric(Cy1))
 8 |   k_y <- exp(-Cy2/(2*(eps^2)))
 9 |   first_term <- sum(k_y) / (nobs*nobs)
10 |   f <- function(z){
11 |     return(mmd_c(first_term, eps, z, y))
12 |   }
13 |   return(f)
14 | }
15 | # compute MMD between y and z
16 | #'@export
17 | mmd <- function(y, z){
18 |   nobs <- ncol(y)
19 |   Cy1 <- cost_matrix_L1(y, y)
20 |   Cy2 <- cost_matrix_L2(y, y)^2
21 |   eps <- median(as.numeric(Cy1))
22 |   k_y <- exp(-Cy2/(2*(eps^2)))
23 |   first_term <- sum(k_y) / (nobs*nobs)
24 |   return(mmd_c(first_term, eps, z, y))
25 | }
26 | 


--------------------------------------------------------------------------------
/R/model_get_autoregressive.R:
--------------------------------------------------------------------------------
 1 | # Autoregressive model
 2 | # theta = (phi, logsigma)
 3 | #'@rdname get_autoregressive
 4 | #'@title Autoregressive model
 5 | #'@description This function returns a list representing an auto-regressive
 6 | #'model of order 1.
 7 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution),
 8 | #' generate_randomness (generate data-generating variables), robservation (create synthetic
 9 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
10 | #' ydim (dimension of observations), parameters (list of hyperparameters,
11 | #' to be passed to rprior,dprior,robservation)
12 | #'@export
13 | get_autoregressive <- function(){
14 |   # generate phi ~ Unif(-1,1) and log-sigma is normally distributed
15 |   rprior <- function(nparticles, parameters){
16 |     phis <- 2*runif(nparticles) - 1
17 |     logsigmas <- rnorm(nparticles, 0, 1)
18 |     return(cbind(phis, logsigmas))
19 |   }
20 | 
21 |   # evaluate the log-density of the prior, for each particle
22 |   dprior <- function(thetas, parameters){
23 |     logdensities <- dnorm(thetas[,2], 0, 1, log = TRUE)
24 |     logdensities[thetas[,1] > 1] <- -Inf
25 |     logdensities[thetas[,1] < -1] <- -Inf
26 |     return(logdensities)
27 |   }
28 |   #
29 |   generate_randomness <- function(nobservations){
30 |     return(rnorm(nobservations))
31 |   }
32 |   # function to generate a dataset for each theta value
33 |   # X_0 is Normal(0,sigma^2/(1-phi^2))
34 |   # X_t is Normal(phi X_t-1,sigma^2)
35 |   robservation <- function(nobservations, theta, parameters, randomness){
36 |     observations <- rep(0, nobservations)
37 |     observations[1] <- randomness[1] * exp(theta[2]) / sqrt(1 - theta[1]^2)
38 |     for (idata in 2:nobservations){
39 |       observations[idata] <- theta[1] * observations[idata-1] + randomness[idata] * exp(theta[2])
40 |     }
41 |     return(observations)
42 |   }
43 |   #
44 |   loglikelihood <- function(thetaparticles, observations, parameters){
45 |     init_sd <- exp(thetaparticles[,2]) / sqrt(1 - thetaparticles[,1]^2)
46 |     ll <- dnorm(observations[1], mean = 0, sd = init_sd, log = TRUE)
47 |     for (i in 2:length(observations)){
48 |       ll <- ll + dnorm(observations[i], mean = observations[i-1] * thetaparticles[,1], sd = exp(thetaparticles[,2]), log = TRUE)
49 |     }
50 |     return(ll)
51 |   }
52 | 
53 |   #
54 |   model <- list(rprior = rprior,
55 |                 dprior = dprior, loglikelihood = loglikelihood,
56 |                 generate_randomness = generate_randomness,
57 |                 robservation = robservation,
58 |                 parameter_names = c("rho", "logsigma"),
59 |                 thetadim = 2, ydim = 1,
60 |                 parameters = list())
61 |   return(model)
62 | }
63 | 


--------------------------------------------------------------------------------
/R/model_get_cosine.R:
--------------------------------------------------------------------------------
 1 | # cosine model
 2 | # theta = (phi, logsigma)
 3 | #'@rdname get_cosine
 4 | #'@title Cosine model
 5 | #'@description This function returns a list representing a cosine trend model.
 6 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution),
 7 | #' generate_randomness (generate data-generating variables), robservation (create synthetic
 8 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
 9 | #' ydim (dimension of observations), parameters (list of hyperparameters,
10 | #' to be passed to rprior,dprior,robservation)
11 | #'@export
12 | get_cosine <- function(){
13 |   # generate phi ~ Unif(0,2pi) and log-sigma is normally distributed
14 |   rprior <- function(nparticles, parameters){
15 |     omegas <- runif(nparticles, min = 0, max = 1/10)
16 |     phis <- runif(nparticles, min = 0, max = 2*pi)
17 |     logsigma <- rnorm(nparticles)
18 |     logA <- rnorm(nparticles)
19 |     return(cbind(omegas, phis, logsigma, logA))
20 |   }
21 | 
22 |   # evaluate the log-density of the prior, for each particle
23 |   dprior <- function(thetas, parameters){
24 |     logdensities <- dnorm(thetas[,3], 0, 1, log = TRUE)
25 |     logdensities <- logdensities + dnorm(thetas[,4], 0, 1, log = TRUE)
26 |     logdensities[thetas[,1] > 1/10] <- -Inf
27 |     logdensities[thetas[,1] < 0] <- -Inf
28 |     logdensities[thetas[,2] > 2*pi] <- -Inf
29 |     logdensities[thetas[,2] < 0] <- -Inf
30 |     return(logdensities)
31 |   }
32 |   #
33 |   generate_randomness <- function(nobservations){
34 |     return(rnorm(nobservations))
35 |   }
36 |   # function to generate a dataset for each theta value
37 |   robservation <- function(nobservations, theta, parameters, randomness){
38 |     observations <- exp(theta[4]) * cos(2*pi*theta[1]*(1:nobservations) + theta[2]) + exp(theta[3]) * randomness
39 |     return(observations)
40 |   }
41 |   #
42 |   model <- list(rprior = rprior,
43 |                 dprior = dprior,
44 |                 generate_randomness = generate_randomness,
45 |                 robservation = robservation,
46 |                 parameter_names = c("omega", "phi", "logsigma", "logA"),
47 |                 thetadim = 4, ydim = 1,
48 |                 parameters = list())
49 |   return(model)
50 | }
51 | 


--------------------------------------------------------------------------------
/R/model_get_gandk.R:
--------------------------------------------------------------------------------
 1 | #'@rdname get_gandk
 2 | #'@title G and k model
 3 | #'@description This function returns a list representing the g-and-k
 4 | #' quantile distribution.
 5 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution),
 6 | #' generate_randomness (generate data-generating variables), robservation (create synthetic
 7 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
 8 | #' ydim (dimension of observations), parameters (list of hyperparameters,
 9 | #' to be passed to rprior,dprior,robservation)
10 | #'@export
11 | get_gandk <- function(){
12 |   rprior <- function(nparticles, parameters){
13 |     return(matrix(runif(nparticles*4, min = 0, max = 10), ncol = 4))
14 |   }
15 |   # evaluate the log-density of the prior, for each particle
16 |   dprior <- function(thetaparticles, parameters){
17 |     densities <- rep(0, nrow(thetaparticles))
18 |     for (i in 1:nrow(thetaparticles)){
19 |       if (any(thetaparticles[i,] > 10) || any(thetaparticles[i,] < 0)){
20 |         densities[i] <- -Inf
21 |       }
22 |     }
23 |     return(densities)
24 |   }
25 |   # generate random variables used to compute a synthetic dataset
26 |   generate_randomness <- function(nobservations){
27 |     return(rnorm(nobservations))
28 |   }
29 |   # function to compute a dataset for each theta value
30 |   robservation <- function(nobservations, theta, parameters, randomness){
31 |     observations <- gandkinversecdf_givennormals(randomness, theta)
32 |     return(observations)
33 |   }
34 |   loglikelihood <- function(thetas, ys, ...){
35 |     n <- length(ys)
36 |     evals <- rep(0, nrow(thetas))
37 |     for (itheta in 1:nrow(thetas)){
38 |       ll <- function(ys, h = 1e-5, tolerance = 1e-10){
39 |         all_ys <- c(ys-h, ys+h)
40 |         o <- order(all_ys)
41 |         x <- rep(0, length(all_ys))
42 |         x[o[1]] <- gandkcdf(y = all_ys[o[1]], theta = thetas[itheta,], tolerance = tolerance)
43 |         for (i in 2:length(all_ys)){
44 |           x[o[i]] <- gandkcdf(y = all_ys[o[i]], theta = thetas[itheta,], tolerance = tolerance, lower = x[o[i-1]])
45 |         }
46 |         return(sum(log((x[(n+1):(2*n)] - x[1:n])/(2*h))))
47 |       }
48 |       evals[itheta] <- ll(ys)
49 |     }
50 |     return(evals)
51 |   }
52 |   parameters <- list()
53 |   #
54 |   model <- list(rprior = rprior,
55 |                 dprior = dprior,
56 |                 generate_randomness = generate_randomness,
57 |                 robservation = robservation,
58 |                 loglikelihood = loglikelihood,
59 |                 parameter_names = c("A", "B", "g", "k"),
60 |                 parameters = parameters,
61 |                 thetadim = 4, ydim = 1)
62 |   return(model)
63 | }
64 | 


--------------------------------------------------------------------------------
/R/model_get_levydriven.R:
--------------------------------------------------------------------------------
 1 | #'@rdname get_levydriven
 2 | #'@title Levy driven stochastic volatility model
 3 | #'@description This function returns a list representing a Levy driven stochastic volatility model.
 4 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution),
 5 | #' generate_randomness (generate data-generating variables), robservation (create synthetic
 6 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
 7 | #' ydim (dimension of observations), parameters (list of hyperparameters,
 8 | #' to be passed to rprior,dprior,robservation)
 9 | #'@export
10 | get_levydriven <- function(){
11 |   ## Stochastic volatility : one-factor model
12 |   #
13 |   # Y_t = mu + beta * v_t + v_t^(0.5) * epsilon_t
14 |   # X_t = (v_t, z_t)
15 |   # v_t+1 = lambda^(-1) ( z_t - z_{t+1} + sum_j=1^k e_j )
16 |   # z_t+1 = e^(-lambda) * z_t + sum_j=1^k exp(-lambda(t + 1 - c_j)) e_j
17 |   # k ~ Poisson(lambda * xi^2 / omega^2)
18 |   # c_{1:k} ~ Uniform(t, t + 1)
19 |   # e_{1:k} ~ Exp(xi / omega^2) (rate parameter)
20 |   #
21 |   # v_0 does not matter
22 |   # z_0 ~ Gamma(xi^2 / omega^2, xi / omega^2) (rate parameter)
23 |   #
24 |   # theta <- c(0, 0, 0.5, 0.0625, 0.01)
25 |   rprior <- function(nparticles, ...){
26 |     theta1 <- rnorm(nparticles, 0, sd = sqrt(2))
27 |     theta2 <- rnorm(nparticles, 0, sd = sqrt(2))
28 |     theta3 <- rexp(nparticles, rate = 0.2)
29 |     theta4 <- rexp(nparticles, rate = 0.2)
30 |     theta5 <- rexp(nparticles, rate = 1)
31 |     return(cbind(theta1, theta2, theta3, theta4, theta5))
32 |   }
33 |   dprior <- function(thetas, ...){
34 |     if (is.null(dim(thetas))) thetas <- matrix(thetas, nrow = 1)
35 |     density_evals <- dnorm(thetas[,1], mean = 0, sd = sqrt(2), log = TRUE)
36 |     density_evals <- density_evals + dnorm(thetas[,2], mean = 0, sd = sqrt(2), log = TRUE)
37 |     density_evals <- density_evals + dexp(thetas[,3], rate = 0.2, log = TRUE)
38 |     density_evals <- density_evals + dexp(thetas[,4], rate = 0.2, log = TRUE)
39 |     density_evals <- density_evals + dexp(thetas[,5], rate = 1, log = TRUE)
40 |     return(density_evals)
41 |   }
42 | 
43 |   generate_randomness <- function(nobservations){
44 |     return(list())
45 |   }
46 |   robservation <- function(nobservations, theta, parameters, randomness){
47 |     obs <- rep(0, nobservations)
48 |     state <- rgamma(2, shape = theta[3] * theta[3] / theta[4], scale = theta[4]/theta[3])
49 |     for (t in 1:nobservations){
50 |       rtransition_r <- levydriven_rtransition_rand(1, theta)
51 |       new_z <- exp(-theta[5]) * state[2] + rtransition_r$sum_weighted_e
52 |       new_v <- (1/theta[5]) * (state[2] - new_z + rtransition_r$sum_e)
53 |       state[1] <- new_v
54 |       state[2] <- new_z
55 |       obs[t] <- rnorm(1, mean = theta[1] + theta[2] * state[1], sd = sqrt(state[1]))
56 |     }
57 |     return(obs)
58 |   }
59 |   model <- list(rprior = rprior,
60 |                 dprior = dprior,
61 |                 generate_randomness = generate_randomness,
62 |                 robservation = robservation,
63 |                 parameter_names = c("mu", "beta", "xi", "omega2", "lambda"),
64 |                 thetadim = 5, ydim = 1)
65 |   return(model)
66 | }
67 | 


--------------------------------------------------------------------------------
/R/model_get_mgandk.R:
--------------------------------------------------------------------------------
 1 | #'@rdname get_mgandk
 2 | #'@title multivariate G and k model
 3 | #'@description This function returns
 4 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution),
 5 | #' generate_randomness (generate data-generating variables), robservation (create synthetic
 6 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
 7 | #' ydim (dimension of observations), parameters (list of hyperparameters,
 8 | #' to be passed to rprior,dprior,robservation)
 9 | #'@export
10 | get_mgandk <- function(){
11 |   rprior <- function(nparticles, parameters){
12 |     thetas <- matrix(runif(nparticles*8, min = 0, max = 10), ncol = 8)
13 |     thetas <- cbind(thetas, runif(nparticles, min = -1, max = 1))
14 |     return(thetas)
15 |   }
16 |   # evaluate the log-density of the prior, for each particle
17 |   dprior <- function(thetas, parameters){
18 |     densities <- rep(0, nrow(thetas))
19 |     for (i in 1:nrow(thetas)){
20 |       if (any(thetas[i,1:8] > 10) || any(thetas[i,1:8] < 0)){
21 |         densities[i] <- -Inf
22 |       }
23 |       densities[i] <- densities[i] + dunif(thetas[i,9], min = -1, max = 1, log = TRUE)
24 |     }
25 |     return(densities)
26 |   }
27 |   # function to compute a dataset for each theta value
28 |   robservation <- function(nobservations, theta, ...){
29 |     theta_1 <- theta[1:4]
30 |     theta_2 <- theta[5:8]
31 |     rho <- theta[9]
32 |     covariance <- matrix(c(1, rho, rho, 1), ncol = 2, nrow = 2)
33 |     normals <- fast_rmvnorm(nobservations, rep(0, 2), covariance)
34 |     y_1 <- gandkinversecdf_givennormals(normals[,1], theta_1)
35 |     y_2 <- gandkinversecdf_givennormals(normals[,2], theta_2)
36 |     return(rbind(y_1, y_2))
37 |   }
38 |   gandk_loglikelihood <- function(thetas, ys, ...){
39 |     n <- length(ys)
40 |     evals <- rep(0, nrow(thetas))
41 |     for (itheta in 1:nrow(thetas)){
42 |       ll <- function(ys, h = 1e-5, tolerance = 1e-10){
43 |         all_ys <- c(ys-h, ys+h)
44 |         o <- order(all_ys)
45 |         x <- rep(0, length(all_ys))
46 |         x[o[1]] <- gandkcdf(y = all_ys[o[1]], theta = thetas[itheta,], tolerance = tolerance)
47 |         for (i in 2:length(all_ys)){
48 |           x[o[i]] <- gandkcdf(y = all_ys[o[i]], theta = thetas[itheta,], tolerance = tolerance, lower = x[o[i-1]])
49 |         }
50 |         return(sum(log((x[(n+1):(2*n)] - x[1:n])/(2*h))))
51 |       }
52 |       evals[itheta] <- ll(ys)
53 |     }
54 |     return(evals)
55 |   }
56 |   #
57 |   loglikelihood <- function(thetas, ys, ...){
58 |     lls <- rep(0, nrow(thetas))
59 |     for (itheta in 1:nrow(thetas)){
60 |       Fy1 <- sapply(ys[1,], function(v) gandkcdf(v, thetas[itheta,1:4]))
61 |       Fy2 <- sapply(ys[2,], function(v) gandkcdf(v, thetas[itheta,5:8]))
62 |       x1 <- qnorm(Fy1)
63 |       x2 <- qnorm(Fy2)
64 |       covariance <- matrix(c(1, thetas[itheta,9], thetas[itheta,9], 1), ncol = 2, nrow = 2)
65 |       lls[itheta] <- sum(fast_dmvnorm(cbind(x1, x2), rep(0, 2), covariance))
66 |       lls[itheta] <- lls[itheta] - sum(dnorm(qnorm(Fy1), log = T)) - sum(dnorm(qnorm(Fy2), log = T))
67 |     }
68 |     lls <- lls + gandk_loglikelihood(thetas[,1:4,drop=F], ys[1,])
69 |     lls <- lls + gandk_loglikelihood(thetas[,5:8,drop=F], ys[2,])
70 |     return(lls)
71 |   }
72 |   parameters <- list()
73 |   #
74 |   model <- list(rprior = rprior,
75 |                 dprior = dprior,
76 |                 robservation = robservation,
77 |                 loglikelihood = loglikelihood,
78 |                 parameter_names = c("a1", "b1", "g1", "k1", "a2", "b2", "g2", "k2", "rho"),
79 |                 parameters = parameters,
80 |                 thetadim = 9, ydim = 2)
81 |   return(model)
82 | }
83 | 


--------------------------------------------------------------------------------
/R/model_get_multivariatenormal.R:
--------------------------------------------------------------------------------
 1 | # The observations are i.i.d. Normal with mean mu (parameters)
 2 | # and covariance matrix Sigma, defined
 3 | # as the identity matrix, and +0.5 on the upper and lower diagonals.
 4 | #'@rdname get_multivariate_normal
 5 | #'@title Multivariate Normal model
 6 | #'@description This function returns a list representing
 7 | #' a Normal location model, in a dimension specified by the user as an argument.
 8 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution),
 9 | #' generate_randomness (generate data-generating variables), robservation (create synthetic
10 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
11 | #' ydim (dimension of observations), parameters (list of hyperparameters,
12 | #' to be passed to rprior,dprior,robservation)
13 | #'@export
14 | get_multivariate_normal <- function(dimension){
15 |   target <- list()
16 |   target$rprior <- function(nparticles, parameters){
17 |     return(fast_rmvnorm(nparticles, rep(parameters$mu_0, dimension), diag(parameters$tau^2, dimension, dimension)))
18 |     # particles <- matrix(nrow = nparticles, ncol = dimension)
19 |     # for (id in 1:dimension){
20 |     #   particles[,id] <- rnorm(nparticles, mean = parameters$mu_0, sd = parameters$tau)
21 |     # }
22 |     # return(particles)
23 |   }
24 |   # evaluate the log-density of the prior, for each particle
25 |   # parameters is a list containing mu_0, nu, alpha, beta
26 |   target$dprior <- function(thetas, parameters){
27 |     # logdensities <- rep(0, nrow(thetas))
28 |     return(fast_dmvnorm(thetas, rep(parameters$mu_0, dimension), diag(parameters$tau^2, dimension, dimension)))
29 |     # for (id in 1:dimension){
30 |       # logdensities <- logdensities + dnorm(thetas[,id],  mean = parameters$mu_0, sd = parameters$tau, log = TRUE)
31 |     # }
32 |     # return(logdensities)
33 |   }
34 | 
35 |   # generate random variables used to compute a synthetic dataset
36 |   target$generate_randomness <- function(nobservations){
37 |     return(fast_rmvnorm(nobservations, rep(0, dimension), diag(1, dimension, dimension)))
38 |   }
39 | 
40 |   S <- diag(1, dimension, dimension)
41 |   if (dimension > 1){
42 |     for (i in 1:(dimension-1)){
43 |       S[i,i+1] <- S[i+1,i] <- 0.5
44 |     }
45 |   }
46 |   target$parameters <- list(S = S, mu_0 = 0, tau = 10)
47 |   # function to compute a dataset for each theta value
48 |   target$robservation <- function(nobservations, theta, parameters, ...){
49 |     return(t(fast_rmvnorm(nobservations, theta, parameters$S)))
50 |   }
51 |   target$loglikelihood <- function(thetaparticles, observations, parameters){
52 |     logdensities <- rep(0, nrow(thetaparticles))
53 |     for (i in 1:nrow(thetaparticles)){
54 |       logdensities[i] <- sum(fast_dmvnorm(t(observations), thetaparticles[i,], parameters$S))
55 |     }
56 |     return(logdensities)
57 |   }
58 |   target$thetadim <- dimension
59 |   target$ydim <- dimension
60 |   target$parameter_names <- paste0("X", 1:dimension)
61 |   return(target)
62 | }
63 | 


--------------------------------------------------------------------------------
/R/model_get_normal.R:
--------------------------------------------------------------------------------
 1 | # Normal Gamma model
 2 | # theta = (mu, tau)
 3 | #'@rdname get_normal
 4 | #'@title Normal model
 5 | #'@description This function returns a list representing
 6 | #' a Normal location model.
 7 | #' The prior is mu ~ Normal(mu_0, nu^{-1}), where nu is precision
 8 | #' and  tau ~ Gamma(alpha, beta), where beta is rate (1/scale).
 9 | #'  The likelihood is Y ~ Normal(mu, tau^2) where tau^2 is the variance.
10 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution),
11 | #' generate_randomness (generate data-generating variables), robservation (create synthetic
12 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
13 | #' ydim (dimension of observations), parameters (list of hyperparameters,
14 | #' to be passed to rprior,dprior,robservation)
15 | #'@export
16 | get_normal <- function(){
17 |   rprior <- function(nparticles, parameters){
18 |     particles <- matrix(nrow = nparticles, ncol = 2)
19 |     particles[,1] <- rnorm(nparticles, mean = parameters$mu_0, sd = 1/sqrt(parameters$nu))
20 |     particles[,2] <- rgamma(nparticles, shape = parameters$alpha, rate = parameters$beta)
21 |     return(particles)
22 |   }
23 |   # evaluate the log-density of the prior, for each particle
24 |   # parameters is a list containing mu_0, nu, alpha, beta
25 |   dprior <- function(thetaparticles, parameters){
26 |     logdensities <- dnorm(thetaparticles[,1], mean = parameters$mu_0, sd = 1/sqrt(parameters$nu), log = TRUE)
27 |     logdensities <- logdensities + dgamma(thetaparticles[,2], shape = parameters$alpha, rate = parameters$beta, log = TRUE)
28 |     return(logdensities)
29 |   }
30 |   # log-likelihood, available here and used to run MCMC
31 |   loglikelihood <- function(thetaparticles, observation, ...){
32 |     logdensities <- dnorm(observation, mean = thetaparticles[,1],
33 |                           sd = thetaparticles[,2], log = TRUE)
34 |     return(logdensities)
35 |   }
36 |   # generate random variables used to compute a synthetic dataset
37 |   generate_randomness <- function(nobservations){
38 |     return(rnorm(nobservations))
39 |   }
40 |   # function to compute a dataset for each theta value, given fixed randomness
41 |   robservation_given_randomness <- function(nobservations, theta, parameters, randomness){
42 |     observations <- theta[1] + randomness * theta[2]
43 |     return(observations)
44 |   }
45 |   # function to generate a dataset
46 |   robservation <- function(nobservations, theta, parameters){
47 |     observations <- theta[1] + rnorm(nobservations) * theta[2]
48 |     return(observations)
49 |   }
50 | 
51 |   parameters <- list(mu_0 = 0, nu = 1, alpha = 2, beta = 1)
52 |   #
53 |   model <- list(rprior = rprior,
54 |                 dprior = dprior,
55 |                 loglikelihood = loglikelihood,
56 |                 generate_randomness = generate_randomness,
57 |                 robservation_given_randomness = robservation_given_randomness,
58 |                 robservation = robservation,
59 |                 parameter_names = c("mu", "sigma"),
60 |                 parameters = parameters,
61 |                 thetadim = 2, ydim = 1)
62 |   return(model)
63 | }
64 | 


--------------------------------------------------------------------------------
/R/model_get_pz_4param.R:
--------------------------------------------------------------------------------
 1 | #'@name get_pz_4param
 2 | #'@title Phytoplankton-zooplankton model
 3 | #'@description This function returns a list representing
 4 | #' a Lotka-Volterra type model for plankton. See
 5 | #' Jones, E., Parslow, J., and Murray, L. (2010). A Bayesian approach to state and parameter estimation in a phytoplankton-zooplankton model. Australian Meteorological and Oceanographic Journal, 59:7-16.
 6 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution),
 7 | #' generate_randomness (generate data-generating variables), robservation (create synthetic
 8 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
 9 | #' ydim (dimension of observations), parameters (list of hyperparameters,
10 | #' to be passed to rprior,dprior,robservation)
11 | #'@export
12 | get_pz_4param <- function(){
13 |   rprior <- function(nparticles, ...){
14 |     ## evaluate prior density on the transformed parameter
15 |     theta1 <- runif(nparticles)
16 |     theta2 <- runif(nparticles)
17 |     # set the other parameters deterministically
18 |     theta3 <- runif(nparticles)
19 |     theta4 <- runif(nparticles)
20 |     return(cbind(theta1, theta2, theta3, theta4))
21 |   }
22 |   # prior, on the transformed parameters
23 |   dprior <- function(thetas, ...){
24 |     ## evaluate prior density on the transformed parameter
25 |     if (is.null(dim(thetas))) thetas <- matrix(thetas, nrow = 1)
26 |     density_evals <- dunif(thetas[,1], log = TRUE) + dunif(thetas[,2], log = TRUE)
27 |     density_evals <- density_evals + dunif(thetas[,3], log = TRUE) + dunif(thetas[,4], log = TRUE)
28 |     return(density_evals)
29 |   }
30 | 
31 |   #
32 |   generate_randomness <- function(nobservations){
33 |     return(list(x_0 = rnorm(2), x = rnorm(nobservations), y = rnorm(nobservations)))
34 |   }
35 |   robservation <- function(nobservations, theta, parameters, randomness){
36 |     # untr_theta <- untransform_theta(theta)
37 |     state <- matrix(exp(log(2) + randomness$x_0), nrow = 2)
38 |     states <- matrix(nrow = 2, ncol = nobservations+1)
39 |     states[,1] <- state
40 |     log_obs <- rep(0, nobservations)
41 |     for (t in 1:nobservations){
42 |       alpha <- theta[2] * randomness$x[t] + theta[1]
43 |       state <- pz_transition(state, alpha, t-1, c(theta[3:4], 0.1, 0.1))
44 |       states[,t+1] <- state
45 |       log_obs[t] <- 0.2*randomness$y[t] + log(state[1,1])
46 |     }
47 |     return(log_obs)
48 |   }
49 |   ### additional functions to run PMMH
50 |   rinit <- function(nparticles, theta, rand, ...){
51 |     return(exp(matrix(log(2) + rand[1:(2*nparticles)], nrow = 2)))
52 |   }
53 |   rtransition <- function(xparticles, theta, time, rand, ...){
54 |     nparticles <- ncol(xparticles)
55 |     ra <- rand[((2+time-1)*nparticles + 1):((2+time)*nparticles)]
56 |     alphas <- theta[1] + theta[2] * ra
57 |     xparticles <- pz_transition(xparticles, alphas, time-1, c(theta[3:4], 0.1, 0.1))
58 |     return(xparticles)
59 |   }
60 |   dmeasurement <- function(xparticles, theta, observation, ...) {
61 |     return(dnorm(x = observation, mean = log(xparticles[1,]), sd = 0.2, log = TRUE))
62 |   }
63 |   generate_randomness_pmmh <- function(nparticles, datalength){
64 |     return(pz_generate_randomness_cpp(nparticles, datalength))
65 |   }
66 |   precompute <- function(...){
67 |     return(list())
68 |   }
69 | 
70 |   #
71 |   model <- list(rprior = rprior,
72 |                 dprior = dprior,
73 |                 generate_randomness = generate_randomness,
74 |                 robservation = robservation,
75 |                 parameter_names = c("mu_alpha", "sigma_alpha", "c", "e"),
76 |                 thetadim = 4, ydim = 1,
77 |                 rinit = rinit, rtransition = rtransition, dmeasurement = dmeasurement, generate_randomness_pmmh = generate_randomness_pmmh, precompute = precompute)
78 |   return(model)
79 | }
80 | 


--------------------------------------------------------------------------------
/R/model_get_queue.R:
--------------------------------------------------------------------------------
 1 | #'@export
 2 | get_queue <- function(){
 3 |   rprior <- function(ntheta, parameters){
 4 |     theta1 <- runif(n = ntheta, min = 0, max = 10)
 5 |     theta2minus1 <- runif(n = ntheta, min = 0, max = 10)
 6 |     theta3 <- runif(n = ntheta, min = 0, max = 1/3)
 7 |     return(cbind(theta1, theta2minus1, theta3))
 8 |   }
 9 | 
10 |   dprior <- function(thetas, parameters){
11 |     evals <- dunif(thetas[,1], min = 0, max = 10, log = TRUE)
12 |     evals <- evals + dunif(thetas[,2], min = 0, max = 10, log = TRUE)
13 |     evals <- evals + dunif(thetas[,3], min = 0, max = 1/3, log = TRUE)
14 |     return(evals)
15 |   }
16 | 
17 |   robservation <- function(nobservations, theta, parameters, ...){
18 |     theta1 <- theta[1]
19 |     theta2 <- theta[2] + theta[1]
20 |     theta3 <- theta[3]
21 |     u <- runif(nobservations, theta1, theta2)
22 |     v <- rep(0, nobservations)
23 |     y <- rep(0, nobservations)
24 |     x <- rep(0, nobservations)
25 |     v[1] <- rexp(n = 1, rate = theta3)
26 |     y[1] <- u[1] + v[1]
27 |     x[1] <- y[1]
28 |     for (t in 2:nobservations){
29 |       v[t] <- v[t-1] + rexp(n = 1, rate = theta3)
30 |       y[t] <- u[t] + max(0, v[t] - x[t-1])
31 |       x[t] <- x[t-1] + y[t]
32 |     }
33 |     return(y)
34 |   }
35 |   model <- list(rprior = rprior,
36 |                 dprior = dprior,
37 |                 robservation = robservation,
38 |                 parameter_names = c("theta1", "theta2minus1", "theta3"),
39 |                 parameters = list(),
40 |                 thetadim = 3, ydim = 1)
41 |   return(model)
42 | }
43 | 


--------------------------------------------------------------------------------
/R/model_get_ricker.R:
--------------------------------------------------------------------------------
 1 | #'@rdname get_ricker
 2 | #'@title Ricker model
 3 | #'@description This function returns a list representing
 4 | #' the Ricker model in
 5 | #' Wood (2010) Statistical inference for noisy nonlinear ecological dynamic systems
 6 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution),
 7 | #' generate_randomness (generate data-generating variables), robservation (create synthetic
 8 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
 9 | #' ydim (dimension of observations), parameters (list of hyperparameters,
10 | #' to be passed to rprior,dprior,robservation)
11 | #'@export
12 | get_ricker <- function(){
13 |   rprior <- function(nparticles, ...){
14 |     theta1 <- runif(nparticles, min = 0, max = 10)
15 |     theta2 <- runif(nparticles, min = 0, max = 20)
16 |     theta3 <- runif(nparticles, min = 0, max = 2)
17 |     return(cbind(theta1, theta2, theta3))
18 |   }
19 |   dprior <- function(thetas, ...){
20 |     if (is.null(dim(thetas))) thetas <- matrix(thetas, nrow = 1)
21 |     density_evals <- dunif(thetas[,1], min = 0, max = 10, log = TRUE)
22 |     density_evals <- density_evals + dunif(thetas[,2], min = 0, max = 20, log = TRUE)
23 |     density_evals <- density_evals + dunif(thetas[,3], min = 0, max = 2, log = TRUE)
24 |     return(density_evals)
25 |   }
26 |   #
27 |   generate_randomness <- function(nobservations){
28 |     return(list())
29 |   }
30 |   robservation <- function(nobservations, theta, parameters, randomness){
31 |     obs <- rep(0, nobservations)
32 |     r <- exp(theta[1])
33 |     phi <- theta[2]
34 |     sigma_e <- theta[3]
35 |     state <- 1
36 |     for (t in 1:nobservations){
37 |       state = r * state * exp(-state + sigma_e * rnorm(1))
38 |       obs[t] <- rpois(1, phi*state)
39 |     }
40 |     return(obs)
41 |   }
42 |   #
43 |   model <- list(rprior = rprior,
44 |                 dprior = dprior,
45 |                 generate_randomness = generate_randomness,
46 |                 robservation = robservation,
47 |                 parameters = NULL,
48 |                 parameter_names = c("logr", "phi", "sigma_e"),
49 |                 thetadim = 3, ydim = 1)
50 |   return(model)
51 | }
52 | 


--------------------------------------------------------------------------------
/R/model_get_toggleswitch.R:
--------------------------------------------------------------------------------
 1 | #' @name get_toggleswitch
 2 | #' @title Toggle switch model
 3 | #' @description This function returns a list representing the toggle switch model
 4 | #' of Bonassi, F. V., West, M., et al. (2015).
 5 | #' Sequential Monte Carlo with adaptive weights for approximate Bayesian computation. Bayesian Analysis, 10(1):171-187.
 6 | #' @return The list contains rprior, dprior (generate and evaluate the density of prior distribution),
 7 | #' generate_randomness (generate data-generating variables), robservation (create synthetic
 8 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
 9 | #' ydim (dimension of observations), parameters (list of hyperparameters,
10 | #' to be passed to rprior,dprior,robservation)
11 | #' @export
12 | get_toggleswitch <- function(){
13 |   library(truncnorm)
14 |   rprior <- function(nparticles, parameters){
15 |     particles <- matrix(nrow = nparticles, ncol = 7)
16 |     particles[,1] <- runif(nparticles, 0, 50)
17 |     particles[,2] <- runif(nparticles, 0, 50)
18 |     particles[,3] <- runif(nparticles, 0, 5)
19 |     particles[,4] <- runif(nparticles, 0, 5)
20 |     particles[,5] <- runif(nparticles, 250, 450)
21 |     particles[,6] <- runif(nparticles, 0, 0.5)
22 |     particles[,7] <- runif(nparticles, 0, 0.4)
23 |     return(particles)
24 |   }
25 |   # evaluate the log-density of the prior, for each particle
26 |   dprior <- function(thetaparticles, parameters){
27 |     logdensities <- rep(0, nrow(thetaparticles))
28 |     logdensities <- dunif(thetaparticles[,1], min = 0, max = 50, log = TRUE)
29 |     logdensities <- logdensities + dunif(thetaparticles[,2], min = 0, max = 50, log = TRUE)
30 |     logdensities <- logdensities + dunif(thetaparticles[,3], min = 0, max = 5, log = TRUE)
31 |     logdensities <- logdensities + dunif(thetaparticles[,4], min = 0, max = 5, log = TRUE)
32 |     logdensities <- logdensities + dunif(thetaparticles[,5], min = 250, max = 450, log = TRUE)
33 |     logdensities <- logdensities + dunif(thetaparticles[,6], min = 0, max = 0.5, log = TRUE)
34 |     logdensities <- logdensities + dunif(thetaparticles[,7], min = 0, max = 0.4, log = TRUE)
35 |     return(logdensities)
36 |   }
37 |   # generate random variables used to compute a synthetic dataset
38 |   generate_randomness <- function(nobservations){
39 |     return(list())
40 |   }
41 |   # function to compute a dataset for each theta value
42 |   robservation <- function(nobservations, theta, parameters, randomness){
43 |     # constants used in the data generating process
44 |     h <- 1
45 |     tau <- 300
46 |     u0 <- 10
47 |     v0 <- 10
48 |     #
49 |     u <- matrix(0, nrow = nobservations, ncol = tau+1)
50 |     v <- matrix(0, nrow = nobservations, ncol = tau+1)
51 |     u[,1] <- u0
52 |     v[,1] <- v0
53 |     # noise <- array(randomness[(nobservations+1):length(randomness)], dim = c(nobservations, tau, 2))
54 |     for (t in 1:tau){
55 |       u[,t+1] <- u[,t] + h * theta[1] / (1 + v[,t]^theta[3]) - h * (1 + 0.03 * u[,t])
56 |       u[,t+1] <- u[,t+1] + h * 0.5 * rtruncnorm(nobservations, a = - u[,t+1]/(h*0.5))
57 |       v[,t+1] <- v[,t] + h * theta[2] / (1 + u[,t]^theta[4]) - h * (1 + 0.03 * v[,t])
58 |       v[,t+1] <- v[,t+1] + h * 0.5 * rtruncnorm(nobservations, a = - v[,t+1]/(h*0.5))
59 |     }
60 |     lb = -(u[,tau+1] + theta[5]) / (theta[5] * theta[6]) * (u[,tau+1]^theta[7])
61 |     y <- u[,tau+1] + theta[5] + theta[5] * theta[6] * rtruncnorm(nobservations, a = lb) / (u[,tau+1]^theta[7])
62 |     return(y)
63 |   }
64 |   parameters <- list()
65 |   #
66 |   model <- list(rprior = rprior,
67 |                 dprior = dprior,
68 |                 generate_randomness = generate_randomness,
69 |                 robservation = robservation,
70 |                 parameter_names = c("alpha_1", "alpha_2", "beta_1", "beta_2", "mu", "sigma", "gamma"),
71 |                 parameters = parameters,
72 |                 thetadim = 7, ydim = 1)
73 |   return(model)
74 | }
75 | 


--------------------------------------------------------------------------------
/R/particle_filter.R:
--------------------------------------------------------------------------------
 1 | # requires model$rinit, model$rtransition, and model$dobs
 2 | # rinit specifies x_1 and the first observation is y_1
 3 | #'@export
 4 | particle_filter <- function(nparticles, model, theta, observations, storex = FALSE){
 5 |   datalength <- ncol(observations)
 6 |   # initialization
 7 |   xparticles <- model$rinit(nparticles, theta)
 8 |   logw <- model$dobs(observations, 1, xparticles, theta)
 9 |   if (all(is.infinite(logw))){
10 |     return(NA)
11 |   }
12 |   maxlw <- max(logw)
13 |   w <- exp(logw - maxlw)
14 |   # update log likelihood estimate
15 |   ll <- maxlw + log(mean(w))
16 |   normweights <- w / sum(w)
17 |   #
18 |   if (storex){
19 |     xhistory <- rep(list(matrix(ncol = nparticles, nrow = nrow(xparticles))), datalength)
20 |     whistory <- rep(list(rep(0, nparticles)), datalength + 1)
21 |     ahistory <- rep(list(rep(0, nparticles)), datalength)
22 |     xhistory[[1]] <- xparticles
23 |     whistory[[1]] <- normweights
24 |   }
25 |   # step t > 1
26 |   for (time in 2:datalength){
27 |     ancestors <- systematic_resampling_given_u(normweights, runif(1))
28 |     xparticles <- xparticles[ancestors,,drop=F]
29 |     xparticles <- model$rtransition(xparticles, time, theta)
30 |     logw <- model$dobs(observations, time, xparticles, theta)
31 |     if (all(is.infinite(logw))){
32 |       return(NA)
33 |     }
34 |     maxlw <- max(logw)
35 |     w <- exp(logw - maxlw)
36 |     # update log likelihood estimate
37 |     ll <- ll + maxlw + log(mean(w))
38 |     normweights <- w / sum(w)
39 | 
40 |     #
41 |     if (storex){
42 |       ahistory[[time]] <- ancestors
43 |       xhistory[[time+1]] <- xparticles
44 |       whistory[[time+1]] <- normweights
45 |     }
46 |   }
47 |   if (storex){
48 |     return(list(ll = ll, xhistory = xhistory, whistory = whistory, ahistory = ahistory))
49 |   } else {
50 |     return(ll)
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/R/plot_functions.R:
--------------------------------------------------------------------------------
 1 | #'@export
 2 | plot_threshold <- function(wsmcresults){
 3 |   gthreshold <- qplot(x = 1:(length(wsmcresults$threshold_history)), y = wsmcresults$threshold_history, geom = "line")
 4 |   gthreshold <- gthreshold + xlab("step") + ylab("threshold")
 5 |   return(gthreshold)
 6 | }
 7 | 
 8 | #'@export
 9 | plot_threshold_time <- function(wsmcresults){
10 |   gthreshold <- qplot(x = wsmcresults$compute_times, y = wsmcresults$threshold_history, geom = "line")
11 |   gthreshold <- gthreshold + xlab("time (s)") + ylab("threshold")
12 |   return(gthreshold)
13 | }
14 | 
15 | #'@export
16 | plot_ncomputed <- function(wsmcresults){
17 |   ncomputed <- qplot(x = 1:(length(wsmcresults$ncomputed)), y = wsmcresults$ncomputed, geom = "line")
18 |   ncomputed <- ncomputed + xlab("step") + ylab("# distances computed")
19 |   return(ncomputed)
20 | }
21 | 
22 | #'@export
23 | plot_bivariate <- function(wsmcresults, i1, i2, from_step = 0){
24 |   wsmc.df <- wsmc_to_dataframe(wsmcresults)
25 |   nsteps <- max(wsmc.df$step)
26 |   parameter_names <- names(wsmc.df)[c(i1, i2)]
27 |   names(wsmc.df)[c(i1,i2)] <- c("temp_name_1", "temp_name_2")
28 |   g <- ggplot(wsmc.df %>% filter(step > from_step), aes(x = temp_name_1, y = temp_name_2, colour = step, group = step))
29 |   g <- g + geom_point(alpha = 0.5)
30 |   g <- g + scale_colour_gradient2(midpoint = from_step+floor((nsteps-from_step)/2)) + theme(legend.position = "none")
31 |   g <- g + xlab(parameter_names[i1]) + ylab(parameter_names[i2])
32 |   return(g)
33 | }
34 | 
35 | #'@export
36 | plot_bivariate_polygon <- function(wsmcresults, i1, i2, from_step = 0){
37 |   wsmc.df <- wsmc_to_dataframe(wsmcresults)
38 |   nsteps <- max(wsmc.df$step)
39 |   parameter_names <- names(wsmc.df)[c(i1, i2)]
40 |   names(wsmc.df)[c(i1,i2)] <- c("temp_name_1", "temp_name_2")
41 |   g <- ggplot(wsmc.df %>% filter(step > from_step), aes(x = temp_name_1, y = temp_name_2, colour = step, group = step))
42 |   g <- g + stat_density_2d(aes(fill = step), geom = "polygon")
43 |   g <- g + scale_colour_gradient2(midpoint = from_step+floor((nsteps-from_step)/2)) + theme(legend.position = "none")
44 |   g <- g + scale_fill_gradient2(midpoint = from_step+floor((nsteps-from_step)/2))
45 |   g <- g + xlab(parameter_names[i1]) + ylab(parameter_names[i2])
46 |   return(g)
47 | }
48 | 
49 | #'@export
50 | plot_marginal <- function(wsmcresults, i, from_step = 0){
51 |   wsmc.df <- wsmc_to_dataframe(wsmcresults)
52 |   parameter_name <- names(wsmc.df)[i]
53 |   names(wsmc.df)[i] <- c("temp_name_i")
54 |   g <- ggplot(wsmc.df %>% filter(step > from_step), aes(x = temp_name_i, colour = step, group = step)) + geom_density(aes(y = ..density..))
55 |   g <- g + theme(legend.position = "none") + xlab(parameter_name)
56 |   g <- g + scale_color_gradient(low = rgb(1,0.5,0.5), high = "darkblue")
57 |   return(g)
58 | }
59 | 
60 | #'@export
61 | plot_marginal_time <- function(wsmcresults, i, from_step = 0){
62 |   wsmc.df <- wsmc_to_dataframe(wsmcresults)
63 |   parameter_name <- names(wsmc.df)[i]
64 |   names(wsmc.df)[i] <- c("temp_name_i")
65 |   g <- ggplot(wsmc.df %>% filter(step > from_step), aes(x = temp_name_i, colour = time, group = factor(time))) + geom_density(aes(y = ..density..))
66 |   g <- g + xlab(parameter_name) # +  theme(legend.position = "none")
67 |   g <- g + scale_color_gradient(name = "time (s)", low = rgb(1,0.5,0.5), high = "darkblue")
68 |   return(g)
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/R/pz_transition.R:
--------------------------------------------------------------------------------
 1 | #'@rdname pz_transition
 2 | #'@title pz_transition
 3 | #'@description Solve PZ ODE for each particle, given each alpha, from time to time + 1,
 4 | #' and given the parameters (c, e, ml, mq).
 5 | #'@export
 6 | #'
 7 | pz_transition <- function(xparticles, alphas, time, parameters){
 8 |   return(one_step_pz_vector(xparticles, alphas, time, parameters))
 9 | }
10 | 


--------------------------------------------------------------------------------
/R/setmytheme.R:
--------------------------------------------------------------------------------
 1 | #'@rdname setmytheme
 2 | #'@title Set My Theme
 3 | #'@description set theme for ggplot, to create consistent figures
 4 | #'@export
 5 | setmytheme <- function(){
 6 |   theme_set(theme_bw())
 7 |   theme_update(axis.text.x = element_text(size = 20),
 8 |                axis.text.y = element_text(size = 20),
 9 |                axis.title.x = element_text(size = 25, margin=margin(20,0,0,0)),
10 |                axis.title.y = element_text(size = 25, angle = 90, margin = margin(0,20,0,0)),
11 |                legend.text = element_text(size = 20),
12 |                legend.title = element_text(size = 20),
13 |                title = element_text(size = 30),
14 |                strip.text = element_text(size = 25),
15 |                strip.background = element_rect(fill="white"),
16 |                panel.spacing = unit(2, "lines"),
17 |                legend.position = "bottom")
18 | }
19 | 


--------------------------------------------------------------------------------
/R/systematic_resampling.R:
--------------------------------------------------------------------------------
 1 | #'@rdname systematic_resampling
 2 | #'@title systematic_resampling
 3 | #'@description systematic_resampling
 4 | #'@export
 5 | systematic_resampling <- function(normalized_weights){
 6 |   return(systematic_resampling_(normalized_weights))
 7 | }
 8 | 
 9 | #'@export
10 | systematic_resampling_given_u <- function(normalized_weights, u){
11 |   return(systematic_resampling_n_(normalized_weights, length(normalized_weights), u))
12 | }
13 | 
14 | #'@export
15 | systematic_resampling_n <- function(normalized_weights, ndraws){
16 |   return(systematic_resampling_n_(normalized_weights, ndraws, runif(1)))
17 | }
18 | 
19 | # systematic_resampling <- function(normalized_weights){
20 | #   N <- length(normalized_weights)
21 | #   indices <- rep(0, N)
22 | #   normalized_weights <- N * normalized_weights
23 | #   j <- 1
24 | #   csw <- normalized_weights[1]
25 | #   u <- runif(1, min = 0, max = 1)
26 | #   for (k in 1:N){
27 | #     while (csw < u){
28 | #       j <- j + 1
29 | #       csw <- csw + normalized_weights[j]
30 | #     }
31 | #     indices[k] <- j
32 | #     u <- u + 1
33 | #   }
34 | #   return(indices)
35 | # }
36 | 


--------------------------------------------------------------------------------
/R/wasserstein.R:
--------------------------------------------------------------------------------
 1 | #'@name wasserstein
 2 | #'@title wasserstein
 3 | #'@description Compute regularized Wasserstein distance between two empirical distributions,
 4 | #' p and q, specified as vector of probabilities summing to one.
 5 | #' The third argument is the cost matrix, i.e. a matrix of pair-wise distances,
 6 | #' the fourth argument is the regularization parameter, e.g. 0.05*median(cost_matrix),
 7 | #' and the last argument is the number of Sinkhorn iterations to perform, e.g. 100.
 8 | #' Important references are
 9 | #'
10 | #' - Cuturi, M. (2013). Sinkhorn distances: Lightspeed computation of optimal transport. In Advances in Neural Information Processing Systems (NIPS), pages 2292-2300.
11 | #'
12 | #' - Cuturi, M. and Doucet, A. (2014). Fast computation of Wasserstein barycenters. In Proceedings of the 31st International Conference on Machine Learning (ICML), pages 685-693.
13 | #'
14 | #'@return a list with "distances", "transportmatrix", "u" and "v"
15 | #'@export
16 | wasserstein <- function(p, q, cost_matrix, epsilon, niterations){
17 |   return(wasserstein_(p, q, cost_matrix, epsilon, niterations))
18 | }
19 | 


--------------------------------------------------------------------------------
/R/wcovariance.R:
--------------------------------------------------------------------------------
1 | #'@rdname wcovariance
2 | #'@title wcovariance
3 | #'@description wcovariance
4 | #'@export
5 | wcovariance <- function(xparticles, normweights, mean){
6 |   return(wcovariance_(xparticles, normweights, mean))
7 | }
8 | 


--------------------------------------------------------------------------------
/R/winference-package.R:
--------------------------------------------------------------------------------
 1 | #'@name winference-package
 2 | #'@aliases winference
 3 | #'@docType package
 4 | #'@title winference
 5 | #'@author anonymous <anonymous@@anonymous.com>
 6 | #'@description ...
 7 | #'@details ...
 8 | #'@keywords package
 9 | #'@useDynLib winference
10 | #'@importFrom Rcpp sourceCpp
11 | NULL
12 | 


--------------------------------------------------------------------------------
/R/wmean.R:
--------------------------------------------------------------------------------
1 | #'@rdname wmean
2 | #'@title wmean
3 | #'@description wmean
4 | #'@export
5 | wmean <- function(xparticles, normweights){
6 |   return(wmean_(xparticles, normweights))
7 | }
8 | 


--------------------------------------------------------------------------------
/R/wsmc_to_dataframe.R:
--------------------------------------------------------------------------------
 1 | #'@export
 2 | wsmc_to_dataframe <- function(results, ...){
 3 |   th <- results$thetas_history
 4 |   if (!is.null(results$target$parameter_names)){
 5 |     parameter_names <- results$target$parameter_names
 6 |   } else {
 7 |     parameter_names <- paste0("X", 1:ncol(results$thetas_history[[1]]))
 8 |   }
 9 |   nsteps <- length(th)
10 |   df <- data.frame()
11 |   for (i in 1:(nsteps)){
12 |     df_ <- data.frame(cbind(th[[i]], rep(i, nrow(th[[i]])), rep(as.numeric(results$compute_times)[i], nrow(th[[i]]))))
13 |     names(df_) <- c(parameter_names, "step", "time")
14 |     df <- rbind(df, df_)
15 |   }
16 |   names(df) <- c(parameter_names, "step", "time")
17 |   return(df)
18 | }
19 | 
20 | 
21 | # wsmc_to_dataframe <- function(results, parameter_names){
22 | #   th <- results$thetas_history
23 | #   nsteps <- length(th)
24 | #   df <- data.frame()
25 | #   for (i in 1:(nsteps)){
26 | #     df_ <- data.frame(cbind(th[[i]], rep(i, nrow(th[[i]]))))
27 | #     names(df_) <- c(parameter_names, "step")
28 | #     df <- rbind(df, df_)
29 | #   }
30 | #   names(df) <- c(parameter_names, "step")
31 | #   return(df)
32 | # }
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # winference
 2 | R package to perform approximate Bayesian computation with the Wasserstein distance
 3 | 
 4 | joint work with Espen Bernton, Mathieu Gerber, Christian P. Robert
 5 | 
 6 | The inst folder contains tutorial files, and files to reproduce the figures of the article.
 7 | 
 8 | The package requires R packages: Rcpp,RcppEigen,doParallel,doRNG,foreach,ggplot2,ggthemes,dplyr,reshape2,BH,transport
 9 | as well as the CGAL library which can be installed following the instructions
10 | on https://doc.cgal.org/latest/Manual/installation.html.
11 | 


--------------------------------------------------------------------------------
/WInference.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace,vignette
22 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/README.R:
--------------------------------------------------------------------------------
 1 | Reproduce results of the article
 2 | ===
 3 | 
 4 | This folder contains files to reproduce the figures in the article:
 5 | "Inference in generative models using the Wasserstein distance"
 6 | by Espen Bernton, Pierre E. Jacob, Mathieu Gerber, Christian P. Robert
 7 | 
 8 | They are part of the 'winference' package:
 9 | https://github.com/pierrejacob/winference
10 | 
11 | The files are organized by name: e.g. all files relating to the queueing model
12 | start with 'queue_'. They have to be executed in a certain order, starting with
13 | '_generate_data', then '_wsmc', and finally '_plots'.
14 | 
15 | Each subfolder contains a README file detailing the role of each file.
16 | 
17 | The files will save the results in the current working directory, which you might want to change
18 | with the 'setwd' function. The scripts might take hours to run, depending on the machine; if this is a problem,
19 | make sure you take a look at what you run before you run it!
20 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/ar1/.Rapp.history:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/reproduceabc/ar1/.Rapp.history


--------------------------------------------------------------------------------
/inst/reproduceabc/ar1/README.R:
--------------------------------------------------------------------------------
 1 | Reproduce results for the AR(1) model in Section 4.2 of the article.
 2 | ===
 3 | 
 4 | ar1_generate_data.R: generate a time series from an AR(1) model
 5 | 
 6 | ar1_wsmc_marginal.R: loads data, take first 1,000 observations,
 7 | and compute WABC posterior based on Wasserstein distance between
 8 | the marginal distributions of synthetic and observed data sets.
 9 | 
10 | ar1_wsmc_delay1.R: loads data, take first 1,000 observations,
11 | and compute WABC posterior based on Wasserstein distance between
12 | the delay reconstructions of synthetic and observed data sets, with a lag of one.
13 | 
14 | ar1_plots.R: loads results from above scripts and creates the two plots
15 | of Figure 3 (a) and (b).
16 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/ar1/ar1_generate_data.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | rm(list = ls())
 3 | setmytheme()
 4 | set.seed(11)
 5 | prefix = ""
 6 | target <- get_autoregressive()
 7 | # number of observations
 8 | nobservations <- 10000
 9 | # parameter of data-generating process
10 | true_theta <- c(0.7, 0.9)
11 | obs <- target$robservation(nobservations, true_theta,
12 |                            target$parameters, target$generate_randomness(nobservations))
13 | 
14 | save(true_theta, obs, file = paste0(prefix, "ar1data.RData"))
15 | # plot the observations
16 | plot(obs, type = "l")
17 | acf(obs)
18 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/ar1/ar1_wsmc_delay1.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | # model
 7 | target <- get_autoregressive()
 8 | #
 9 | # number of observations
10 | nobservations <- 1000
11 | nparticles = 2048
12 | prefix <- ""
13 | load(file = paste0(prefix, "ar1data.RData"))
14 | obs <- obs[1:nobservations]
15 | 
16 | #
17 | lagvalue <- 1
18 | lag_obs <- create_lagmatrix(matrix(obs, nrow = 1), lagvalue)
19 | lag_obs <- lag_obs[,seq(from=1, to=ncol(lag_obs), by=2)]
20 | 
21 | 
22 | compute_d <- get_transport_to_y(lag_obs)
23 | compute_distance <- function(y_sim){
24 |   lag_y_sim <- create_lagmatrix(matrix(y_sim, nrow = 1), lagvalue)
25 |   lag_y_sim <- lag_y_sim[,seq(from=1, to=ncol(lag_y_sim), by=2)]
26 |   compute_d(lag_y_sim)
27 | }
28 | 
29 | target$simulate <- function(theta){
30 |   return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1))
31 | }
32 | 
33 | thetas <- target$rprior(1, target$parameters)
34 | y_sim <- target$simulate(thetas[1,])
35 | compute_distance(y_sim)
36 | 
37 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(),
38 |                    minimum_diversity = 0.5, R = 2, maxtrials = 1000)
39 | 
40 | filename <- paste0(prefix, "ar1.n", nobservations, ".wsmc_delay1.RData")
41 | results <- wsmc(compute_distance, target, param_algo, savefile = filename, maxsim = 10^6)
42 | load(filename)
43 | # results <- wsmc_continue(results, savefile = filename, maxtime = 10*60*60)
44 | #
45 | # load(filename)
46 | # wsmc.df <- wsmc_to_dataframe(results)
47 | # nsteps <- max(wsmc.df$step)
48 | # plot_bivariate_polygon(results, 1, 2)
49 | # plot_bivariate(results, 1, 2)
50 | # plot_threshold_time(results)
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/ar1/ar1_wsmc_marginal.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | # model
 7 | target <- get_autoregressive()
 8 | #
 9 | # number of observations
10 | nobservations = 1000
11 | nparticles = 2048
12 | prefix <- ""
13 | load(file = paste0(prefix, "ar1data.RData"))
14 | obs <- obs[1:nobservations]
15 | obs_sorted = sort(obs)
16 | compute_d = function(y){
17 |   sort_y = sort(y)
18 |   mean(abs(sort_y-obs_sorted))
19 | }
20 | 
21 | target$simulate <- function(theta){
22 |   return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1))
23 | }
24 | 
25 | thetas <- target$rprior(1, target$parameters)
26 | y_sim <- target$simulate(thetas[1,])
27 | compute_d(y_sim)
28 | 
29 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(),
30 |                    minimum_diversity = 0.5, R = 2, maxtrials = 1000)
31 | 
32 | filename <- paste0(prefix, "ar1.n", nobservations, ".wsmc_marginal.RData")
33 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxtime = 60*60)
34 | # load(filename)
35 | # results <- wsmc_continue(results, savefile = filename, maxtime = 2*60*60)
36 | #
37 | # load(filename)
38 | # wsmc.df <- wsmc_to_dataframe(results)
39 | # nsteps <- max(wsmc.df$step)
40 | # plot_bivariate_polygon(results, 1, 2)
41 | # plot_bivariate(results, 1, 2)
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/cosine/README.R:
--------------------------------------------------------------------------------
 1 | Reproduce results for the cosine model in Section 4.1 of the article.
 2 | ===
 3 | 
 4 | cosine_generate_data.R: generate a time series from a cosine trend + noise model
 5 | 
 6 | cosine_mcmc.R: loads data, take first 100 observations,
 7 | and runs a Metropolis--Hastings algorithm to approximate the posterior distribution.
 8 | 
 9 | cosine_wsmc_euclidean.R: loads data, take first 100 observations,
10 | and compute ABC posterior based on Euclidean distances between
11 | synthetic and observed data sets.
12 | 
13 | cosine_wsmc_curvematching_wasserstein.R: loads data, take first 100 observations,
14 | and compute WABC posterior based on curve matching, with lambda = 1,
15 | and Wasserstein distance computed exactly using the "transport" package
16 | (which needs to be installed, i.e. install.package("transport"))
17 | 
18 | cosine_plots.R: loads results from above scripts and creates the four plots
19 | of Figure 2 (a,b,c,d).
20 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/cosine/cosine_generate_data.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | 
 7 | target <- get_cosine()
 8 | 
 9 | prefix = ""
10 | 
11 | # number of observations
12 | nobservations <- 10000
13 | # parameter of data-generating process
14 | true_theta <- c(1/80, pi/4, 0, log(2))
15 | obs <- target$robservation(nobservations, true_theta,
16 |                            target$parameters, target$generate_randomness(nobservations))
17 | 
18 | save(true_theta, obs, file = paste0(prefix,"cosinedata.RData"))
19 | 
20 | plot(obs[1:100], type = "l")
21 | # hist(obs)
22 | # acf(obs, 100)
23 | # mean(obs)
24 | # sd(obs)
25 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/cosine/cosine_mcmc.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | prefix <- ""
 7 | target <- get_cosine()
 8 | 
 9 | nobservations <- 100
10 | 
11 | load(paste0(prefix, "cosinedata.RData"))
12 | obs <- matrix(obs[1:nobservations], nrow = 1)
13 | loglikelihood <- function(thetas, ys, ...){
14 |   evals <- rep(0, nrow(thetas))
15 |   for (itheta in 1:nrow(thetas)){
16 |     backbone <- exp(thetas[itheta,4]) * cos(2 * pi * thetas[itheta,1] * (1:nobservations) + thetas[itheta,2])
17 |     evals[itheta] <- sum(dnorm(ys, mean = backbone, sd = exp(thetas[itheta,3]), log = TRUE))
18 |   }
19 |   return(evals)
20 | }
21 | 
22 | 
23 | target$loglikelihood <- loglikelihood
24 | theta_init <- target$rprior(8, target$parameters)
25 | 
26 | tuning_parameters <- list(niterations = 100000, nchains = nrow(theta_init),
27 |                           cov_proposal = diag(0.1, nrow = target$thetadim, ncol = target$thetadim),
28 |                           adaptation = 10000, init_chains = theta_init)
29 | mhfile <- paste0(prefix, "cosine.mcmc.n", nobservations, ".RData")
30 | mh <- metropolishastings(obs, target, tuning_parameters)
31 | save(mh, file = mhfile)
32 | load(mhfile)
33 | 
34 | burnin <- 50000
35 | chain.df <- mhchainlist_to_dataframe(mh$chains)
36 | chain.df %>% head
37 | g <- ggplot(chain.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.1, group = ichain, colour = factor(ichain))) + geom_line()
38 | g +  geom_hline(yintercept = true_theta[1], col = "red")
39 | 
40 | g <- ggplot(chain.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.2, group = ichain, colour = factor(ichain))) + geom_line()
41 | g +  geom_hline(yintercept = true_theta[2], col = "red")
42 | 
43 | g <- ggplot(chain.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.3, group = ichain, colour = factor(ichain))) + geom_line()
44 | g +  geom_hline(yintercept = true_theta[3], col = "red")
45 | 
46 | g <- ggplot(chain.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.4, group = ichain, colour = factor(ichain))) + geom_line()
47 | g +  geom_hline(yintercept = true_theta[4], col = "red")
48 | 
49 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/cosine/cosine_wsmc_curvematching_wasserstein.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | 
 7 | prefix = ""
 8 | 
 9 | target <- get_cosine()
10 | 
11 | nobservations <- 100
12 | 
13 | load(paste0(prefix, "cosinedata.RData"))
14 | obs <- matrix(obs[1:nobservations], nrow = 1)
15 | target$simulate <- function(theta){
16 |   return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1))
17 | }
18 | 
19 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(),
20 |                    minimum_diversity = 0.5, R = 2, maxtrials = 1e5)
21 | 
22 | lambda <- 1
23 | multiplier <- lambda*(max(obs[1,]) - min(obs[1,]))
24 | augment <- function(series) rbind(multiplier * (1:length(series))/length(series), series)
25 | augmented_obs <- augment(obs)
26 | plot(augmented_obs[1,],augmented_obs[2,])
27 | sorted_augmented_obs <- augmented_obs[,hilbert_order(augmented_obs)]
28 | 
29 | compute_d <- function(y_fake){
30 |   augmented_y_fake <- augment(y_fake)
31 |   sink("/dev/null")
32 |   z <- exact_transport_distance(augmented_obs, augmented_y_fake, p = 1, ground_p = 2)
33 |   sink(NULL)
34 |   return(z)
35 | }
36 | 
37 | # library(microbenchmark)
38 | # microbenchmark(y_sim <- target$simulate(true_theta),compute_d(y_sim))
39 | # y_sim <- target$simulate(true_theta)
40 | # compute_d(y_sim)
41 | 
42 | filename <- paste0(prefix, "cosine_wsmc_curvematching.wasserstein.lambda", lambda, ".n", nobservations, ".RData")
43 | # results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 1e6)
44 | load(filename)
45 | # results$compute_d = compute_d
46 | # ncomp = sum(results$ncomputed)
47 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 1e6)
48 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/cosine/cosine_wsmc_euclidean.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | 
 7 | prefix = ""
 8 | 
 9 | target <- get_cosine()
10 | 
11 | nobservations <- 100
12 | 
13 | load(paste0(prefix, "cosinedata.RData"))
14 | obs <- matrix(obs[1:nobservations], nrow = 1)
15 | target$simulate <- function(theta){
16 |   return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1))
17 | }
18 | 
19 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(),
20 |                    minimum_diversity = 0.5, R = 2, maxtrials = 1e5)
21 | 
22 | compute_d <- function(z){
23 |   return(sqrt(sum((z[1,] - obs[1,])^2)))
24 | }
25 | 
26 | 
27 | filename <- paste0(prefix, "cosine_wsmc_euclidean.n", nobservations, ".RData")
28 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 1e7)
29 | load(filename)
30 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 1e6)
31 | 
32 | # plot_marginal(results, 1)
33 | # plot_marginal(results, 2)
34 | # plot_marginal(results, 3)
35 | # plot_marginal(results, 4)
36 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/gandk/README.R:
--------------------------------------------------------------------------------
 1 | Reproduce results for the univariate g-and-k model in Section 5.1.1 of the WABC article.
 2 | ===
 3 |   
 4 | gandk_generate_data.R: generate and save a data set from the model
 5 | 
 6 | gandk_wsmc.R: loads a data set of size 250 and runs SMC to approximate the WABC
 7 | posterior using the exact Wasserstein distance with a budget of 2.4*10^6 model simulations.
 8 | Can be continued for a long run (e.g. 10^8 simulations in as Fig 5 of the paper),
 9 | using wsmc_continue.
10 | 
11 | gandk_abctools.R: loads the abctools package, loads the data. Runs the semi-automatic
12 | ABC procedure of Fearnhead and Prangle (2012) using 2.4*10^6 model simulations. Might 
13 | require a lot of memory.
14 | 
15 | gandk_mcmc.R: load the data and run MCMC to approximate the posterior distribution,
16 | using numerical approximations of the likelihood. Proposal adapted to target using a
17 | previous run of the algorithm. Performs 75,000 iterations.
18 | 
19 | gandk_plots_compare.R: loads the data and the output of the scripts above. Plots 
20 | the marginal distributions corresponding to Fig 4 of the paper. 
21 | 
22 | gandk_plots_convergence.R: loads the data and output from gandk_wsmc.R and gandk_mcmc.R.
23 | Plots correspond WABC marginal posteriors in Fig 5 a)-d).
24 | 
25 | gandk_plots_ncomputed.R: loads the data and output from gandk_wsmc.R and gandk_mcmc.R.
26 | Plot corresponds to Fig 5 e). number of sims vs number of smc steps
27 | 
28 | gandk_plots_threshold.R: loads the data and output from gandk_wsmc.R and gandk_mcmc.R.
29 | Plot corresponds to Fig 5 f). threshold vs number of model sims.
30 | 
31 | gandk_plots_w_to_posterior.R: loads the data and output from gandk_wsmc.R and gandk_mcmc.R.
32 | Approximates the W1 distance between the posterior and the WABC approximation, plots W1
33 | distance as function of model simulations. Plot corresponds to Fig 5 g).


--------------------------------------------------------------------------------
/inst/reproduceabc/gandk/gandk_abctools.R:
--------------------------------------------------------------------------------
 1 | library(abctools)
 2 | library(winference)
 3 | rm(list = ls())
 4 | registerDoParallel(cores = detectCores())
 5 | setmytheme()
 6 | set.seed(11)
 7 | target <- get_gandk()
 8 | 
 9 | prefix = ""
10 | 
11 | nobservations <- 250
12 | load(paste0(prefix, "gandkdata.RData"))
13 | obs <- obs[1:nobservations]
14 | 
15 | target$simulate <- function(theta){
16 |   return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1))
17 | }
18 | 
19 | 
20 | # load(paste0(prefix, "gandkwsmc.n", nobservations, ".RData"))
21 | # results_was = results
22 | # ndists = cumsum(results$ncomputed)
23 | # nsim = ndists[34]
24 | 
25 | # nsim_approx = 10^6
26 | # nsim = ndists[which.min(abs(nsim_approx - ndists))]
27 | 
28 | nsim = 2.4*10^6
29 | 
30 | m = 10 #determines number of order stats in initial summary stat
31 | l = 4 #determines how many powers of the order stats are included in the initial summary stat
32 | 
33 | t = proc.time()
34 | thetas = target$rprior(nsim, target$parameters)
35 | 
36 | obs_subset = sort(obs)[c(1,(1:nobservations)[(nobservations/m)*(1:(m-1))],nobservations)]
37 | 
38 | sumstats = apply(thetas, MARGIN = 1, function(theta) {
39 |   y_fake = target$simulate(theta)
40 |   sort_y_fake = sort(y_fake)
41 |   subset_y_fake = sort_y_fake[c(1,(1:nobservations)[(nobservations/m)*(1:(m-1))],nobservations)]
42 |   })
43 | sumstats = t(sumstats)
44 | 
45 | # sumstats = foreach(i = 1:nsim, .combine = rbind) %dorng%{
46 | #     y_fake = target$simulate(theta)
47 | #     sort_y_fake = sort(y_fake)
48 | #     subset_y_fake = sort_y_fake[c(1,(1:nobservations)[(nobservations/m)*(1:(m-1))],nobservations)]
49 | #     return(subset_y_fake)
50 | # }
51 | 
52 | filename_sumstats = paste0(prefix, "sumstats.gandk.abctools.n", nobservations,".m",m,".l",l,".nsim",nsim,".RData")
53 | save(list(sumstats,thetas), file = filename_sumstats)
54 | 
55 | load(filename_sumstats)
56 | N = results_was$param_algo$nthetas
57 | tol = N/nsim
58 | #tol = 1000/nsim
59 | #tol = 500/nsim
60 | #tol = 100/nsim
61 | 
62 | 
63 | tfs <- list(function(x){cbind(x, x^2, x^3, x^4)})
64 | saabc <- semiauto.abc(obs = obs_subset, param = thetas, sumstats = sumstats,
65 |                       satr = tfs, overlap = TRUE, saprop = 1,
66 |                       abcprop = 1, tol = tol, method = "rejection",
67 |                       final.dens = TRUE)
68 | 
69 | t = proc.time() - t
70 | t
71 | results = list(thetas = saabc$post.sample, compute_time = t, threshold = tol, norder = m+1, npower = l, nsim = nsim)
72 | filename = paste0(prefix, "gandk.abctools.n", nobservations,".m",m,".l",l,".nsim",nsim,".RData")
73 | save(results, file = filename)
74 | load(filename)
75 | 
76 | # hist(saabc$post.sample[,1],prob=T)
77 | # hist(saabc$post.sample[,2],prob=T)
78 | # hist(saabc$post.sample[,3],prob=T)
79 | # hist(saabc$post.sample[,4],prob=T)
80 | 
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/gandk/gandk_generate_data.R:
--------------------------------------------------------------------------------
 1 | #+ presets, echo = FALSE, warning = FALSE, message = FALSE
 2 | library(winference)
 3 | registerDoParallel(cores = detectCores())
 4 | rm(list = ls())
 5 | setmytheme()
 6 | 
 7 | set.seed(11)
 8 | 
 9 | prefix = ""
10 | filename = paste0(prefix,"gandkdata.Rdata")
11 | 
12 | target <- get_gandk()
13 | 
14 | # number of observations
15 | nobservations <- 10000
16 | # parameter of data-generating process
17 | true_theta <- c(3, 1, 2, 0.5)
18 | obs <- target$robservation(nobservations, true_theta,
19 |                            target$parameters, target$generate_randomness(nobservations))
20 | 
21 | save(true_theta, obs, file = filename)
22 | 
23 | hist(obs)
24 | plot(obs[1:1000], type = "l")
25 | mean(obs)
26 | sd(obs)
27 | 
28 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/gandk/gandk_plots_convergence.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_gandk()
 7 | 
 8 | fig.height <- 5
 9 | fig.width <- 5
10 | 
11 | my_colors <- get_my_colors()
12 | 
13 | prefix <- ""
14 | 
15 | nobservations = 250
16 | load(paste0(prefix, "gandkdata.RData"))
17 | obs <- obs[1:nobservations]
18 | 
19 | # Wasserstein SMC
20 | filename = paste0(prefix, "gandkwsmc.n", nobservations, ".RData")
21 | load(filename)
22 | results_was = results
23 | wsmc.df = wsmc_to_dataframe(results_was)
24 | nsteps = tail(wsmc.df$step,n=1)
25 | #step = nsteps
26 | step = 20
27 | wsmc.df = wsmc.df[wsmc.df$step>=step,]
28 | 
29 | 
30 | # MCMC
31 | mhfile <- paste0(prefix, "gandkmcmc.n", nobservations, "mh.RData")
32 | load(mhfile)
33 | mcmc.df <- mhchainlist_to_dataframe(mh$chains)
34 | names(mcmc.df) <- c("ichain", "iteration", target$parameter_names)
35 | burnin = 50000
36 | mcmc.df = mcmc.df[mcmc.df$iteration>burnin,3:6]
37 | 
38 | 
39 | g <- ggplot(mcmc.df, aes(x = A)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5)
40 | g <- g + geom_density(data = wsmc.df, aes(x = A, colour = step, group = step), alpha = 0.5)
41 | g <- g + scale_color_gradient(low = rgb(1,0.5,0.5), high = "darkblue", name = "")
42 | g <- g + scale_fill_manual(name = "", values = my_colors) + xlab("a") + theme(legend.position = "none")
43 | g <- g + geom_vline(xintercept = true_theta[1])
44 | g
45 | ggsave(filename = paste0(prefix, "conv.gandk_marginal1.pdf"), plot = g, width = fig.width, height = fig.height)
46 | 
47 | g <- ggplot(mcmc.df, aes(x = B)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5)
48 | g <- g + geom_density(data = wsmc.df, aes(x = B, colour = step, group = step), alpha = 0.5)
49 | g <- g + scale_color_gradient(low = rgb(1,0.5,0.5), high = "darkblue", name = "")
50 | g <- g + scale_fill_manual(name = "", values = my_colors) + xlab("b") + theme(legend.position = "none")
51 | g <- g + geom_vline(xintercept = true_theta[2])
52 | g
53 | ggsave(filename = paste0(prefix, "conv.gandk_marginal2.pdf"), plot = g, width = fig.width, height = fig.height)
54 | 
55 | g <- ggplot(mcmc.df, aes(x = g)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5)
56 | g <- g + geom_density(data = wsmc.df, aes(x = g, colour = step, group = step), alpha = 0.5)
57 | g <- g + scale_color_gradient(low = rgb(1,0.5,0.5), high = "darkblue", name = "")
58 | g <- g + scale_fill_manual(name = "", values = my_colors) + xlab("g") + theme(legend.position = "none")
59 | g <- g + geom_vline(xintercept = true_theta[3])
60 | g <- g + coord_cartesian(xlim = c(1, 6))
61 | g
62 | ggsave(filename = paste0(prefix, "conv.gandk_marginal3.pdf"), plot = g, width = fig.width, height = fig.height)
63 | 
64 | g <- ggplot(mcmc.df, aes(x = k)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5)
65 | g <- g + geom_density(data = wsmc.df, aes(x = k, colour = step, group = step), alpha = 0.5)
66 | g <- g + scale_color_gradient(low = rgb(1,0.5,0.5), high = "darkblue", name = "")
67 | g <- g + scale_fill_manual(name = "", values = my_colors) + xlab("k") + theme(legend.position = "none")
68 | g <- g + geom_vline(xintercept = true_theta[4])
69 | g
70 | ggsave(filename = paste0(prefix, "conv.gandk_marginal4.pdf"), plot = g, width = fig.width, height = fig.height)
71 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/gandk/gandk_plots_ncomputed.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_gandk()
 7 | 
 8 | fig.height <- 5
 9 | fig.width <- 5
10 | 
11 | my_colors <- get_my_colors()
12 | 
13 | prefix = ""
14 | 
15 | nobservations = 250
16 | load(paste0(prefix, "gandkdata.RData"))
17 | obs <- obs[1:nobservations]
18 | 
19 | # Wasserstein SMC
20 | filename = paste0(prefix, "gandkwsmc.n", nobservations, ".RData")
21 | load(filename)
22 | results_was = results
23 | wsmc.df = wsmc_to_dataframe(results_was)
24 | nsteps = tail(wsmc.df$step,n=1)
25 | ncomp = results_was$ncomputed
26 | 
27 | df = data.frame(steps = 1:nsteps, ncomp = ncomp)
28 | g = ggplot(df, aes(x = steps, y = ncomp)) + geom_line(aes(colour = "Wasserstein")) + geom_point(aes(colour = "Wasserstein"))
29 | g = g + scale_color_manual(name = "", values = my_colors) + xlab("steps") + ylab("# model simulations")
30 | g = g + scale_y_log10(breaks = c(1e4,1e5,1e6,1e7,1e8)) + scale_x_continuous(breaks = c(10,20,40,30,50))
31 | g <- g + geom_label(data = data.frame(x = c(34), y = c(6e7), method = c("Wasserstein")),
32 |                     aes(x = x, y = y, colour = method, label = method), size = 7) + theme(legend.position = "none")
33 | g
34 | ggsave(filename = paste0(prefix, "gandk.n", nobservations, ".ncomp_vs_step.pdf"), plot = g, width = fig.width, height = fig.height)
35 | 
36 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/gandk/gandk_plots_threshold.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_gandk()
 7 | 
 8 | fig.height <- 5
 9 | fig.width <- 5
10 | 
11 | my_colors <- get_my_colors()
12 | 
13 | prefix = ""
14 | 
15 | nobservations = 250
16 | load(paste0(prefix, "gandkdata.RData"))
17 | obs <- obs[1:nobservations]
18 | 
19 | # Wasserstein SMC
20 | filename = paste0(prefix, "gandkwsmc.n", nobservations, ".RData")
21 | load(filename)
22 | results_was = results
23 | #step = 20
24 | #thresholds = results_was$threshold_history[-(1:(step-1))]
25 | step = 1
26 | thresholds = results_was$threshold_history
27 | ncomp = cumsum(results_was$ncomputed)
28 | 
29 | # g = qplot(x = step:(step+length(thresholds)-1), y = thresholds, geom = "line")
30 | # g = g + xlab("step") + ylab("threshold")
31 | # g = g + scale_y_log10(breaks = c(1e4,100,10,1,0.1,0.01))
32 | # g
33 | # ggsave(filename = paste0(prefix, "gandk.n", nobservations, ".threshold.pdf"), plot = g, width = fig.width, height = fig.height)
34 | 
35 | df = data.frame(thresholds = thresholds, ncomp = ncomp)
36 | g = ggplot(df, aes(x = ncomp, y = thresholds)) + geom_line(aes(colour = "Wasserstein")) + geom_point(aes(colour = "Wasserstein"))
37 | g = g + scale_color_manual(name = "", values = my_colors) + xlab("# model simulations") + ylab("threshold")
38 | g = g + scale_y_log10(breaks = c(1e4,1e3,100,10,1,0.1,0.01)) + scale_x_log10(breaks = c(1e4,1e6,1e8))
39 | g <- g + geom_label(data = data.frame(x = c(1e7), y = c(0.4), method = c("Wasserstein")),
40 |                     aes(x = x, y = y, colour = method, label = method), size = 7) + theme(legend.position = "none")
41 | g
42 | ggsave(filename = paste0(prefix, "gandk.n", nobservations, ".threshold_vs_ncomp.pdf"), plot = g, width = fig.width, height = fig.height)
43 | 
44 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/gandk/gandk_plots_w_to_posterior.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_gandk()
 7 | 
 8 | fig.height <- 5
 9 | fig.width <- 5
10 | 
11 | my_colors <- get_my_colors()
12 | 
13 | prefix = ""
14 | 
15 | nobservations = 250
16 | load(paste0(prefix, "gandkdata.RData"))
17 | obs <- obs[1:nobservations]
18 | 
19 | # Wasserstein SMC
20 | filename = paste0(prefix, "gandkwsmc.n", nobservations, ".RData")
21 | load(filename)
22 | results_was = results
23 | 
24 | 
25 | # MCMC
26 | mhfile <- paste0(prefix, "gandkmcmc.n", nobservations, "mh.RData")
27 | load(mhfile)
28 | mcmc.df <- mhchainlist_to_dataframe(mh$chains)
29 | names(mcmc.df) <- c("ichain", "iteration", target$parameter_names)
30 | burnin = 50000
31 | mcmc.df = mcmc.df[mcmc.df$iteration>burnin,]
32 | 
33 | 
34 | #Calculate Wasserstein of WABC to posterior
35 | wasserstein_to_posterior <- function(results, posterior_sample, nmax){
36 |   nsteps <- length(results$thetas_history)
37 |   if (nsteps > nmax){
38 |     steps <- floor(seq(from = 1, to = nsteps, length.out = nmax))
39 |   } else {
40 |     steps <- 1:nsteps
41 |   }
42 |   w_to_post_ <- as.numeric(foreach (istep = steps, .combine = c) %dorng% {
43 |     thetas <- results$thetas_history[[istep]]
44 |     x <- try(exact_transport_distance(t(thetas), t(posterior_sample), p = 1, ground_p = 2))
45 |     if (inherits(x, "try-error")){
46 |       x <- sinkhorn_distance(t(thetas), t(posterior_sample), p = 1, ground_p = 2, eps = 0.1, niterations = 1000)$uncorrected
47 |     }
48 |     x
49 |   })
50 |   return(data.frame(steps = steps, ncomputed = cumsum(results$ncomputed)[steps], times = results$compute_times[steps], w = w_to_post_))
51 | }
52 | 
53 | 
54 | npost <- 2048
55 | mcmcpostburnin <- mcmc.df %>% filter(iteration > burnin)
56 | posterior_sample <- mcmcpostburnin[sample(x = 1:nrow(mcmcpostburnin), size = npost, replace = TRUE),3:ncol(mcmcpostburnin)]
57 | 
58 | filename <- paste0(prefix, "gandk.n", nobservations, ".w1posterior.RData")
59 | n_w_comp <- 50
60 | w_to_post_wasserstein <- wasserstein_to_posterior(results_was, posterior_sample, n_w_comp)
61 | save(w_to_post_wasserstein, file = filename)
62 | load(file = filename)
63 | 
64 | g <- ggplot(w_to_post_wasserstein, aes(x = ncomputed, y = w)) + geom_line(aes(colour = "Wasserstein")) + geom_point(aes(colour = "Wasserstein"))
65 | g <- g + scale_color_manual(name = "", values = my_colors) + xlab("# model simulations") + ylab("W-distance to posterior")
66 | g <- g + scale_x_log10(breaks = c(1e4,1e5,1e6,1e7,1e8)) + scale_y_log10(breaks = c(10,5,2,1,0.5,0.25,0.1,0.06))
67 | g <- g + geom_label(data = data.frame(x = c(8e6), y = c(0.1), method = c("Wasserstein")),
68 |                     aes(x = x, y = y, colour = method, label = method), size = 7) + theme(legend.position = "none")
69 | g
70 | ggsave(filename = paste0(prefix, "gandk.n", nobservations, ".w1post.pdf"), plot = g, width = 2*fig.width, height = fig.height)
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/gandk/gandk_wsmc.R:
--------------------------------------------------------------------------------
 1 | #+ presets, echo = FALSE, warning = FALSE, message = FALSE
 2 | library(winference)
 3 | registerDoParallel(cores = detectCores())
 4 | rm(list = ls())
 5 | setmytheme()
 6 | set.seed(11)
 7 | target <- get_gandk()
 8 | # number of observations
 9 | nobservations <- 250
10 | prefix = ""
11 | load(paste0(prefix, "gandkdata.RData"))
12 | obs <- obs[1:nobservations]
13 | sort_obs = sort(obs)
14 | 
15 | 
16 | #compute_d <- get_hilbert_to_y(matrix(obs, nrow = 1))
17 | 
18 | compute_d = function(y){
19 |   sort_y = sort(y)
20 |   mean(abs(sort_y-sort_obs))
21 | }
22 | 
23 | target$simulate <- function(theta){
24 |   return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1))
25 | }
26 | 
27 | # M=10000
28 | # ts = rep(0,M)
29 | # for(i in 1:M){
30 | #   y_sim = target$simulate(target$rprior(1, target$parameters))
31 | #   t=proc.time()
32 | #   compute_d_alt(y_sim)
33 | #   t=proc.time() -t
34 | #   ts[i] = t[3]
35 | # }
36 | 
37 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(),
38 |                    minimum_diversity = 0.5, R = 2, maxtrials = 100000)
39 | #t = proc.time()
40 | filename <- paste0(prefix, "gandkwsmc.n", nobservations, ".RData")
41 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsim = 2.4e6)
42 | #t = proc.time() - t
43 | load(filename)
44 | #results <- wsmc_continue(results, savefile = filename, maxtime = 14*60*60)
45 | 
46 | 
47 | # load(filename)
48 | # wsmc.df <- wsmc_to_dataframe(results)
49 | # nsteps <- max(wsmc.df$step)
50 | #
51 | # # plot_bivariate_polygon(results, 1, 2)
52 | # # plot_bivariate_polygon(results, 3, 4)
53 | #
54 | # library(gridExtra)
55 | # grid.arrange(plot_marginal_time(results, 1),
56 | #   plot_marginal_time(results, 2),
57 | #   plot_marginal_time(results, 3),
58 | #   plot_marginal_time(results, 4), nrow = 2)
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/levydriven/README.R:
--------------------------------------------------------------------------------
 1 | Reproduce results for the Levy-driven stochastic volatility model in Section 5.4 of the article.
 2 | ===
 3 | 
 4 | levydriven_generate_data.R: generate a time series from the model.
 5 | 
 6 | levydriven_wsmc_hilbert.R: loads data, takes first 10,000 observations,
 7 | approximates WABC with delay reconstruction (lag of one), using the Hilbert
 8 | distance.
 9 | 
10 | levydriven_wsmc_with_summary.R: loads results from above scripts,
11 | and defines new distance that uses both the Hilbert-delay reconstruction one,
12 | and a distance on summary statistics, and then approximates the corresponding
13 | WABC posterior.
14 | 
15 | levydriven_plots.R: loads results from above scripts and creates the three plots
16 | of Figure 9 (a,b,c) and the three plots of Figure 10 (a,b,c).
17 | 
18 | ===
19 | Additionally, the folder contains files that could be useful but not necessary
20 | to reproduce the figures:
21 | 
22 | levydriven_timings.R: runs particle filters and compute variance of likelihood
23 | estimator as well as record the timings; this is to convince oneself that a
24 | classic PMMH approach to this problem would be time consuming, due to the length
25 | of the time series.
26 | 
27 | levydriven_mh.R: implements PMMH on a subset of the data.
28 | 
29 | levydriven_is_correction.R: implements an IS correction step to
30 | go from the WABC posterior to the actual posterior.
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/levydriven/levydriven_generate_data.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | 
 6 | set.seed(11)
 7 | 
 8 | target <- get_levydriven()
 9 | 
10 | prefix <- ""
11 | 
12 | # number of observations
13 | nobservations <- 50000
14 | # parameter of data-generating process
15 | true_theta <- c(0, 0, 0.5, 0.0625, 0.01)
16 | obs <- target$robservation(nobservations, true_theta,
17 |                            target$parameters, target$generate_randomness(nobservations))
18 | 
19 | save(true_theta, obs, file = paste0(prefix,"levydrivendata.RData"))
20 | 
21 | # hist(obs)
22 | plot(obs, type = "l")
23 | # plot(obs[1:250], type = "l")
24 | # acf(obs)
25 | # mean(obs)
26 | # sd(obs)
27 | # approx equal to exp(0.9)/sqrt(1-0.7^2)
28 | 
29 | library(microbenchmark)
30 | microbenchmark(
31 | obs <- target$robservation(1e4, true_theta,
32 |                            target$parameters, target$generate_randomness(1e4)),
33 | times = 100)
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/levydriven/levydriven_timings.R:
--------------------------------------------------------------------------------
  1 | library(winference)
  2 | registerDoParallel(cores = detectCores())
  3 | rm(list = ls())
  4 | setmytheme()
  5 | set.seed(13)
  6 | target <- get_levydriven()
  7 | prefix <- ""
  8 | load(file = paste0(prefix, "levydrivendata.RData"))
  9 | nobservations <- 10000
 10 | obs <- obs[1:nobservations]
 11 | filename2 <- paste0(prefix, "levydriven.n", nobservations, ".lag1.wsmc.hilbert.summary.RUN2.RData")
 12 | load(filename2)
 13 | 
 14 | thetas <- results$thetas_history[[results$thetas_history %>% length]]
 15 | 
 16 | post_cov <- cov(thetas)
 17 | post_mean <- colMeans(thetas)
 18 | 
 19 | variancescaling <- 1
 20 | 
 21 | rproposal <- function(n){
 22 |   return(fast_rmvnorm(n, post_mean, post_cov))
 23 | }
 24 | 
 25 | dproposal <- function(thetas){
 26 |   fast_dmvnorm(thetas, post_mean, post_cov)
 27 | }
 28 | 
 29 | model <- list()
 30 | model$rinit <- function(nparticles, theta){
 31 |   x <- matrix(nrow = nparticles, ncol = 2)
 32 |   for (i in 1:nparticles){
 33 |     x[i,] <- rgamma(2, shape = theta[3] * theta[3]/theta[4], scale = theta[4]/theta[3])
 34 |   }
 35 |   return(x)
 36 | }
 37 | 
 38 | model$rtransition <- function(xparticles, theta){
 39 |   rtransition_r <- levydriven_rtransition_rand(dim(xparticles)[1], theta)
 40 |   new_z <- exp(-theta[5]) * xparticles[,2] + rtransition_r$sum_weighted_e
 41 |   new_v <- (1/theta[5]) * (xparticles[,2] - new_z + rtransition_r$sum_e)
 42 |   xparticles[,1] <- new_v
 43 |   xparticles[,2] <- new_z
 44 |   return(xparticles)
 45 | }
 46 | 
 47 | model$dobs <- function(observations, time, xparticles, theta){
 48 |   return(dnorm(observations[time], mean = theta[1] + theta[2] * xparticles[,1], sd = sqrt(xparticles[,1]), log = TRUE))
 49 | }
 50 | 
 51 | ### particle filter estimates of the log-likelihood
 52 | particle_filter <- function(nparticles, model, theta, observations){
 53 |   datalength <- nobservations
 54 |   # initialization
 55 |   xparticles <- model$rinit(nparticles, theta)
 56 |   logw <- rep(0, nparticles)
 57 |   # logw <- model$dobs(observations, 1, xparticles, theta)
 58 |   if (all(is.infinite(logw))){
 59 |     return(NA)
 60 |   }
 61 |   maxlw <- max(logw)
 62 |   w <- exp(logw - maxlw)
 63 |   # update log likelihood estimate
 64 |   ll <- maxlw + log(mean(w))
 65 |   normweights <- w / sum(w)
 66 |   #
 67 |   # step t > 1
 68 |   for (time in 1:datalength){
 69 |     if (time > 1){
 70 |       ancestors <- systematic_resampling_given_u(normweights, runif(1))
 71 |       xparticles <- xparticles[ancestors,,drop=F]
 72 |     }
 73 |     xparticles <- model$rtransition(xparticles, theta)
 74 |     logw <- model$dobs(observations, time, xparticles, theta)
 75 |     if (all(is.infinite(logw))){
 76 |       return(NA)
 77 |     }
 78 |     maxlw <- max(logw)
 79 |     w <- exp(logw - maxlw)
 80 |     # update log likelihood estimate
 81 |     ll <- ll + maxlw + log(mean(w))
 82 |     normweights <- w / sum(w)
 83 |     #
 84 |   }
 85 |   return(ll)
 86 | }
 87 | 
 88 | # nparticles <- 2^10
 89 | # res <- particle_filter(nparticles, model, post_mean, obs)
 90 | 
 91 | 
 92 | library(foreach)
 93 | library(doParallel)
 94 | registerDoParallel(cores = detectCores())
 95 | nparticles <- 4096
 96 | proposal <- rproposal(10)
 97 | dproposals <- dproposal(proposal)
 98 | prior_eval <- target$dprior(proposal)
 99 | nrep <- dim(proposal)[1]
100 | nrep_per_theta <- 50
101 | results.df <- foreach(irep = 1:nrep, .combine = rbind) %dorng% {
102 |   lls <- rep(0, nrep_per_theta)
103 |   times <- rep(0, nrep_per_theta)
104 |   if (is.finite(prior_eval[irep])){
105 |     for (irep_per_theta in 1:nrep_per_theta){
106 |       pct <- proc.time()
107 |       lls[irep_per_theta] <- particle_filter(nparticles, model, proposal[irep,], obs)
108 |       times[irep_per_theta] <- as.numeric((proc.time() - pct)[3])
109 |     }
110 |   }
111 |   data.frame(irep = irep, irep_per_theta = 1:nrep_per_theta, lls = lls, times = times)
112 | }
113 | 
114 | results.df %>% group_by(irep) %>% summarise(sdlls = sd(lls), meantime = mean(times))
115 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/levydriven/levydriven_wsmc_hilbert.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | 
 6 | set.seed(13)
 7 | 
 8 | target <- get_levydriven()
 9 | 
10 | # number of observations
11 | prefix <- ""
12 | load(file = paste0(prefix, "levydrivendata.RData"))
13 | 
14 | nparticles = 2048
15 | nobservations <- 10000
16 | obs <- obs[1:nobservations]
17 | plot(obs, type = "l")
18 | 
19 | lagvalue <- 1
20 | # #
21 | lag_obs <- create_lagmatrix(matrix(obs, nrow = 1), lagvalue)
22 | lag_obs <- lag_obs[,seq(from=1, to=ncol(lag_obs), by=2)]
23 | 
24 | compute_hilbert <- get_hilbert_to_y(lag_obs)
25 | 
26 | compute_d <- function(z){
27 |   fake_obs <- create_lagmatrix(matrix(z, nrow = 1), lagvalue)
28 |   fake_obs <- fake_obs[,seq(from=1, to=ncol(fake_obs), by=2)]
29 |   return(compute_hilbert(fake_obs))
30 | }
31 | 
32 | target$simulate <- function(theta){
33 |   r <- target$generate_randomness(nobservations)
34 |   return(target$robservation(nobservations, theta, list(), r))
35 | }
36 | 
37 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(),
38 |                    minimum_diversity = 0.5, R = 2, maxtrials = 1000)
39 | 
40 | # filename <- paste0(prefix, "levydriven.n", nobservations, ".lag", lagvalue, ".wsmc.hilbert.RUN2.RData")
41 | # # results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 4e5)
42 | # load(file = filename)
43 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 2e5)
44 | 
45 | # library(gridExtra)
46 | # grid.arrange(plot_threshold_time(results) + scale_y_log10(), plot_ncomputed(results))
47 | # plot_bivariate(results, 1, 2)
48 | # plot_bivariate(results, 3, 4)
49 | # plot_bivariate(results, 4, 5)
50 | # #
51 | # ## timings
52 | library(microbenchmark)
53 | microbenchmark(target$simulate(true_theta), times = 1000)
54 | 
55 | dummy = target$simulate(true_theta)
56 | f <- function(dataset){
57 |   return(sum(acf(dataset^2, plot = F, lag.max = 50)$acf[,,1][1:50]))
58 | }
59 | 
60 | microbenchmark(compute_d(dummy), times = 1000)
61 | microbenchmark(f(dummy), times = 1000)
62 | 
63 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/levydriven/levydriven_wsmc_with_summary.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | 
 6 | set.seed(13)
 7 | 
 8 | target <- get_levydriven()
 9 | 
10 | # number of observations
11 | prefix <- ""
12 | 
13 | load(file = paste0(prefix, "levydrivendata.RData"))
14 | 
15 | nobservations <- 10000
16 | obs <- obs[1:nobservations]
17 | 
18 | lagvalue <- 1
19 | # #
20 | lag_obs <- create_lagmatrix(matrix(obs, nrow = 1), lagvalue)
21 | lag_obs <- lag_obs[,seq(from=1, to=ncol(lag_obs), by=2)]
22 | 
23 | compute_hilbert <- get_hilbert_to_y(lag_obs)
24 | 
25 | filename <- paste0(prefix, "levydriven.n", nobservations, ".lag", lagvalue, ".wsmc.hilbert.RUN2.RData")
26 | # filename <- paste0(prefix, "levydriven.n", nobservations, ".lag", lagvalue, ".wsmc.swap.RData")
27 | load(file = filename)
28 | 
29 | 
30 | f <- function(dataset){
31 |   return(sum(acf(dataset^2, plot = F, lag.max = 50)$acf[,,1][1:50]))
32 | }
33 | f_obs <- f(obs)
34 | fs <- as.numeric(foreach(i = 1:results$param_algo$nthetas, .combine = c) %dorng% {
35 |   f(results$latest_y[[i]])
36 | })
37 | hist(fs, nclass = 30)
38 | abline(v = f_obs)
39 | 
40 | thetas <- tail(results$thetas_history, 1)[[1]]
41 | plot(thetas[,5], fs)
42 | summary(tail(results$distances_history, 1)[[1]])
43 | threshold1 <- tail(results$threshold_history, 2)[[1]]
44 | 
45 | compute_d1 <- results$compute_d
46 | compute_d_summary <- function(z){
47 |   first_part <- compute_d1(z)
48 |   if (first_part > threshold1){
49 |     return(Inf)
50 |   } else {
51 |     return(abs(f(z) - f_obs))
52 |   }
53 | }
54 | 
55 | filename2 <- paste0(prefix, "test-levydriven.n", nobservations, ".lag", lagvalue, ".wsmc.hilbert.summary.RUN2.RData")
56 | # filename2 <- paste0(prefix, "levydriven.n", nobservations, ".lag", lagvalue, ".wsmc.hilbert.summary.RUN2.RData")
57 | 
58 | ds <- as.numeric(foreach(i = 1:results$param_algo$nthetas, .combine = c) %dorng% {
59 |   compute_d_summary(results$latest_y[[i]])
60 | })
61 | results2 <- results
62 | results2$compute_d <- compute_d_summary
63 | results2$param_algo$threshold <- max(ds)
64 | results2$distances_history[[length(results2$distances_history)]] <- ds
65 | results2$param_algo$proposal <- mixture_rmixmod()
66 | #
67 | results2 <- wsmc_continue(results2, savefile = filename2, maxsim = 8e5)
68 | # load(filename2)
69 | # results2 <- wsmc_continue(results, savefile = filename2, maxsim = 8e5)
70 | #
71 | # library(gridExtra)
72 | # grid.arrange(plot_threshold_time(results2) + scale_y_log10(), plot_ncomputed(results2))
73 | # #
74 | # plot_bivariate(results2, 4, 5)
75 | # plot_marginal(results2, 1)
76 | # plot_marginal(results2, 2)
77 | # plot_marginal(results2, 4) + scale_x_log10()
78 | # plot_marginal(results2, 5) + scale_x_log10()
79 | #
80 | # # names(results2)
81 | # # results2$param_algo$proposal$param_update()
82 | # # results2$thetas_history %>% length
83 | # thetas <- results2$thetas_history[[results2$thetas_history %>% length]]
84 | # fit <- mixmodCluster(data = data.frame(thetas), nbCluster = 5, dataType = "quantitative")
85 | # results2$param_algo$proposal$r
86 | # results2$param_algo$proposal$d
87 | #
88 | #
89 | # ##
90 | #
91 | # ## timings
92 | # library(microbenchmark)
93 | # microbenchmark(target$simulate(true_theta), times = 100)
94 | # microbenchmark(compute_d_summary(target$simulate(true_theta)), times = 100)
95 | 
96 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mgandk/README.R:
--------------------------------------------------------------------------------
 1 | Reproduce results for the multivariate g-and-k model in Section 5.1.2 of the WABC article.
 2 | ===
 3 |   
 4 | mgandk_generate_data.R: generate and save a data set from the model
 5 | 
 6 | mgandk_wsmc_wasserstein.R: loads a data set of size 500 and runs SMC to approximate the WABC
 7 | posterior using the exact Wasserstein distance with a budget of 2*10^6 model simulations.
 8 | 
 9 | mgandk_wsmc_swap.R: loads the data set and runs SMC to approximate the WABC
10 | posterior using the swapping distance with a budget of 2*10^6 model simulations.
11 | 
12 | mgandk_wsmc_hilbert.R: loads the data set and runs SMC to approximate the WABC
13 | posterior using the Hilbert distance with a budget of 2*10^6 model simulations.
14 | 
15 | mgandk_wsmc_mmd.R: loads the data set and runs SMC to approximate the WABC
16 | posterior using an approximation of  the MMD distance with a budget of 2*10^6
17 | model simulations.
18 | 
19 | mgandk_mcmc.R: loads the data the WABC using the Hilbert approximation to tune
20 | the MCMC proposal used to approximate the posterior distribution. Uses numerical
21 | approximations of the likelihood. Performs 150,000 iterations.
22 | 
23 | mgandk_plots_ncomputed.R: loads the data and output from the scripts above.
24 | Plot corresponds to Fig 6 a). number of sims vs number of smc steps
25 | 
26 | mgandk_plots_threshold.R: loads the data and output from the scripts above.
27 | Plot corresponds to Fig 6 b). threshold vs number of sims.
28 | 
29 | mgandk_plots_w_to_posterior.R: loads the data and output from the scripts above.
30 | Approximates the W1 distance between the posterior and the different WABC approximations,
31 | plots W1 distance to posterior as function of model simulations. Corresponds to Fig 6 c).
32 | 
33 | mgandk_timings.R: estimate the average time it takes to compute the different distances
34 | 
35 | mgandk_plots.R: produces plots of WABC marginals.


--------------------------------------------------------------------------------
/inst/reproduceabc/mgandk/mgandk_generate_data.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_mgandk()
 7 | prefix = ""
 8 | 
 9 | nobservations <- 10000
10 | true_theta <- c(3, 1, 1, 0.5, 4, 0.5, 2, 0.4, 0.6)
11 | 
12 | obs <- target$robservation(nobservations, true_theta)
13 | 
14 | save(true_theta, obs, file = paste0(prefix, "mgandkdata.RData"))
15 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mgandk/mgandk_mcmc.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_mgandk()
 7 | 
 8 | prefix = ""
 9 | 
10 | nobservations <- 500
11 | load(paste0(prefix, "mgandkdata.RData"))
12 | obs <- obs[,1:nobservations]
13 | # load ABC posterior
14 | load(paste0(prefix, "mgandk.wsmc.n",nobservations,".hilbert.RData"))
15 | thetas <- tail(results$thetas_history, 1)[[1]]
16 | theta_init <- thetas[sample(x = 1:nrow(thetas), 8, replace = TRUE),]
17 | colMeans(thetas)
18 | cov <- cov(thetas)
19 | # test log-likelihood
20 | target$loglikelihood(theta_init, obs)
21 | tuning_parameters <- list(niterations = 150000, nchains = nrow(theta_init),
22 |                           cov_proposal = cov,
23 |                           adaptation = 1000, init_chains = theta_init)
24 | mhfile <- paste0(prefix, "mgandk.mcmc.n", nobservations, ".mh.initfromABC.RData")
25 | mh <- metropolishastings(obs, target, tuning_parameters, savefile = mhfile)
26 | save(mh, file = mhfile)
27 | load(mhfile)
28 | # if (exists("mh_results")){
29 | #   mcmc.df <- mhchainlist_to_dataframe(mh_results$chains)
30 | #   mcmc.df <- mcmc.df %>% filter(iteration < mh_results$iteration)
31 | # } else {
32 | #   mcmc.df <- mhchainlist_to_dataframe(mh$chains)
33 | # }
34 | 
35 | # mcmc.df %>% head
36 | # burnin <- 5000
37 | # # burnin <- 0
38 | # ggplot(mcmc.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.7, group = ichain, colour = factor(ichain))) + geom_line(alpha = 0.5)
39 | # ggplot(mcmc.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.9, group = ichain, colour = factor(ichain))) + geom_line(alpha = 0.5)
40 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.1, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[1])
41 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.2, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[2])
42 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.3, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[3])
43 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.4, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[4])
44 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.5, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[5])
45 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.6, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[6])
46 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.7, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[7])
47 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.8, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[8])
48 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.9, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[9])
49 | #
50 | 
51 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mgandk/mgandk_plots_ncomputed.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_gandk()
 7 | 
 8 | fig.height <- 5
 9 | fig.width <- 5
10 | 
11 | my_colors <- get_my_colors()
12 | 
13 | prefix <- ""
14 | 
15 | nobservations = 500
16 | load(paste0(prefix, "mgandkdata.RData"))
17 | obs <- obs[1:nobservations]
18 | 
19 | # Wasserstein SMC
20 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".wasserstein.RData")
21 | load(filename)
22 | results_was = results
23 | wsmc.df = wsmc_to_dataframe(results_was)
24 | nsteps = tail(wsmc.df$step,n=1)
25 | ncomp = results_was$ncomputed
26 | 
27 | # Swapping SMC
28 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".swap.RData")
29 | load(filename)
30 | results_swap = results
31 | swap.df = wsmc_to_dataframe(results_swap)
32 | swap.nsteps = tail(swap.df$step,n=1)
33 | swap.ncomp = results_swap$ncomputed
34 | 
35 | # Hilbert SMC
36 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".hilbert.RData")
37 | load(filename)
38 | results_hilbert = results
39 | hilbert.df = wsmc_to_dataframe(results_hilbert)
40 | hilbert.nsteps = tail(hilbert.df$step,n=1)
41 | hilbert.ncomp = results_hilbert$ncomputed
42 | 
43 | # MMD SMC
44 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".mmd.RData")
45 | load(filename)
46 | results_mmd = results
47 | mmd.df = wsmc_to_dataframe(results_mmd)
48 | mmd.nsteps = tail(mmd.df$step,n=1)
49 | mmd.ncomp = results_mmd$ncomputed
50 | 
51 | 
52 | w.df = data.frame(steps = 1:nsteps, ncomp = ncomp)
53 | s.df = data.frame(steps = 1:swap.nsteps, ncomp = swap.ncomp)
54 | h.df = data.frame(steps = 1:hilbert.nsteps, ncomp = hilbert.ncomp)
55 | m.df = data.frame(steps = 1:mmd.nsteps, ncomp = mmd.ncomp)
56 | 
57 | g = ggplot(w.df, aes(x = steps, y = ncomp)) + geom_line(aes(colour = "Wasserstein")) + geom_point(aes(colour = "Wasserstein"))
58 | g = g + geom_line(data = s.df, aes(colour = "Swap")) + geom_point(data = s.df, aes(colour = "Swap"))
59 | g = g + geom_line(data = h.df, aes(colour = "Hilbert")) + geom_point(data = h.df, aes(colour = "Hilbert"))
60 | g = g + geom_line(data = m.df, aes(colour = "MMD")) + geom_point(data = m.df, aes(colour = "MMD"))
61 | g = g + scale_color_manual(name = "", values = my_colors) + xlab("steps") + ylab("# model simulations")
62 | g = g + scale_y_log10(breaks = c(0.5e4,1e4,2e4,4e4,1e5,2e5)) + scale_x_continuous(breaks = c(0,20,40,60,80))
63 | g = g + geom_label(data = data.frame(x = c(60,70,60,17), y = c(7e3,1.5e4,1.7e5,1.9e4), method = c("Wasserstein","Swap","Hilbert","MMD")),
64 |                     aes(x = x, y = y, colour = method, label = method), size = 7) + theme(legend.position = "none")
65 | g
66 | ggsave(filename = paste0(prefix, "mgandk.n", nobservations, ".ncomp_vs_step.pdf"), plot = g, width = fig.width, height = fig.height)
67 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mgandk/mgandk_plots_threshold.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_gandk()
 7 | 
 8 | fig.height <- 5
 9 | fig.width <- 5
10 | 
11 | my_colors <- get_my_colors()
12 | 
13 | prefix <- ""
14 | 
15 | nobservations = 500
16 | load(paste0(prefix, "mgandkdata.RData"))
17 | obs <- obs[1:nobservations]
18 | 
19 | # Wasserstein SMC
20 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".wasserstein.RData")
21 | load(filename)
22 | results_was = results
23 | was.thresholds = results_was$threshold_history
24 | was.ncomp = cumsum(results_was$ncomputed)
25 | 
26 | # Swapping SMC
27 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".swap.RData")
28 | load(filename)
29 | results_swap = results
30 | swap.thresholds = results_swap$threshold_history
31 | swap.ncomp = cumsum(results_swap$ncomputed)
32 | 
33 | # Hilbert SMC
34 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".hilbert.RData")
35 | load(filename)
36 | results_hilbert = results
37 | hilbert.thresholds = results_hilbert$threshold_history
38 | hilbert.ncomp = cumsum(results_hilbert$ncomputed)
39 | 
40 | # MMD SMC
41 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".mmd.RData")
42 | load(filename)
43 | results_mmd = results
44 | mmd.thresholds = results_mmd$threshold_history
45 | mmd.ncomp = cumsum(results_mmd$ncomputed)
46 | 
47 | 
48 | w.df = data.frame(threshold = was.thresholds, ncomp = was.ncomp)
49 | s.df = data.frame(threshold = swap.thresholds, ncomp = swap.ncomp)
50 | h.df = data.frame(threshold = hilbert.thresholds, ncomp = hilbert.ncomp)
51 | m.df = data.frame(threshold = mmd.thresholds, ncomp = mmd.ncomp)
52 | 
53 | g = ggplot(w.df, aes(x = ncomp, y = threshold)) + geom_line(aes(colour = "Wasserstein")) + geom_point(aes(colour = "Wasserstein"))
54 | g = g + geom_line(data = s.df, aes(colour = "Swap")) + geom_point(data = s.df, aes(colour = "Swap"))
55 | g = g + geom_line(data = h.df, aes(colour = "Hilbert")) + geom_point(data = h.df, aes(colour = "Hilbert"))
56 | g = g + geom_line(data = m.df, aes(colour = "MMD")) + geom_point(data = m.df, aes(colour = "MMD"))
57 | g = g + scale_color_manual(name = "", values = my_colors) + xlab("# model simulations") + ylab("threshold")
58 | g = g + scale_y_log10(breaks = c(1e-2,1,1e2,1e4,1e6)) + scale_x_log10()
59 | g = g + geom_label(data = data.frame(x = c(3e5,5e4,7e5,1e4), y = c(5e2,4e4,7,0.07), method = c("Wasserstein","Swap","Hilbert","MMD")),
60 |                    aes(x = x, y = y, colour = method, label = method), size = 7) + theme(legend.position = "none")
61 | g
62 | ggsave(filename = paste0(prefix, "mgandk.n", nobservations, ".threshold_vs_ncomp.pdf"), plot = g, width = fig.width, height = fig.height)
63 | 
64 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mgandk/mgandk_timings.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_mgandk()
 7 | prefix <- ""
 8 | 
 9 | nobservations <- 500
10 | load(paste0(prefix, "mgandkdata.RData"))
11 | obs <- obs[,1:nobservations]
12 | target$simulate <- function(theta) target$robservation(nobservations, theta)
13 | 
14 | data_sets <- list()
15 | 
16 | x <- proc.time()
17 | target$loglikelihood(target$rprior(1000, target$parameters), obs)
18 | newx <- proc.time()
19 | elapsed <- (newx - x)[3]
20 | cat("time to compute likelihood:", elapsed/nrep, "\n")
21 | 
22 | nrep <- 1000
23 | x <- proc.time()
24 | for (irep in 1:nrep){
25 |   data_sets[[irep]] <- target$simulate(true_theta)
26 | }
27 | newx <- proc.time()
28 | elapsed <- (newx - x)[3]
29 | cat("time to simulate data sets:", elapsed/nrep, "\n")
30 | x <- proc.time()
31 | for (irep in 1:nrep){
32 |   sinkhorn_distance(obs, data_sets[[irep]])
33 | }
34 | newx <- proc.time()
35 | elapsed <- (newx - x)[3]
36 | cat("time to compute Sinkhorn distance:", elapsed/nrep, "\n")
37 | 
38 | x <- proc.time()
39 | for (irep in 1:nrep){
40 |   exact_transport_distance(obs, data_sets[[irep]])
41 | }
42 | newx <- proc.time()
43 | elapsed <- (newx - x)[3]
44 | cat("time to compute Wasserstein distance:", elapsed/nrep, "\n")
45 | 
46 | x <- proc.time()
47 | for (irep in 1:nrep){
48 |   hilbert_distance(obs, data_sets[[irep]])
49 | }
50 | newx <- proc.time()
51 | elapsed <- (newx - x)[3]
52 | cat("time to compute Hilbert distance:", elapsed/nrep, "\n")
53 | 
54 | 
55 | cd <- get_mmd_to_y(obs)
56 | x <- proc.time()
57 | for (irep in 1:nrep){
58 |   cd(data_sets[[irep]])
59 | }
60 | newx <- proc.time()
61 | elapsed <- (newx - x)[3]
62 | cat("time to compute MMD distance:", elapsed/nrep, "\n")
63 | 
64 | x <- proc.time()
65 | for (irep in 1:nrep){
66 |   swap_distance(obs, data_sets[[irep]], tolerance = 1e-5)
67 | }
68 | newx <- proc.time()
69 | elapsed <- (newx - x)[3]
70 | cat("time to compute Swap distance:", elapsed/nrep, "\n")
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mgandk/mgandk_wsmc_hilbert.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_mgandk()
 7 | 
 8 | prefix = ""
 9 | 
10 | nobservations <- 500
11 | load(paste0(prefix, "mgandkdata.RData"))
12 | obs <- obs[,1:nobservations]
13 | target$simulate <- function(theta) target$robservation(nobservations, theta)
14 | 
15 | compute_d <- get_hilbert_to_y(obs)
16 | y_sim <- target$simulate(target$rprior(1, target$parameters)[1,])
17 | compute_d(y_sim)
18 | 
19 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(),
20 |                    minimum_diversity = 0.5, R = 2, maxtrials = 100000)
21 | 
22 | filename <- paste0(prefix, "mgandk.wsmc.n", nobservations, ".hilbert.RData")
23 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 2e6)
24 | load(filename)
25 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 5e6)
26 | # plot_marginal_time(results, 7, from_step = 50) + geom_vline(xintercept = true_theta[7])
27 | # plot_marginal_time(results, 5, from_step = 50) + geom_vline(xintercept = true_theta[5])
28 | # qplot(x = cumsum(results$ncomputed), y = results$threshold_history, geom = "line") + scale_x_log10() + scale_y_log10()
29 | # qplot(x = tail(results$distances_history, 1)[[1]], geom = "histogram")
30 | #
31 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mgandk/mgandk_wsmc_mmd.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_mgandk()
 7 | 
 8 | prefix = ""
 9 | 
10 | nobservations <- 500
11 | load(paste0(prefix, "mgandkdata.RData"))
12 | obs <- obs[,1:nobservations]
13 | target$simulate <- function(theta) target$robservation(nobservations, theta)
14 | 
15 | compute_d <- get_mmd_to_y(obs)
16 | y_sim <- target$simulate(target$rprior(1, target$parameters)[1,])
17 | compute_d(y_sim)
18 | 
19 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(),
20 |                    minimum_diversity = 0.5, R = 2, maxtrials = 100000)
21 | 
22 | filename <- paste0(prefix, "mgandk.wsmc.n", nobservations, ".mmd.RData")
23 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 2e6)
24 | load(filename)
25 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 1e6)
26 | # plot_marginal_time(results, 9, from_step = 50)
27 | # qplot(x = cumsum(results$ncomputed), y = results$threshold_history, geom = "line") + scale_x_log10() + scale_y_log10()
28 | # qplot(x = tail(results$distances_history, 1)[[1]], geom = "histogram")
29 | 
30 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mgandk/mgandk_wsmc_swap.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_mgandk()
 7 | 
 8 | prefix = ""
 9 | 
10 | nobservations <- 500
11 | load(paste0(prefix, "mgandkdata.RData"))
12 | obs <- obs[,1:nobservations]
13 | target$simulate <- function(theta) target$robservation(nobservations, theta)
14 | 
15 | compute_d <- function(y_sim) swap_distance(obs, y_sim, tolerance = 1e-5)$distance
16 | y_sim <- target$simulate(target$rprior(1, target$parameters)[1,])
17 | compute_d(y_sim)
18 | 
19 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(),
20 |                    minimum_diversity = 0.5, R = 2, maxtrials = 100000)
21 | 
22 | filename <- paste0(prefix, "mgandk.wsmc.n", nobservations, ".swap.RData")
23 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 2e6)
24 | load(filename)
25 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 3e6)
26 | # plot_marginal_time(results, 7, from_step = 50)
27 | # qplot(x = cumsum(results$ncomputed), y = results$threshold_history, geom = "line") + scale_x_log10() + scale_y_log10()
28 | # qplot(x = tail(results$distances_history, 1)[[1]], geom = "histogram")
29 | #
30 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mgandk/mgandk_wsmc_wasserstein.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores()-2)
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | target <- get_mgandk()
 7 | 
 8 | prefix = ""
 9 | 
10 | nobservations <- 500
11 | load(paste0(prefix, "mgandkdata.RData"))
12 | obs <- obs[,1:nobservations]
13 | target$simulate <- function(theta) target$robservation(nobservations, theta)
14 | 
15 | library(transport)
16 | compute_d <- function(z){
17 |   sink(file = paste0(prefix,"tmp"))
18 |   wdistance <- exact_transport_distance(obs, z, 1, 2)
19 |   sink(NULL)
20 |   if (wdistance > 1e10){
21 |     wdistance <- 1e10
22 |   }
23 |   return(wdistance)
24 | }
25 | 
26 | # set.seed(11)
27 | # y_sim = target$simulat(target$rprior(1))
28 | # compute_d(y_sim)
29 | 
30 | # thetas_ <- target$rprior(100, target$parameters)
31 | # y_sim <- list()
32 | # for (itheta in 1:nrow(thetas_)){
33 | #   y_sim[[itheta]] <- target$simulate(thetas_[itheta,])
34 | # }
35 | #
36 | # ds <- sapply(y_sim, compute_d)
37 | # ds
38 | #
39 | # yy <- target$simulate(thetas_[1,])
40 | # plot(yy[1,], yy[2,])
41 | #
42 | # plot(obs[1,], obs[2,])
43 | # thetas_[1,]
44 | # true_theta
45 | 
46 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(),
47 |                    minimum_diversity = 0.5, R = 2, maxtrials = 100000)
48 | 
49 | savefile <- paste0(prefix, "mgandk.wsmc.n", nobservations, ".wasserstein.RData")
50 | maxsimulation <- 2e6
51 | results <- wsmc(compute_d, target, param_algo, savefile = savefile, maxsimulation = maxsimulation)
52 | load(savefile)
53 | # results$param_algo$maxtrials <- 10000
54 | # results <- wsmc_continue(results, savefile = savefile, maxsimulation = 1e6)
55 | # results$ncomputed %>% sum
56 | # from_step <- 0
57 | # wsmc.df <- wsmc_to_dataframe(results)
58 | # index <- 7
59 | # g <- ggplot(wsmc.df %>% filter(step > from_step), aes_string(x = target$parameter_names[index], colour = "time", group = "factor(time)")) + geom_density(aes(y = ..density..))
60 | # g <- g + xlab(target$parameter_names[index]) # +  theme(legend.position = "none")
61 | # g <- g + scale_color_gradient(name = "time (s)", low = rgb(1,0.5,0.5), high = "darkblue")
62 | # g + geom_vline(xintercept = true_theta[index])
63 | #
64 | # qplot(x = cumsum(results$ncomputed), y = results$threshold_history, geom = "line") + scale_x_log10() + scale_y_log10()
65 | # qplot(x = tail(results$distances_history, 1)[[1]], geom = "histogram")
66 | 
67 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mvnormal/README.R:
--------------------------------------------------------------------------------
 1 | Reproduce results for the multivariate model in Section 2.3 of the WABC article.
 2 | ===
 3 | 
 4 | mvnormal_generate_data.R: generate and save a data set from the model
 5 | 
 6 | mvnormal_wsmc_wasserstein.R: loads a data set of size 100 and runs SMC to approximate the WABC
 7 | posterior using the exact Wasserstein distance with a budget of 10^6 model simulations.
 8 | 
 9 | mvnormal_wsmc_summary.R: loads the data set and runs SMC to approximate the ABC
10 | posterior based on the sample mean with a budget of 10^6 model simulations.
11 | 
12 | mvnormal_wsmc_euclidean.R: loads the data set and runs SMC to approximate the ABC
13 | posterior based on the Euclidean distance with a budget of 10^6 model simulations.
14 | 
15 | mvnormal_rejection_summary.R: loads the data set and runs a rejection sampler to approximate the ABC
16 | posterior based on the sample mean with a budget of 10^6 model simulations.
17 | 
18 | mvnormal_rejection_wasserstein.R: loads the data set and runs a rejection sampler to approximate the WABC
19 | posterior based on the exact Wasserstein distance with a budget of 10^6 model simulations.
20 | 
21 | mvnormal_plots.R: loads the data and the output from the scripts above to plot the marginal ABC
22 | posteriors as well as their W1 distances to the posterior. Corresponds to Fig 1 of the paper.
23 | 
24 | mvnormal_timings.R: estimates the average time it takes to compute the different distances
25 | and simulate data
26 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mvnormal/mvnormal_generate_data.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | 
 6 | set.seed(11)
 7 | 
 8 | doRun <- FALSE
 9 | max_time <- 30*60
10 | d <- 2
11 | target <- get_multivariate_normal(d)
12 | target$parameters$tau <- 5
13 | nobservations <- 100
14 | p <- 1
15 | true_theta <- rnorm(d)
16 | prefix <- ""
17 | 
18 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData")
19 | obs <- target$robservation(nobservations, true_theta, target$parameters)
20 | save(true_theta, obs, file = obsfile)
21 | 
22 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mvnormal/mvnormal_rejection_summary.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | rm(list = ls())
 3 | registerDoParallel(cores = detectCores())
 4 | setmytheme()
 5 | 
 6 | set.seed(11)
 7 | 
 8 | doRun <- FALSE
 9 | max_time <- 30*60
10 | d <- 2
11 | target <- get_multivariate_normal(d)
12 | target$parameters$tau <- 5
13 | nobservations <- 100
14 | nparticles <- 2048
15 | maxsim = 10^6
16 | p <- 1
17 | prefix <- ""
18 | 
19 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData")
20 | load(obsfile)
21 | 
22 | # function to simulate data
23 | target$simulate <- function(theta){
24 |   return(target$robservation(nobservations, theta, target$parameters))
25 | }
26 | # distance between summary
27 | summary_obs <- rowMeans(obs)
28 | dsummary <- function(z){
29 |   summary_z <- rowMeans(z)
30 |   return(mean(abs(summary_z - summary_obs)))
31 | }
32 | 
33 | 
34 | 
35 | filename <- paste0(prefix, "mvnormalrejection.d", d, ".n", nobservations, ".summary.RData")
36 | 
37 | naccept = 2048
38 | t = proc.time()
39 | prior_theta = matrix(rnorm(d*maxsim,0,5), nrow=maxsim, ncol=d)
40 | distances = rep(0,maxsim)
41 | for(i in 1:maxsim){
42 |   ysim = target$simulate(prior_theta[i,])
43 |   distances[i] = dsummary(ysim)
44 | }
45 | # distances = foreach(i = 1:maxsim, .combine = c) %dorng% {
46 | #   ysim = target$simulate(prior_theta[i,])
47 | #   return(dsummary(ysim))
48 | # }
49 | sort_distances = sort(distances)
50 | results = prior_theta[distances <= sort_distances[naccept],]
51 | t = proc.time() - t
52 | t
53 | 
54 | save(results, t, file = filename)
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mvnormal/mvnormal_rejection_wasserstein.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | rm(list = ls())
 3 | registerDoParallel(cores = detectCores())
 4 | setmytheme()
 5 | 
 6 | set.seed(11)
 7 | 
 8 | doRun <- FALSE
 9 | max_time <- 30*60
10 | d <- 2
11 | target <- get_multivariate_normal(d)
12 | target$parameters$tau <- 5
13 | nobservations <- 100
14 | nparticles <- 2048
15 | maxsim = 10^6
16 | p <- 1
17 | prefix <- ""
18 | 
19 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData")
20 | load(obsfile)
21 | 
22 | # function to simulate data
23 | target$simulate <- function(theta){
24 |   return(target$robservation(nobservations, theta, target$parameters))
25 | }
26 | # distance function
27 | wdistance <- get_transport_to_y(obs, p = p)
28 | 
29 | filename <- paste0(prefix, "mvnormalrejection.d", d, ".n", nobservations, ".wasserstein.RData")
30 | 
31 | naccept = 2048
32 | t = proc.time()
33 | prior_theta = matrix(rnorm(d*maxsim,0,5), nrow=maxsim, ncol=d)
34 | distances = rep(0,maxsim)
35 | for(i in 1:maxsim){
36 |   ysim = target$simulate(prior_theta[i,])
37 |   {sink("/dev/null"); distances[i] = wdistance(ysim); sink();}
38 | }
39 | # distances = foreach(i = 1:maxsim, .combine = c) %dorng% {
40 | #   ysim = target$simulate(prior_theta[i,])
41 | #   return(wdistance(ysim))
42 | # }
43 | sort_distances = sort(distances)
44 | results = prior_theta[distances <= sort_distances[naccept],]
45 | t = proc.time() - t
46 | t
47 | 
48 | save(results, t, file = filename)
49 | 
50 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mvnormal/mvnormal_timings.R:
--------------------------------------------------------------------------------
 1 | library(microbenchmark)
 2 | library(winference)
 3 | rm(list = ls())
 4 | setmytheme()
 5 | 
 6 | set.seed(11)
 7 | 
 8 | doRun <- FALSE
 9 | max_time <- 30*60
10 | d <- 2
11 | target <- get_multivariate_normal(d)
12 | target$parameters$tau <- 5
13 | nobservations <- 100
14 | p <- 1
15 | prefix <- ""
16 | 
17 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData")
18 | load(obsfile)
19 | 
20 | # function to simulate data
21 | target$simulate <- function(theta){
22 |   return(target$robservation(nobservations, theta, target$parameters))
23 | }
24 | 
25 | # wasserstein distance
26 | wdistance <- get_transport_to_y(obs, p = p)
27 | 
28 | # euclidean distance
29 | ground_p <- 2
30 | deuclidean <- function(z){
31 |   return(mean(apply(abs(z - obs), 2, function(v) (sum(v^ground_p))^(1/ground_p))^p)^(1/p))
32 | }
33 | 
34 | # distance between summary
35 | summary_obs <- rowMeans(obs)
36 | dsummary <- function(z){
37 |   summary_z <- rowMeans(z)
38 |   return(mean(abs(summary_z - summary_obs)))
39 | }
40 | 
41 | {sink("/dev/null"); wtime = invisible(microbenchmark(wdistance(target$simulate(true_theta)), times = 10000)); sink();}
42 | wtime
43 | 
44 | etime = microbenchmark(deuclidean(target$simulate(true_theta)), times = 10000)
45 | etime
46 | 
47 | stime = microbenchmark(dsummary(target$simulate(true_theta)), times = 10000)
48 | stime
49 | 
50 | simtime = microbenchmark(target$simulate(true_theta), times = 10000)
51 | simtime
52 | 
53 | # # #result from timing (average values in seconds)
54 | # wtime = 0.0123
55 | # etime = 0.000380
56 | # stime = 0.000064
57 | # simtime = 0.000040
58 | #
59 | # wtime/etime
60 | # wtime/stime
61 | #
62 | # #load simulation results
63 | # filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".wasserstein.RData")
64 | # load(filename)
65 | # results_wasserstein <- results
66 | # tail(results_wasserstein$compute_times,n=1)
67 | #
68 | #
69 | # filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".euclidean.RData")
70 | # load(filename)
71 | # results_euclidean <- results
72 | # tail(results_euclidean$compute_times,n=1)
73 | #
74 | #
75 | # filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".summary.RData")
76 | # load(filename)
77 | # results_summary <- results
78 | # tail(results_summary$compute_times,n=1)
79 | #
80 | #
81 | # filename <- paste0(prefix, "mvnormalrejection.d", d, ".n", nobservations, ".summary.RData")
82 | # load(filename)
83 | # results_rejection_summary <- results
84 | # t
85 | 
86 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mvnormal/mvnormal_wsmc_euclidean.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | 
 6 | set.seed(11)
 7 | 
 8 | doRun <- FALSE
 9 | max_time <- 30*60
10 | d <- 2
11 | target <- get_multivariate_normal(d)
12 | target$parameters$tau <- 5
13 | nobservations <- 100
14 | nparticles <- 2048
15 | p <- 1
16 | prefix <- ""
17 | 
18 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData")
19 | load(obsfile)
20 | 
21 | # function to simulate data
22 | target$simulate <- function(theta){
23 |   return(target$robservation(nobservations, theta, target$parameters))
24 | }
25 | # euclidean distance
26 | # and also Euclidean distance
27 | ground_p <- 2
28 | deuclidean <- function(z){
29 |   return(mean(apply(abs(z - obs), 2, function(v) (sum(v^ground_p))^(1/ground_p))^p)^(1/p))
30 | }
31 | 
32 | # common algorithmic parameters
33 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(),
34 |                    minimum_diversity = 0.5, R = 2, maxtrials = 1e5)
35 | 
36 | filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".euclidean.RData")
37 | results <- wsmc(deuclidean, target, param_algo, maxsimulation = 10^6, savefile = filename)
38 | load(filename)
39 | # results <- wsmc_continue(results, savefile = filename, maxtime = 10*60)
40 | 
41 | #
42 | # load(filename)
43 | # plot_threshold_time(results) + geom_point()
44 | # mle <- rowMeans(obs)
45 | # plot_bivariate(results, 1, 2, from = 10) + geom_vline(xintercept = mle[1]) + geom_hline(yintercept = mle[2])
46 | # plot_marginal(results, 1, from = 10)
47 | 
48 | # library(microbenchmark)
49 | # microbenchmark(deuclidean(target$simulate(true_theta)), times = 1000)
50 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mvnormal/mvnormal_wsmc_summary.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | 
 6 | set.seed(11)
 7 | 
 8 | doRun <- FALSE
 9 | max_time <- 30*60
10 | d <- 2
11 | target <- get_multivariate_normal(d)
12 | target$parameters$tau <- 5
13 | nobservations <- 100
14 | nparticles <- 2048
15 | p <- 1
16 | prefix <- ""
17 | 
18 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData")
19 | load(obsfile)
20 | 
21 | # function to simulate data
22 | target$simulate <- function(theta){
23 |   return(target$robservation(nobservations, theta, target$parameters))
24 | }
25 | # distance between summary
26 | summary_obs <- rowMeans(obs)
27 | dsummary <- function(z){
28 |   summary_z <- rowMeans(z)
29 |   return(mean(abs(summary_z - summary_obs)))
30 | }
31 | 
32 | # common algorithmic parameters
33 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(),
34 |                    minimum_diversity = 0.5, R = 2, maxtrials = 1e5)
35 | 
36 | filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".summary.RData")
37 | results <- wsmc(dsummary, target, param_algo, maxsimulation = 10^6, savefile = filename)
38 | # load(filename)
39 | 
40 | # results <- wsmc(dsummary, target, param_algo, maxsimulation = 10^6, parallel = FALSE)
41 | # results <- wsmc_continue(results, savefile = filename, maxtime = 10*60*60)
42 | #
43 | # plot_threshold_time(results) + geom_point()
44 | # mle <- rowMeans(obs)
45 | # plot_bivariate(results, 1, 2, from = 10) + geom_vline(xintercept = mle[1]) + geom_hline(yintercept = mle[2])
46 | # plot_marginal(results, 1, from = 10)
47 | #
48 | # library(microbenchmark)
49 | # microbenchmark(dsummary(target$simulate(true_theta)), times = 1000)
50 | #
51 | # library(microbenchmark)
52 | # microbenchmark(target$simulate(true_theta), times = 1000)
53 | #
54 | 
55 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/mvnormal/mvnormal_wsmc_wasserstein.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | 
 6 | set.seed(11)
 7 | 
 8 | doRun <- FALSE
 9 | max_time <- 30*60
10 | d <- 2
11 | target <- get_multivariate_normal(d)
12 | target$parameters$tau <- 5
13 | nobservations <- 100
14 | nparticles <- 2048
15 | p <- 1
16 | prefix <- ""
17 | 
18 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData")
19 | load(obsfile)
20 | 
21 | # function to simulate data
22 | target$simulate <- function(theta){
23 |   return(target$robservation(nobservations, theta, target$parameters))
24 | }
25 | # wasserstein distance
26 | wdistance <- get_transport_to_y(obs, p = p)
27 | #
28 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(),
29 |                    minimum_diversity = 0.5, R = 2, maxtrials = 1e5)
30 | 
31 | filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".wasserstein.RData")
32 | results <- wsmc(wdistance, target, param_algo, maxsimulation = 10^6, savefile = filename)
33 | load(filename)
34 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 800000)
35 | #
36 | # load(filename)
37 | # plot_threshold_time(results) + geom_point()
38 | # mle <- rowMeans(obs)
39 | # plot_bivariate(results, 1, 2, from = 10) + geom_vline(xintercept = mle[1]) + geom_hline(yintercept = mle[2])
40 | # plot_marginal(results, 1, from = 10)
41 | 
42 | # library(microbenchmark)
43 | # microbenchmark(wdistance(target$simulate(true_theta)), times = 1000)
44 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/queue/50.intermediateobs.neal.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/reproduceabc/queue/50.intermediateobs.neal.Rdata


--------------------------------------------------------------------------------
/inst/reproduceabc/queue/README.R:
--------------------------------------------------------------------------------
 1 | Reproduce results for the queueing model in Section 5.3 of the article.
 2 | ===
 3 | 
 4 | queue_wsmc_marginal_intermediate.R: load data set (RData file in the folder),
 5 | and approximates WABC posterior based on Wasserstein distance between marginal
 6 | distributions of synthetic and observed data sets.
 7 | 
 8 | queue_pmmh_intermediate.R: loads results from above script, uses them to tune the parameters
 9 | of a PMMH algorithm to target the exact posterior distribution, and runs PMMH.
10 | 
11 | queue_abctools.R: runs semi-automatic ABC using abctools package
12 | (install.packages("abctools"))
13 | 
14 | queue_plots_compare.R: loads results from above scripts,
15 | and creates the three plots in Figure 8 (a,b,c).
16 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/queue/queue_wsmc_marginal_intermediate.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | 
 6 | set.seed(11)
 7 | 
 8 | prefix = ""
 9 | 
10 | load(paste0(prefix, "50.intermediateobs.neal.Rdata"))
11 | obs <- matrix(obs, nrow = 1)
12 | obs_sorted = sort(obs)
13 | nobs <- ncol(obs)
14 | #
15 | target <- get_queue()
16 | target$simulate <- function(theta){
17 |   return(matrix(target$robservation(nobs, theta, target$parameters), nrow = 1))
18 | }
19 | 
20 | #x_obs <- cumsum(obs[1,])
21 | #
22 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(),
23 |                    minimum_diversity = 0.5, R = 2, maxtrials = 100000)
24 | #compute_d <- get_hilbert_to_y(obs)
25 | compute_d = function(y){
26 |   sort_y = sort(y)
27 |   mean(abs(sort_y-obs_sorted))
28 | }
29 | 
30 | 
31 | filename <- paste0(prefix, "queue_intermediate_wsmc_marginal.RData")
32 | #results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsim = 10^7)
33 | load(filename)
34 | results <- wsmc_continue(results, savefile = filename, maxsim = 2*10^7)
35 | # load(filename)
36 | 
37 | 
38 | target$rprior <- function(ntheta, parameters){
39 |   theta1 <- runif(n = ntheta, min = 0, max = min(obs[1,]))
40 |   theta2minus1 <- runif(n = ntheta, min = 0, max = 10)
41 |   theta3 <- runif(n = ntheta, min = 0, max = 1/3)
42 |   return(cbind(theta1, theta2minus1, theta3))
43 | }
44 | #
45 | target$dprior <- function(thetas, parameters){
46 |   evals <- dunif(thetas[,1], min = 0, max = min(obs[1,]), log = TRUE)
47 |   evals <- evals + dunif(thetas[,2], min = 0, max = 10, log = TRUE)
48 |   evals <- evals + dunif(thetas[,3], min = 0, max = 1/3, log = TRUE)
49 |   return(evals)
50 | }
51 | 
52 | filename <- paste0(prefix, "queue_intermediate_wsmc_marginal_constraints.RData")
53 | #results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsim = 10^7)
54 | load(filename)
55 | results <- wsmc_continue(results, savefile = filename, maxsim = 2*10^7)
56 | # load(filename)
57 | 
58 | 
59 | #
60 | # plot_marginal(results, 1)
61 | # plot_marginal(results, 2)
62 | # plot_marginal(results, 3)
63 | # #
64 | # # get posterior mean and variance
65 | # thetas <- tail(results$thetas_history, 1)[[1]]
66 | # thetas[,3] <- log(thetas[,3])
67 | # theta_mean <- colMeans(thetas)
68 | # theta_cov <- cov(thetas)
69 | #
70 | # theta_mean
71 | # theta_cov
72 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/supplementary/README.R:
--------------------------------------------------------------------------------
 1 | Reproduce results for the supplementary materials
 2 | ===
 3 | 
 4 | check_assumption_plot.R: produces two plots for Figure 1 (a,b) of the supplementary,
 5 | in the Section "2.1.2. Misspecified location model"
 6 | 
 7 | gandk.checkassumption.R: produces 5 plots for Figure 2 (a,b,c,d) and Figure 3
 8 | of the supplementary, in the Section "2.3 Checking Assumption 5 for the g-and-k example"
 9 | 
10 | multivariate.transportdistancecomparison.R: computes different transport distances
11 | between samples generated from multivariate Normal distributions, with different
12 | parameters and different dimensions.
13 | 
14 | multivariate.transportdistancecomparison.plots.R: loads results of above script
15 | to create Figure 4 of the supplementary, in the Section "4. Approximation of the Wasserstein distance
16 | ".
17 | 
18 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/supplementary/check_assumption_plot.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(13)
 6 | 
 7 | 
 8 | dgpname <- "gamma"
 9 | modelname <- "normal"
10 | 
11 | # dgp
12 | if (dgpname == "gamma"){
13 |    robs <- function(n) rgamma(n, 10, 5)
14 | } else {
15 |   robs <- function(n) rnorm(n, mean = 2, sd = 1)
16 | }
17 | nobs <- 500000
18 | 
19 | # model
20 | if (modelname == "gamma"){
21 |   rmodel <- function(n, theta) rgamma(n, theta, 5)
22 | } else {
23 |   rmodel <- function(n, theta) rnorm(n, mean = theta, sd = 1)
24 | }
25 | prefix <- ""
26 | savefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".RData")
27 | 
28 | nseq <- 100
29 | if (modelname == "gamma"){
30 |   theta_seq <- seq(from = 9.5, to = 10.5, length.out = nseq)
31 | } else {
32 |   theta_seq <- seq(from = 1.5, to = 2.5, length.out = nseq)
33 | }
34 | 
35 | # Wasserstein distance with p = 2
36 | y_obs <- robs(nobs)
37 | compute_d2 <- get_hilbert_to_y(matrix(y_obs, nrow = 1), p = 2, ground_p = 2)
38 | results <- foreach(i = 1:nseq) %dorng% {
39 | 
40 |   theta <- theta_seq[i]
41 |   y_fake <- rmodel(nobs, theta)
42 |   # hilbert_distance(matrix(y_obs, nrow = 1), matrix(y_fake, nrow = 1))
43 |   compute_d2(matrix(y_fake, nrow = 1))
44 | }
45 | save(results, file = savefile)
46 | load(file = savefile)
47 | 
48 | wp2 <- sapply(results, function(v) v)
49 | 
50 | theta_star <- theta_seq[which.min(wp2)]
51 | x <- abs(theta_seq - 2)
52 | ysqrt <- sqrt(abs(wp2 - wp2[which.min(wp2)]))
53 | y <- abs(wp2 - wp2[which.min(wp2)])
54 | 
55 | figurefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".theta-vs-wasserstein.pdf")
56 | g <- qplot(x = theta_seq, y = wp2, geom = "point") + xlab(expression(theta)) + ylab("Wasserstein distance")
57 | print(g)
58 | ggsave(filename = figurefile, plot = g, width = 7, height = 5)
59 | figurefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".theta-vs-wasserstein.png")
60 | ggsave(filename = figurefile, plot = g, width = 7, height = 5, dpi = 150)
61 | 
62 | if (modelname == dgpname){
63 |   figurefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".distancetheta.pdf")
64 |   g <- qplot(x = x, y = y, geom = "point") + xlab("distance to theta star") + ylab("Wasserstein distance") + geom_smooth(method = "lm", se = FALSE)
65 |   print(g)
66 |   ggsave(filename = figurefile, plot = g, width = 7, height = 5)
67 | } else {
68 |   figurefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".distancetheta.pdf")
69 |   g <- qplot(x = x, y = ysqrt, geom = "point") + geom_smooth(method = "lm", se = FALSE) + xlab(expression(abs(theta-2))) + ylab("sqrt(Wasserstein distance)")
70 |   print(g)
71 |   ggsave(filename = figurefile, plot = g, width = 7, height = 5)
72 |   figurefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".distancetheta.png")
73 |   ggsave(filename = figurefile, plot = g, width = 7, height = 5, dpi = 150)
74 | }
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/supplementary/multivariate.transportdistancecomparison.plots.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | library(doParallel)
 3 | library(doRNG)
 4 | library(dplyr)
 5 | library(ggthemes)
 6 | registerDoParallel(cores = 10)
 7 | rm(list = ls())
 8 | set.seed(1)
 9 | setmytheme()
10 | my_colors <- get_my_colors()
11 | # my_colors <- c(my_colors, c("Sinkhorn" = "cornflowerblue"))
12 | # my_colors['Swap'] <- "009E73"
13 | timings_all <- data.frame()
14 | distances.df_all <- data.frame()
15 | 
16 | for (d in 2:5){
17 |   load(paste0("transportdistances.n500.d", d, ".i2.RData"))
18 |   timings_all <- rbind(timings_all, timings %>% group_by(expr) %>% summarise(m = median(time)/1e9) %>% mutate(dimension = d))
19 |   distances.df$dimension <- d
20 |   distances.df_all <- rbind(distances.df_all, distances.df)
21 | }
22 | 
23 | timings_all
24 | # timings %>% group_by(expr, dimension) %>% summarise(m = median(time)/1e9)
25 | distances.df_all$dimension <- factor(distances.df_all$dimension, levels = 2:5, labels = paste0("d = ", 2:5))
26 | head(distances.df_all)
27 | g <- ggplot(distances.df_all, aes(x = thetas, y = e)) + geom_point(aes(colour = "Wasserstein"))
28 | g <- g + geom_point(aes(y = sw, colour = "Swap"))
29 | g <- g + geom_point(aes(y = h, colour = "Hilbert"))
30 | g <- g + scale_color_manual(name = "", values = my_colors) + xlab(expression(sigma^2)) + ylab("distance") + facet_wrap(~ dimension, nrow = 1)
31 | g <- g +  guides(colour = guide_legend(override.aes = list(size=5))) + theme(legend.text=element_text(size=20))
32 | g <- g + scale_x_continuous(breaks = c(1,2,3,4,5,6,7,8))
33 | # if (icomponent==2){
34 | #   g <- g + geom_vline(xintercept = var(y[1,]), linetype = 3) + geom_vline(xintercept = theta_dgp[icomponent], linetype = 2)
35 | # }
36 | g
37 | ggsave(filename = "transportdistances.n500.i2.png", plot = g, width = 15, height = 5, dpi = 500)
38 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/supplementary/wasserstein_clt_mvtnorm_mean.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | 
 7 | fig.height <- 5
 8 | fig.width <- 5
 9 | 
10 | my_colors <- get_my_colors()
11 | 
12 | d = 10
13 | 
14 | #obs
15 | n = 10^2
16 | obs = fast_rmvnorm(n,rep(2,d),diag(1,d))
17 | obs_mean = apply(obs,2,mean)
18 | 
19 | #fake
20 | m = 10^2
21 | 
22 | compute_d = function(fake_obs){
23 |   swap_distance(t(obs), t(fake_obs), tolerance = 1e-5)$distance
24 | }
25 | #test
26 | fake_obs = fast_rmvnorm(n,rep(1,d),diag(1,d))
27 | compute_d(fake_obs)
28 | 
29 | 
30 | 
31 | #Grid
32 | grid = seq(obs_mean[1]-1.5,obs_mean[1]+1.5,length.out = 20)
33 | 
34 | #Number of data sets per grid point
35 | N = 100
36 | 
37 | #Variance of fake data sets
38 | sigma2 = 1
39 | 
40 | dists = foreach(k = 1:length(grid), .combine = rbind) %dorng% {
41 |   store_distances = rep(0,N)
42 |   store_meandiffs = rep(0,N)
43 |   for(i in 1:N){
44 |     #fake_obs = fast_rmvnorm(m,c(grid[k],obs_mean[2:d]),diag(sigma2,d))
45 |     fake_obs = fast_rmvnorm(m,c(grid[k],rep(1,d-1)),diag(sigma2,d))
46 |     fake_mean = apply(fake_obs,2,mean)
47 |     store_distances[i] = compute_d(fake_obs)
48 |     store_meandiffs[i] = sum((obs_mean-fake_mean)^2)^0.5
49 |   }
50 |   out = cbind(rep(grid[k],N),store_distances,store_meandiffs)
51 |   return(out)
52 | }
53 | 
54 | dists.df = data.frame(theta = dists[,1], Wasserstein = dists[,2], xbardiff = dists[,3])
55 | # save(dists.df,file = paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".Rdata"))
56 | # load(paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".Rdata"))
57 | 
58 | wmax = max(dists.df$Wasserstein)
59 | wmin = min(dists.df$Wasserstein)
60 | wmax - wmin
61 | 
62 | xmax = max(dists.df$xbardiff)
63 | xmin = min(dists.df$xbardiff)
64 | xmax - xmin
65 | 
66 | ll = max(wmax-wmin,xmax-xmin)
67 | 
68 | # g = ggplot(dists.df, aes(theta, Wasserstein, group = theta)) + geom_boxplot()
69 | # g = g + coord_cartesian(ylim = c(wmin-ll/20,wmin+ll))
70 | # g
71 | #
72 | # g = ggplot(dists.df, aes(theta, xbardiff, group = theta)) + geom_boxplot()
73 | # g = g + coord_cartesian(ylim = c(xmin-ll/20,xmin+ll))
74 | # g
75 | 
76 | g = ggplot(data = dists.df, aes(theta, Wasserstein, group = theta)) + geom_boxplot(aes(fill = "Wasserstein"),alpha=0.7)
77 | g = g + geom_boxplot(data = dists.df, aes(theta, xbardiff, group = theta, fill = "Summary"),alpha=0.7)
78 | g = g + scale_fill_manual(name = "", values = my_colors) + xlab(expression(theta)) + ylab("distance")
79 | g
80 | #ggsave(filename = paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".pdf"), plot = g, width = fig.width, height = fig.height)
81 | 
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/supplementary/wasserstein_clt_mvtnorm_mean_plots.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | 
 7 | fig.height <- 5
 8 | fig.width <- 5
 9 | 
10 | my_colors <- get_my_colors()
11 | 
12 | dset = c(2,5,10)
13 | nset = c(10,100,1000)
14 | 
15 | for(dd in 1:3){
16 |   for(nn in 1:3){
17 | 
18 |     d = dset[dd]
19 | 
20 |     #obs
21 |     n = nset[nn]
22 |     obs = fast_rmvnorm(n,rep(2,d),diag(1,d))
23 |     obs_mean = apply(obs,2,mean)
24 | 
25 |     #fake
26 |     m = nset[nn]
27 | 
28 |     compute_d = function(fake_obs){
29 |       swap_distance(t(obs), t(fake_obs), tolerance = 1e-5)$distance
30 |     }
31 |     #test
32 |     fake_obs = fast_rmvnorm(n,rep(1,d),diag(1,d))
33 |     compute_d(fake_obs)
34 | 
35 | 
36 | 
37 |     #Grid
38 |     grid = seq(obs_mean[1]-1.5,obs_mean[1]+1.5,length.out = 20)
39 | 
40 |     #Number of data sets per grid point
41 |     N = 100
42 | 
43 |     #Variance of fake data sets
44 |     sigma2 = 1
45 | 
46 |     # dists = foreach(k = 1:length(grid), .combine = rbind) %dorng% {
47 |     #   store_distances = rep(0,N)
48 |     #   store_meandiffs = rep(0,N)
49 |     #   for(i in 1:N){
50 |     #     #fake_obs = fast_rmvnorm(m,c(grid[k],obs_mean[2:d]),diag(sigma2,d))
51 |     #     fake_obs = fast_rmvnorm(m,c(grid[k],rep(1,d-1)),diag(sigma2,d))
52 |     #     fake_mean = apply(fake_obs,2,mean)
53 |     #     store_distances[i] = compute_d(fake_obs)
54 |     #     store_meandiffs[i] = sum((obs_mean-fake_mean)^2)^0.5
55 |     #   }
56 |     #   out = cbind(rep(grid[k],N),store_distances,store_meandiffs)
57 |     #   return(out)
58 |     # }
59 |     #
60 |     # dists.df = data.frame(theta = dists[,1], Wasserstein = dists[,2], xbardiff = dists[,3])
61 |     # save(dists.df,file = paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".Rdata"))
62 |     load(paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".Rdata"))
63 | 
64 |     wmax = max(dists.df$Wasserstein)
65 |     wmin = min(dists.df$Wasserstein)
66 |     wmax - wmin
67 | 
68 |     xmax = max(dists.df$xbardiff)
69 |     xmin = min(dists.df$xbardiff)
70 |     xmax - xmin
71 | 
72 |     ll = max(wmax-wmin,xmax-xmin)
73 | 
74 |     # g = ggplot(dists.df, aes(theta, Wasserstein, group = theta)) + geom_boxplot()
75 |     # g = g + coord_cartesian(ylim = c(wmin-ll/20,wmin+ll))
76 |     # g
77 |     #
78 |     # g = ggplot(dists.df, aes(theta, xbardiff, group = theta)) + geom_boxplot()
79 |     # g = g + coord_cartesian(ylim = c(xmin-ll/20,xmin+ll))
80 |     # g
81 | 
82 |     g = ggplot(data = dists.df, aes(theta, Wasserstein, group = theta)) + geom_boxplot(aes(fill = "Wasserstein"),alpha=0.7)
83 |     g = g + geom_boxplot(data = dists.df, aes(theta, xbardiff, group = theta, fill = "Summary"),alpha=0.7)
84 |     g = g + scale_fill_manual(name = "", values = my_colors) + xlab(expression(theta[1])) + ylab("distance")
85 |     #g
86 |     ggsave(filename = paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".pdf"), plot = g, width = fig.width, height = fig.height)
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/toggleswitch/README.R:
--------------------------------------------------------------------------------
 1 | Reproduce results for the toggle switch model in Section 5.2 of the WABC article.
 2 | ===
 3 |   
 4 | toggle_switch_generate.R: generate and save a data set from the model
 5 | 
 6 | toggle_switch_wsmc.R: loads a data set of size 2000 and runs SMC to approximate the WABC
 7 | posterior using the exact Wasserstein distance with a budget of 10^6 model simulations.
 8 | 
 9 | toggle_switch_summary.R: construct the summary statistic from Bonassi et al.
10 | 
11 | toggle_switch_load_summary.R: after toggle_switch_summary.R has been run, use this
12 | script to load the summary statistic.
13 | 
14 | toggle_switch_summary_smc.R: loads the data set and the summary statistic and runs SMC to
15 | approximate the ABC posterior based on the summary statistic with a budget of 10^6 model
16 | simulations.
17 | 
18 | toggle_switch_plots.R: loads the data and the output from the SMC runs and plots the marginal 
19 | distributions of the resulting ABC posteriors. Also plots a histogram of the data. Corresponds
20 | to Fig 7 of the article.


--------------------------------------------------------------------------------
/inst/reproduceabc/toggleswitch/toggle_switch_generate.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | 
 6 | prefix = ""
 7 | 
 8 | set.seed(11)
 9 | target <- get_toggleswitch()
10 | 
11 | # number of observations
12 | nobservations <- 2000
13 | # parameter of data-generating process
14 | # true_theta <- c(alpha_1, alpha_2, beta_1, beta_2, mu, sigma, gamma)
15 | true_theta <- c(22, 12, 4, 4.5, 325, 0.25, 0.15)
16 | 
17 | obs <- target$robservation(nobservations, true_theta,
18 |                            target$parameters, target$generate_randomness(nobservations))
19 | #save(true_theta, obs, file = "toggleswitchdata.RData")
20 | 
21 | hist(obs)
22 | # plot(obs[1:1000], type = "l")
23 | # mean(obs)
24 | # sd(obs)
25 | 
26 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/toggleswitch/toggle_switch_load_summary.R:
--------------------------------------------------------------------------------
 1 | #Loads the summary statistic provided the
 2 | prefix = ""
 3 | 
 4 | #Parameters
 5 | p = 25000 #Number of data sets drawn from prior predictive
 6 | nobservations = 2000
 7 | R = 100
 8 | 
 9 | upper.bound = 2000
10 | nbins = 50
11 | breaks = seq(0, upper.bound, length.out = nbins)
12 | 
13 | #Set bandwith so as to mimic the histogram
14 | bandwidth = upper.bound/nbins
15 | #mids from histogram
16 | mids = breaks[-1] - (breaks[-1]-breaks[-nbins])/2
17 | 
18 | filename = paste0(prefix,"toggle.binned.p",p,".nobs", nobservations, ".RData")
19 | 
20 | load(filename)
21 | 
22 | binned.data.matrix = t(sapply(binned.data, function(x) x[[1]]))
23 | 
24 | binned.data.svd = svd(binned.data.matrix)
25 | 
26 | first.col.A = binned.data.svd$u[,1]
27 | 
28 | #Choose references as the R percentiles of first column of A
29 | reference.order = order(first.col.A)
30 | reference.index = reference.order[c(1,(1:(R-1))*(p/R),p)]
31 | 
32 | prob.mat = binned.data.matrix[reference.index,]/nobservations
33 | 
34 | # #Check call
35 | # summary_full(y,bandwidth,prob.mat,mids)
36 | 
37 | data.matrix = t(sapply(binned.data, function(x) x[[2]]))
38 | 
39 | #Load full summary stat for all R references
40 | filename2 = paste0(prefix,"summary.full.p",p,".nobs", nobservations, ".RData")
41 | load(filename2)
42 | 
43 | summary.full.matrix = t(sapply(summary.stat.full, function(x) x))
44 | #replace -Inf with -1000
45 | summary.full.matrix = pmax(summary.full.matrix,-1000)
46 | 
47 | summary.full.svd = svd(summary.full.matrix)
48 | 
49 | #Number of principal components
50 | r = 11
51 | M = rbind(diag(1,r),matrix(0,R+1-r,r))
52 | H = summary.full.svd$v%*%M
53 | 
54 | summary.stat = function(y){
55 |   S = summary_full(y,bandwidth,prob.mat,mids)
56 |   S = pmax(S,-1000)
57 |   S.red = S%*%H
58 |   return(S.red)
59 | }
60 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/toggleswitch/toggle_switch_summary.R:
--------------------------------------------------------------------------------
  1 | #Constructs the summary statistic in Bonassi
  2 | library(winference)
  3 | registerDoParallel(cores = detectCores())
  4 | rm(list = ls())
  5 | setmytheme()
  6 | set.seed(11)
  7 | prefix = ""
  8 | 
  9 | target <- get_toggleswitch()
 10 | 
 11 | #Parameters
 12 | p = 25000 #Number of data sets drawn from prior predictive
 13 | nobservations = 2000
 14 | R = 100
 15 | 
 16 | upper.bound = 2000
 17 | nbins = 50
 18 | breaks = seq(0, upper.bound, length.out = nbins)
 19 | 
 20 | target$simulate <- function(theta) matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1)
 21 | 
 22 | prior_draws = target$rprior(p, target$parameters)
 23 | 
 24 | filename = paste0(prefix,"toggle.binned.p",p,".nobs", nobservations, ".RData")
 25 | 
 26 | t = proc.time()
 27 | binned.data = foreach(i = 1:p) %dorng% {
 28 | 
 29 |   #simulate data set given draw from prior
 30 |   ysim = target$simulate(prior_draws[i,])
 31 | 
 32 |   #sets all values greater than R to R-0.00001
 33 |   ysim = sapply(ysim, function(x) min(x,upper.bound-0.00001))
 34 | 
 35 |   #bin the data
 36 |   binned = tabulate(findInterval(ysim, vec=breaks),nbins = nbins-1)
 37 | 
 38 |   return(list(binned,ysim))
 39 | }
 40 | t = proc.time() - t
 41 | save(binned.data,file = filename)
 42 | 
 43 | load(filename)
 44 | 
 45 | binned.data.matrix = t(sapply(binned.data, function(x) x[[1]]))
 46 | 
 47 | binned.data.svd = svd(binned.data.matrix)
 48 | 
 49 | first.col.A = binned.data.svd$u[,1]
 50 | 
 51 | #Choose references as the R percentiles of first column of A
 52 | reference.order = order(first.col.A)
 53 | reference.index = reference.order[c(1,(1:(R-1))*(p/R),p)]
 54 | 
 55 | #Set bandwith so as to mimic the histogram
 56 | bandwidth = upper.bound/nbins
 57 | 
 58 | #Continuous histograms
 59 | mids = breaks[-1] - (breaks[-1]-breaks[-nbins])/2
 60 | 
 61 | prob.mat = binned.data.matrix[reference.index,]/nobservations
 62 | 
 63 | #Check call
 64 | y = data.matrix[2,]
 65 | summary_full(y,bandwidth,prob.mat,mids)
 66 | 
 67 | data.matrix = t(sapply(binned.data, function(x) x[[2]]))
 68 | 
 69 | filename2 = paste0(prefix,"summary.full.p",p,".nobs", nobservations, ".RData")
 70 | t = proc.time()
 71 | summary.stat.full = foreach(i = 1:p) %dorng% {
 72 |   y = data.matrix[i,]
 73 |   out = summary_full(y,bandwidth,prob.mat,mids)
 74 |   return(out)
 75 | }
 76 | t = proc.time() - t
 77 | save(summary.stat.full,file = filename2)
 78 | load(filename2)
 79 | 
 80 | summary.full.matrix = t(sapply(summary.stat.full, function(x) x))
 81 | #replaces -Inf with -1000 (number on log scale)
 82 | summary.full.matrix = pmax(summary.full.matrix,-1000)
 83 | 
 84 | summary.full.svd = svd(summary.full.matrix)
 85 | 
 86 | #Number of principal components
 87 | r = 11
 88 | M = rbind(diag(1,r),matrix(0,R+1-r,r))
 89 | H = summary.full.svd$v%*%M
 90 | summary.reduced.matrix = summary.full.matrix%*%H
 91 | # filename3 = paste0("summary.H.p",p,".nobs", nobservations, ".RData")
 92 | # save(H,file = filename3)
 93 | 
 94 | summary.stat = function(y){
 95 |   S = summary_full(y,bandwidth,prob.mat,mids)
 96 |   S = pmax(S,-1000)
 97 |   S.red = S%*%H
 98 |   return(S.red)
 99 | }
100 | 
101 | # #Try
102 | # t = proc.time()
103 | # y = target$simulate(target$rprior(target$truetheta, target$parameters))
104 | # t = proc.time()-t
105 | # t
106 | #
107 | # t = proc.time()
108 | # ss = summary.stat(y)
109 | # t = proc.time()-t
110 | # t
111 | #
112 | # t = proc.time()
113 | # ss = sort(y)
114 | # t = proc.time()-t
115 | # t
116 | 
117 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/toggleswitch/toggle_switch_summary_smc.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores()-2)
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | 
 7 | prefix = ""
 8 | 
 9 | target <- get_toggleswitch()
10 | # number of observations
11 | nobservations <- 2000
12 | load(file = paste0(prefix,"toggleswitchdata.RData"))
13 | obs <- obs[1:nobservations]
14 | 
15 | #Load summary stat. Requires toggle_switch_summary.R to have been run first.
16 | source(paste0(prefix,"toggle_switch_load_summary.R"))
17 | 
18 | s_obs = summary.stat(obs)
19 | 
20 | # function to compute distance between observed data and data generated given theta
21 | compute_d <- function(y){
22 |   s_fake = summary.stat(y)
23 |   dist = sqrt(sum((s_obs-s_fake)^2))
24 |   return(dist)
25 | }
26 | 
27 | target$simulate <- function(theta) matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1)
28 | 
29 | y_sim <- target$simulate(target$rprior(1, target$parameters))
30 | compute_d(y_sim)
31 | 
32 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(),
33 |                    minimum_diversity = 0.5, R = 2, maxtrials = 1000)
34 | 
35 | filename <- paste0(prefix,"toggleswitchw.summary.smc.n", nobservations, ".RData")
36 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 1e6)
37 | load(filename)
38 | #results <- wsmc_continue(results, savefile = filename, maxsim = (1e6 - 166035))
39 | 


--------------------------------------------------------------------------------
/inst/reproduceabc/toggleswitch/toggle_switch_wsmc.R:
--------------------------------------------------------------------------------
 1 | library(winference)
 2 | registerDoParallel(cores = detectCores())
 3 | rm(list = ls())
 4 | setmytheme()
 5 | set.seed(11)
 6 | 
 7 | prefix = ""
 8 | 
 9 | target <- get_toggleswitch()
10 | # number of observations
11 | nobservations <- 2000
12 | load(file = paste0(prefix,"toggleswitchdata.RData"))
13 | obs <- obs[1:nobservations]
14 | obs_sorted <- sort(obs)
15 | 
16 | # function to compute distance between observed data and data generated given theta
17 | compute_d = function(y){
18 |   sort_y = sort(y)
19 |   mean(abs(sort_y-obs_sorted))
20 | }
21 | 
22 | target$simulate <- function(theta) matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1)
23 | 
24 | y_sim <- target$simulate(target$rprior(1, target$parameters))
25 | compute_d(y_sim)
26 | 
27 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(),
28 |                    minimum_diversity = 0.5, R = 2, maxtrials = 1000)
29 | 
30 | filename <- paste0(prefix, "toggleswitchwsmc.n", nobservations, ".RData")
31 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 1e6)
32 | load(filename)
33 | #results <- wsmc_continue(results, savefile = filename, maxsim = 1e6)
34 | # plot_bivariate(results, 1, 2)
35 | 


--------------------------------------------------------------------------------
/inst/reproducepointestimation/README.R:
--------------------------------------------------------------------------------
 1 | These scripts produce the figure for the manuscript
 2 | "On parameter estimation with the Wasserstein distance"
 3 | by
 4 | Espen Bernton, Pierre E. Jacob, Mathieu Gerber, Christian P. Robert
 5 | 
 6 | The instructions below describe the different files.
 7 | 
 8 | Some of the scripts require having loaded packages such as ggplot2 for plotting,
 9 | doParallel, foreach, doRNG for parallel computation and some 'dplyr' for data.frame manipulations.
10 | 
11 | #######
12 | 
13 | Section 4.1 Quantile “g-and-k” distribution
14 | 
15 | gandk_functions.R: contains function definitions, in particular the inverse CDF of the g-and-k distribution in C++
16 | 
17 | gandk.cluster.script.R: computes the MEWE on B=1000 bootstrap versions of the data, and saves the output; we ran this 400 times in parallel on a cluster.
18 | (This function will have to be adapted to different machines, and file paths changed appropriately.)
19 | 
20 | gandk_coverage.R: postprocesses the outputs of the above script to compute the coverage.
21 | 
22 | gandk_plots.R: computes MEWE for different values of data size n, and a number of independent runs,
23 | in order to produce the scatter plots and rescaled plots of the section, as well as a histogram of a data set.
24 | These plots are in Figure 1 of the manuscript.
25 | 
26 | gandk_correlated.R: same but for a data-generating process that generates dependent
27 | observations. The resulting plots are in Figure 2 of the manuscript.
28 | 
29 | 
30 | #######
31 | 
32 | Section 4.2 Sum of log-Normal random variables
33 | 
34 | same as for g-and-k, with files starting with "lognormal_"
35 | 
36 | The resulting plots are in Figure 3 of the manuscript.
37 | 
38 | #######
39 | 
40 | Section 4.3 Gamma data fitted with a Normal model
41 | 
42 | gamma_normal_functions.R: contains function definitions, in particular those that generate data from the model
43 | 
44 | gamma_normal_fixedmk_diffn.R: produces plots to show the behavior of the MEWE as the number of observations increase
45 | (plots of Figure 4 in the manuscript)
46 | 
47 | gamma_normal_fixedn_diffmk.R: produces plots to show the effect of different m and k on the quality of the MEWE approximation of the MWE, for a fixed data set.
48 | (plots of Figure 5 in the manuscript)
49 | 
50 | gamma_normal_bootstrap.R: runs 400 independent experiments in which the MEWE is computed on B=1000 bootstrap versions of the data, and saves the the corresponding bootstrap intervals. It also calculates the coverage rate of the intervals.
51 | 
52 | #######
53 | 
54 | Section 4.4 Cauchy data fitted with a Normal model
55 | 
56 | cauchydata_normalfit_fixedmk_diffn.R: computes MEWE repeatedly, on independent data sets of various sizes,
57 | and produces scatter plots and rescaled plots.
58 | 
59 | cauchydata_normalfit_fixedmk_diffn_KL.R: same for a minimum KL estimator, using the
60 | FNN package.
61 | 
62 | These two scripts produce the plots presented in Figure 6 of the manuscript.
63 | 


--------------------------------------------------------------------------------
/inst/reproducepointestimation/gamma_normal_functions.R:
--------------------------------------------------------------------------------
 1 | target <- list()
 2 | 
 3 | target$generate_randomness <- function(nobservations){
 4 |   return(rnorm(nobservations))
 5 | }
 6 | 
 7 | target$thetadim = 2
 8 | 
 9 | metricL1 = function(xvec,yvec)  mean(abs(xvec - yvec))
10 | 


--------------------------------------------------------------------------------
/inst/reproducepointestimation/gandk.cluster.script.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | library(parallel)
 3 | # parallel RNG using L'Ecuyer et al (2002)
 4 | RNGkind("L'Ecuyer-CMRG") # L'Ecuyer CMRG required for multiple streams
 5 | igrid <- as.integer(Sys.getenv('SLURM_ARRAY_TASK_ID'))
 6 | set.seed(1) # initial seed
 7 | for (i in 1:igrid){
 8 |   .Random.seed <- nextRNGStream(.Random.seed) # compute appropriate stream
 9 | }
10 | 
11 | source("gandk_functions.R")
12 | true_theta = c(3,1,2,0.5)
13 | 
14 | 
15 | #Pick m to be larger than or equal to max(n) and a multiple of each entry in n 
16 | #M = 50    #Number of data sets from dgp
17 | n = 1000  #Number of samples in each data set from dgp
18 | N = 20    #Number of synthetic data sets for finding MEWE
19 | m = 10^4  #Number of samples in the synthetic data sets
20 | B = 10  #Number of bootstrap samples per data set from dgp
21 | alpha = 0.05 #Desired significance level, compute the alpha/2 and 1-alpha/2 quantiles
22 | thetadim = 4 #Inference on all parameters
23 | 
24 | filename = paste0("output/gandk_bootstrap_run",igrid,"_N",N,"_m",m,"_n",n,"_B",B,".Rdata")
25 | 
26 | # Bootstrap confidence intervals for the A and B parameters
27 | time_elapsed = proc.time()
28 | #Temporary storage for the bootstrapped estimators and other information
29 | bs_store = matrix(0,B, thetadim)#target$thetadim)
30 | bs_count_store = rep(0,B)
31 | 
32 | 
33 | #Generate the observations, sort, and replicate observations to match length of the synthetic data
34 | obs_rand = target$generate_randomness(n)
35 | obs = target$robservation(true_theta,obs_rand)
36 | sort_obs = sort(obs)
37 | sort_obs_mult = rep(sort_obs, each = m/n)
38 | 
39 | #Generate the synthetic randomness, sort. 
40 | randomness = t(sapply(1:N, function(x) target$generate_randomness(m)))
41 | sort_randomness = t(apply(randomness, 1, sort))
42 |   
43 | #Define the objective to be minimized to find the MEWE
44 | obj1 = function(theta){
45 |   if(prod(theta) < 0){
46 |     out = 100
47 |   } else{
48 |     wass_dists = apply(sort_randomness, 1, function(x) metricL1(sort_obs_mult,target$robservation(theta,x)))
49 |     out = mean(wass_dists)
50 |   }
51 |   return(out)
52 | }
53 |   
54 | #Find the MEWE
55 | mewe = optim(true_theta,obj1)
56 | 
57 | #### Bootstrap
58 | #Generate multinomials
59 | multinom = matrix(sample(1:n, size = n*B, replace = T), nrow = B)
60 | sort_multinom = t(apply(multinom, 1, sort))
61 | 
62 | #Resample B times from the empirical distribution, sorted
63 | emp_sort_obs = t(apply(sort_multinom, 1, function(x) sort_obs[x]))
64 |   
65 | #For each of the resampled data sets, find the MEWE.
66 | ### WARNING: CURRENTLY RECYCLING THE RANDOMNESS FROM ABOVE, CHECK TO SEE IF IT MAKES A DIFFERENCE.
67 | for(i in 1:B){
68 |   
69 |   #Replicate observations to match length of the synthetic data
70 |   emp_obs_mult = rep(emp_sort_obs[i,], each = m/n)
71 |   
72 |   #Define the objective function
73 |   obj_bs = function(theta){
74 |     wass_dists = apply(sort_randomness, 1, function(x) metricL1(emp_obs_mult,target$robservation(theta,x)))
75 |     out = mean(wass_dists)
76 |     return(out)
77 |   }
78 |   
79 |   #Find minimum, initializing at the MEWE
80 |   mewe_bs = try(optim(mewe$par,obj_bs))
81 |   if (inherits(mewe_bs, "try-error")){
82 |     mewe_bs <- list(par = rep(NA, 4), count = Inf)
83 |   }
84 |   
85 |   #Store the stuff
86 |   bs_store[i,] = mewe_bs$par
87 |   bs_count_store[i] = mewe_bs$count[1]
88 | }
89 | 
90 | time_elapsed = proc.time() - time_elapsed
91 | 
92 | save(bs_store, bs_count_store, time_elapsed, file = filename)
93 | 


--------------------------------------------------------------------------------
/inst/reproducepointestimation/gandk_coverage.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | 
 3 | source("gandk_functions.R")
 4 | prefix = "output/"
 5 | 
 6 | true_theta = c(3,1,2,0.5)
 7 | 
 8 | M = 400
 9 | n = 1000  #Number of samples in each data set from dgp
10 | N = 20    #Number of synthetic data sets for finding MEWE
11 | m = 10^4  #Number of samples in the synthetic data sets
12 | B = 1000  #Number of bootstrap samples per data set from dgp
13 | alpha = 0.05 #Desired significance level, compute the alpha/2 and 1-alpha/2 quantiles
14 | thetadim = 4 #Inference on all parameters
15 | 
16 | #Check if value is inside the interval
17 | is.inside = function(value,interval){
18 |   if(interval[1] <= value & value <= interval[2]){
19 |     return(TRUE)
20 |   } else{
21 |     return(FALSE)
22 |   }
23 | }
24 | 
25 | #Store the number of intervals that cover
26 | store.cover = matrix(NA,nrow = M, ncol = thetadim + 1)
27 | 
28 | for(i in 1:M){
29 |   
30 |   # Load file from run i
31 |   filename = paste0(prefix,"gandk_bootstrap_run",i,"_N",N,"_m",m,"_n",n,"_B",B,".Rdata")
32 |   load(filename)
33 |   
34 |   # Compute the empirical quantiles of the bootstrap samples,
35 |   # both separately for each parameter and with Bonferroni correction.
36 |   q_A = quantile(bs_store[,1],probs = c(alpha/8,alpha/2,1-alpha/2,1-alpha/8))
37 |   q_B = quantile(bs_store[,2],probs = c(alpha/8,alpha/2,1-alpha/2,1-alpha/8))
38 |   q_g = quantile(bs_store[,3],probs = c(alpha/8,alpha/2,1-alpha/2,1-alpha/8))
39 |   q_k = quantile(bs_store[,4],probs = c(alpha/8,alpha/2,1-alpha/2,1-alpha/8))
40 |   
41 |   ci_A = q_A[2:3]
42 |   ci_B = q_B[2:3]
43 |   ci_g = q_g[2:3]
44 |   ci_k = q_k[2:3]
45 |   ci_all = rbind(q_A[c(1,4)],q_B[c(1,4)],q_g[c(1,4)],q_k[c(1,4)])
46 |   
47 |   #Check if inside
48 |   store.cover[i,1] = is.inside(true_theta[1],ci_A)
49 |   store.cover[i,2] = is.inside(true_theta[2],ci_B)
50 |   store.cover[i,3] = is.inside(true_theta[3],ci_g)
51 |   store.cover[i,4] = is.inside(true_theta[4],ci_k)
52 |   
53 |   if(is.inside(true_theta[1],ci_all[1,]) & is.inside(true_theta[2],ci_all[2,]) &
54 |      is.inside(true_theta[3],ci_all[3,]) & is.inside(true_theta[4],ci_all[4,])){
55 |     store.cover[i,5] = TRUE
56 |   } else{
57 |     store.cover[i,5] = FALSE
58 |   }
59 | }
60 | 
61 | coverages = apply(store.cover,2,mean)
62 | 


--------------------------------------------------------------------------------
/inst/reproducepointestimation/gandk_functions.R:
--------------------------------------------------------------------------------
 1 | library(Rcpp)
 2 | 
 3 | cppFunction("NumericVector gandkinversecdf_givennormals(NumericVector normals, NumericVector theta){
 4 |   NumericVector z = normals;
 5 |   double A = theta(0);
 6 |   double B = theta(1);
 7 |   double c = 0.8;
 8 |   double g = theta(2);
 9 |   double k = theta(3);
10 |   return A + B * (1 + c * (1 - exp(- g * z)) / (1 + exp(- g * z))) * pow((1 + pow(z, 2.0)), k) * z;
11 | }")
12 | 
13 | cppFunction("NumericVector gandkinversecdf(NumericVector x, NumericVector theta){
14 |   NumericVector z = qnorm(x);
15 |   double A = theta(0);
16 |   double B = theta(1);
17 |   double c = 0.8;
18 |   double g = theta(2);
19 |   double k = theta(3);
20 |   return A + B * (1 + c * (1 - exp(- g * z)) / (1 + exp(- g * z))) * pow((1 + pow(z, 2.0)), k) * z;
21 | }")
22 | 
23 | target <- list()
24 | 
25 | target$generate_randomness <- function(nobservations){
26 |   return(rnorm(nobservations))
27 | }
28 | 
29 | target$robservation <- function(theta, randomness){
30 |   return(gandkinversecdf_givennormals(randomness, theta))
31 | }
32 | 
33 | metricL1 = function(xvec,yvec)  mean(abs(xvec - yvec))
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/inst/reproducepointestimation/lognormal.cluster.script.R:
--------------------------------------------------------------------------------
 1 | ## computes MEWE on B=1000 bootstrap version of a data set
 2 | ## File to be run on a cluster preferably!
 3 | 
 4 | rm(list = ls())
 5 | library(parallel)
 6 | # parallel RNG using L'Ecuyer et al (2002)
 7 | RNGkind("L'Ecuyer-CMRG") # L'Ecuyer CMRG required for multiple streams
 8 | igrid <- as.integer(Sys.getenv('SLURM_ARRAY_TASK_ID'))
 9 | set.seed(1) # initial seed
10 | for (i in 1:igrid){
11 |   .Random.seed <- nextRNGStream(.Random.seed) # compute appropriate stream
12 | }
13 | 
14 | source("lognormal_functions.R")
15 | 
16 | #Pick m to be larger than or equal to max(n) and a multiple of each entry in n 
17 | #M = 50    #Number of data sets from dgp
18 | n = 10^3  #Number of samples in each data set from dgp
19 | N = 20    #Number of synthetic data sets for finding MEWE
20 | m = 10^4  #Number of samples in the synthetic data sets
21 | B = 1000  #Number of bootstrap samples per data set from dgp
22 | alpha = 0.05 #Desired significance level, compute the alpha/2 and 1-alpha/2 quantiles
23 | 
24 | filename = paste0("output/lognormal_bootstrap_run",igrid,"_N",N,"_m",m,"_n",n,"_B",B,".Rdata")
25 | 
26 | # Bootstrap confidence intervals for the A and B parameters
27 | time_elapsed = proc.time()
28 | #Temporary storage for the bootstrapped estimators and other information
29 | bs_store = matrix(0, B, thetadim)#target$thetadim)
30 | bs_count_store = rep(0, B)
31 | 
32 | #Generate the observations, sort, and replicate observations to match length of the synthetic data
33 | obs_rand = target$generate_randomness(n)
34 | obs = target$robservation(true_theta,obs_rand)
35 | sort_obs = sort(obs)
36 | sort_obs_mult = rep(sort_obs, each = m/n)
37 | 
38 | #Generate the synthetic randomness, sort. 
39 | randomness = t(sapply(1:N, function(x) target$generate_randomness(m)))
40 | # sort_randomness = t(apply(randomness, 1, sort))
41 | 
42 | #Define the objective to be minimized to find the MEWE
43 | obj1 = function(theta){
44 |   if(theta[2] < 0){
45 |     out = 100
46 |   } else{
47 |     wass_dists = apply(randomness, 1, function(x) metricL1(sort_obs_mult, sort(target$robservation(theta, x))))
48 |     out = mean(wass_dists)
49 |   }
50 |   return(out)
51 | }
52 | 
53 | #Find the MEWE
54 | mewe = optim(true_theta,obj1)
55 | mewe$par
56 | 
57 | # #### Bootstrap
58 | #Generate multinomials
59 | multinom = matrix(sample(1:n, size = n*B, replace = T), nrow = B)
60 | sort_multinom = t(apply(multinom, 1, sort))
61 | 
62 | #Resample B times from the empirical distribution, sorted
63 | emp_sort_obs = t(apply(sort_multinom, 1, function(x) sort_obs[x]))
64 | 
65 | #For each of the resampled data sets, find the MEWE.
66 | for(i in 1:B){
67 |   #Replicate observations to match length of the synthetic data
68 |   emp_obs_mult = rep(emp_sort_obs[i,], each = m/n)
69 |   
70 |   #Define the objective function
71 |   obj_bs = function(theta){
72 |     if(theta[2] < 0){
73 |       out = 100
74 |     } else{
75 |       wass_dists = apply(randomness, 1, function(x) metricL1(emp_obs_mult, sort(target$robservation(theta, x))))
76 |       out = mean(wass_dists)
77 |     }
78 |     return(out)
79 |   }
80 |   
81 |   #Find minimum, initializing at the MEWE
82 |   mewe_bs = try(optim(mewe$par,obj_bs))
83 |   if (inherits(mewe_bs, "try-error")){
84 |     mewe_bs <- list(par = rep(NA, thetadim), count = Inf)
85 |   }
86 |   
87 |   #Store the stuff
88 |   bs_store[i,] = mewe_bs$par
89 |   bs_count_store[i] = mewe_bs$count[1]
90 | }
91 | 
92 | time_elapsed = proc.time() - time_elapsed
93 | 
94 | save(bs_store, bs_count_store, time_elapsed, file = filename)
95 | 


--------------------------------------------------------------------------------
/inst/reproducepointestimation/lognormal_coverage.R:
--------------------------------------------------------------------------------
 1 | ## post process output of lognormal.cluster.script.R
 2 | ## to compute coverage of confidence intervals
 3 | rm(list = ls())
 4 | 
 5 | prefix = "output/"
 6 | 
 7 | true_theta = c(0,1)
 8 | 
 9 | M = 400
10 | n = 1000  #Number of samples in each data set from dgp
11 | N = 20    #Number of synthetic data sets for finding MEWE
12 | m = 10^4  #Number of samples in the synthetic data sets
13 | B = 1000  #Number of bootstrap samples per data set from dgp
14 | alpha = 0.05 #Desired significance level, compute the alpha/2 and 1-alpha/2 quantiles
15 | thetadim = 2 #Inference on all parameters
16 | 
17 | #Check if value is inside the interval
18 | is.inside = function(value,interval){
19 |   if(interval[1] <= value & value <= interval[2]){
20 |     return(TRUE)
21 |   } else{
22 |     return(FALSE)
23 |   }
24 | }
25 | 
26 | #Store the number of intervals that cover
27 | store.cover = matrix(NA,nrow = M, ncol = thetadim + 1)
28 | 
29 | for(i in 1:M){
30 |   
31 |   # Load file from run i
32 |   filename = paste0(prefix,"lognormal_bootstrap_run",i,"_N",N,"_m",m,"_n",n,"_B",B,".Rdata")
33 |   load(filename)
34 |   
35 |   # Compute the empirical quantiles of the bootstrap samples,
36 |   # both separately for each parameter and with Bonferroni correction.
37 |   q_mu = quantile(bs_store[,1],probs = c(alpha/4,alpha/2,1-alpha/2,1-alpha/4))
38 |   q_sigma = quantile(bs_store[,2],probs = c(alpha/4,alpha/2,1-alpha/2,1-alpha/4))
39 |   
40 |   ci_mu = q_mu[2:3]
41 |   ci_sigma = q_sigma[2:3]
42 |   ci_all = rbind(q_mu[c(1,4)],q_sigma[c(1,4)])
43 |   
44 |   #Check if inside
45 |   store.cover[i,1] = is.inside(true_theta[1],ci_mu)
46 |   store.cover[i,2] = is.inside(true_theta[2],ci_sigma)
47 |   
48 |   if(is.inside(true_theta[1],ci_all[1,]) & is.inside(true_theta[2],ci_all[2,])){
49 |     store.cover[i,3] = TRUE
50 |   } else{
51 |     store.cover[i,3] = FALSE
52 |   }
53 | }
54 | 
55 | coverages = apply(store.cover,2,mean)
56 | 


--------------------------------------------------------------------------------
/inst/reproducepointestimation/lognormal_functions.R:
--------------------------------------------------------------------------------
 1 | 
 2 | L <- 10
 3 | 
 4 | target <- list()
 5 | 
 6 | target$generate_randomness <- function(nobservations){
 7 |   return(rnorm(nobservations*L))
 8 | }
 9 | 
10 | target$robservation <- function(theta, randomness){
11 |   normals_ <- theta[1] + theta[2] * randomness
12 |   lognormals_ <- exp(normals_)
13 |   return(rowSums(matrix(lognormals_, ncol = L)))
14 | }
15 | 
16 | metricL1 = function(xvec, yvec)  mean(abs(xvec - yvec))
17 | 
18 | true_theta <- c(0,1)
19 | 
20 | thetadim = 2 #Inference on all parameters
21 | 
22 | target$thetadim = 2
23 | 


--------------------------------------------------------------------------------
/inst/tutorials/tutorial_cosine.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/tutorials/tutorial_cosine.pdf


--------------------------------------------------------------------------------
/inst/tutorials/tutorial_distances.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/tutorials/tutorial_distances.pdf


--------------------------------------------------------------------------------
/inst/tutorials/tutorial_gandk.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/tutorials/tutorial_gandk.pdf


--------------------------------------------------------------------------------
/inst/tutorials/tutorial_normal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/tutorials/tutorial_normal.pdf


--------------------------------------------------------------------------------
/man/.Rapp.history:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/man/.Rapp.history


--------------------------------------------------------------------------------
/man/cost_matrix_L1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metrics.R
 3 | \name{cost_matrix_L1}
 4 | \alias{cost_matrix_L1}
 5 | \title{cost_matrix_L1}
 6 | \usage{
 7 | cost_matrix_L1(x, y)
 8 | }
 9 | \description{
10 | Compute cost matrix L1 between two matrices of dimension d x N
11 | }
12 | 


--------------------------------------------------------------------------------
/man/cost_matrix_L2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metrics.R
 3 | \name{cost_matrix_L2}
 4 | \alias{cost_matrix_L2}
 5 | \title{cost_matrix_L2}
 6 | \usage{
 7 | cost_matrix_L2(x, y)
 8 | }
 9 | \description{
10 | Compute cost matrix L2 between two matrices of dimension d x N
11 | }
12 | 


--------------------------------------------------------------------------------
/man/cost_matrix_Lp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/metrics.R
 3 | \name{cost_matrix_Lp}
 4 | \alias{cost_matrix_Lp}
 5 | \title{cost_matrix_Lp}
 6 | \usage{
 7 | cost_matrix_Lp(x, y, p)
 8 | }
 9 | \description{
10 | Compute cost matrix Lp between two matrices of dimension d x N
11 | }
12 | 


--------------------------------------------------------------------------------
/man/create_lagmatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/create_lagmatrix.R
 3 | \name{create_lagmatrix}
 4 | \alias{create_lagmatrix}
 5 | \title{create_lagmatrix}
 6 | \usage{
 7 | create_lagmatrix(timeseries, k)
 8 | }
 9 | \value{
10 | a matrix with k rows and n-k columns, where n is the length of the provided time series
11 | }
12 | \description{
13 | This function creates the delay reconstruction, i.e. a matrix
14 | where the first row contains y_1, ..., y_T (the given univariate time series)
15 | the second row contains NA, y_2, ..., y_T .... etc
16 | the k-th row contains NA, NA, ..., y_{k+1}, ..., y_T
17 | }
18 | 


--------------------------------------------------------------------------------
/man/fast_dmvnorm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fast_dmvnorm.R
 3 | \name{fast_dmvnorm}
 4 | \alias{fast_dmvnorm}
 5 | \title{fast_dmvnorm}
 6 | \usage{
 7 | fast_dmvnorm(x, mean, covariance)
 8 | }
 9 | \description{
10 | evaluate multivariate Normal log-densities
11 | }
12 | 


--------------------------------------------------------------------------------
/man/fast_rmvnorm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fast_rmvnorm.R
 3 | \name{fast_rmvnorm}
 4 | \alias{fast_rmvnorm}
 5 | \title{fast_rmvnorm}
 6 | \usage{
 7 | fast_rmvnorm(nparticles, mean, covariance)
 8 | }
 9 | \description{
10 | generate multivariate Normal draws
11 | }
12 | 


--------------------------------------------------------------------------------
/man/get_autoregressive.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_get_autoregressive.R
 3 | \name{get_autoregressive}
 4 | \alias{get_autoregressive}
 5 | \title{Autoregressive model}
 6 | \usage{
 7 | get_autoregressive()
 8 | }
 9 | \value{
10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution),
11 | generate_randomness (generate data-generating variables), robservation (create synthetic
12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
13 | ydim (dimension of observations), parameters (list of hyperparameters,
14 | to be passed to rprior,dprior,robservation)
15 | }
16 | \description{
17 | This function returns a list representing an auto-regressive
18 | model of order 1.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/get_cosine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_get_cosine.R
 3 | \name{get_cosine}
 4 | \alias{get_cosine}
 5 | \title{Cosine model}
 6 | \usage{
 7 | get_cosine()
 8 | }
 9 | \value{
10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution),
11 | generate_randomness (generate data-generating variables), robservation (create synthetic
12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
13 | ydim (dimension of observations), parameters (list of hyperparameters,
14 | to be passed to rprior,dprior,robservation)
15 | }
16 | \description{
17 | This function returns a list representing a cosine trend model.
18 | }
19 | 


--------------------------------------------------------------------------------
/man/get_gandk.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_get_gandk.R
 3 | \name{get_gandk}
 4 | \alias{get_gandk}
 5 | \title{G and k model}
 6 | \usage{
 7 | get_gandk()
 8 | }
 9 | \value{
10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution),
11 | generate_randomness (generate data-generating variables), robservation (create synthetic
12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
13 | ydim (dimension of observations), parameters (list of hyperparameters,
14 | to be passed to rprior,dprior,robservation)
15 | }
16 | \description{
17 | This function returns a list representing the g-and-k
18 | quantile distribution.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/get_levydriven.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_get_levydriven.R
 3 | \name{get_levydriven}
 4 | \alias{get_levydriven}
 5 | \title{Levy driven stochastic volatility model}
 6 | \usage{
 7 | get_levydriven()
 8 | }
 9 | \value{
10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution),
11 | generate_randomness (generate data-generating variables), robservation (create synthetic
12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
13 | ydim (dimension of observations), parameters (list of hyperparameters,
14 | to be passed to rprior,dprior,robservation)
15 | }
16 | \description{
17 | This function returns a list representing a Levy driven stochastic volatility model.
18 | }
19 | 


--------------------------------------------------------------------------------
/man/get_mgandk.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_get_mgandk.R
 3 | \name{get_mgandk}
 4 | \alias{get_mgandk}
 5 | \title{multivariate G and k model}
 6 | \usage{
 7 | get_mgandk()
 8 | }
 9 | \value{
10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution),
11 | generate_randomness (generate data-generating variables), robservation (create synthetic
12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
13 | ydim (dimension of observations), parameters (list of hyperparameters,
14 | to be passed to rprior,dprior,robservation)
15 | }
16 | \description{
17 | This function returns
18 | }
19 | 


--------------------------------------------------------------------------------
/man/get_multivariate_normal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_get_multivariatenormal.R
 3 | \name{get_multivariate_normal}
 4 | \alias{get_multivariate_normal}
 5 | \title{Multivariate Normal model}
 6 | \usage{
 7 | get_multivariate_normal(dimension)
 8 | }
 9 | \value{
10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution),
11 | generate_randomness (generate data-generating variables), robservation (create synthetic
12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
13 | ydim (dimension of observations), parameters (list of hyperparameters,
14 | to be passed to rprior,dprior,robservation)
15 | }
16 | \description{
17 | This function returns a list representing
18 | a Normal location model, in a dimension specified by the user as an argument.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/get_normal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_get_normal.R
 3 | \name{get_normal}
 4 | \alias{get_normal}
 5 | \title{Normal model}
 6 | \usage{
 7 | get_normal()
 8 | }
 9 | \value{
10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution),
11 | generate_randomness (generate data-generating variables), robservation (create synthetic
12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
13 | ydim (dimension of observations), parameters (list of hyperparameters,
14 | to be passed to rprior,dprior,robservation)
15 | }
16 | \description{
17 | This function returns a list representing
18 | a Normal location model.
19 | The prior is mu ~ Normal(mu_0, nu^{-1}), where nu is precision
20 | and  tau ~ Gamma(alpha, beta), where beta is rate (1/scale).
21 |  The likelihood is Y ~ Normal(mu, tau^2) where tau^2 is the variance.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/get_pz_4param.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_get_pz_4param.R
 3 | \name{get_pz_4param}
 4 | \alias{get_pz_4param}
 5 | \title{Phytoplankton-zooplankton model}
 6 | \usage{
 7 | get_pz_4param()
 8 | }
 9 | \value{
10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution),
11 | generate_randomness (generate data-generating variables), robservation (create synthetic
12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
13 | ydim (dimension of observations), parameters (list of hyperparameters,
14 | to be passed to rprior,dprior,robservation)
15 | }
16 | \description{
17 | This function returns a list representing
18 | a Lotka-Volterra type model for plankton. See
19 | Jones, E., Parslow, J., and Murray, L. (2010). A Bayesian approach to state and parameter estimation in a phytoplankton-zooplankton model. Australian Meteorological and Oceanographic Journal, 59:7-16.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/get_ricker.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_get_ricker.R
 3 | \name{get_ricker}
 4 | \alias{get_ricker}
 5 | \title{Ricker model}
 6 | \usage{
 7 | get_ricker()
 8 | }
 9 | \value{
10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution),
11 | generate_randomness (generate data-generating variables), robservation (create synthetic
12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
13 | ydim (dimension of observations), parameters (list of hyperparameters,
14 | to be passed to rprior,dprior,robservation)
15 | }
16 | \description{
17 | This function returns a list representing
18 | the Ricker model in
19 | Wood (2010) Statistical inference for noisy nonlinear ecological dynamic systems
20 | }
21 | 


--------------------------------------------------------------------------------
/man/get_toggleswitch.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/model_get_toggleswitch.R
 3 | \name{get_toggleswitch}
 4 | \alias{get_toggleswitch}
 5 | \title{Toggle switch model}
 6 | \usage{
 7 | get_toggleswitch()
 8 | }
 9 | \value{
10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution),
11 | generate_randomness (generate data-generating variables), robservation (create synthetic
12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter),
13 | ydim (dimension of observations), parameters (list of hyperparameters,
14 | to be passed to rprior,dprior,robservation)
15 | }
16 | \description{
17 | This function returns a list representing the toggle switch model
18 | of Bonassi, F. V., West, M., et al. (2015).
19 | Sequential Monte Carlo with adaptive weights for approximate Bayesian computation. Bayesian Analysis, 10(1):171-187.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/hilbert_order.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hilbert_order.R
 3 | \name{hilbert_order}
 4 | \alias{hilbert_order}
 5 | \title{hilbert_order}
 6 | \usage{
 7 | hilbert_order(x)
 8 | }
 9 | \value{
10 | a vector of index corresponding to the ordered samples
11 | }
12 | \description{
13 | This function returns the "Hilbert order" of a sample of n
14 | points of dimension d, stored in a matrix with d rows and n columns
15 | where d is the dimension of each sample and n the number of samples.
16 | The function essentially calls Hilbert_Sort_CGAL, from the CGAL library.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/pz_transition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pz_transition.R
 3 | \name{pz_transition}
 4 | \alias{pz_transition}
 5 | \title{pz_transition}
 6 | \usage{
 7 | pz_transition(xparticles, alphas, time, parameters)
 8 | }
 9 | \description{
10 | Solve PZ ODE for each particle, given each alpha, from time to time + 1,
11 | and given the parameters (c, e, ml, mq).
12 | }
13 | 


--------------------------------------------------------------------------------
/man/setmytheme.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/setmytheme.R
 3 | \name{setmytheme}
 4 | \alias{setmytheme}
 5 | \title{Set My Theme}
 6 | \usage{
 7 | setmytheme()
 8 | }
 9 | \description{
10 | set theme for ggplot, to create consistent figures
11 | }
12 | 


--------------------------------------------------------------------------------
/man/systematic_resampling.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/systematic_resampling.R
 3 | \name{systematic_resampling}
 4 | \alias{systematic_resampling}
 5 | \title{systematic_resampling}
 6 | \usage{
 7 | systematic_resampling(normalized_weights)
 8 | }
 9 | \description{
10 | systematic_resampling
11 | }
12 | 


--------------------------------------------------------------------------------
/man/wasserstein.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wasserstein.R
 3 | \name{wasserstein}
 4 | \alias{wasserstein}
 5 | \title{wasserstein}
 6 | \usage{
 7 | wasserstein(p, q, cost_matrix, epsilon, niterations)
 8 | }
 9 | \value{
10 | a list with "distances", "transportmatrix", "u" and "v"
11 | }
12 | \description{
13 | Compute regularized Wasserstein distance between two empirical distributions,
14 | p and q, specified as vector of probabilities summing to one.
15 | The third argument is the cost matrix, i.e. a matrix of pair-wise distances,
16 | the fourth argument is the regularization parameter, e.g. 0.05*median(cost_matrix),
17 | and the last argument is the number of Sinkhorn iterations to perform, e.g. 100.
18 | Important references are
19 | 
20 | - Cuturi, M. (2013). Sinkhorn distances: Lightspeed computation of optimal transport. In Advances in Neural Information Processing Systems (NIPS), pages 2292-2300.
21 | 
22 | - Cuturi, M. and Doucet, A. (2014). Fast computation of Wasserstein barycenters. In Proceedings of the 31st International Conference on Machine Learning (ICML), pages 685-693.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/wcovariance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wcovariance.R
 3 | \name{wcovariance}
 4 | \alias{wcovariance}
 5 | \title{wcovariance}
 6 | \usage{
 7 | wcovariance(xparticles, normweights, mean)
 8 | }
 9 | \description{
10 | wcovariance
11 | }
12 | 


--------------------------------------------------------------------------------
/man/winference-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/winference-package.R
 3 | \docType{package}
 4 | \name{winference-package}
 5 | \alias{winference-package}
 6 | \alias{winference}
 7 | \title{winference}
 8 | \description{
 9 | ...
10 | }
11 | \details{
12 | ...
13 | }
14 | \author{
15 | anonymous <anonymous@anonymous.com>
16 | }
17 | \keyword{package}
18 | 


--------------------------------------------------------------------------------
/man/wmean.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wmean.R
 3 | \name{wmean}
 4 | \alias{wmean}
 5 | \title{wmean}
 6 | \usage{
 7 | wmean(xparticles, normweights)
 8 | }
 9 | \description{
10 | wmean
11 | }
12 | 


--------------------------------------------------------------------------------
/src/HilbertCode.cpp:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <CGAL/Exact_predicates_inexact_constructions_kernel.h>
 3 | #include <CGAL/hilbert_sort.h>
 4 | #include <CGAL/Simple_cartesian.h>
 5 | #include <CGAL/spatial_sort.h>
 6 | #include <CGAL/Spatial_sort_traits_adapter_d.h>
 7 | #include <boost/iterator/counting_iterator.hpp>
 8 | #include <CGAL/Cartesian_d.h>
 9 | #include <CGAL/Homogeneous_d.h>
10 | #include <iostream>
11 | #include <vector>
12 | #include "HilbertCode.h"
13 | 
14 | #include <stdio.h>
15 | #include <stdlib.h>  //malloc
16 | 
17 | typedef CGAL::Cartesian_d<double> Kernel;
18 | typedef Kernel::Point_d                                Point_d;
19 | 
20 | typedef 
21 |   CGAL::Spatial_sort_traits_adapter_d<Kernel,Point_d*>  Search_traits_d;
22 | 
23 | 
24 | void Hilbert_Sort_CGAL(double *x, int dx, int N, double *J)
25 | {
26 | 	int i,k;
27 | 	double *work = new double[dx];
28 | 
29 | 	std::vector<Point_d> points; 
30 | 	for(i=0;i<N;i++)
31 | 	{
32 | 		for(k=0;k<dx;k++)
33 | 		{
34 | 			work[k]=x[dx*i+k];
35 | 		}
36 | 		
37 | 		points.push_back(Point_d(dx,work+0, work+dx));
38 | 	}
39 | 
40 | 	std::vector<std::ptrdiff_t> indices;
41 |   	indices.reserve(points.size());
42 |   
43 |   	std::copy(boost::counting_iterator<std::ptrdiff_t>(0),
44 |             boost::counting_iterator<std::ptrdiff_t>(points.size()),
45 |             std::back_inserter(indices));
46 |   
47 |   	CGAL::hilbert_sort( indices.begin(),indices.end(),Search_traits_d(&(points[0])) );
48 |   
49 |   	for (i=0;i<N;i++)
50 |   	{
51 | 		 J[i]=indices[i];
52 |   	}
53 | 
54 | 	delete [] work;
55 | 	work=NULL;
56 | }
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/src/HilbertCode.h:
--------------------------------------------------------------------------------
1 | #ifndef HILB_H_
2 | #define HILB_H_
3 | 
4 | void Hilbert_Sort_CGAL(double *x,   int dx, int N, double *J);
5 | 
6 | #endif
7 | 
8 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | CXX_STD = CXX14
2 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
3 | 


--------------------------------------------------------------------------------
/src/compute_cost.cpp:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <iostream>
  3 | #include <vector>
  4 | #include <algorithm>
  5 | #include <RcppEigen.h>
  6 | using namespace Rcpp;
  7 | using namespace std;
  8 | using namespace Eigen;
  9 | 
 10 | 
 11 | // Takes two matrices of size d times N1 and d times N2, returns an N1 times N2 matrix
 12 | // [[Rcpp::export]]
 13 | NumericMatrix cost_matrix_L1_(const NumericMatrix & x, const NumericMatrix & y){
 14 |   NumericMatrix cost(x.cols(), y.cols());
 15 |   for (int ix = 0; ix < x.cols(); ix ++){
 16 |     for (int iy = 0; iy < y.cols(); iy ++){
 17 |       cost(ix,iy) = 0;
 18 |       // loop over components, stored in columns
 19 |       for (int id = 0; id < x.rows(); id ++){
 20 |         // cost(ix,iy) += std::abs(x(ix,id)-y(iy,id));
 21 |         cost(ix,iy) += std::abs(x(id,ix)-y(id,iy));
 22 |       }
 23 |     }
 24 |   }
 25 |   return cost;
 26 | }
 27 | // Takes two matrices of size d times N1 and d times N2, returns an N1 times N2 matrix
 28 | // [[Rcpp::export]]
 29 | NumericMatrix cost_matrix_L2_(const NumericMatrix & x, const NumericMatrix & y){
 30 |   NumericMatrix cost(x.cols(), y.cols());
 31 |   for (int ix = 0; ix < x.cols(); ix ++){
 32 |     for (int iy = 0; iy < y.cols(); iy ++){
 33 |       cost(ix,iy) = 0;
 34 |       // loop over components, stored in columns
 35 |       for (int id = 0; id < x.rows(); id ++){
 36 |         // cost(ix,iy) += std::abs(x(ix,id)-y(iy,id));
 37 |         cost(ix,iy) += std::pow(x(id,ix)-y(id,iy),2);
 38 |       }
 39 |       cost(ix,iy) = std::sqrt(cost(ix,iy));
 40 |     }
 41 |   }
 42 |   return cost;
 43 | }
 44 | 
 45 | // Takes two matrices of size d times N1 and d times N2, returns an N1 times N2 matrix
 46 | // [[Rcpp::export]]
 47 | NumericMatrix cost_matrix_Lp_(const NumericMatrix & x, const NumericMatrix & y, double p){
 48 |   NumericMatrix cost(x.cols(), y.cols());
 49 |   double z = 0;
 50 |   for (int ix = 0; ix < x.cols(); ix ++){
 51 |     for (int iy = 0; iy < y.cols(); iy ++){
 52 |       cost(ix,iy) = 0;
 53 |       // loop over components
 54 |       for (int id = 0; id < x.rows(); id ++){
 55 |         z = x(id,ix)-y(id,iy);
 56 |         cost(ix,iy) += std::pow(std::fabs(z), p);
 57 |       }
 58 |       cost(ix,iy) = std::pow(cost(ix,iy), 1/p);
 59 |     }
 60 |   }
 61 |   return cost;
 62 | }
 63 | 
 64 | // // [[Rcpp::export]]
 65 | // NumericMatrix compute_cost1_(const NumericVector & x, const NumericVector & y){
 66 | //   NumericMatrix cost(x.size(), y.size());
 67 | //   for (int ix = 0; ix < x.size(); ix ++){
 68 | //     for (int iy = 0; iy < y.size(); iy ++){
 69 | //       cost(ix,iy) = std::abs(x(ix)-y(iy));
 70 | //     }
 71 | //   }
 72 | //   return cost;
 73 | // }
 74 | //
 75 | //
 76 | // // [[Rcpp::export]]
 77 | // NumericMatrix compute_cost2_(const NumericVector & x, const NumericVector & y){
 78 | //   NumericMatrix cost(x.size(), y.size());
 79 | //   for (int ix = 0; ix < x.size(); ix ++){
 80 | //     for (int iy = 0; iy < y.size(); iy ++){
 81 | //       cost(ix,iy) = std::pow(x(ix)-y(iy), 2);
 82 | //     }
 83 | //   }
 84 | //   return cost;
 85 | // }
 86 | //
 87 | // // [[Rcpp::export]]
 88 | // NumericMatrix cost_matrix_(const NumericMatrix & x, const NumericMatrix & y){
 89 | //   NumericMatrix cost(x.rows(), y.rows());
 90 | //   for (int ix = 0; ix < x.rows(); ix ++){
 91 | //     for (int iy = 0; iy < y.rows(); iy ++){
 92 | //       cost(ix,iy) = 0;
 93 | //       // loop over components, stored in columns
 94 | //       for (int id = 0; id < x.cols(); id ++){
 95 | //         // cost(ix,iy) += std::abs(x(ix,id)-y(iy,id));
 96 | //         cost(ix,iy) += std::pow(x(ix,id)-y(iy,id),2);
 97 | //       }
 98 | //       cost(ix,iy) = std::sqrt(cost(ix,iy));
 99 | //     }
100 | //   }
101 | //   return cost;
102 | // }
103 | 
104 | 


--------------------------------------------------------------------------------
/src/gandkinversecdf.cpp:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <iostream>
 3 | #include <RcppEigen.h>
 4 | using namespace Rcpp;
 5 | using namespace std;
 6 | using namespace Eigen;
 7 | 
 8 | // [[Rcpp::export]]
 9 | NumericVector gandkinversecdf_(NumericVector x, NumericVector theta){
10 |   NumericVector z = qnorm(x);
11 |   double A = theta(0);
12 |   double B = theta(1);
13 |   double c = 0.8;
14 |   double g = theta(2);
15 |   double k = theta(3);
16 |   return A + B * (1 + c * (1 - exp(- g * z)) / (1 + exp(- g * z))) * pow((1 + pow(z, 2.0)), k) * z;
17 | }
18 | 
19 | // [[Rcpp::export]]
20 | NumericVector gandkinversecdf_givennormals_(NumericVector normals, NumericVector theta){
21 |   NumericVector z = normals;
22 |   double A = theta(0);
23 |   double B = theta(1);
24 |   double c = 0.8;
25 |   double g = theta(2);
26 |   double k = theta(3);
27 |   return A + B * (1 + c * (1 - exp(- g * z)) / (1 + exp(- g * z))) * pow((1 + pow(z, 2.0)), k) * z;
28 | }
29 | 
30 | // [[Rcpp::export]]
31 | double gandkcdf_(double y, NumericVector theta, int maxsteps = 1000, double tolerance = 1e-10,
32 |                  double lower = 1e-20, double upper = 1-1e-20){
33 |   double A = theta(0);
34 |   double B = theta(1);
35 |   double c = 0.8;
36 |   double g = theta(2);
37 |   double k = theta(3);
38 | 
39 |   int istep = 0;
40 |   double current_try = (upper + lower) / 2;
41 |   double current_size = (upper - lower) / 4;
42 |   NumericVector dd(1);
43 |   dd(0) = current_try;
44 |   NumericVector z = qnorm(dd);
45 |   double fattempt = A + B * (1 + c * (1 - exp(- g * z(0))) / (1 + exp(- g * z(0)))) * pow((1 + pow(z(0), 2.0)), k) * z(0);
46 |   while (!(fattempt > y-tolerance && fattempt < y+tolerance) && (istep < maxsteps)){
47 |     istep++;
48 |     if (fattempt > y-tolerance){
49 |       current_try = current_try - current_size;
50 |       dd(0) = current_try;
51 |       NumericVector z = qnorm(dd);
52 |       fattempt = A + B * (1 + c * (1 - exp(- g * z(0))) / (1 + exp(- g * z(0)))) * pow((1 + pow(z(0), 2.0)), k) * z(0);
53 |       current_size = current_size / 2;
54 |     } else {
55 |       current_try = current_try + current_size;
56 |       dd(0) = current_try;
57 |       NumericVector z = qnorm(dd);
58 |       fattempt = A + B * (1 + c * (1 - exp(- g * z(0))) / (1 + exp(- g * z(0)))) * pow((1 + pow(z(0), 2.0)), k) * z(0);
59 |       current_size = current_size / 2;
60 |     }
61 |   }
62 |   return current_try;
63 | }
64 | 


--------------------------------------------------------------------------------
/src/hilbert_order.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | #include "HilbertCode.h"
 3 | using namespace Rcpp;
 4 | using namespace std;
 5 | using namespace Eigen;
 6 | 
 7 | // [[Rcpp::export]]
 8 | NumericVector hilbert_order_(NumericMatrix x){
 9 |   int dx = x.nrow();
10 |   int N = x.ncol();
11 |   NumericVector order_indices(N);
12 |   Hilbert_Sort_CGAL(REAL(x), dx, N, REAL(order_indices));
13 |   return order_indices;
14 | }
15 | 


--------------------------------------------------------------------------------
/src/levydriven_.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | using namespace Rcpp;
 3 | using namespace std;
 4 | using namespace Eigen;
 5 | 
 6 | 
 7 | // [[Rcpp::export]]
 8 | List levydriven_rtransition_rand_cpp(int nparticles, NumericVector & theta){
 9 |   RNGScope scope;
10 |   NumericVector sum_weighted_e(nparticles);
11 |   NumericVector sum_e(nparticles);
12 |   NumericVector k = rpois(nparticles, theta[4] * theta[2] * theta[2] / theta[3]);
13 |   for (int i = 0; i < nparticles; i++){
14 |     NumericVector c = runif(k(i));
15 |     NumericVector e = rexp(k(i), theta[2]/theta[3]); // rcpp rexp is parametrized with rate
16 |     sum_e(i) = sum(e);
17 |     sum_weighted_e(i) = sum(exp(-theta[4] * c) * e);
18 |   }
19 |   return(List::create(Named("sum_weighted_e") = sum_weighted_e, Named("sum_e") = sum_e));
20 | }
21 | 


--------------------------------------------------------------------------------
/src/median.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | // [[Rcpp::export]]
 5 | double median_rcpp(NumericVector x) {
 6 |   NumericVector y = clone(x);
 7 |   int n, half;
 8 |   double y1, y2;
 9 |   n = y.size();
10 |   half = n / 2;
11 |   if(n % 2 == 1) {
12 |     // median for odd length vector
13 |     std::nth_element(y.begin(), y.begin()+half, y.end());
14 |     return y[half];
15 |   } else {
16 |     // median for even length vector
17 |     std::nth_element(y.begin(), y.begin()+half, y.end());
18 |     y1 = y[half];
19 |     std::nth_element(y.begin(), y.begin()+half-1, y.begin()+half);
20 |     y2 = y[half-1];
21 |     return (y1 + y2) / 2.0;
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/median.h:
--------------------------------------------------------------------------------
1 | #ifndef MEDIAN_H_
2 | #define MEDIAN_H_
3 | #include <RcppEigen.h>
4 | using namespace Rcpp;
5 | 
6 | double median_rcpp(NumericVector x);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/src/mmd.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | // [[Rcpp::export]]
 5 | double mmd_c(double first_term, double eps, const NumericMatrix & x, const NumericMatrix & y){
 6 |   int nobs = y.cols();
 7 |   int dimension = y.rows();
 8 |   double result = first_term;
 9 |   double cost_xx;
10 |   double cost_xy;
11 |   double second_term = 0;
12 |   double third_term = 0;
13 |   for (int i1 = 0; i1 < nobs; i1 ++){
14 |     for (int i2 = 0; i2 < nobs; i2 ++){
15 |       cost_xx = 0;
16 |       cost_xy = 0;
17 |       for (int id = 0; id < dimension; id ++){
18 |         cost_xx += std::pow(x(id,i1)-x(id,i2),2);
19 |         cost_xy += std::pow(x(id,i1)-y(id,i2),2);
20 |       }
21 |       second_term += exp(-cost_xx / (2*eps*eps));
22 |       third_term += exp(-cost_xy / (2*eps*eps));
23 |     }
24 |   }
25 |   result += second_term / (nobs * nobs) - 2 * third_term / (nobs*nobs);
26 |   return result;
27 | }
28 | 


--------------------------------------------------------------------------------
/src/mvnorm.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | #include "mvnorm.h"
 3 | using namespace Rcpp;
 4 | 
 5 | // [[Rcpp::export]]
 6 | NumericMatrix rmvnorm(int nsamples, const NumericVector & mean, const NumericMatrix & covariance){
 7 |   RNGScope scope;
 8 |   int ncols = covariance.cols();
 9 |   const Eigen::Map<Eigen::MatrixXd> covariance_(as<Eigen::Map<Eigen::MatrixXd> >(covariance));
10 |   Eigen::MatrixXd cholesky_covariance(covariance_.llt().matrixU());
11 |   Eigen::MatrixXd Y(nsamples, ncols);
12 |   for(int i = 0; i < ncols; i++){
13 |     Y.col(i) = as<Eigen::ArrayXd>(rnorm(nsamples));
14 |   }
15 |   Y = Y * cholesky_covariance;
16 |   for(int j = 0; j < ncols; j++){
17 |     for(int i = 0; i < nsamples; i++){
18 |       Y(i,j) = Y(i,j) + mean(j);
19 |     }
20 |   }
21 |   return wrap(Y);
22 | }
23 | 
24 | 
25 | NumericMatrix centered_rmvnorm(int nsamples, const Eigen::MatrixXd & cholesky_covariance){
26 |   // sample centered gaussian variates given the upper triangular factor in the cholesky decomposition of the covariance
27 |   RNGScope scope;
28 |   int ncols = cholesky_covariance.cols();
29 |   Eigen::MatrixXd Y(nsamples, ncols);
30 |   for(int i = 0; i < ncols; i++){
31 |     Y.col(i) = as<Eigen::ArrayXd>(rnorm(nsamples));
32 |   }
33 |   Y = Y * cholesky_covariance;
34 |   return wrap(Y);
35 | }
36 | 
37 | // [[Rcpp::export]]
38 | NumericVector dmvnorm(const NumericMatrix & x, const NumericVector & mean, const NumericMatrix & covariance){
39 |   const Eigen::Map<Eigen::MatrixXd> covariance_(as<Eigen::Map<Eigen::MatrixXd> >(covariance));
40 |   const Eigen::Map<Eigen::MatrixXd> x_(as<Eigen::Map<Eigen::MatrixXd> >(x));
41 |   Eigen::LLT<Eigen::MatrixXd> lltofcov(covariance_);
42 |   Eigen::MatrixXd lower = lltofcov.matrixL();
43 |   Eigen::MatrixXd xcentered(x_);
44 |   double halflogdeterminant = lower.diagonal().array().log().sum();;
45 |   double cst = - (halflogdeterminant) - (x.cols() * 0.9189385);
46 |   for(int j = 0; j < x.cols(); j++){
47 |     for(int i = 0; i < x.rows(); i++){
48 |       xcentered(i,j) = xcentered(i,j) - mean(j);
49 |     }
50 |   }
51 |   Eigen::VectorXd results = -0.5 * lower.triangularView<Eigen::Lower>().solve(xcentered.transpose()).colwise().squaredNorm();
52 |   for (int i = 0; i < results.size(); i++){
53 |     results(i) = results(i) + cst;
54 |   }
55 |   return wrap(results);
56 | }
57 | 
58 | RCPP_MODULE(module_mvnorm) {
59 |   function( "rmvnorm", &rmvnorm );
60 |   function( "dmvnorm", &dmvnorm );
61 | }
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/src/mvnorm.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INCL_MVNORM_
 2 | #define _INCL_MVNORM_
 3 | #include <RcppEigen.h>
 4 | using namespace Rcpp;
 5 | 
 6 | // generate samples from a multivariate normal distribution
 7 | NumericMatrix rmvnorm(int nsamples, const NumericVector & mean, const NumericMatrix & covariance);
 8 | 
 9 | NumericMatrix centered_rmvnorm(int nsamples, const Eigen::MatrixXd & cholesky_covariance);
10 | 
11 | // evaluate probability density function of a multivariate normal distribution
12 | NumericVector dmvnorm(const NumericMatrix & x, const NumericVector & mean, const NumericMatrix & covariance);
13 | 
14 | #endif
15 | 
16 | 


--------------------------------------------------------------------------------
/src/pz_functions.cpp:
--------------------------------------------------------------------------------
 1 | #include <boost/array.hpp>
 2 | #include <boost/numeric/odeint.hpp>
 3 | #include <Rcpp.h>
 4 | 
 5 | // [[Rcpp::depends(BH)]]
 6 | // [[Rcpp::plugins(cpp11)]]
 7 | 
 8 | using namespace std;
 9 | using namespace boost::numeric::odeint;
10 | 
11 | // const double sigma = 10.0, r = 28.0, b = 8.0 / 3.0;
12 | 
13 | typedef boost::array< double , 2> state_type;
14 | 
15 | class pz_ {
16 | 
17 |   double alpha, c, e, ml, mq;
18 | 
19 |   public:
20 |     pz_( double alpha, double c, double e, double ml, double mq ) : alpha(alpha), c(c), e(e), ml(ml), mq(mq) { }
21 | 
22 |   void operator() ( const state_type &x , state_type &dxdt , const double /* t */ )
23 |   {
24 |     dxdt[0] = alpha * x[0] - c * x[0] * x[1];
25 |     dxdt[1] = e * c * x[0] * x[1] - ml * x[1] - mq * x[1] * x[1];
26 |   }
27 | };
28 | 
29 | struct push_back_state_and_time
30 | {
31 |   std::vector< state_type >& m_states;
32 |   std::vector< double >& m_times;
33 | 
34 |   push_back_state_and_time( std::vector< state_type > &states , std::vector< double > &times )
35 |   : m_states( states ) , m_times( times ) { }
36 | 
37 |   void operator()( const state_type &x , double t )
38 |   {
39 |     m_states.push_back( x );
40 |     m_times.push_back( t );
41 |   }
42 | };
43 | 
44 | using namespace Rcpp;
45 | 
46 | // // [[Rcpp::export]]
47 | // NumericVector one_step_pz_(double P, double Z,  double t, double alpha, double c, double e, double ml, double mq ) {
48 | //   state_type x = { P , Z }; // initial conditions
49 | //   vector<state_type> x_vec;
50 | //   vector<double> times;
51 | //   pz_ pz_instance(alpha, c, e, ml, mq);
52 | //   size_t steps = integrate( pz_instance , x , t , t + 1, 1.0 , push_back_state_and_time(x_vec, times));
53 | //   NumericVector result = NumericVector::create(0, 0);
54 | //   result(0) = x_vec[steps][0];
55 | //   result(1) = x_vec[steps][1];
56 | //   return result;
57 | // }
58 | 
59 | // [[Rcpp::export]]
60 | NumericMatrix one_step_pz_vector(NumericMatrix xparticles, NumericVector alphas, double t, NumericVector parameters){
61 |   double c = parameters[0];
62 |   double e = parameters[1];
63 |   double  ml = parameters[2];
64 |   double  mq = parameters[3];
65 |   NumericMatrix result(2, xparticles.cols());
66 |   for (int i = 0; i < xparticles.cols(); i++){
67 |     double P = xparticles(0, i);
68 |     double Z = xparticles(1, i);
69 |     state_type x = { P , Z }; // initial conditions
70 |     vector<state_type> x_vec;
71 |     vector<double> times;
72 |     pz_ pz_instance(alphas(i), c, e, ml, mq);
73 |     size_t steps = integrate( pz_instance , x , t , t + 1, 1.0 , push_back_state_and_time(x_vec, times));
74 |     result(0, i) = x_vec[steps][0];
75 |     result(1, i) = x_vec[steps][1];
76 |   }
77 |   return result;
78 | }
79 | 
80 | // [[Rcpp::export]]
81 | NumericVector pz_generate_randomness_cpp(int nparticles, int datalength){
82 |   RNGScope scope;
83 |   NumericVector normal_draws = rnorm((2 + datalength) * nparticles, 0, 1);
84 |   return normal_draws;
85 | }
86 | 
87 | // [[Rcpp::export]]
88 | NumericVector pz_perturb_randomness_cpp(const NumericVector & randomness, double rho){
89 |   RNGScope scope;
90 |   int l = randomness.size();
91 |   NumericVector newrand(l);
92 |   double v = sqrt(1.0 - rho*rho);
93 |   NumericVector normal_draws = rnorm(l, 0, 1);
94 |   for (int i = 0; i < l; i ++){
95 |     newrand(i) =  rho * randomness(i) + v * normal_draws(i);
96 |   }
97 |   return newrand;
98 | }
99 | 


--------------------------------------------------------------------------------
/src/resampling.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | #include "resampling.h"
 3 | using namespace Rcpp;
 4 | using namespace std;
 5 | 
 6 | 
 7 | void permute(IntegerVector & a, const int & nparticles){
 8 |   int swap;
 9 |   for (int i = 0; i < nparticles; i++){
10 |     if (a(i) != i && a(a(i)) != a(i)){
11 |       swap = a(a(i));
12 |       a(a(i)) = a(i);
13 |       a(i) = swap;
14 |       i--;
15 |     }
16 |   }
17 | }
18 | 
19 | // weights have to sum to 1
20 | void systematic(IntegerVector & ancestors, const NumericVector & weights, const int & nparticles){
21 |   RNGScope scope;
22 |   NumericVector u_vec = runif(1);
23 |   double u = u_vec(0) / nparticles;
24 |   int j = 0;
25 |   double csw = weights(0);
26 |   for(int k = 0; k < nparticles; k++){
27 |     while(csw < u){
28 |       j++;
29 |       csw += weights(j);
30 |     }
31 |     ancestors(k) = j;
32 |     u = u + 1./nparticles;
33 |   }
34 | }
35 | 
36 | // made for export to R
37 | // [[Rcpp::export]]
38 | IntegerVector systematic_resampling_(const NumericVector & weights){
39 |   RNGScope scope;
40 |   int nparticles = weights.size();
41 |   IntegerVector ancestors(nparticles);
42 |   NumericVector u_vec = runif(1);
43 |   double u = u_vec(0) / nparticles;
44 |   int j = 0;
45 |   double csw = weights(0);
46 |   for(int k = 0; k < nparticles; k++){
47 |     while(csw < u){
48 |       j++;
49 |       csw += weights(j);
50 |     }
51 |     ancestors(k) = j;
52 |     u = u + 1./nparticles;
53 |   }
54 |   return ancestors + 1;
55 | }
56 | 
57 | inline int randWrapper(const int n) { return floor(unif_rand()*n); }
58 | 
59 | // the weights in argument don't need to be normalised, and 'ancestors' don't need to be of the same size as 'weights'
60 | void multinomial(IntegerVector & ancestors, const NumericVector & weights){
61 |     RNGScope scope;
62 |     int nparents = weights.size();
63 |     NumericVector cumsumw = cumsum(weights);
64 |     int nchildren = ancestors.size();
65 |     NumericVector uniforms = runif(nchildren);
66 |     double sumw = cumsumw(nparents - 1);
67 |     double lnMax = 0;
68 |     int j = nparents;
69 |     for (int i = nchildren; i > 0; i--){
70 |       lnMax += log(uniforms(i-1)) / i;
71 |       uniforms(i-1) = sumw * exp(lnMax);
72 |       while (uniforms(i-1) < cumsumw(j-1)){
73 |         j --;
74 |       }
75 |       ancestors(i-1) = j;
76 |     }
77 |     std::random_shuffle(ancestors.begin(), ancestors.end(), randWrapper);
78 | }
79 | 


--------------------------------------------------------------------------------
/src/resampling.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INCL_RESAMPLING_
 2 | #define _INCL_RESAMPLING_
 3 | #include <RcppEigen.h>
 4 | using namespace Rcpp;
 5 | using namespace std;
 6 | 
 7 | // permute the ancestors so that whenever o(i) > 0, a(i) = i; allows in-place resampling
 8 | void permute(IntegerVector & a, const int & nparticles);
 9 | // systematic resampling, weights have to sum to 1
10 | void systematic(IntegerVector & ancestors, const NumericVector & weights, const int & nparticles);
11 | // made for export to R; takes normalized weights
12 | IntegerVector systematic_resampling_(const NumericVector & weights);
13 | // multinomial resampling, weights don't have to sum to 1
14 | void multinomial(IntegerVector & ancestors, const NumericVector & weights);
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/src/swapsweep.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | // [[Rcpp::export]]
 4 | List swapsweep(IntegerVector permutation, const NumericMatrix Cp, double totalcost){
 5 | int n = Cp.cols();
 6 | double currentcost, proposedcost;
 7 | int perm_i, perm_j;
 8 | for (int i = 0; i < n - 1; i ++){
 9 |   int perm_i = permutation[i];
10 |   for (int j = i+1; j < n; j++){
11 |     perm_j = permutation[j];
12 |     currentcost = Cp(i, perm_i) + Cp(j, perm_j);
13 |     proposedcost = Cp(i, perm_j) + Cp(j, perm_i);
14 |     if (proposedcost < currentcost){
15 |       permutation[i] = perm_j;
16 |       permutation[j] = perm_i;
17 |       perm_i = perm_j;
18 |       totalcost = totalcost - currentcost + proposedcost;
19 |     }
20 |   }
21 | }
22 | return List::create(Named("totalcost")=totalcost,
23 |                     Named("permutation") = permutation);
24 | }
25 | 


--------------------------------------------------------------------------------
/src/systematic.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | using namespace Rcpp;
 3 | using namespace std;
 4 | 
 5 | // [[Rcpp::export]]
 6 | IntegerVector systematic_resampling_n_(const NumericVector & weights, int ndraws, double u){
 7 |   RNGScope scope;
 8 |   int nparticles = weights.size();
 9 |   IntegerVector ancestors(ndraws);
10 |   u = u / ndraws;
11 |   int j = 0;
12 |   double csw = weights(0);
13 |   for(int k = 0; k < ndraws; k++){
14 |     while(csw < u){
15 |       j++;
16 |       csw += weights(j);
17 |     }
18 |     u = u + 1. / ndraws;
19 |     ancestors(k) = j;
20 |   }
21 |   // int swap;
22 |   // for (int i = 0; i < nparticles; i++){
23 |   //   if (ancestors(i) != i && ancestors(ancestors(i)) != ancestors(i)){
24 |   //     swap = ancestors(ancestors(i));
25 |   //     ancestors(ancestors(i)) = ancestors(i);
26 |   //     ancestors(i) = swap;
27 |   //     i--;
28 |   //   }
29 |   // }
30 |   return ancestors + 1;
31 | }
32 | 


--------------------------------------------------------------------------------
/src/toggle_switch.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | 
 5 | double normal_pdf(double x, double m, double s)
 6 | {
 7 |   static const double inv_sqrt_2pi = 0.3989422804014327;
 8 |   double a = (x - m) / s;
 9 |   
10 |   return inv_sqrt_2pi / s * std::exp(-0.5f * a * a);
11 | }
12 | 
13 | 
14 | // [[Rcpp::export]]
15 | NumericVector cont_hist(NumericVector x, double h, NumericVector prob, NumericVector mids) {
16 |   
17 |   int n = x.size();
18 |   int m = prob.size();
19 |   
20 |   double sum;
21 |   NumericVector out(n);
22 |   
23 |   for(int i = 0; i<n; i++){
24 |     sum = 0.0;
25 |     for(int j = 0; j<m; j++){
26 |       sum += prob[j]*normal_pdf(x[i],mids[j],h);
27 |     }
28 |     out[i] = sum;
29 |   }
30 |   
31 |   return out;
32 | }
33 | 
34 | // [[Rcpp::export]]
35 | NumericVector summary_full(NumericVector y, double h, NumericMatrix prob_mat, NumericVector mids) {
36 |   
37 |   int n = prob_mat.nrow();
38 |   int m = y.size();
39 |   NumericVector out(n);
40 |   NumericVector store(m);
41 |   double sum;
42 |   
43 |   for(int i = 0; i<n; i++){
44 |     //NumericMatrix::Row prob = prob_mat(i,_);
45 |     store = cont_hist(y,h,prob_mat(i,_),mids);
46 |     sum = 0.0;
47 |     for(int j = 0; j<m; j++){
48 |       sum += std::log(store[j])/m;
49 |     }
50 |     out[i] = sum;
51 |   }
52 |   return out;
53 | }
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/src/wasserstein.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | using namespace Rcpp;
 3 | using namespace std;
 4 | using namespace Eigen;
 5 | 
 6 | // [[Rcpp::export]]
 7 | List wasserstein_(NumericVector p_, NumericVector q_, NumericMatrix cost_matrix_,
 8 |                   double epsilon, int niterations){
 9 |   // compute distance between p and q
10 |   // p corresponds to the weights of a N-sample
11 |   // each q corresponds to the weights of a M-sample
12 |   // Thus cost_matrix must be a N x M cost matrix
13 |   // epsilon is a regularization parameter, equal to 1/lambda in some references
14 |   int N = p_.size();
15 |   int M = q_.size();
16 |   
17 |   Map<MatrixXd> q(as<Map<MatrixXd> >(q_));
18 |   Map<MatrixXd> cost_matrix(as<Map<MatrixXd> >(cost_matrix_));
19 |   Map<VectorXd> p(as<Map<VectorXd> >(p_));
20 |   // avoid to take exp(k) when k is less than -500,
21 |   // as K then contains zeros, and then the upcoming computations divide by zero
22 |   MatrixXd K = (cost_matrix.array() * (-1./epsilon)).exp(); // K =  exp(- M / epsilon)
23 |   
24 |   // MatrixXd K = (cost_matrix.array() * (-1./epsilon)); // K =  exp(- M / epsilon)
25 |   // for (int i = 0; i < N; i++){
26 |     // for (int j = 0; j < M; j++){
27 |   //     if (K(i,j) < -500){
28 |   //       K(i,j) = exp(-500);
29 |   //     } else {
30 |         // K(i,j) = exp(K(i,j));
31 |   //     }
32 |     // }
33 |   // }
34 |   
35 |   MatrixXd K_transpose = K.transpose();
36 |   MatrixXd K_tilde = p.array().inverse().matrix().asDiagonal() * K; // diag(1/p) %*% K
37 |   MatrixXd u = VectorXd::Constant(N, 1./N);
38 |   for (int iteration = 0; iteration < niterations; iteration ++){
39 |     // cerr << iteration << endl;
40 |     //  u is set to 1 / (K_tilde %*% (qs / (K_transpose %*% u)))
41 |     u = 1. / (K_tilde * (q.array() / (K_transpose * u).array()).matrix()).array();
42 |     // for (int i = 0; i < N; i ++) cerr << u(i,0) << endl;
43 |   }
44 |   MatrixXd v = q.array() / (K_transpose * u).array();
45 |   // compute the optimal transport matrix between p and the first q
46 |   MatrixXd transportmatrix = u.col(0).asDiagonal() * K * v.col(0).asDiagonal();
47 |   MatrixXd uXIv = u.array() * ((K.array() * cost_matrix.array()).matrix() * v).array();
48 |   NumericVector d = wrap(uXIv.colwise().sum());
49 |   return List::create(Named("distances")=d,
50 |                       Named("transportmatrix") = wrap(transportmatrix),
51 |                       Named("u") = wrap(u),
52 |                       Named("v") = wrap(v));
53 | }
54 | 
55 | 


--------------------------------------------------------------------------------
/src/wasserstein_auto.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | #include <algorithm>
 3 | using namespace Rcpp;
 4 | using namespace std;
 5 | using namespace Eigen;
 6 | 
 7 | // [[Rcpp::export]]
 8 | List wasserstein_auto_(NumericVector p_, NumericVector q_, NumericMatrix cost_matrix_,
 9 |                   double epsilon, double desired_alpha){
10 |   // compute distance between p and q
11 |   // p corresponds to the weights of a N-sample
12 |   // each q corresponds to the weights of a M-sample
13 |   // Thus cost_matrix must be a N x M cost matrix
14 |   // epsilon is a regularization parameter, equal to 1/lambda in some references
15 |   int N = p_.size();
16 |   int M = q_.size();
17 |   
18 |   Map<VectorXd> p(as<Map<VectorXd> >(p_));
19 |   Map<VectorXd> q(as<Map<VectorXd> >(q_));
20 |   Map<MatrixXd> cost_matrix(as<Map<MatrixXd> >(cost_matrix_));
21 |   // avoid to take exp(k) when k is less than -500,
22 |   // as K then contains zeros, and then the upcoming computations divide by zero
23 |   MatrixXd K = (cost_matrix.array() * (-1./epsilon)); // K =  exp(- M / epsilon)
24 |   for (int i = 0; i < N; i++){
25 |     for (int j = 0; j < M; j++){
26 |       if (K(i,j) < -500){
27 |         K(i,j) = exp(-500);
28 |       } else {
29 |         K(i,j) = exp(K(i,j));
30 |       }
31 |     }
32 |   }
33 |   MatrixXd K_transpose = K.transpose();
34 |   MatrixXd K_tilde = p.array().inverse().matrix().asDiagonal() * K; // diag(1/p) %*% K
35 |   VectorXd u = VectorXd::Constant(N, 1./N);
36 |   //
37 |   VectorXd marginal1, marginal2;
38 |   MatrixXd transportmatrix;
39 |   VectorXd v;
40 |   double alpha = 0;
41 |   double beta = 0;
42 |   int niterations_max = 1000;
43 |   int iteration = 0;
44 |   // for (int iteration = 0; iteration < niterations; iteration ++){
45 |   while ((iteration < niterations_max) and (alpha < desired_alpha)){
46 |     iteration ++;
47 |     u = 1. / (K_tilde * (q.array() / (K_transpose * u).array()).matrix()).array();
48 |     if (iteration % 10 == 1){
49 |       // check if criterion is satisfied
50 |       v = q.array() / (K_transpose * u).array();
51 |       transportmatrix = u.col(0).asDiagonal() * K * v.col(0).asDiagonal();
52 |       marginal1 = transportmatrix.rowwise().sum();
53 |       marginal2 = transportmatrix.colwise().sum();
54 |       alpha = 10;
55 |       for (int i = 0; i < N; i++){
56 |         beta = std::min(p(i) / marginal1(i), q(i) / marginal2(i));
57 |         alpha = std::min(alpha, beta);
58 |       }
59 |       // cerr << "alpha = " << alpha << endl;
60 |     }
61 |   }
62 |   v = q.array() / (K_transpose * u).array();
63 |   // compute the optimal transport matrix between p and the first q
64 |   transportmatrix = u.col(0).asDiagonal() * K * v.col(0).asDiagonal();
65 |   // MatrixXd uXIv = u.array() * ((K.array() * cost_matrix.array()).matrix() * v).array();
66 |   // NumericVector d = wrap(uXIv.colwise().sum());
67 |   return List::create(Named("transportmatrix") = wrap(transportmatrix),
68 |                       Named("u") = wrap(u),
69 |                       Named("v") = wrap(v),
70 |                       Named("iteration") = iteration);
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/src/weighted_averages.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | #include "weighted_averages.h"
 3 | using namespace Rcpp;
 4 | 
 5 | // [[Rcpp::export]]
 6 | NumericVector wmean_(const NumericMatrix & x, const NumericVector & unnormalized_w){
 7 |   int nrows = x.rows();
 8 |   int ncols = x.cols();
 9 |   double sumw = sum(unnormalized_w);
10 |   NumericVector result(ncols);
11 |   double cumsumxw;
12 |   for (int icol = 0; icol < ncols; icol++){
13 |     cumsumxw = 0.;
14 |     for (int irow = 0; irow < nrows ; irow++){
15 |       cumsumxw += unnormalized_w(irow) * x(irow, icol);
16 |     }
17 |     result(icol) = cumsumxw / sumw;
18 |   }
19 |   return result;
20 | }
21 | 
22 | // [[Rcpp::export]]
23 | NumericMatrix wcovariance_(const NumericMatrix & x, const NumericVector & unnormalized_w, const NumericVector & xbar){
24 |   int nrows = x.rows();
25 |   int ncols = x.cols();
26 |   double sumw = sum(unnormalized_w);
27 |   double sumsqw = sum(unnormalized_w*unnormalized_w);
28 |   NumericMatrix result(ncols, ncols);
29 |   std::fill(result.begin(), result.end(), 0);
30 |   for (int i = 0; i < ncols; i++){
31 |     for (int j = 0; j < ncols; j++){
32 |       for (int irow = 0; irow < nrows ; irow++){
33 |         result(i, j) += unnormalized_w(irow) * (x(irow, i) - xbar(i)) * (x(irow, j) - xbar(j));
34 |       }
35 |       result(i,j) /=  (sumw - sumsqw / sumw);
36 |     }
37 |   }
38 |   return result ;
39 | }
40 | 
41 | RCPP_MODULE(module_waverage) {
42 |   function( "wmean", &wmean_ );
43 |   function( "wcovariance", &wcovariance_ );
44 | }
45 | 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/src/weighted_averages.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INCL_WEIGHTED_AVRG_
 2 | #define _INCL_WEIGHTED_AVRG_
 3 | #include <RcppEigen.h>
 4 | using namespace Rcpp;
 5 | 
 6 | 
 7 | NumericVector wmean_(const NumericMatrix & x, const NumericVector & unnormalized_w);
 8 | 
 9 | NumericMatrix wcovariance_(const NumericMatrix & x, const NumericVector & unnormalized_w, const NumericVector & xbar);
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/winference.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace,vignette
22 | 


--------------------------------------------------------------------------------