├── .Rbuildignore ├── .Rprofile ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── R ├── RcppExports.R ├── create_lagmatrix.R ├── fast_dmvnorm.R ├── fast_rmvnorm.R ├── gandkinversecdf.R ├── get_my_colors.R ├── hilbert_order.R ├── levydriventransition.R ├── metrics.R ├── metropolishastings.R ├── mmd.R ├── model_get_autoregressive.R ├── model_get_cosine.R ├── model_get_gandk.R ├── model_get_levydriven.R ├── model_get_mgandk.R ├── model_get_multivariatenormal.R ├── model_get_normal.R ├── model_get_pz_4param.R ├── model_get_queue.R ├── model_get_ricker.R ├── model_get_toggleswitch.R ├── particle_filter.R ├── particlemetropolishastings.R ├── plot_functions.R ├── proposals.R ├── pz_transition.R ├── setmytheme.R ├── systematic_resampling.R ├── wasserstein.R ├── wcovariance.R ├── winference-package.R ├── wmean.R ├── wsmc.R └── wsmc_to_dataframe.R ├── README.md ├── WInference.Rproj ├── inst ├── reproduceabc │ ├── README.R │ ├── ar1 │ │ ├── .Rapp.history │ │ ├── README.R │ │ ├── ar1_generate_data.R │ │ ├── ar1_plots.R │ │ ├── ar1_wsmc_delay1.R │ │ └── ar1_wsmc_marginal.R │ ├── cosine │ │ ├── README.R │ │ ├── cosine_generate_data.R │ │ ├── cosine_mcmc.R │ │ ├── cosine_plots.R │ │ ├── cosine_wsmc_curvematching_wasserstein.R │ │ └── cosine_wsmc_euclidean.R │ ├── gandk │ │ ├── README.R │ │ ├── gandk_abctools.R │ │ ├── gandk_generate_data.R │ │ ├── gandk_mcmc.R │ │ ├── gandk_plots_compare.R │ │ ├── gandk_plots_convergence.R │ │ ├── gandk_plots_ncomputed.R │ │ ├── gandk_plots_threshold.R │ │ ├── gandk_plots_w_to_posterior.R │ │ └── gandk_wsmc.R │ ├── levydriven │ │ ├── README.R │ │ ├── levydriven_generate_data.R │ │ ├── levydriven_is_correction.R │ │ ├── levydriven_mh.R │ │ ├── levydriven_plots.R │ │ ├── levydriven_timings.R │ │ ├── levydriven_wsmc_hilbert.R │ │ └── levydriven_wsmc_with_summary.R │ ├── mgandk │ │ ├── README.R │ │ ├── mgandk_generate_data.R │ │ ├── mgandk_mcmc.R │ │ ├── mgandk_plots.R │ │ ├── mgandk_plots_ncomputed.R │ │ ├── mgandk_plots_threshold.R │ │ ├── mgandk_plots_wdist_to_posterior.R │ │ ├── mgandk_timings.R │ │ ├── mgandk_wsmc_hilbert.R │ │ ├── mgandk_wsmc_mmd.R │ │ ├── mgandk_wsmc_swap.R │ │ └── mgandk_wsmc_wasserstein.R │ ├── mvnormal │ │ ├── README.R │ │ ├── mvnormal_generate_data.R │ │ ├── mvnormal_plots.R │ │ ├── mvnormal_rejection_summary.R │ │ ├── mvnormal_rejection_wasserstein.R │ │ ├── mvnormal_timings.R │ │ ├── mvnormal_wsmc_euclidean.R │ │ ├── mvnormal_wsmc_summary.R │ │ └── mvnormal_wsmc_wasserstein.R │ ├── queue │ │ ├── 50.intermediateobs.neal.Rdata │ │ ├── README.R │ │ ├── queue_abctools.R │ │ ├── queue_plots_compare.R │ │ ├── queue_pmmh_intermediate.R │ │ └── queue_wsmc_marginal_intermediate.R │ ├── supplementary │ │ ├── README.R │ │ ├── check_assumption_plot.R │ │ ├── gandk.checkassumption.R │ │ ├── multivariate.transportdistancecomparison.R │ │ ├── multivariate.transportdistancecomparison.plots.R │ │ ├── wasserstein_clt_mvtnorm_mean.R │ │ └── wasserstein_clt_mvtnorm_mean_plots.R │ └── toggleswitch │ │ ├── README.R │ │ ├── toggle_switch_generate.R │ │ ├── toggle_switch_load_summary.R │ │ ├── toggle_switch_plots.R │ │ ├── toggle_switch_summary.R │ │ ├── toggle_switch_summary_smc.R │ │ └── toggle_switch_wsmc.R ├── reproducepointestimation │ ├── README.R │ ├── cauchydata_normalfit_fixedmk_diffn.R │ ├── cauchydata_normalfit_fixedmk_diffn_KL.R │ ├── gamma_normal_bootstrap.R │ ├── gamma_normal_fixedmk_diffn.R │ ├── gamma_normal_fixedn_diffmk.R │ ├── gamma_normal_functions.R │ ├── gandk.cluster.script.R │ ├── gandk_correlated.R │ ├── gandk_coverage.R │ ├── gandk_functions.R │ ├── gandk_plots.R │ ├── lognormal.cluster.script.R │ ├── lognormal_correlated.R │ ├── lognormal_coverage.R │ ├── lognormal_functions.R │ └── lognormal_plots.R └── tutorials │ ├── tutorial_cosine.Rmd │ ├── tutorial_cosine.pdf │ ├── tutorial_distances.Rmd │ ├── tutorial_distances.pdf │ ├── tutorial_gandk.Rmd │ ├── tutorial_gandk.pdf │ ├── tutorial_normal.Rmd │ └── tutorial_normal.pdf ├── man ├── .Rapp.history ├── cost_matrix_L1.Rd ├── cost_matrix_L2.Rd ├── cost_matrix_Lp.Rd ├── create_lagmatrix.Rd ├── fast_dmvnorm.Rd ├── fast_rmvnorm.Rd ├── get_autoregressive.Rd ├── get_cosine.Rd ├── get_gandk.Rd ├── get_levydriven.Rd ├── get_mgandk.Rd ├── get_multivariate_normal.Rd ├── get_normal.Rd ├── get_pz_4param.Rd ├── get_ricker.Rd ├── get_toggleswitch.Rd ├── hilbert_order.Rd ├── pz_transition.Rd ├── setmytheme.Rd ├── systematic_resampling.Rd ├── wasserstein.Rd ├── wcovariance.Rd ├── winference-package.Rd └── wmean.Rd ├── src ├── HilbertCode.cpp ├── HilbertCode.h ├── Makevars ├── RcppExports.cpp ├── compute_cost.cpp ├── gandkinversecdf.cpp ├── hilbert_order.cpp ├── levydriven_.cpp ├── median.cpp ├── median.h ├── mmd.cpp ├── mvnorm.cpp ├── mvnorm.h ├── pz_functions.cpp ├── resampling.cpp ├── resampling.h ├── swapsweep.cpp ├── systematic.cpp ├── toggle_switch.cpp ├── variouswasserstein.cpp ├── wasserstein.cpp ├── wasserstein_auto.cpp ├── wasserstein_semi_discrete.cpp ├── weighted_averages.cpp └── weighted_averages.h └── winference.Rproj /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.Rprofile: -------------------------------------------------------------------------------- 1 | .First <- function(){ 2 | print("hello") 3 | Sys.setenv(MAKEFLAGS = " -j4") 4 | } 5 | 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | *.tex 3 | .*.swp 4 | .Rproj.user 5 | .Rhistory 6 | .RData 7 | *.RData 8 | inst/reproducepointestimation/*.RData 9 | inst/reproducepointestimation/*.Rdata 10 | inst/reproducepointestimation/*.pdf 11 | inst/reproducepointestimation/*.png 12 | .Ruserdata 13 | .DS_Store 14 | src/*.o 15 | src/*.so 16 | src/*.dll 17 | TODO 18 | inst/tutorials/*cache*/ 19 | inst/tutorials/*files*/ 20 | inst/reproduce-old/ 21 | 22 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: winference 2 | Type: Package 3 | Title: Approximate Bayesian Computation with the Wasserstein distance 4 | Version: 0.1.2 5 | Author: Espen Bernton, Mathieu Gerber, Pierre E. Jacob 6 | Maintainer: Pierre E. Jacob 7 | Description: Parameter inference for generative models, by using an approximate Bayesian 8 | computation approach where summary statistics are replaced by the Wasserstein distance 9 | between synthetic and observed data. 10 | License: GPL (>= 2) 11 | LazyData: TRUE 12 | Depends: 13 | Rcpp,RcppEigen,doParallel,doRNG,foreach,ggplot2,ggthemes,dplyr,reshape2,BH,transport 14 | Imports: 15 | Rcpp (>= 0.11.6),RcppEigen,BH 16 | LinkingTo: Rcpp,RcppEigen,BH 17 | RoxygenNote: 6.1.1 18 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(cost_matrix_L1) 4 | export(cost_matrix_L2) 5 | export(cost_matrix_Lp) 6 | export(create_lagmatrix) 7 | export(exact_lsap_distance) 8 | export(exact_lsap_given_C) 9 | export(exact_transport_distance) 10 | export(exact_transport_given_C) 11 | export(fast_dmvnorm) 12 | export(fast_rmvnorm) 13 | export(gandkcdf) 14 | export(gandkinversecdf) 15 | export(gandkinversecdf_givennormals) 16 | export(get_autoregressive) 17 | export(get_cosine) 18 | export(get_gandk) 19 | export(get_hilbert_to_y) 20 | export(get_levydriven) 21 | export(get_lsap_to_y) 22 | export(get_mgandk) 23 | export(get_mmd_to_y) 24 | export(get_multivariate_normal) 25 | export(get_my_colors) 26 | export(get_normal) 27 | export(get_pz_4param) 28 | export(get_queue) 29 | export(get_ricker) 30 | export(get_sinkhorn_to_y) 31 | export(get_toggleswitch) 32 | export(get_transport_to_y) 33 | export(hilbert_distance) 34 | export(hilbert_order) 35 | export(independent_proposal) 36 | export(levydriven_rtransition_rand) 37 | export(metropolishastings) 38 | export(mhchainlist_to_dataframe) 39 | export(mixture_mclust) 40 | export(mixture_rmixmod) 41 | export(mmd) 42 | export(move_step) 43 | export(move_step_onetheta) 44 | export(particle_filter) 45 | export(plot_bivariate) 46 | export(plot_bivariate_polygon) 47 | export(plot_marginal) 48 | export(plot_marginal_time) 49 | export(plot_ncomputed) 50 | export(plot_threshold) 51 | export(plot_threshold_time) 52 | export(pmmh) 53 | export(pz_transition) 54 | export(randomwalk_proposal) 55 | export(setmytheme) 56 | export(sinkhorn_distance) 57 | export(sinkhorn_given_C) 58 | export(std_move_step_onetheta) 59 | export(swap_distance) 60 | export(swap_kernel_distance) 61 | export(systematic_resampling) 62 | export(systematic_resampling_given_u) 63 | export(systematic_resampling_n) 64 | export(update_proposal) 65 | export(wasserstein) 66 | export(wcovariance) 67 | export(wmean) 68 | export(wsmc) 69 | export(wsmc_continue) 70 | export(wsmc_one_step) 71 | export(wsmc_to_dataframe) 72 | importFrom(Rcpp,sourceCpp) 73 | useDynLib(winference) 74 | -------------------------------------------------------------------------------- /R/create_lagmatrix.R: -------------------------------------------------------------------------------- 1 | #'@rdname create_lagmatrix 2 | #'@title create_lagmatrix 3 | #'@description This function creates the delay reconstruction, i.e. a matrix 4 | #' where the first row contains y_1, ..., y_T (the given univariate time series) 5 | #' the second row contains NA, y_2, ..., y_T .... etc 6 | #' the k-th row contains NA, NA, ..., y_{k+1}, ..., y_T 7 | #'@return a matrix with k rows and n-k columns, where n is the length of the provided time series 8 | #'@export 9 | create_lagmatrix <- function(timeseries, k){ 10 | if (k == 0){ 11 | return(matrix(timeseries, nrow = 1)) 12 | } 13 | res <- matrix(NA, nrow = k+1, ncol = ncol(timeseries)) 14 | res[1,] <- timeseries 15 | for (lagvalue in 1:k){ 16 | res[lagvalue+1,] <- lag(timeseries[1,], lagvalue) 17 | } 18 | return(res[,(k+1):ncol(res),drop=F]) 19 | } 20 | -------------------------------------------------------------------------------- /R/fast_dmvnorm.R: -------------------------------------------------------------------------------- 1 | #'@rdname fast_dmvnorm 2 | #'@title fast_dmvnorm 3 | #'@description evaluate multivariate Normal log-densities 4 | #'@export 5 | 6 | fast_dmvnorm <- function(x, mean, covariance){ 7 | return(dmvnorm(x, mean, covariance)) 8 | } 9 | -------------------------------------------------------------------------------- /R/fast_rmvnorm.R: -------------------------------------------------------------------------------- 1 | #'@rdname fast_rmvnorm 2 | #'@title fast_rmvnorm 3 | #'@description generate multivariate Normal draws 4 | #'@export 5 | 6 | fast_rmvnorm <- function(nparticles, mean, covariance){ 7 | return(rmvnorm(nparticles, mean, covariance)) 8 | } 9 | -------------------------------------------------------------------------------- /R/gandkinversecdf.R: -------------------------------------------------------------------------------- 1 | #'@export 2 | gandkinversecdf <- function(uniforms, theta){ 3 | return(gandkinversecdf_(uniforms, theta)) 4 | } 5 | 6 | #'@export 7 | gandkinversecdf_givennormals <- function(normals, theta){ 8 | return(gandkinversecdf_givennormals_(normals, theta)) 9 | } 10 | 11 | #'@export 12 | gandkcdf <- function(y, theta, maxsteps = 1000, tolerance = 1e-10, lower = 1e-20, upper = 1-1e-20){ 13 | return(gandkcdf_(y, theta, maxsteps, tolerance, lower, upper)) 14 | } 15 | -------------------------------------------------------------------------------- /R/get_my_colors.R: -------------------------------------------------------------------------------- 1 | #'@export 2 | get_my_colors <- function(){ 3 | return(c("Hilbert" = "orange", "marginal" = "red", "Curve matching" = "darkblue", 4 | "MMD"="009E73", "Posterior"="black", 5 | "Summary"="orange", 6 | "Semi-auto"="orange", 7 | "SA + constraint"="red", 8 | "Wasserstein"="darkblue", 9 | "Euclidean"="#009E73", 10 | "Swap" = "purple", 11 | "Rej. Summary" = "purple", 12 | "W + constraint" = "darkgreen" 13 | )) 14 | } 15 | -------------------------------------------------------------------------------- /R/hilbert_order.R: -------------------------------------------------------------------------------- 1 | #'@rdname hilbert_order 2 | #'@title hilbert_order 3 | #'@description This function returns the "Hilbert order" of a sample of n 4 | #'points of dimension d, stored in a matrix with d rows and n columns 5 | #' where d is the dimension of each sample and n the number of samples. 6 | #' The function essentially calls Hilbert_Sort_CGAL, from the CGAL library. 7 | #'@return a vector of index corresponding to the ordered samples 8 | #'@export 9 | hilbert_order <- function(x){ 10 | return(hilbert_order_(x) + 1) 11 | } 12 | -------------------------------------------------------------------------------- /R/levydriventransition.R: -------------------------------------------------------------------------------- 1 | #'@export 2 | levydriven_rtransition_rand <- function(nparticles, theta){ 3 | levydriven_rtransition_rand_cpp(nparticles, theta) 4 | } 5 | -------------------------------------------------------------------------------- /R/metropolishastings.R: -------------------------------------------------------------------------------- 1 | # Function to perform Metropolis-Hastings 2 | #'@export 3 | metropolishastings <- function(observations, target, tuning_parameters, savefile = NULL, verbose = FALSE){ 4 | # Posterior density function (log) 5 | posterior <- function(thetas){ 6 | logdens <- target$dprior(thetas, target$parameters) 7 | which.ok <- which(is.finite(logdens)) 8 | if (length(which.ok) > 0){ 9 | theta.ok <- thetas[which.ok,,drop=FALSE] 10 | logdens[which.ok] <- logdens[which.ok] + target$loglikelihood(theta.ok, observations, target$parameters) 11 | } 12 | return(logdens) 13 | } 14 | 15 | niterations <- tuning_parameters$niterations 16 | nchains <- tuning_parameters$nchains 17 | cov_proposal <- tuning_parameters$cov_proposal 18 | p <- ncol(tuning_parameters$init_chains) 19 | 20 | # store whole chains 21 | chains <- rep(list(matrix(nrow = niterations, ncol = p)), nchains) 22 | # current states of the chains 23 | current_chains <- matrix(nrow = nchains, ncol = p) 24 | # initialization of the chains 25 | current_chains <- matrix(tuning_parameters$init_chains, nrow = nchains, ncol = p) 26 | for (ichain in 1:nchains){ 27 | chains[[ichain]][1,] <- current_chains[ichain,] 28 | } 29 | # log target density values associated with the current states of the chains 30 | current_dtarget <- posterior(current_chains) 31 | # 32 | naccepts <- 0 33 | # run the chains 34 | for (iteration in 2:niterations){ 35 | if ((iteration %% max(1, floor(niterations/100)) == 1) && (verbose)){ 36 | cat("iteration ", iteration, "/", niterations, "\n") 37 | cat("average acceptance:", naccepts / (iteration*nchains) * 100, "%\n") 38 | } 39 | if (iteration > 250 && tuning_parameters$adaptation > 0 && (iteration %% tuning_parameters$adaptation) == 0){ 40 | # adapt the proposal covariance matrix based on the last < 50,000 samples of all chains 41 | mcmc_samples <- foreach(ichain = 1:nchains, .combine = rbind) %do% { 42 | matrix(chains[[ichain]][max(1, iteration - tuning_parameters$adaptation):(iteration-1),], ncol = p) 43 | } 44 | cov_proposal <- cov(mcmc_samples) / p 45 | } 46 | # proposals 47 | proposals <- current_chains + fast_rmvnorm(nchains, rep(0, p), cov_proposal) 48 | # proposals' target density 49 | proposal_dtarget <- posterior(proposals) 50 | # log Metropolis Hastings ratio 51 | acceptance_ratios <- (proposal_dtarget - current_dtarget) 52 | # uniforms for the acceptance decisions 53 | uniforms <- runif(n = nchains) 54 | # acceptance decisions 55 | accepts <- (log(uniforms) < acceptance_ratios) 56 | naccepts <- naccepts + sum(accepts) 57 | # make the appropriate replacements 58 | current_chains[accepts,] <- proposals[accepts,] 59 | if (is.null(dim(current_chains))) current_chains <- matrix(current_chains, ncol = p) 60 | current_dtarget[accepts] <- proposal_dtarget[accepts] 61 | # book keeping 62 | for (ichain in 1:nchains){ 63 | chains[[ichain]][iteration,] <- current_chains[ichain,] 64 | } 65 | if (!is.null(savefile) && iteration %% 1000 == 1){ 66 | mh_results <- list(chains = chains, naccepts = naccepts, cov_proposal = cov_proposal, iteration = iteration) 67 | save(mh_results, file = savefile) 68 | } 69 | } 70 | cat("average acceptance:", naccepts / (niterations*nchains) * 100, "%\n") 71 | return(list(chains = chains, naccepts = naccepts, cov_proposal = cov_proposal)) 72 | } 73 | 74 | #'@export 75 | mhchainlist_to_dataframe <- function(chains_list){ 76 | nchains <- length(chains_list) 77 | niterations <- nrow(chains_list[[1]]) 78 | chaindf <- foreach (i = 1:nchains, .combine = rbind) %do% { 79 | data.frame(ichain = rep(i, niterations), iteration = 1:niterations, X = chains_list[[i]]) 80 | } 81 | return(chaindf) 82 | } 83 | -------------------------------------------------------------------------------- /R/mmd.R: -------------------------------------------------------------------------------- 1 | # returns a function that computes MMD between y and z, for fixed y 2 | #'@export 3 | get_mmd_to_y <- function(y){ 4 | nobs <- ncol(y) 5 | Cy1 <- cost_matrix_L1(y, y) 6 | Cy2 <- cost_matrix_L2(y, y)^2 7 | eps <- median(as.numeric(Cy1)) 8 | k_y <- exp(-Cy2/(2*(eps^2))) 9 | first_term <- sum(k_y) / (nobs*nobs) 10 | f <- function(z){ 11 | return(mmd_c(first_term, eps, z, y)) 12 | } 13 | return(f) 14 | } 15 | # compute MMD between y and z 16 | #'@export 17 | mmd <- function(y, z){ 18 | nobs <- ncol(y) 19 | Cy1 <- cost_matrix_L1(y, y) 20 | Cy2 <- cost_matrix_L2(y, y)^2 21 | eps <- median(as.numeric(Cy1)) 22 | k_y <- exp(-Cy2/(2*(eps^2))) 23 | first_term <- sum(k_y) / (nobs*nobs) 24 | return(mmd_c(first_term, eps, z, y)) 25 | } 26 | -------------------------------------------------------------------------------- /R/model_get_autoregressive.R: -------------------------------------------------------------------------------- 1 | # Autoregressive model 2 | # theta = (phi, logsigma) 3 | #'@rdname get_autoregressive 4 | #'@title Autoregressive model 5 | #'@description This function returns a list representing an auto-regressive 6 | #'model of order 1. 7 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution), 8 | #' generate_randomness (generate data-generating variables), robservation (create synthetic 9 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 10 | #' ydim (dimension of observations), parameters (list of hyperparameters, 11 | #' to be passed to rprior,dprior,robservation) 12 | #'@export 13 | get_autoregressive <- function(){ 14 | # generate phi ~ Unif(-1,1) and log-sigma is normally distributed 15 | rprior <- function(nparticles, parameters){ 16 | phis <- 2*runif(nparticles) - 1 17 | logsigmas <- rnorm(nparticles, 0, 1) 18 | return(cbind(phis, logsigmas)) 19 | } 20 | 21 | # evaluate the log-density of the prior, for each particle 22 | dprior <- function(thetas, parameters){ 23 | logdensities <- dnorm(thetas[,2], 0, 1, log = TRUE) 24 | logdensities[thetas[,1] > 1] <- -Inf 25 | logdensities[thetas[,1] < -1] <- -Inf 26 | return(logdensities) 27 | } 28 | # 29 | generate_randomness <- function(nobservations){ 30 | return(rnorm(nobservations)) 31 | } 32 | # function to generate a dataset for each theta value 33 | # X_0 is Normal(0,sigma^2/(1-phi^2)) 34 | # X_t is Normal(phi X_t-1,sigma^2) 35 | robservation <- function(nobservations, theta, parameters, randomness){ 36 | observations <- rep(0, nobservations) 37 | observations[1] <- randomness[1] * exp(theta[2]) / sqrt(1 - theta[1]^2) 38 | for (idata in 2:nobservations){ 39 | observations[idata] <- theta[1] * observations[idata-1] + randomness[idata] * exp(theta[2]) 40 | } 41 | return(observations) 42 | } 43 | # 44 | loglikelihood <- function(thetaparticles, observations, parameters){ 45 | init_sd <- exp(thetaparticles[,2]) / sqrt(1 - thetaparticles[,1]^2) 46 | ll <- dnorm(observations[1], mean = 0, sd = init_sd, log = TRUE) 47 | for (i in 2:length(observations)){ 48 | ll <- ll + dnorm(observations[i], mean = observations[i-1] * thetaparticles[,1], sd = exp(thetaparticles[,2]), log = TRUE) 49 | } 50 | return(ll) 51 | } 52 | 53 | # 54 | model <- list(rprior = rprior, 55 | dprior = dprior, loglikelihood = loglikelihood, 56 | generate_randomness = generate_randomness, 57 | robservation = robservation, 58 | parameter_names = c("rho", "logsigma"), 59 | thetadim = 2, ydim = 1, 60 | parameters = list()) 61 | return(model) 62 | } 63 | -------------------------------------------------------------------------------- /R/model_get_cosine.R: -------------------------------------------------------------------------------- 1 | # cosine model 2 | # theta = (phi, logsigma) 3 | #'@rdname get_cosine 4 | #'@title Cosine model 5 | #'@description This function returns a list representing a cosine trend model. 6 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution), 7 | #' generate_randomness (generate data-generating variables), robservation (create synthetic 8 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 9 | #' ydim (dimension of observations), parameters (list of hyperparameters, 10 | #' to be passed to rprior,dprior,robservation) 11 | #'@export 12 | get_cosine <- function(){ 13 | # generate phi ~ Unif(0,2pi) and log-sigma is normally distributed 14 | rprior <- function(nparticles, parameters){ 15 | omegas <- runif(nparticles, min = 0, max = 1/10) 16 | phis <- runif(nparticles, min = 0, max = 2*pi) 17 | logsigma <- rnorm(nparticles) 18 | logA <- rnorm(nparticles) 19 | return(cbind(omegas, phis, logsigma, logA)) 20 | } 21 | 22 | # evaluate the log-density of the prior, for each particle 23 | dprior <- function(thetas, parameters){ 24 | logdensities <- dnorm(thetas[,3], 0, 1, log = TRUE) 25 | logdensities <- logdensities + dnorm(thetas[,4], 0, 1, log = TRUE) 26 | logdensities[thetas[,1] > 1/10] <- -Inf 27 | logdensities[thetas[,1] < 0] <- -Inf 28 | logdensities[thetas[,2] > 2*pi] <- -Inf 29 | logdensities[thetas[,2] < 0] <- -Inf 30 | return(logdensities) 31 | } 32 | # 33 | generate_randomness <- function(nobservations){ 34 | return(rnorm(nobservations)) 35 | } 36 | # function to generate a dataset for each theta value 37 | robservation <- function(nobservations, theta, parameters, randomness){ 38 | observations <- exp(theta[4]) * cos(2*pi*theta[1]*(1:nobservations) + theta[2]) + exp(theta[3]) * randomness 39 | return(observations) 40 | } 41 | # 42 | model <- list(rprior = rprior, 43 | dprior = dprior, 44 | generate_randomness = generate_randomness, 45 | robservation = robservation, 46 | parameter_names = c("omega", "phi", "logsigma", "logA"), 47 | thetadim = 4, ydim = 1, 48 | parameters = list()) 49 | return(model) 50 | } 51 | -------------------------------------------------------------------------------- /R/model_get_gandk.R: -------------------------------------------------------------------------------- 1 | #'@rdname get_gandk 2 | #'@title G and k model 3 | #'@description This function returns a list representing the g-and-k 4 | #' quantile distribution. 5 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution), 6 | #' generate_randomness (generate data-generating variables), robservation (create synthetic 7 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 8 | #' ydim (dimension of observations), parameters (list of hyperparameters, 9 | #' to be passed to rprior,dprior,robservation) 10 | #'@export 11 | get_gandk <- function(){ 12 | rprior <- function(nparticles, parameters){ 13 | return(matrix(runif(nparticles*4, min = 0, max = 10), ncol = 4)) 14 | } 15 | # evaluate the log-density of the prior, for each particle 16 | dprior <- function(thetaparticles, parameters){ 17 | densities <- rep(0, nrow(thetaparticles)) 18 | for (i in 1:nrow(thetaparticles)){ 19 | if (any(thetaparticles[i,] > 10) || any(thetaparticles[i,] < 0)){ 20 | densities[i] <- -Inf 21 | } 22 | } 23 | return(densities) 24 | } 25 | # generate random variables used to compute a synthetic dataset 26 | generate_randomness <- function(nobservations){ 27 | return(rnorm(nobservations)) 28 | } 29 | # function to compute a dataset for each theta value 30 | robservation <- function(nobservations, theta, parameters, randomness){ 31 | observations <- gandkinversecdf_givennormals(randomness, theta) 32 | return(observations) 33 | } 34 | loglikelihood <- function(thetas, ys, ...){ 35 | n <- length(ys) 36 | evals <- rep(0, nrow(thetas)) 37 | for (itheta in 1:nrow(thetas)){ 38 | ll <- function(ys, h = 1e-5, tolerance = 1e-10){ 39 | all_ys <- c(ys-h, ys+h) 40 | o <- order(all_ys) 41 | x <- rep(0, length(all_ys)) 42 | x[o[1]] <- gandkcdf(y = all_ys[o[1]], theta = thetas[itheta,], tolerance = tolerance) 43 | for (i in 2:length(all_ys)){ 44 | x[o[i]] <- gandkcdf(y = all_ys[o[i]], theta = thetas[itheta,], tolerance = tolerance, lower = x[o[i-1]]) 45 | } 46 | return(sum(log((x[(n+1):(2*n)] - x[1:n])/(2*h)))) 47 | } 48 | evals[itheta] <- ll(ys) 49 | } 50 | return(evals) 51 | } 52 | parameters <- list() 53 | # 54 | model <- list(rprior = rprior, 55 | dprior = dprior, 56 | generate_randomness = generate_randomness, 57 | robservation = robservation, 58 | loglikelihood = loglikelihood, 59 | parameter_names = c("A", "B", "g", "k"), 60 | parameters = parameters, 61 | thetadim = 4, ydim = 1) 62 | return(model) 63 | } 64 | -------------------------------------------------------------------------------- /R/model_get_levydriven.R: -------------------------------------------------------------------------------- 1 | #'@rdname get_levydriven 2 | #'@title Levy driven stochastic volatility model 3 | #'@description This function returns a list representing a Levy driven stochastic volatility model. 4 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution), 5 | #' generate_randomness (generate data-generating variables), robservation (create synthetic 6 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 7 | #' ydim (dimension of observations), parameters (list of hyperparameters, 8 | #' to be passed to rprior,dprior,robservation) 9 | #'@export 10 | get_levydriven <- function(){ 11 | ## Stochastic volatility : one-factor model 12 | # 13 | # Y_t = mu + beta * v_t + v_t^(0.5) * epsilon_t 14 | # X_t = (v_t, z_t) 15 | # v_t+1 = lambda^(-1) ( z_t - z_{t+1} + sum_j=1^k e_j ) 16 | # z_t+1 = e^(-lambda) * z_t + sum_j=1^k exp(-lambda(t + 1 - c_j)) e_j 17 | # k ~ Poisson(lambda * xi^2 / omega^2) 18 | # c_{1:k} ~ Uniform(t, t + 1) 19 | # e_{1:k} ~ Exp(xi / omega^2) (rate parameter) 20 | # 21 | # v_0 does not matter 22 | # z_0 ~ Gamma(xi^2 / omega^2, xi / omega^2) (rate parameter) 23 | # 24 | # theta <- c(0, 0, 0.5, 0.0625, 0.01) 25 | rprior <- function(nparticles, ...){ 26 | theta1 <- rnorm(nparticles, 0, sd = sqrt(2)) 27 | theta2 <- rnorm(nparticles, 0, sd = sqrt(2)) 28 | theta3 <- rexp(nparticles, rate = 0.2) 29 | theta4 <- rexp(nparticles, rate = 0.2) 30 | theta5 <- rexp(nparticles, rate = 1) 31 | return(cbind(theta1, theta2, theta3, theta4, theta5)) 32 | } 33 | dprior <- function(thetas, ...){ 34 | if (is.null(dim(thetas))) thetas <- matrix(thetas, nrow = 1) 35 | density_evals <- dnorm(thetas[,1], mean = 0, sd = sqrt(2), log = TRUE) 36 | density_evals <- density_evals + dnorm(thetas[,2], mean = 0, sd = sqrt(2), log = TRUE) 37 | density_evals <- density_evals + dexp(thetas[,3], rate = 0.2, log = TRUE) 38 | density_evals <- density_evals + dexp(thetas[,4], rate = 0.2, log = TRUE) 39 | density_evals <- density_evals + dexp(thetas[,5], rate = 1, log = TRUE) 40 | return(density_evals) 41 | } 42 | 43 | generate_randomness <- function(nobservations){ 44 | return(list()) 45 | } 46 | robservation <- function(nobservations, theta, parameters, randomness){ 47 | obs <- rep(0, nobservations) 48 | state <- rgamma(2, shape = theta[3] * theta[3] / theta[4], scale = theta[4]/theta[3]) 49 | for (t in 1:nobservations){ 50 | rtransition_r <- levydriven_rtransition_rand(1, theta) 51 | new_z <- exp(-theta[5]) * state[2] + rtransition_r$sum_weighted_e 52 | new_v <- (1/theta[5]) * (state[2] - new_z + rtransition_r$sum_e) 53 | state[1] <- new_v 54 | state[2] <- new_z 55 | obs[t] <- rnorm(1, mean = theta[1] + theta[2] * state[1], sd = sqrt(state[1])) 56 | } 57 | return(obs) 58 | } 59 | model <- list(rprior = rprior, 60 | dprior = dprior, 61 | generate_randomness = generate_randomness, 62 | robservation = robservation, 63 | parameter_names = c("mu", "beta", "xi", "omega2", "lambda"), 64 | thetadim = 5, ydim = 1) 65 | return(model) 66 | } 67 | -------------------------------------------------------------------------------- /R/model_get_mgandk.R: -------------------------------------------------------------------------------- 1 | #'@rdname get_mgandk 2 | #'@title multivariate G and k model 3 | #'@description This function returns 4 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution), 5 | #' generate_randomness (generate data-generating variables), robservation (create synthetic 6 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 7 | #' ydim (dimension of observations), parameters (list of hyperparameters, 8 | #' to be passed to rprior,dprior,robservation) 9 | #'@export 10 | get_mgandk <- function(){ 11 | rprior <- function(nparticles, parameters){ 12 | thetas <- matrix(runif(nparticles*8, min = 0, max = 10), ncol = 8) 13 | thetas <- cbind(thetas, runif(nparticles, min = -1, max = 1)) 14 | return(thetas) 15 | } 16 | # evaluate the log-density of the prior, for each particle 17 | dprior <- function(thetas, parameters){ 18 | densities <- rep(0, nrow(thetas)) 19 | for (i in 1:nrow(thetas)){ 20 | if (any(thetas[i,1:8] > 10) || any(thetas[i,1:8] < 0)){ 21 | densities[i] <- -Inf 22 | } 23 | densities[i] <- densities[i] + dunif(thetas[i,9], min = -1, max = 1, log = TRUE) 24 | } 25 | return(densities) 26 | } 27 | # function to compute a dataset for each theta value 28 | robservation <- function(nobservations, theta, ...){ 29 | theta_1 <- theta[1:4] 30 | theta_2 <- theta[5:8] 31 | rho <- theta[9] 32 | covariance <- matrix(c(1, rho, rho, 1), ncol = 2, nrow = 2) 33 | normals <- fast_rmvnorm(nobservations, rep(0, 2), covariance) 34 | y_1 <- gandkinversecdf_givennormals(normals[,1], theta_1) 35 | y_2 <- gandkinversecdf_givennormals(normals[,2], theta_2) 36 | return(rbind(y_1, y_2)) 37 | } 38 | gandk_loglikelihood <- function(thetas, ys, ...){ 39 | n <- length(ys) 40 | evals <- rep(0, nrow(thetas)) 41 | for (itheta in 1:nrow(thetas)){ 42 | ll <- function(ys, h = 1e-5, tolerance = 1e-10){ 43 | all_ys <- c(ys-h, ys+h) 44 | o <- order(all_ys) 45 | x <- rep(0, length(all_ys)) 46 | x[o[1]] <- gandkcdf(y = all_ys[o[1]], theta = thetas[itheta,], tolerance = tolerance) 47 | for (i in 2:length(all_ys)){ 48 | x[o[i]] <- gandkcdf(y = all_ys[o[i]], theta = thetas[itheta,], tolerance = tolerance, lower = x[o[i-1]]) 49 | } 50 | return(sum(log((x[(n+1):(2*n)] - x[1:n])/(2*h)))) 51 | } 52 | evals[itheta] <- ll(ys) 53 | } 54 | return(evals) 55 | } 56 | # 57 | loglikelihood <- function(thetas, ys, ...){ 58 | lls <- rep(0, nrow(thetas)) 59 | for (itheta in 1:nrow(thetas)){ 60 | Fy1 <- sapply(ys[1,], function(v) gandkcdf(v, thetas[itheta,1:4])) 61 | Fy2 <- sapply(ys[2,], function(v) gandkcdf(v, thetas[itheta,5:8])) 62 | x1 <- qnorm(Fy1) 63 | x2 <- qnorm(Fy2) 64 | covariance <- matrix(c(1, thetas[itheta,9], thetas[itheta,9], 1), ncol = 2, nrow = 2) 65 | lls[itheta] <- sum(fast_dmvnorm(cbind(x1, x2), rep(0, 2), covariance)) 66 | lls[itheta] <- lls[itheta] - sum(dnorm(qnorm(Fy1), log = T)) - sum(dnorm(qnorm(Fy2), log = T)) 67 | } 68 | lls <- lls + gandk_loglikelihood(thetas[,1:4,drop=F], ys[1,]) 69 | lls <- lls + gandk_loglikelihood(thetas[,5:8,drop=F], ys[2,]) 70 | return(lls) 71 | } 72 | parameters <- list() 73 | # 74 | model <- list(rprior = rprior, 75 | dprior = dprior, 76 | robservation = robservation, 77 | loglikelihood = loglikelihood, 78 | parameter_names = c("a1", "b1", "g1", "k1", "a2", "b2", "g2", "k2", "rho"), 79 | parameters = parameters, 80 | thetadim = 9, ydim = 2) 81 | return(model) 82 | } 83 | -------------------------------------------------------------------------------- /R/model_get_multivariatenormal.R: -------------------------------------------------------------------------------- 1 | # The observations are i.i.d. Normal with mean mu (parameters) 2 | # and covariance matrix Sigma, defined 3 | # as the identity matrix, and +0.5 on the upper and lower diagonals. 4 | #'@rdname get_multivariate_normal 5 | #'@title Multivariate Normal model 6 | #'@description This function returns a list representing 7 | #' a Normal location model, in a dimension specified by the user as an argument. 8 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution), 9 | #' generate_randomness (generate data-generating variables), robservation (create synthetic 10 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 11 | #' ydim (dimension of observations), parameters (list of hyperparameters, 12 | #' to be passed to rprior,dprior,robservation) 13 | #'@export 14 | get_multivariate_normal <- function(dimension){ 15 | target <- list() 16 | target$rprior <- function(nparticles, parameters){ 17 | return(fast_rmvnorm(nparticles, rep(parameters$mu_0, dimension), diag(parameters$tau^2, dimension, dimension))) 18 | # particles <- matrix(nrow = nparticles, ncol = dimension) 19 | # for (id in 1:dimension){ 20 | # particles[,id] <- rnorm(nparticles, mean = parameters$mu_0, sd = parameters$tau) 21 | # } 22 | # return(particles) 23 | } 24 | # evaluate the log-density of the prior, for each particle 25 | # parameters is a list containing mu_0, nu, alpha, beta 26 | target$dprior <- function(thetas, parameters){ 27 | # logdensities <- rep(0, nrow(thetas)) 28 | return(fast_dmvnorm(thetas, rep(parameters$mu_0, dimension), diag(parameters$tau^2, dimension, dimension))) 29 | # for (id in 1:dimension){ 30 | # logdensities <- logdensities + dnorm(thetas[,id], mean = parameters$mu_0, sd = parameters$tau, log = TRUE) 31 | # } 32 | # return(logdensities) 33 | } 34 | 35 | # generate random variables used to compute a synthetic dataset 36 | target$generate_randomness <- function(nobservations){ 37 | return(fast_rmvnorm(nobservations, rep(0, dimension), diag(1, dimension, dimension))) 38 | } 39 | 40 | S <- diag(1, dimension, dimension) 41 | if (dimension > 1){ 42 | for (i in 1:(dimension-1)){ 43 | S[i,i+1] <- S[i+1,i] <- 0.5 44 | } 45 | } 46 | target$parameters <- list(S = S, mu_0 = 0, tau = 10) 47 | # function to compute a dataset for each theta value 48 | target$robservation <- function(nobservations, theta, parameters, ...){ 49 | return(t(fast_rmvnorm(nobservations, theta, parameters$S))) 50 | } 51 | target$loglikelihood <- function(thetaparticles, observations, parameters){ 52 | logdensities <- rep(0, nrow(thetaparticles)) 53 | for (i in 1:nrow(thetaparticles)){ 54 | logdensities[i] <- sum(fast_dmvnorm(t(observations), thetaparticles[i,], parameters$S)) 55 | } 56 | return(logdensities) 57 | } 58 | target$thetadim <- dimension 59 | target$ydim <- dimension 60 | target$parameter_names <- paste0("X", 1:dimension) 61 | return(target) 62 | } 63 | -------------------------------------------------------------------------------- /R/model_get_normal.R: -------------------------------------------------------------------------------- 1 | # Normal Gamma model 2 | # theta = (mu, tau) 3 | #'@rdname get_normal 4 | #'@title Normal model 5 | #'@description This function returns a list representing 6 | #' a Normal location model. 7 | #' The prior is mu ~ Normal(mu_0, nu^{-1}), where nu is precision 8 | #' and tau ~ Gamma(alpha, beta), where beta is rate (1/scale). 9 | #' The likelihood is Y ~ Normal(mu, tau^2) where tau^2 is the variance. 10 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution), 11 | #' generate_randomness (generate data-generating variables), robservation (create synthetic 12 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 13 | #' ydim (dimension of observations), parameters (list of hyperparameters, 14 | #' to be passed to rprior,dprior,robservation) 15 | #'@export 16 | get_normal <- function(){ 17 | rprior <- function(nparticles, parameters){ 18 | particles <- matrix(nrow = nparticles, ncol = 2) 19 | particles[,1] <- rnorm(nparticles, mean = parameters$mu_0, sd = 1/sqrt(parameters$nu)) 20 | particles[,2] <- rgamma(nparticles, shape = parameters$alpha, rate = parameters$beta) 21 | return(particles) 22 | } 23 | # evaluate the log-density of the prior, for each particle 24 | # parameters is a list containing mu_0, nu, alpha, beta 25 | dprior <- function(thetaparticles, parameters){ 26 | logdensities <- dnorm(thetaparticles[,1], mean = parameters$mu_0, sd = 1/sqrt(parameters$nu), log = TRUE) 27 | logdensities <- logdensities + dgamma(thetaparticles[,2], shape = parameters$alpha, rate = parameters$beta, log = TRUE) 28 | return(logdensities) 29 | } 30 | # log-likelihood, available here and used to run MCMC 31 | loglikelihood <- function(thetaparticles, observation, ...){ 32 | logdensities <- dnorm(observation, mean = thetaparticles[,1], 33 | sd = thetaparticles[,2], log = TRUE) 34 | return(logdensities) 35 | } 36 | # generate random variables used to compute a synthetic dataset 37 | generate_randomness <- function(nobservations){ 38 | return(rnorm(nobservations)) 39 | } 40 | # function to compute a dataset for each theta value, given fixed randomness 41 | robservation_given_randomness <- function(nobservations, theta, parameters, randomness){ 42 | observations <- theta[1] + randomness * theta[2] 43 | return(observations) 44 | } 45 | # function to generate a dataset 46 | robservation <- function(nobservations, theta, parameters){ 47 | observations <- theta[1] + rnorm(nobservations) * theta[2] 48 | return(observations) 49 | } 50 | 51 | parameters <- list(mu_0 = 0, nu = 1, alpha = 2, beta = 1) 52 | # 53 | model <- list(rprior = rprior, 54 | dprior = dprior, 55 | loglikelihood = loglikelihood, 56 | generate_randomness = generate_randomness, 57 | robservation_given_randomness = robservation_given_randomness, 58 | robservation = robservation, 59 | parameter_names = c("mu", "sigma"), 60 | parameters = parameters, 61 | thetadim = 2, ydim = 1) 62 | return(model) 63 | } 64 | -------------------------------------------------------------------------------- /R/model_get_pz_4param.R: -------------------------------------------------------------------------------- 1 | #'@name get_pz_4param 2 | #'@title Phytoplankton-zooplankton model 3 | #'@description This function returns a list representing 4 | #' a Lotka-Volterra type model for plankton. See 5 | #' Jones, E., Parslow, J., and Murray, L. (2010). A Bayesian approach to state and parameter estimation in a phytoplankton-zooplankton model. Australian Meteorological and Oceanographic Journal, 59:7-16. 6 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution), 7 | #' generate_randomness (generate data-generating variables), robservation (create synthetic 8 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 9 | #' ydim (dimension of observations), parameters (list of hyperparameters, 10 | #' to be passed to rprior,dprior,robservation) 11 | #'@export 12 | get_pz_4param <- function(){ 13 | rprior <- function(nparticles, ...){ 14 | ## evaluate prior density on the transformed parameter 15 | theta1 <- runif(nparticles) 16 | theta2 <- runif(nparticles) 17 | # set the other parameters deterministically 18 | theta3 <- runif(nparticles) 19 | theta4 <- runif(nparticles) 20 | return(cbind(theta1, theta2, theta3, theta4)) 21 | } 22 | # prior, on the transformed parameters 23 | dprior <- function(thetas, ...){ 24 | ## evaluate prior density on the transformed parameter 25 | if (is.null(dim(thetas))) thetas <- matrix(thetas, nrow = 1) 26 | density_evals <- dunif(thetas[,1], log = TRUE) + dunif(thetas[,2], log = TRUE) 27 | density_evals <- density_evals + dunif(thetas[,3], log = TRUE) + dunif(thetas[,4], log = TRUE) 28 | return(density_evals) 29 | } 30 | 31 | # 32 | generate_randomness <- function(nobservations){ 33 | return(list(x_0 = rnorm(2), x = rnorm(nobservations), y = rnorm(nobservations))) 34 | } 35 | robservation <- function(nobservations, theta, parameters, randomness){ 36 | # untr_theta <- untransform_theta(theta) 37 | state <- matrix(exp(log(2) + randomness$x_0), nrow = 2) 38 | states <- matrix(nrow = 2, ncol = nobservations+1) 39 | states[,1] <- state 40 | log_obs <- rep(0, nobservations) 41 | for (t in 1:nobservations){ 42 | alpha <- theta[2] * randomness$x[t] + theta[1] 43 | state <- pz_transition(state, alpha, t-1, c(theta[3:4], 0.1, 0.1)) 44 | states[,t+1] <- state 45 | log_obs[t] <- 0.2*randomness$y[t] + log(state[1,1]) 46 | } 47 | return(log_obs) 48 | } 49 | ### additional functions to run PMMH 50 | rinit <- function(nparticles, theta, rand, ...){ 51 | return(exp(matrix(log(2) + rand[1:(2*nparticles)], nrow = 2))) 52 | } 53 | rtransition <- function(xparticles, theta, time, rand, ...){ 54 | nparticles <- ncol(xparticles) 55 | ra <- rand[((2+time-1)*nparticles + 1):((2+time)*nparticles)] 56 | alphas <- theta[1] + theta[2] * ra 57 | xparticles <- pz_transition(xparticles, alphas, time-1, c(theta[3:4], 0.1, 0.1)) 58 | return(xparticles) 59 | } 60 | dmeasurement <- function(xparticles, theta, observation, ...) { 61 | return(dnorm(x = observation, mean = log(xparticles[1,]), sd = 0.2, log = TRUE)) 62 | } 63 | generate_randomness_pmmh <- function(nparticles, datalength){ 64 | return(pz_generate_randomness_cpp(nparticles, datalength)) 65 | } 66 | precompute <- function(...){ 67 | return(list()) 68 | } 69 | 70 | # 71 | model <- list(rprior = rprior, 72 | dprior = dprior, 73 | generate_randomness = generate_randomness, 74 | robservation = robservation, 75 | parameter_names = c("mu_alpha", "sigma_alpha", "c", "e"), 76 | thetadim = 4, ydim = 1, 77 | rinit = rinit, rtransition = rtransition, dmeasurement = dmeasurement, generate_randomness_pmmh = generate_randomness_pmmh, precompute = precompute) 78 | return(model) 79 | } 80 | -------------------------------------------------------------------------------- /R/model_get_queue.R: -------------------------------------------------------------------------------- 1 | #'@export 2 | get_queue <- function(){ 3 | rprior <- function(ntheta, parameters){ 4 | theta1 <- runif(n = ntheta, min = 0, max = 10) 5 | theta2minus1 <- runif(n = ntheta, min = 0, max = 10) 6 | theta3 <- runif(n = ntheta, min = 0, max = 1/3) 7 | return(cbind(theta1, theta2minus1, theta3)) 8 | } 9 | 10 | dprior <- function(thetas, parameters){ 11 | evals <- dunif(thetas[,1], min = 0, max = 10, log = TRUE) 12 | evals <- evals + dunif(thetas[,2], min = 0, max = 10, log = TRUE) 13 | evals <- evals + dunif(thetas[,3], min = 0, max = 1/3, log = TRUE) 14 | return(evals) 15 | } 16 | 17 | robservation <- function(nobservations, theta, parameters, ...){ 18 | theta1 <- theta[1] 19 | theta2 <- theta[2] + theta[1] 20 | theta3 <- theta[3] 21 | u <- runif(nobservations, theta1, theta2) 22 | v <- rep(0, nobservations) 23 | y <- rep(0, nobservations) 24 | x <- rep(0, nobservations) 25 | v[1] <- rexp(n = 1, rate = theta3) 26 | y[1] <- u[1] + v[1] 27 | x[1] <- y[1] 28 | for (t in 2:nobservations){ 29 | v[t] <- v[t-1] + rexp(n = 1, rate = theta3) 30 | y[t] <- u[t] + max(0, v[t] - x[t-1]) 31 | x[t] <- x[t-1] + y[t] 32 | } 33 | return(y) 34 | } 35 | model <- list(rprior = rprior, 36 | dprior = dprior, 37 | robservation = robservation, 38 | parameter_names = c("theta1", "theta2minus1", "theta3"), 39 | parameters = list(), 40 | thetadim = 3, ydim = 1) 41 | return(model) 42 | } 43 | -------------------------------------------------------------------------------- /R/model_get_ricker.R: -------------------------------------------------------------------------------- 1 | #'@rdname get_ricker 2 | #'@title Ricker model 3 | #'@description This function returns a list representing 4 | #' the Ricker model in 5 | #' Wood (2010) Statistical inference for noisy nonlinear ecological dynamic systems 6 | #'@return The list contains rprior, dprior (generate and evaluate the density of prior distribution), 7 | #' generate_randomness (generate data-generating variables), robservation (create synthetic 8 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 9 | #' ydim (dimension of observations), parameters (list of hyperparameters, 10 | #' to be passed to rprior,dprior,robservation) 11 | #'@export 12 | get_ricker <- function(){ 13 | rprior <- function(nparticles, ...){ 14 | theta1 <- runif(nparticles, min = 0, max = 10) 15 | theta2 <- runif(nparticles, min = 0, max = 20) 16 | theta3 <- runif(nparticles, min = 0, max = 2) 17 | return(cbind(theta1, theta2, theta3)) 18 | } 19 | dprior <- function(thetas, ...){ 20 | if (is.null(dim(thetas))) thetas <- matrix(thetas, nrow = 1) 21 | density_evals <- dunif(thetas[,1], min = 0, max = 10, log = TRUE) 22 | density_evals <- density_evals + dunif(thetas[,2], min = 0, max = 20, log = TRUE) 23 | density_evals <- density_evals + dunif(thetas[,3], min = 0, max = 2, log = TRUE) 24 | return(density_evals) 25 | } 26 | # 27 | generate_randomness <- function(nobservations){ 28 | return(list()) 29 | } 30 | robservation <- function(nobservations, theta, parameters, randomness){ 31 | obs <- rep(0, nobservations) 32 | r <- exp(theta[1]) 33 | phi <- theta[2] 34 | sigma_e <- theta[3] 35 | state <- 1 36 | for (t in 1:nobservations){ 37 | state = r * state * exp(-state + sigma_e * rnorm(1)) 38 | obs[t] <- rpois(1, phi*state) 39 | } 40 | return(obs) 41 | } 42 | # 43 | model <- list(rprior = rprior, 44 | dprior = dprior, 45 | generate_randomness = generate_randomness, 46 | robservation = robservation, 47 | parameters = NULL, 48 | parameter_names = c("logr", "phi", "sigma_e"), 49 | thetadim = 3, ydim = 1) 50 | return(model) 51 | } 52 | -------------------------------------------------------------------------------- /R/model_get_toggleswitch.R: -------------------------------------------------------------------------------- 1 | #' @name get_toggleswitch 2 | #' @title Toggle switch model 3 | #' @description This function returns a list representing the toggle switch model 4 | #' of Bonassi, F. V., West, M., et al. (2015). 5 | #' Sequential Monte Carlo with adaptive weights for approximate Bayesian computation. Bayesian Analysis, 10(1):171-187. 6 | #' @return The list contains rprior, dprior (generate and evaluate the density of prior distribution), 7 | #' generate_randomness (generate data-generating variables), robservation (create synthetic 8 | #' data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 9 | #' ydim (dimension of observations), parameters (list of hyperparameters, 10 | #' to be passed to rprior,dprior,robservation) 11 | #' @export 12 | get_toggleswitch <- function(){ 13 | library(truncnorm) 14 | rprior <- function(nparticles, parameters){ 15 | particles <- matrix(nrow = nparticles, ncol = 7) 16 | particles[,1] <- runif(nparticles, 0, 50) 17 | particles[,2] <- runif(nparticles, 0, 50) 18 | particles[,3] <- runif(nparticles, 0, 5) 19 | particles[,4] <- runif(nparticles, 0, 5) 20 | particles[,5] <- runif(nparticles, 250, 450) 21 | particles[,6] <- runif(nparticles, 0, 0.5) 22 | particles[,7] <- runif(nparticles, 0, 0.4) 23 | return(particles) 24 | } 25 | # evaluate the log-density of the prior, for each particle 26 | dprior <- function(thetaparticles, parameters){ 27 | logdensities <- rep(0, nrow(thetaparticles)) 28 | logdensities <- dunif(thetaparticles[,1], min = 0, max = 50, log = TRUE) 29 | logdensities <- logdensities + dunif(thetaparticles[,2], min = 0, max = 50, log = TRUE) 30 | logdensities <- logdensities + dunif(thetaparticles[,3], min = 0, max = 5, log = TRUE) 31 | logdensities <- logdensities + dunif(thetaparticles[,4], min = 0, max = 5, log = TRUE) 32 | logdensities <- logdensities + dunif(thetaparticles[,5], min = 250, max = 450, log = TRUE) 33 | logdensities <- logdensities + dunif(thetaparticles[,6], min = 0, max = 0.5, log = TRUE) 34 | logdensities <- logdensities + dunif(thetaparticles[,7], min = 0, max = 0.4, log = TRUE) 35 | return(logdensities) 36 | } 37 | # generate random variables used to compute a synthetic dataset 38 | generate_randomness <- function(nobservations){ 39 | return(list()) 40 | } 41 | # function to compute a dataset for each theta value 42 | robservation <- function(nobservations, theta, parameters, randomness){ 43 | # constants used in the data generating process 44 | h <- 1 45 | tau <- 300 46 | u0 <- 10 47 | v0 <- 10 48 | # 49 | u <- matrix(0, nrow = nobservations, ncol = tau+1) 50 | v <- matrix(0, nrow = nobservations, ncol = tau+1) 51 | u[,1] <- u0 52 | v[,1] <- v0 53 | # noise <- array(randomness[(nobservations+1):length(randomness)], dim = c(nobservations, tau, 2)) 54 | for (t in 1:tau){ 55 | u[,t+1] <- u[,t] + h * theta[1] / (1 + v[,t]^theta[3]) - h * (1 + 0.03 * u[,t]) 56 | u[,t+1] <- u[,t+1] + h * 0.5 * rtruncnorm(nobservations, a = - u[,t+1]/(h*0.5)) 57 | v[,t+1] <- v[,t] + h * theta[2] / (1 + u[,t]^theta[4]) - h * (1 + 0.03 * v[,t]) 58 | v[,t+1] <- v[,t+1] + h * 0.5 * rtruncnorm(nobservations, a = - v[,t+1]/(h*0.5)) 59 | } 60 | lb = -(u[,tau+1] + theta[5]) / (theta[5] * theta[6]) * (u[,tau+1]^theta[7]) 61 | y <- u[,tau+1] + theta[5] + theta[5] * theta[6] * rtruncnorm(nobservations, a = lb) / (u[,tau+1]^theta[7]) 62 | return(y) 63 | } 64 | parameters <- list() 65 | # 66 | model <- list(rprior = rprior, 67 | dprior = dprior, 68 | generate_randomness = generate_randomness, 69 | robservation = robservation, 70 | parameter_names = c("alpha_1", "alpha_2", "beta_1", "beta_2", "mu", "sigma", "gamma"), 71 | parameters = parameters, 72 | thetadim = 7, ydim = 1) 73 | return(model) 74 | } 75 | -------------------------------------------------------------------------------- /R/particle_filter.R: -------------------------------------------------------------------------------- 1 | # requires model$rinit, model$rtransition, and model$dobs 2 | # rinit specifies x_1 and the first observation is y_1 3 | #'@export 4 | particle_filter <- function(nparticles, model, theta, observations, storex = FALSE){ 5 | datalength <- ncol(observations) 6 | # initialization 7 | xparticles <- model$rinit(nparticles, theta) 8 | logw <- model$dobs(observations, 1, xparticles, theta) 9 | if (all(is.infinite(logw))){ 10 | return(NA) 11 | } 12 | maxlw <- max(logw) 13 | w <- exp(logw - maxlw) 14 | # update log likelihood estimate 15 | ll <- maxlw + log(mean(w)) 16 | normweights <- w / sum(w) 17 | # 18 | if (storex){ 19 | xhistory <- rep(list(matrix(ncol = nparticles, nrow = nrow(xparticles))), datalength) 20 | whistory <- rep(list(rep(0, nparticles)), datalength + 1) 21 | ahistory <- rep(list(rep(0, nparticles)), datalength) 22 | xhistory[[1]] <- xparticles 23 | whistory[[1]] <- normweights 24 | } 25 | # step t > 1 26 | for (time in 2:datalength){ 27 | ancestors <- systematic_resampling_given_u(normweights, runif(1)) 28 | xparticles <- xparticles[ancestors,,drop=F] 29 | xparticles <- model$rtransition(xparticles, time, theta) 30 | logw <- model$dobs(observations, time, xparticles, theta) 31 | if (all(is.infinite(logw))){ 32 | return(NA) 33 | } 34 | maxlw <- max(logw) 35 | w <- exp(logw - maxlw) 36 | # update log likelihood estimate 37 | ll <- ll + maxlw + log(mean(w)) 38 | normweights <- w / sum(w) 39 | 40 | # 41 | if (storex){ 42 | ahistory[[time]] <- ancestors 43 | xhistory[[time+1]] <- xparticles 44 | whistory[[time+1]] <- normweights 45 | } 46 | } 47 | if (storex){ 48 | return(list(ll = ll, xhistory = xhistory, whistory = whistory, ahistory = ahistory)) 49 | } else { 50 | return(ll) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /R/plot_functions.R: -------------------------------------------------------------------------------- 1 | #'@export 2 | plot_threshold <- function(wsmcresults){ 3 | gthreshold <- qplot(x = 1:(length(wsmcresults$threshold_history)), y = wsmcresults$threshold_history, geom = "line") 4 | gthreshold <- gthreshold + xlab("step") + ylab("threshold") 5 | return(gthreshold) 6 | } 7 | 8 | #'@export 9 | plot_threshold_time <- function(wsmcresults){ 10 | gthreshold <- qplot(x = wsmcresults$compute_times, y = wsmcresults$threshold_history, geom = "line") 11 | gthreshold <- gthreshold + xlab("time (s)") + ylab("threshold") 12 | return(gthreshold) 13 | } 14 | 15 | #'@export 16 | plot_ncomputed <- function(wsmcresults){ 17 | ncomputed <- qplot(x = 1:(length(wsmcresults$ncomputed)), y = wsmcresults$ncomputed, geom = "line") 18 | ncomputed <- ncomputed + xlab("step") + ylab("# distances computed") 19 | return(ncomputed) 20 | } 21 | 22 | #'@export 23 | plot_bivariate <- function(wsmcresults, i1, i2, from_step = 0){ 24 | wsmc.df <- wsmc_to_dataframe(wsmcresults) 25 | nsteps <- max(wsmc.df$step) 26 | parameter_names <- names(wsmc.df)[c(i1, i2)] 27 | names(wsmc.df)[c(i1,i2)] <- c("temp_name_1", "temp_name_2") 28 | g <- ggplot(wsmc.df %>% filter(step > from_step), aes(x = temp_name_1, y = temp_name_2, colour = step, group = step)) 29 | g <- g + geom_point(alpha = 0.5) 30 | g <- g + scale_colour_gradient2(midpoint = from_step+floor((nsteps-from_step)/2)) + theme(legend.position = "none") 31 | g <- g + xlab(parameter_names[i1]) + ylab(parameter_names[i2]) 32 | return(g) 33 | } 34 | 35 | #'@export 36 | plot_bivariate_polygon <- function(wsmcresults, i1, i2, from_step = 0){ 37 | wsmc.df <- wsmc_to_dataframe(wsmcresults) 38 | nsteps <- max(wsmc.df$step) 39 | parameter_names <- names(wsmc.df)[c(i1, i2)] 40 | names(wsmc.df)[c(i1,i2)] <- c("temp_name_1", "temp_name_2") 41 | g <- ggplot(wsmc.df %>% filter(step > from_step), aes(x = temp_name_1, y = temp_name_2, colour = step, group = step)) 42 | g <- g + stat_density_2d(aes(fill = step), geom = "polygon") 43 | g <- g + scale_colour_gradient2(midpoint = from_step+floor((nsteps-from_step)/2)) + theme(legend.position = "none") 44 | g <- g + scale_fill_gradient2(midpoint = from_step+floor((nsteps-from_step)/2)) 45 | g <- g + xlab(parameter_names[i1]) + ylab(parameter_names[i2]) 46 | return(g) 47 | } 48 | 49 | #'@export 50 | plot_marginal <- function(wsmcresults, i, from_step = 0){ 51 | wsmc.df <- wsmc_to_dataframe(wsmcresults) 52 | parameter_name <- names(wsmc.df)[i] 53 | names(wsmc.df)[i] <- c("temp_name_i") 54 | g <- ggplot(wsmc.df %>% filter(step > from_step), aes(x = temp_name_i, colour = step, group = step)) + geom_density(aes(y = ..density..)) 55 | g <- g + theme(legend.position = "none") + xlab(parameter_name) 56 | g <- g + scale_color_gradient(low = rgb(1,0.5,0.5), high = "darkblue") 57 | return(g) 58 | } 59 | 60 | #'@export 61 | plot_marginal_time <- function(wsmcresults, i, from_step = 0){ 62 | wsmc.df <- wsmc_to_dataframe(wsmcresults) 63 | parameter_name <- names(wsmc.df)[i] 64 | names(wsmc.df)[i] <- c("temp_name_i") 65 | g <- ggplot(wsmc.df %>% filter(step > from_step), aes(x = temp_name_i, colour = time, group = factor(time))) + geom_density(aes(y = ..density..)) 66 | g <- g + xlab(parameter_name) # + theme(legend.position = "none") 67 | g <- g + scale_color_gradient(name = "time (s)", low = rgb(1,0.5,0.5), high = "darkblue") 68 | return(g) 69 | } 70 | 71 | -------------------------------------------------------------------------------- /R/pz_transition.R: -------------------------------------------------------------------------------- 1 | #'@rdname pz_transition 2 | #'@title pz_transition 3 | #'@description Solve PZ ODE for each particle, given each alpha, from time to time + 1, 4 | #' and given the parameters (c, e, ml, mq). 5 | #'@export 6 | #' 7 | pz_transition <- function(xparticles, alphas, time, parameters){ 8 | return(one_step_pz_vector(xparticles, alphas, time, parameters)) 9 | } 10 | -------------------------------------------------------------------------------- /R/setmytheme.R: -------------------------------------------------------------------------------- 1 | #'@rdname setmytheme 2 | #'@title Set My Theme 3 | #'@description set theme for ggplot, to create consistent figures 4 | #'@export 5 | setmytheme <- function(){ 6 | theme_set(theme_bw()) 7 | theme_update(axis.text.x = element_text(size = 20), 8 | axis.text.y = element_text(size = 20), 9 | axis.title.x = element_text(size = 25, margin=margin(20,0,0,0)), 10 | axis.title.y = element_text(size = 25, angle = 90, margin = margin(0,20,0,0)), 11 | legend.text = element_text(size = 20), 12 | legend.title = element_text(size = 20), 13 | title = element_text(size = 30), 14 | strip.text = element_text(size = 25), 15 | strip.background = element_rect(fill="white"), 16 | panel.spacing = unit(2, "lines"), 17 | legend.position = "bottom") 18 | } 19 | -------------------------------------------------------------------------------- /R/systematic_resampling.R: -------------------------------------------------------------------------------- 1 | #'@rdname systematic_resampling 2 | #'@title systematic_resampling 3 | #'@description systematic_resampling 4 | #'@export 5 | systematic_resampling <- function(normalized_weights){ 6 | return(systematic_resampling_(normalized_weights)) 7 | } 8 | 9 | #'@export 10 | systematic_resampling_given_u <- function(normalized_weights, u){ 11 | return(systematic_resampling_n_(normalized_weights, length(normalized_weights), u)) 12 | } 13 | 14 | #'@export 15 | systematic_resampling_n <- function(normalized_weights, ndraws){ 16 | return(systematic_resampling_n_(normalized_weights, ndraws, runif(1))) 17 | } 18 | 19 | # systematic_resampling <- function(normalized_weights){ 20 | # N <- length(normalized_weights) 21 | # indices <- rep(0, N) 22 | # normalized_weights <- N * normalized_weights 23 | # j <- 1 24 | # csw <- normalized_weights[1] 25 | # u <- runif(1, min = 0, max = 1) 26 | # for (k in 1:N){ 27 | # while (csw < u){ 28 | # j <- j + 1 29 | # csw <- csw + normalized_weights[j] 30 | # } 31 | # indices[k] <- j 32 | # u <- u + 1 33 | # } 34 | # return(indices) 35 | # } 36 | -------------------------------------------------------------------------------- /R/wasserstein.R: -------------------------------------------------------------------------------- 1 | #'@name wasserstein 2 | #'@title wasserstein 3 | #'@description Compute regularized Wasserstein distance between two empirical distributions, 4 | #' p and q, specified as vector of probabilities summing to one. 5 | #' The third argument is the cost matrix, i.e. a matrix of pair-wise distances, 6 | #' the fourth argument is the regularization parameter, e.g. 0.05*median(cost_matrix), 7 | #' and the last argument is the number of Sinkhorn iterations to perform, e.g. 100. 8 | #' Important references are 9 | #' 10 | #' - Cuturi, M. (2013). Sinkhorn distances: Lightspeed computation of optimal transport. In Advances in Neural Information Processing Systems (NIPS), pages 2292-2300. 11 | #' 12 | #' - Cuturi, M. and Doucet, A. (2014). Fast computation of Wasserstein barycenters. In Proceedings of the 31st International Conference on Machine Learning (ICML), pages 685-693. 13 | #' 14 | #'@return a list with "distances", "transportmatrix", "u" and "v" 15 | #'@export 16 | wasserstein <- function(p, q, cost_matrix, epsilon, niterations){ 17 | return(wasserstein_(p, q, cost_matrix, epsilon, niterations)) 18 | } 19 | -------------------------------------------------------------------------------- /R/wcovariance.R: -------------------------------------------------------------------------------- 1 | #'@rdname wcovariance 2 | #'@title wcovariance 3 | #'@description wcovariance 4 | #'@export 5 | wcovariance <- function(xparticles, normweights, mean){ 6 | return(wcovariance_(xparticles, normweights, mean)) 7 | } 8 | -------------------------------------------------------------------------------- /R/winference-package.R: -------------------------------------------------------------------------------- 1 | #'@name winference-package 2 | #'@aliases winference 3 | #'@docType package 4 | #'@title winference 5 | #'@author anonymous 6 | #'@description ... 7 | #'@details ... 8 | #'@keywords package 9 | #'@useDynLib winference 10 | #'@importFrom Rcpp sourceCpp 11 | NULL 12 | -------------------------------------------------------------------------------- /R/wmean.R: -------------------------------------------------------------------------------- 1 | #'@rdname wmean 2 | #'@title wmean 3 | #'@description wmean 4 | #'@export 5 | wmean <- function(xparticles, normweights){ 6 | return(wmean_(xparticles, normweights)) 7 | } 8 | -------------------------------------------------------------------------------- /R/wsmc_to_dataframe.R: -------------------------------------------------------------------------------- 1 | #'@export 2 | wsmc_to_dataframe <- function(results, ...){ 3 | th <- results$thetas_history 4 | if (!is.null(results$target$parameter_names)){ 5 | parameter_names <- results$target$parameter_names 6 | } else { 7 | parameter_names <- paste0("X", 1:ncol(results$thetas_history[[1]])) 8 | } 9 | nsteps <- length(th) 10 | df <- data.frame() 11 | for (i in 1:(nsteps)){ 12 | df_ <- data.frame(cbind(th[[i]], rep(i, nrow(th[[i]])), rep(as.numeric(results$compute_times)[i], nrow(th[[i]])))) 13 | names(df_) <- c(parameter_names, "step", "time") 14 | df <- rbind(df, df_) 15 | } 16 | names(df) <- c(parameter_names, "step", "time") 17 | return(df) 18 | } 19 | 20 | 21 | # wsmc_to_dataframe <- function(results, parameter_names){ 22 | # th <- results$thetas_history 23 | # nsteps <- length(th) 24 | # df <- data.frame() 25 | # for (i in 1:(nsteps)){ 26 | # df_ <- data.frame(cbind(th[[i]], rep(i, nrow(th[[i]])))) 27 | # names(df_) <- c(parameter_names, "step") 28 | # df <- rbind(df, df_) 29 | # } 30 | # names(df) <- c(parameter_names, "step") 31 | # return(df) 32 | # } 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # winference 2 | R package to perform approximate Bayesian computation with the Wasserstein distance 3 | 4 | joint work with Espen Bernton, Mathieu Gerber, Christian P. Robert 5 | 6 | The inst folder contains tutorial files, and files to reproduce the figures of the article. 7 | 8 | The package requires R packages: Rcpp,RcppEigen,doParallel,doRNG,foreach,ggplot2,ggthemes,dplyr,reshape2,BH,transport 9 | as well as the CGAL library which can be installed following the instructions 10 | on https://doc.cgal.org/latest/Manual/installation.html. 11 | -------------------------------------------------------------------------------- /WInference.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace,vignette 22 | -------------------------------------------------------------------------------- /inst/reproduceabc/README.R: -------------------------------------------------------------------------------- 1 | Reproduce results of the article 2 | === 3 | 4 | This folder contains files to reproduce the figures in the article: 5 | "Inference in generative models using the Wasserstein distance" 6 | by Espen Bernton, Pierre E. Jacob, Mathieu Gerber, Christian P. Robert 7 | 8 | They are part of the 'winference' package: 9 | https://github.com/pierrejacob/winference 10 | 11 | The files are organized by name: e.g. all files relating to the queueing model 12 | start with 'queue_'. They have to be executed in a certain order, starting with 13 | '_generate_data', then '_wsmc', and finally '_plots'. 14 | 15 | Each subfolder contains a README file detailing the role of each file. 16 | 17 | The files will save the results in the current working directory, which you might want to change 18 | with the 'setwd' function. The scripts might take hours to run, depending on the machine; if this is a problem, 19 | make sure you take a look at what you run before you run it! 20 | -------------------------------------------------------------------------------- /inst/reproduceabc/ar1/.Rapp.history: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/reproduceabc/ar1/.Rapp.history -------------------------------------------------------------------------------- /inst/reproduceabc/ar1/README.R: -------------------------------------------------------------------------------- 1 | Reproduce results for the AR(1) model in Section 4.2 of the article. 2 | === 3 | 4 | ar1_generate_data.R: generate a time series from an AR(1) model 5 | 6 | ar1_wsmc_marginal.R: loads data, take first 1,000 observations, 7 | and compute WABC posterior based on Wasserstein distance between 8 | the marginal distributions of synthetic and observed data sets. 9 | 10 | ar1_wsmc_delay1.R: loads data, take first 1,000 observations, 11 | and compute WABC posterior based on Wasserstein distance between 12 | the delay reconstructions of synthetic and observed data sets, with a lag of one. 13 | 14 | ar1_plots.R: loads results from above scripts and creates the two plots 15 | of Figure 3 (a) and (b). 16 | -------------------------------------------------------------------------------- /inst/reproduceabc/ar1/ar1_generate_data.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | rm(list = ls()) 3 | setmytheme() 4 | set.seed(11) 5 | prefix = "" 6 | target <- get_autoregressive() 7 | # number of observations 8 | nobservations <- 10000 9 | # parameter of data-generating process 10 | true_theta <- c(0.7, 0.9) 11 | obs <- target$robservation(nobservations, true_theta, 12 | target$parameters, target$generate_randomness(nobservations)) 13 | 14 | save(true_theta, obs, file = paste0(prefix, "ar1data.RData")) 15 | # plot the observations 16 | plot(obs, type = "l") 17 | acf(obs) 18 | -------------------------------------------------------------------------------- /inst/reproduceabc/ar1/ar1_wsmc_delay1.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | # model 7 | target <- get_autoregressive() 8 | # 9 | # number of observations 10 | nobservations <- 1000 11 | nparticles = 2048 12 | prefix <- "" 13 | load(file = paste0(prefix, "ar1data.RData")) 14 | obs <- obs[1:nobservations] 15 | 16 | # 17 | lagvalue <- 1 18 | lag_obs <- create_lagmatrix(matrix(obs, nrow = 1), lagvalue) 19 | lag_obs <- lag_obs[,seq(from=1, to=ncol(lag_obs), by=2)] 20 | 21 | 22 | compute_d <- get_transport_to_y(lag_obs) 23 | compute_distance <- function(y_sim){ 24 | lag_y_sim <- create_lagmatrix(matrix(y_sim, nrow = 1), lagvalue) 25 | lag_y_sim <- lag_y_sim[,seq(from=1, to=ncol(lag_y_sim), by=2)] 26 | compute_d(lag_y_sim) 27 | } 28 | 29 | target$simulate <- function(theta){ 30 | return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1)) 31 | } 32 | 33 | thetas <- target$rprior(1, target$parameters) 34 | y_sim <- target$simulate(thetas[1,]) 35 | compute_distance(y_sim) 36 | 37 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(), 38 | minimum_diversity = 0.5, R = 2, maxtrials = 1000) 39 | 40 | filename <- paste0(prefix, "ar1.n", nobservations, ".wsmc_delay1.RData") 41 | results <- wsmc(compute_distance, target, param_algo, savefile = filename, maxsim = 10^6) 42 | load(filename) 43 | # results <- wsmc_continue(results, savefile = filename, maxtime = 10*60*60) 44 | # 45 | # load(filename) 46 | # wsmc.df <- wsmc_to_dataframe(results) 47 | # nsteps <- max(wsmc.df$step) 48 | # plot_bivariate_polygon(results, 1, 2) 49 | # plot_bivariate(results, 1, 2) 50 | # plot_threshold_time(results) 51 | 52 | 53 | -------------------------------------------------------------------------------- /inst/reproduceabc/ar1/ar1_wsmc_marginal.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | # model 7 | target <- get_autoregressive() 8 | # 9 | # number of observations 10 | nobservations = 1000 11 | nparticles = 2048 12 | prefix <- "" 13 | load(file = paste0(prefix, "ar1data.RData")) 14 | obs <- obs[1:nobservations] 15 | obs_sorted = sort(obs) 16 | compute_d = function(y){ 17 | sort_y = sort(y) 18 | mean(abs(sort_y-obs_sorted)) 19 | } 20 | 21 | target$simulate <- function(theta){ 22 | return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1)) 23 | } 24 | 25 | thetas <- target$rprior(1, target$parameters) 26 | y_sim <- target$simulate(thetas[1,]) 27 | compute_d(y_sim) 28 | 29 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(), 30 | minimum_diversity = 0.5, R = 2, maxtrials = 1000) 31 | 32 | filename <- paste0(prefix, "ar1.n", nobservations, ".wsmc_marginal.RData") 33 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxtime = 60*60) 34 | # load(filename) 35 | # results <- wsmc_continue(results, savefile = filename, maxtime = 2*60*60) 36 | # 37 | # load(filename) 38 | # wsmc.df <- wsmc_to_dataframe(results) 39 | # nsteps <- max(wsmc.df$step) 40 | # plot_bivariate_polygon(results, 1, 2) 41 | # plot_bivariate(results, 1, 2) 42 | 43 | 44 | -------------------------------------------------------------------------------- /inst/reproduceabc/cosine/README.R: -------------------------------------------------------------------------------- 1 | Reproduce results for the cosine model in Section 4.1 of the article. 2 | === 3 | 4 | cosine_generate_data.R: generate a time series from a cosine trend + noise model 5 | 6 | cosine_mcmc.R: loads data, take first 100 observations, 7 | and runs a Metropolis--Hastings algorithm to approximate the posterior distribution. 8 | 9 | cosine_wsmc_euclidean.R: loads data, take first 100 observations, 10 | and compute ABC posterior based on Euclidean distances between 11 | synthetic and observed data sets. 12 | 13 | cosine_wsmc_curvematching_wasserstein.R: loads data, take first 100 observations, 14 | and compute WABC posterior based on curve matching, with lambda = 1, 15 | and Wasserstein distance computed exactly using the "transport" package 16 | (which needs to be installed, i.e. install.package("transport")) 17 | 18 | cosine_plots.R: loads results from above scripts and creates the four plots 19 | of Figure 2 (a,b,c,d). 20 | -------------------------------------------------------------------------------- /inst/reproduceabc/cosine/cosine_generate_data.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | 7 | target <- get_cosine() 8 | 9 | prefix = "" 10 | 11 | # number of observations 12 | nobservations <- 10000 13 | # parameter of data-generating process 14 | true_theta <- c(1/80, pi/4, 0, log(2)) 15 | obs <- target$robservation(nobservations, true_theta, 16 | target$parameters, target$generate_randomness(nobservations)) 17 | 18 | save(true_theta, obs, file = paste0(prefix,"cosinedata.RData")) 19 | 20 | plot(obs[1:100], type = "l") 21 | # hist(obs) 22 | # acf(obs, 100) 23 | # mean(obs) 24 | # sd(obs) 25 | -------------------------------------------------------------------------------- /inst/reproduceabc/cosine/cosine_mcmc.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | prefix <- "" 7 | target <- get_cosine() 8 | 9 | nobservations <- 100 10 | 11 | load(paste0(prefix, "cosinedata.RData")) 12 | obs <- matrix(obs[1:nobservations], nrow = 1) 13 | loglikelihood <- function(thetas, ys, ...){ 14 | evals <- rep(0, nrow(thetas)) 15 | for (itheta in 1:nrow(thetas)){ 16 | backbone <- exp(thetas[itheta,4]) * cos(2 * pi * thetas[itheta,1] * (1:nobservations) + thetas[itheta,2]) 17 | evals[itheta] <- sum(dnorm(ys, mean = backbone, sd = exp(thetas[itheta,3]), log = TRUE)) 18 | } 19 | return(evals) 20 | } 21 | 22 | 23 | target$loglikelihood <- loglikelihood 24 | theta_init <- target$rprior(8, target$parameters) 25 | 26 | tuning_parameters <- list(niterations = 100000, nchains = nrow(theta_init), 27 | cov_proposal = diag(0.1, nrow = target$thetadim, ncol = target$thetadim), 28 | adaptation = 10000, init_chains = theta_init) 29 | mhfile <- paste0(prefix, "cosine.mcmc.n", nobservations, ".RData") 30 | mh <- metropolishastings(obs, target, tuning_parameters) 31 | save(mh, file = mhfile) 32 | load(mhfile) 33 | 34 | burnin <- 50000 35 | chain.df <- mhchainlist_to_dataframe(mh$chains) 36 | chain.df %>% head 37 | g <- ggplot(chain.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.1, group = ichain, colour = factor(ichain))) + geom_line() 38 | g + geom_hline(yintercept = true_theta[1], col = "red") 39 | 40 | g <- ggplot(chain.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.2, group = ichain, colour = factor(ichain))) + geom_line() 41 | g + geom_hline(yintercept = true_theta[2], col = "red") 42 | 43 | g <- ggplot(chain.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.3, group = ichain, colour = factor(ichain))) + geom_line() 44 | g + geom_hline(yintercept = true_theta[3], col = "red") 45 | 46 | g <- ggplot(chain.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.4, group = ichain, colour = factor(ichain))) + geom_line() 47 | g + geom_hline(yintercept = true_theta[4], col = "red") 48 | 49 | -------------------------------------------------------------------------------- /inst/reproduceabc/cosine/cosine_wsmc_curvematching_wasserstein.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | 7 | prefix = "" 8 | 9 | target <- get_cosine() 10 | 11 | nobservations <- 100 12 | 13 | load(paste0(prefix, "cosinedata.RData")) 14 | obs <- matrix(obs[1:nobservations], nrow = 1) 15 | target$simulate <- function(theta){ 16 | return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1)) 17 | } 18 | 19 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(), 20 | minimum_diversity = 0.5, R = 2, maxtrials = 1e5) 21 | 22 | lambda <- 1 23 | multiplier <- lambda*(max(obs[1,]) - min(obs[1,])) 24 | augment <- function(series) rbind(multiplier * (1:length(series))/length(series), series) 25 | augmented_obs <- augment(obs) 26 | plot(augmented_obs[1,],augmented_obs[2,]) 27 | sorted_augmented_obs <- augmented_obs[,hilbert_order(augmented_obs)] 28 | 29 | compute_d <- function(y_fake){ 30 | augmented_y_fake <- augment(y_fake) 31 | sink("/dev/null") 32 | z <- exact_transport_distance(augmented_obs, augmented_y_fake, p = 1, ground_p = 2) 33 | sink(NULL) 34 | return(z) 35 | } 36 | 37 | # library(microbenchmark) 38 | # microbenchmark(y_sim <- target$simulate(true_theta),compute_d(y_sim)) 39 | # y_sim <- target$simulate(true_theta) 40 | # compute_d(y_sim) 41 | 42 | filename <- paste0(prefix, "cosine_wsmc_curvematching.wasserstein.lambda", lambda, ".n", nobservations, ".RData") 43 | # results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 1e6) 44 | load(filename) 45 | # results$compute_d = compute_d 46 | # ncomp = sum(results$ncomputed) 47 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 1e6) 48 | -------------------------------------------------------------------------------- /inst/reproduceabc/cosine/cosine_wsmc_euclidean.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | 7 | prefix = "" 8 | 9 | target <- get_cosine() 10 | 11 | nobservations <- 100 12 | 13 | load(paste0(prefix, "cosinedata.RData")) 14 | obs <- matrix(obs[1:nobservations], nrow = 1) 15 | target$simulate <- function(theta){ 16 | return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1)) 17 | } 18 | 19 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(), 20 | minimum_diversity = 0.5, R = 2, maxtrials = 1e5) 21 | 22 | compute_d <- function(z){ 23 | return(sqrt(sum((z[1,] - obs[1,])^2))) 24 | } 25 | 26 | 27 | filename <- paste0(prefix, "cosine_wsmc_euclidean.n", nobservations, ".RData") 28 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 1e7) 29 | load(filename) 30 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 1e6) 31 | 32 | # plot_marginal(results, 1) 33 | # plot_marginal(results, 2) 34 | # plot_marginal(results, 3) 35 | # plot_marginal(results, 4) 36 | -------------------------------------------------------------------------------- /inst/reproduceabc/gandk/README.R: -------------------------------------------------------------------------------- 1 | Reproduce results for the univariate g-and-k model in Section 5.1.1 of the WABC article. 2 | === 3 | 4 | gandk_generate_data.R: generate and save a data set from the model 5 | 6 | gandk_wsmc.R: loads a data set of size 250 and runs SMC to approximate the WABC 7 | posterior using the exact Wasserstein distance with a budget of 2.4*10^6 model simulations. 8 | Can be continued for a long run (e.g. 10^8 simulations in as Fig 5 of the paper), 9 | using wsmc_continue. 10 | 11 | gandk_abctools.R: loads the abctools package, loads the data. Runs the semi-automatic 12 | ABC procedure of Fearnhead and Prangle (2012) using 2.4*10^6 model simulations. Might 13 | require a lot of memory. 14 | 15 | gandk_mcmc.R: load the data and run MCMC to approximate the posterior distribution, 16 | using numerical approximations of the likelihood. Proposal adapted to target using a 17 | previous run of the algorithm. Performs 75,000 iterations. 18 | 19 | gandk_plots_compare.R: loads the data and the output of the scripts above. Plots 20 | the marginal distributions corresponding to Fig 4 of the paper. 21 | 22 | gandk_plots_convergence.R: loads the data and output from gandk_wsmc.R and gandk_mcmc.R. 23 | Plots correspond WABC marginal posteriors in Fig 5 a)-d). 24 | 25 | gandk_plots_ncomputed.R: loads the data and output from gandk_wsmc.R and gandk_mcmc.R. 26 | Plot corresponds to Fig 5 e). number of sims vs number of smc steps 27 | 28 | gandk_plots_threshold.R: loads the data and output from gandk_wsmc.R and gandk_mcmc.R. 29 | Plot corresponds to Fig 5 f). threshold vs number of model sims. 30 | 31 | gandk_plots_w_to_posterior.R: loads the data and output from gandk_wsmc.R and gandk_mcmc.R. 32 | Approximates the W1 distance between the posterior and the WABC approximation, plots W1 33 | distance as function of model simulations. Plot corresponds to Fig 5 g). -------------------------------------------------------------------------------- /inst/reproduceabc/gandk/gandk_abctools.R: -------------------------------------------------------------------------------- 1 | library(abctools) 2 | library(winference) 3 | rm(list = ls()) 4 | registerDoParallel(cores = detectCores()) 5 | setmytheme() 6 | set.seed(11) 7 | target <- get_gandk() 8 | 9 | prefix = "" 10 | 11 | nobservations <- 250 12 | load(paste0(prefix, "gandkdata.RData")) 13 | obs <- obs[1:nobservations] 14 | 15 | target$simulate <- function(theta){ 16 | return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1)) 17 | } 18 | 19 | 20 | # load(paste0(prefix, "gandkwsmc.n", nobservations, ".RData")) 21 | # results_was = results 22 | # ndists = cumsum(results$ncomputed) 23 | # nsim = ndists[34] 24 | 25 | # nsim_approx = 10^6 26 | # nsim = ndists[which.min(abs(nsim_approx - ndists))] 27 | 28 | nsim = 2.4*10^6 29 | 30 | m = 10 #determines number of order stats in initial summary stat 31 | l = 4 #determines how many powers of the order stats are included in the initial summary stat 32 | 33 | t = proc.time() 34 | thetas = target$rprior(nsim, target$parameters) 35 | 36 | obs_subset = sort(obs)[c(1,(1:nobservations)[(nobservations/m)*(1:(m-1))],nobservations)] 37 | 38 | sumstats = apply(thetas, MARGIN = 1, function(theta) { 39 | y_fake = target$simulate(theta) 40 | sort_y_fake = sort(y_fake) 41 | subset_y_fake = sort_y_fake[c(1,(1:nobservations)[(nobservations/m)*(1:(m-1))],nobservations)] 42 | }) 43 | sumstats = t(sumstats) 44 | 45 | # sumstats = foreach(i = 1:nsim, .combine = rbind) %dorng%{ 46 | # y_fake = target$simulate(theta) 47 | # sort_y_fake = sort(y_fake) 48 | # subset_y_fake = sort_y_fake[c(1,(1:nobservations)[(nobservations/m)*(1:(m-1))],nobservations)] 49 | # return(subset_y_fake) 50 | # } 51 | 52 | filename_sumstats = paste0(prefix, "sumstats.gandk.abctools.n", nobservations,".m",m,".l",l,".nsim",nsim,".RData") 53 | save(list(sumstats,thetas), file = filename_sumstats) 54 | 55 | load(filename_sumstats) 56 | N = results_was$param_algo$nthetas 57 | tol = N/nsim 58 | #tol = 1000/nsim 59 | #tol = 500/nsim 60 | #tol = 100/nsim 61 | 62 | 63 | tfs <- list(function(x){cbind(x, x^2, x^3, x^4)}) 64 | saabc <- semiauto.abc(obs = obs_subset, param = thetas, sumstats = sumstats, 65 | satr = tfs, overlap = TRUE, saprop = 1, 66 | abcprop = 1, tol = tol, method = "rejection", 67 | final.dens = TRUE) 68 | 69 | t = proc.time() - t 70 | t 71 | results = list(thetas = saabc$post.sample, compute_time = t, threshold = tol, norder = m+1, npower = l, nsim = nsim) 72 | filename = paste0(prefix, "gandk.abctools.n", nobservations,".m",m,".l",l,".nsim",nsim,".RData") 73 | save(results, file = filename) 74 | load(filename) 75 | 76 | # hist(saabc$post.sample[,1],prob=T) 77 | # hist(saabc$post.sample[,2],prob=T) 78 | # hist(saabc$post.sample[,3],prob=T) 79 | # hist(saabc$post.sample[,4],prob=T) 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /inst/reproduceabc/gandk/gandk_generate_data.R: -------------------------------------------------------------------------------- 1 | #+ presets, echo = FALSE, warning = FALSE, message = FALSE 2 | library(winference) 3 | registerDoParallel(cores = detectCores()) 4 | rm(list = ls()) 5 | setmytheme() 6 | 7 | set.seed(11) 8 | 9 | prefix = "" 10 | filename = paste0(prefix,"gandkdata.Rdata") 11 | 12 | target <- get_gandk() 13 | 14 | # number of observations 15 | nobservations <- 10000 16 | # parameter of data-generating process 17 | true_theta <- c(3, 1, 2, 0.5) 18 | obs <- target$robservation(nobservations, true_theta, 19 | target$parameters, target$generate_randomness(nobservations)) 20 | 21 | save(true_theta, obs, file = filename) 22 | 23 | hist(obs) 24 | plot(obs[1:1000], type = "l") 25 | mean(obs) 26 | sd(obs) 27 | 28 | -------------------------------------------------------------------------------- /inst/reproduceabc/gandk/gandk_plots_convergence.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_gandk() 7 | 8 | fig.height <- 5 9 | fig.width <- 5 10 | 11 | my_colors <- get_my_colors() 12 | 13 | prefix <- "" 14 | 15 | nobservations = 250 16 | load(paste0(prefix, "gandkdata.RData")) 17 | obs <- obs[1:nobservations] 18 | 19 | # Wasserstein SMC 20 | filename = paste0(prefix, "gandkwsmc.n", nobservations, ".RData") 21 | load(filename) 22 | results_was = results 23 | wsmc.df = wsmc_to_dataframe(results_was) 24 | nsteps = tail(wsmc.df$step,n=1) 25 | #step = nsteps 26 | step = 20 27 | wsmc.df = wsmc.df[wsmc.df$step>=step,] 28 | 29 | 30 | # MCMC 31 | mhfile <- paste0(prefix, "gandkmcmc.n", nobservations, "mh.RData") 32 | load(mhfile) 33 | mcmc.df <- mhchainlist_to_dataframe(mh$chains) 34 | names(mcmc.df) <- c("ichain", "iteration", target$parameter_names) 35 | burnin = 50000 36 | mcmc.df = mcmc.df[mcmc.df$iteration>burnin,3:6] 37 | 38 | 39 | g <- ggplot(mcmc.df, aes(x = A)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5) 40 | g <- g + geom_density(data = wsmc.df, aes(x = A, colour = step, group = step), alpha = 0.5) 41 | g <- g + scale_color_gradient(low = rgb(1,0.5,0.5), high = "darkblue", name = "") 42 | g <- g + scale_fill_manual(name = "", values = my_colors) + xlab("a") + theme(legend.position = "none") 43 | g <- g + geom_vline(xintercept = true_theta[1]) 44 | g 45 | ggsave(filename = paste0(prefix, "conv.gandk_marginal1.pdf"), plot = g, width = fig.width, height = fig.height) 46 | 47 | g <- ggplot(mcmc.df, aes(x = B)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5) 48 | g <- g + geom_density(data = wsmc.df, aes(x = B, colour = step, group = step), alpha = 0.5) 49 | g <- g + scale_color_gradient(low = rgb(1,0.5,0.5), high = "darkblue", name = "") 50 | g <- g + scale_fill_manual(name = "", values = my_colors) + xlab("b") + theme(legend.position = "none") 51 | g <- g + geom_vline(xintercept = true_theta[2]) 52 | g 53 | ggsave(filename = paste0(prefix, "conv.gandk_marginal2.pdf"), plot = g, width = fig.width, height = fig.height) 54 | 55 | g <- ggplot(mcmc.df, aes(x = g)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5) 56 | g <- g + geom_density(data = wsmc.df, aes(x = g, colour = step, group = step), alpha = 0.5) 57 | g <- g + scale_color_gradient(low = rgb(1,0.5,0.5), high = "darkblue", name = "") 58 | g <- g + scale_fill_manual(name = "", values = my_colors) + xlab("g") + theme(legend.position = "none") 59 | g <- g + geom_vline(xintercept = true_theta[3]) 60 | g <- g + coord_cartesian(xlim = c(1, 6)) 61 | g 62 | ggsave(filename = paste0(prefix, "conv.gandk_marginal3.pdf"), plot = g, width = fig.width, height = fig.height) 63 | 64 | g <- ggplot(mcmc.df, aes(x = k)) + geom_density(aes(y = ..density.., fill = "Posterior"), alpha = 0.5) 65 | g <- g + geom_density(data = wsmc.df, aes(x = k, colour = step, group = step), alpha = 0.5) 66 | g <- g + scale_color_gradient(low = rgb(1,0.5,0.5), high = "darkblue", name = "") 67 | g <- g + scale_fill_manual(name = "", values = my_colors) + xlab("k") + theme(legend.position = "none") 68 | g <- g + geom_vline(xintercept = true_theta[4]) 69 | g 70 | ggsave(filename = paste0(prefix, "conv.gandk_marginal4.pdf"), plot = g, width = fig.width, height = fig.height) 71 | -------------------------------------------------------------------------------- /inst/reproduceabc/gandk/gandk_plots_ncomputed.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_gandk() 7 | 8 | fig.height <- 5 9 | fig.width <- 5 10 | 11 | my_colors <- get_my_colors() 12 | 13 | prefix = "" 14 | 15 | nobservations = 250 16 | load(paste0(prefix, "gandkdata.RData")) 17 | obs <- obs[1:nobservations] 18 | 19 | # Wasserstein SMC 20 | filename = paste0(prefix, "gandkwsmc.n", nobservations, ".RData") 21 | load(filename) 22 | results_was = results 23 | wsmc.df = wsmc_to_dataframe(results_was) 24 | nsteps = tail(wsmc.df$step,n=1) 25 | ncomp = results_was$ncomputed 26 | 27 | df = data.frame(steps = 1:nsteps, ncomp = ncomp) 28 | g = ggplot(df, aes(x = steps, y = ncomp)) + geom_line(aes(colour = "Wasserstein")) + geom_point(aes(colour = "Wasserstein")) 29 | g = g + scale_color_manual(name = "", values = my_colors) + xlab("steps") + ylab("# model simulations") 30 | g = g + scale_y_log10(breaks = c(1e4,1e5,1e6,1e7,1e8)) + scale_x_continuous(breaks = c(10,20,40,30,50)) 31 | g <- g + geom_label(data = data.frame(x = c(34), y = c(6e7), method = c("Wasserstein")), 32 | aes(x = x, y = y, colour = method, label = method), size = 7) + theme(legend.position = "none") 33 | g 34 | ggsave(filename = paste0(prefix, "gandk.n", nobservations, ".ncomp_vs_step.pdf"), plot = g, width = fig.width, height = fig.height) 35 | 36 | -------------------------------------------------------------------------------- /inst/reproduceabc/gandk/gandk_plots_threshold.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_gandk() 7 | 8 | fig.height <- 5 9 | fig.width <- 5 10 | 11 | my_colors <- get_my_colors() 12 | 13 | prefix = "" 14 | 15 | nobservations = 250 16 | load(paste0(prefix, "gandkdata.RData")) 17 | obs <- obs[1:nobservations] 18 | 19 | # Wasserstein SMC 20 | filename = paste0(prefix, "gandkwsmc.n", nobservations, ".RData") 21 | load(filename) 22 | results_was = results 23 | #step = 20 24 | #thresholds = results_was$threshold_history[-(1:(step-1))] 25 | step = 1 26 | thresholds = results_was$threshold_history 27 | ncomp = cumsum(results_was$ncomputed) 28 | 29 | # g = qplot(x = step:(step+length(thresholds)-1), y = thresholds, geom = "line") 30 | # g = g + xlab("step") + ylab("threshold") 31 | # g = g + scale_y_log10(breaks = c(1e4,100,10,1,0.1,0.01)) 32 | # g 33 | # ggsave(filename = paste0(prefix, "gandk.n", nobservations, ".threshold.pdf"), plot = g, width = fig.width, height = fig.height) 34 | 35 | df = data.frame(thresholds = thresholds, ncomp = ncomp) 36 | g = ggplot(df, aes(x = ncomp, y = thresholds)) + geom_line(aes(colour = "Wasserstein")) + geom_point(aes(colour = "Wasserstein")) 37 | g = g + scale_color_manual(name = "", values = my_colors) + xlab("# model simulations") + ylab("threshold") 38 | g = g + scale_y_log10(breaks = c(1e4,1e3,100,10,1,0.1,0.01)) + scale_x_log10(breaks = c(1e4,1e6,1e8)) 39 | g <- g + geom_label(data = data.frame(x = c(1e7), y = c(0.4), method = c("Wasserstein")), 40 | aes(x = x, y = y, colour = method, label = method), size = 7) + theme(legend.position = "none") 41 | g 42 | ggsave(filename = paste0(prefix, "gandk.n", nobservations, ".threshold_vs_ncomp.pdf"), plot = g, width = fig.width, height = fig.height) 43 | 44 | -------------------------------------------------------------------------------- /inst/reproduceabc/gandk/gandk_plots_w_to_posterior.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_gandk() 7 | 8 | fig.height <- 5 9 | fig.width <- 5 10 | 11 | my_colors <- get_my_colors() 12 | 13 | prefix = "" 14 | 15 | nobservations = 250 16 | load(paste0(prefix, "gandkdata.RData")) 17 | obs <- obs[1:nobservations] 18 | 19 | # Wasserstein SMC 20 | filename = paste0(prefix, "gandkwsmc.n", nobservations, ".RData") 21 | load(filename) 22 | results_was = results 23 | 24 | 25 | # MCMC 26 | mhfile <- paste0(prefix, "gandkmcmc.n", nobservations, "mh.RData") 27 | load(mhfile) 28 | mcmc.df <- mhchainlist_to_dataframe(mh$chains) 29 | names(mcmc.df) <- c("ichain", "iteration", target$parameter_names) 30 | burnin = 50000 31 | mcmc.df = mcmc.df[mcmc.df$iteration>burnin,] 32 | 33 | 34 | #Calculate Wasserstein of WABC to posterior 35 | wasserstein_to_posterior <- function(results, posterior_sample, nmax){ 36 | nsteps <- length(results$thetas_history) 37 | if (nsteps > nmax){ 38 | steps <- floor(seq(from = 1, to = nsteps, length.out = nmax)) 39 | } else { 40 | steps <- 1:nsteps 41 | } 42 | w_to_post_ <- as.numeric(foreach (istep = steps, .combine = c) %dorng% { 43 | thetas <- results$thetas_history[[istep]] 44 | x <- try(exact_transport_distance(t(thetas), t(posterior_sample), p = 1, ground_p = 2)) 45 | if (inherits(x, "try-error")){ 46 | x <- sinkhorn_distance(t(thetas), t(posterior_sample), p = 1, ground_p = 2, eps = 0.1, niterations = 1000)$uncorrected 47 | } 48 | x 49 | }) 50 | return(data.frame(steps = steps, ncomputed = cumsum(results$ncomputed)[steps], times = results$compute_times[steps], w = w_to_post_)) 51 | } 52 | 53 | 54 | npost <- 2048 55 | mcmcpostburnin <- mcmc.df %>% filter(iteration > burnin) 56 | posterior_sample <- mcmcpostburnin[sample(x = 1:nrow(mcmcpostburnin), size = npost, replace = TRUE),3:ncol(mcmcpostburnin)] 57 | 58 | filename <- paste0(prefix, "gandk.n", nobservations, ".w1posterior.RData") 59 | n_w_comp <- 50 60 | w_to_post_wasserstein <- wasserstein_to_posterior(results_was, posterior_sample, n_w_comp) 61 | save(w_to_post_wasserstein, file = filename) 62 | load(file = filename) 63 | 64 | g <- ggplot(w_to_post_wasserstein, aes(x = ncomputed, y = w)) + geom_line(aes(colour = "Wasserstein")) + geom_point(aes(colour = "Wasserstein")) 65 | g <- g + scale_color_manual(name = "", values = my_colors) + xlab("# model simulations") + ylab("W-distance to posterior") 66 | g <- g + scale_x_log10(breaks = c(1e4,1e5,1e6,1e7,1e8)) + scale_y_log10(breaks = c(10,5,2,1,0.5,0.25,0.1,0.06)) 67 | g <- g + geom_label(data = data.frame(x = c(8e6), y = c(0.1), method = c("Wasserstein")), 68 | aes(x = x, y = y, colour = method, label = method), size = 7) + theme(legend.position = "none") 69 | g 70 | ggsave(filename = paste0(prefix, "gandk.n", nobservations, ".w1post.pdf"), plot = g, width = 2*fig.width, height = fig.height) 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /inst/reproduceabc/gandk/gandk_wsmc.R: -------------------------------------------------------------------------------- 1 | #+ presets, echo = FALSE, warning = FALSE, message = FALSE 2 | library(winference) 3 | registerDoParallel(cores = detectCores()) 4 | rm(list = ls()) 5 | setmytheme() 6 | set.seed(11) 7 | target <- get_gandk() 8 | # number of observations 9 | nobservations <- 250 10 | prefix = "" 11 | load(paste0(prefix, "gandkdata.RData")) 12 | obs <- obs[1:nobservations] 13 | sort_obs = sort(obs) 14 | 15 | 16 | #compute_d <- get_hilbert_to_y(matrix(obs, nrow = 1)) 17 | 18 | compute_d = function(y){ 19 | sort_y = sort(y) 20 | mean(abs(sort_y-sort_obs)) 21 | } 22 | 23 | target$simulate <- function(theta){ 24 | return(matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1)) 25 | } 26 | 27 | # M=10000 28 | # ts = rep(0,M) 29 | # for(i in 1:M){ 30 | # y_sim = target$simulate(target$rprior(1, target$parameters)) 31 | # t=proc.time() 32 | # compute_d_alt(y_sim) 33 | # t=proc.time() -t 34 | # ts[i] = t[3] 35 | # } 36 | 37 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(), 38 | minimum_diversity = 0.5, R = 2, maxtrials = 100000) 39 | #t = proc.time() 40 | filename <- paste0(prefix, "gandkwsmc.n", nobservations, ".RData") 41 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsim = 2.4e6) 42 | #t = proc.time() - t 43 | load(filename) 44 | #results <- wsmc_continue(results, savefile = filename, maxtime = 14*60*60) 45 | 46 | 47 | # load(filename) 48 | # wsmc.df <- wsmc_to_dataframe(results) 49 | # nsteps <- max(wsmc.df$step) 50 | # 51 | # # plot_bivariate_polygon(results, 1, 2) 52 | # # plot_bivariate_polygon(results, 3, 4) 53 | # 54 | # library(gridExtra) 55 | # grid.arrange(plot_marginal_time(results, 1), 56 | # plot_marginal_time(results, 2), 57 | # plot_marginal_time(results, 3), 58 | # plot_marginal_time(results, 4), nrow = 2) 59 | 60 | 61 | -------------------------------------------------------------------------------- /inst/reproduceabc/levydriven/README.R: -------------------------------------------------------------------------------- 1 | Reproduce results for the Levy-driven stochastic volatility model in Section 5.4 of the article. 2 | === 3 | 4 | levydriven_generate_data.R: generate a time series from the model. 5 | 6 | levydriven_wsmc_hilbert.R: loads data, takes first 10,000 observations, 7 | approximates WABC with delay reconstruction (lag of one), using the Hilbert 8 | distance. 9 | 10 | levydriven_wsmc_with_summary.R: loads results from above scripts, 11 | and defines new distance that uses both the Hilbert-delay reconstruction one, 12 | and a distance on summary statistics, and then approximates the corresponding 13 | WABC posterior. 14 | 15 | levydriven_plots.R: loads results from above scripts and creates the three plots 16 | of Figure 9 (a,b,c) and the three plots of Figure 10 (a,b,c). 17 | 18 | === 19 | Additionally, the folder contains files that could be useful but not necessary 20 | to reproduce the figures: 21 | 22 | levydriven_timings.R: runs particle filters and compute variance of likelihood 23 | estimator as well as record the timings; this is to convince oneself that a 24 | classic PMMH approach to this problem would be time consuming, due to the length 25 | of the time series. 26 | 27 | levydriven_mh.R: implements PMMH on a subset of the data. 28 | 29 | levydriven_is_correction.R: implements an IS correction step to 30 | go from the WABC posterior to the actual posterior. 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /inst/reproduceabc/levydriven/levydriven_generate_data.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | 6 | set.seed(11) 7 | 8 | target <- get_levydriven() 9 | 10 | prefix <- "" 11 | 12 | # number of observations 13 | nobservations <- 50000 14 | # parameter of data-generating process 15 | true_theta <- c(0, 0, 0.5, 0.0625, 0.01) 16 | obs <- target$robservation(nobservations, true_theta, 17 | target$parameters, target$generate_randomness(nobservations)) 18 | 19 | save(true_theta, obs, file = paste0(prefix,"levydrivendata.RData")) 20 | 21 | # hist(obs) 22 | plot(obs, type = "l") 23 | # plot(obs[1:250], type = "l") 24 | # acf(obs) 25 | # mean(obs) 26 | # sd(obs) 27 | # approx equal to exp(0.9)/sqrt(1-0.7^2) 28 | 29 | library(microbenchmark) 30 | microbenchmark( 31 | obs <- target$robservation(1e4, true_theta, 32 | target$parameters, target$generate_randomness(1e4)), 33 | times = 100) 34 | 35 | 36 | -------------------------------------------------------------------------------- /inst/reproduceabc/levydriven/levydriven_timings.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(13) 6 | target <- get_levydriven() 7 | prefix <- "" 8 | load(file = paste0(prefix, "levydrivendata.RData")) 9 | nobservations <- 10000 10 | obs <- obs[1:nobservations] 11 | filename2 <- paste0(prefix, "levydriven.n", nobservations, ".lag1.wsmc.hilbert.summary.RUN2.RData") 12 | load(filename2) 13 | 14 | thetas <- results$thetas_history[[results$thetas_history %>% length]] 15 | 16 | post_cov <- cov(thetas) 17 | post_mean <- colMeans(thetas) 18 | 19 | variancescaling <- 1 20 | 21 | rproposal <- function(n){ 22 | return(fast_rmvnorm(n, post_mean, post_cov)) 23 | } 24 | 25 | dproposal <- function(thetas){ 26 | fast_dmvnorm(thetas, post_mean, post_cov) 27 | } 28 | 29 | model <- list() 30 | model$rinit <- function(nparticles, theta){ 31 | x <- matrix(nrow = nparticles, ncol = 2) 32 | for (i in 1:nparticles){ 33 | x[i,] <- rgamma(2, shape = theta[3] * theta[3]/theta[4], scale = theta[4]/theta[3]) 34 | } 35 | return(x) 36 | } 37 | 38 | model$rtransition <- function(xparticles, theta){ 39 | rtransition_r <- levydriven_rtransition_rand(dim(xparticles)[1], theta) 40 | new_z <- exp(-theta[5]) * xparticles[,2] + rtransition_r$sum_weighted_e 41 | new_v <- (1/theta[5]) * (xparticles[,2] - new_z + rtransition_r$sum_e) 42 | xparticles[,1] <- new_v 43 | xparticles[,2] <- new_z 44 | return(xparticles) 45 | } 46 | 47 | model$dobs <- function(observations, time, xparticles, theta){ 48 | return(dnorm(observations[time], mean = theta[1] + theta[2] * xparticles[,1], sd = sqrt(xparticles[,1]), log = TRUE)) 49 | } 50 | 51 | ### particle filter estimates of the log-likelihood 52 | particle_filter <- function(nparticles, model, theta, observations){ 53 | datalength <- nobservations 54 | # initialization 55 | xparticles <- model$rinit(nparticles, theta) 56 | logw <- rep(0, nparticles) 57 | # logw <- model$dobs(observations, 1, xparticles, theta) 58 | if (all(is.infinite(logw))){ 59 | return(NA) 60 | } 61 | maxlw <- max(logw) 62 | w <- exp(logw - maxlw) 63 | # update log likelihood estimate 64 | ll <- maxlw + log(mean(w)) 65 | normweights <- w / sum(w) 66 | # 67 | # step t > 1 68 | for (time in 1:datalength){ 69 | if (time > 1){ 70 | ancestors <- systematic_resampling_given_u(normweights, runif(1)) 71 | xparticles <- xparticles[ancestors,,drop=F] 72 | } 73 | xparticles <- model$rtransition(xparticles, theta) 74 | logw <- model$dobs(observations, time, xparticles, theta) 75 | if (all(is.infinite(logw))){ 76 | return(NA) 77 | } 78 | maxlw <- max(logw) 79 | w <- exp(logw - maxlw) 80 | # update log likelihood estimate 81 | ll <- ll + maxlw + log(mean(w)) 82 | normweights <- w / sum(w) 83 | # 84 | } 85 | return(ll) 86 | } 87 | 88 | # nparticles <- 2^10 89 | # res <- particle_filter(nparticles, model, post_mean, obs) 90 | 91 | 92 | library(foreach) 93 | library(doParallel) 94 | registerDoParallel(cores = detectCores()) 95 | nparticles <- 4096 96 | proposal <- rproposal(10) 97 | dproposals <- dproposal(proposal) 98 | prior_eval <- target$dprior(proposal) 99 | nrep <- dim(proposal)[1] 100 | nrep_per_theta <- 50 101 | results.df <- foreach(irep = 1:nrep, .combine = rbind) %dorng% { 102 | lls <- rep(0, nrep_per_theta) 103 | times <- rep(0, nrep_per_theta) 104 | if (is.finite(prior_eval[irep])){ 105 | for (irep_per_theta in 1:nrep_per_theta){ 106 | pct <- proc.time() 107 | lls[irep_per_theta] <- particle_filter(nparticles, model, proposal[irep,], obs) 108 | times[irep_per_theta] <- as.numeric((proc.time() - pct)[3]) 109 | } 110 | } 111 | data.frame(irep = irep, irep_per_theta = 1:nrep_per_theta, lls = lls, times = times) 112 | } 113 | 114 | results.df %>% group_by(irep) %>% summarise(sdlls = sd(lls), meantime = mean(times)) 115 | -------------------------------------------------------------------------------- /inst/reproduceabc/levydriven/levydriven_wsmc_hilbert.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | 6 | set.seed(13) 7 | 8 | target <- get_levydriven() 9 | 10 | # number of observations 11 | prefix <- "" 12 | load(file = paste0(prefix, "levydrivendata.RData")) 13 | 14 | nparticles = 2048 15 | nobservations <- 10000 16 | obs <- obs[1:nobservations] 17 | plot(obs, type = "l") 18 | 19 | lagvalue <- 1 20 | # # 21 | lag_obs <- create_lagmatrix(matrix(obs, nrow = 1), lagvalue) 22 | lag_obs <- lag_obs[,seq(from=1, to=ncol(lag_obs), by=2)] 23 | 24 | compute_hilbert <- get_hilbert_to_y(lag_obs) 25 | 26 | compute_d <- function(z){ 27 | fake_obs <- create_lagmatrix(matrix(z, nrow = 1), lagvalue) 28 | fake_obs <- fake_obs[,seq(from=1, to=ncol(fake_obs), by=2)] 29 | return(compute_hilbert(fake_obs)) 30 | } 31 | 32 | target$simulate <- function(theta){ 33 | r <- target$generate_randomness(nobservations) 34 | return(target$robservation(nobservations, theta, list(), r)) 35 | } 36 | 37 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(), 38 | minimum_diversity = 0.5, R = 2, maxtrials = 1000) 39 | 40 | # filename <- paste0(prefix, "levydriven.n", nobservations, ".lag", lagvalue, ".wsmc.hilbert.RUN2.RData") 41 | # # results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 4e5) 42 | # load(file = filename) 43 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 2e5) 44 | 45 | # library(gridExtra) 46 | # grid.arrange(plot_threshold_time(results) + scale_y_log10(), plot_ncomputed(results)) 47 | # plot_bivariate(results, 1, 2) 48 | # plot_bivariate(results, 3, 4) 49 | # plot_bivariate(results, 4, 5) 50 | # # 51 | # ## timings 52 | library(microbenchmark) 53 | microbenchmark(target$simulate(true_theta), times = 1000) 54 | 55 | dummy = target$simulate(true_theta) 56 | f <- function(dataset){ 57 | return(sum(acf(dataset^2, plot = F, lag.max = 50)$acf[,,1][1:50])) 58 | } 59 | 60 | microbenchmark(compute_d(dummy), times = 1000) 61 | microbenchmark(f(dummy), times = 1000) 62 | 63 | -------------------------------------------------------------------------------- /inst/reproduceabc/levydriven/levydriven_wsmc_with_summary.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | 6 | set.seed(13) 7 | 8 | target <- get_levydriven() 9 | 10 | # number of observations 11 | prefix <- "" 12 | 13 | load(file = paste0(prefix, "levydrivendata.RData")) 14 | 15 | nobservations <- 10000 16 | obs <- obs[1:nobservations] 17 | 18 | lagvalue <- 1 19 | # # 20 | lag_obs <- create_lagmatrix(matrix(obs, nrow = 1), lagvalue) 21 | lag_obs <- lag_obs[,seq(from=1, to=ncol(lag_obs), by=2)] 22 | 23 | compute_hilbert <- get_hilbert_to_y(lag_obs) 24 | 25 | filename <- paste0(prefix, "levydriven.n", nobservations, ".lag", lagvalue, ".wsmc.hilbert.RUN2.RData") 26 | # filename <- paste0(prefix, "levydriven.n", nobservations, ".lag", lagvalue, ".wsmc.swap.RData") 27 | load(file = filename) 28 | 29 | 30 | f <- function(dataset){ 31 | return(sum(acf(dataset^2, plot = F, lag.max = 50)$acf[,,1][1:50])) 32 | } 33 | f_obs <- f(obs) 34 | fs <- as.numeric(foreach(i = 1:results$param_algo$nthetas, .combine = c) %dorng% { 35 | f(results$latest_y[[i]]) 36 | }) 37 | hist(fs, nclass = 30) 38 | abline(v = f_obs) 39 | 40 | thetas <- tail(results$thetas_history, 1)[[1]] 41 | plot(thetas[,5], fs) 42 | summary(tail(results$distances_history, 1)[[1]]) 43 | threshold1 <- tail(results$threshold_history, 2)[[1]] 44 | 45 | compute_d1 <- results$compute_d 46 | compute_d_summary <- function(z){ 47 | first_part <- compute_d1(z) 48 | if (first_part > threshold1){ 49 | return(Inf) 50 | } else { 51 | return(abs(f(z) - f_obs)) 52 | } 53 | } 54 | 55 | filename2 <- paste0(prefix, "test-levydriven.n", nobservations, ".lag", lagvalue, ".wsmc.hilbert.summary.RUN2.RData") 56 | # filename2 <- paste0(prefix, "levydriven.n", nobservations, ".lag", lagvalue, ".wsmc.hilbert.summary.RUN2.RData") 57 | 58 | ds <- as.numeric(foreach(i = 1:results$param_algo$nthetas, .combine = c) %dorng% { 59 | compute_d_summary(results$latest_y[[i]]) 60 | }) 61 | results2 <- results 62 | results2$compute_d <- compute_d_summary 63 | results2$param_algo$threshold <- max(ds) 64 | results2$distances_history[[length(results2$distances_history)]] <- ds 65 | results2$param_algo$proposal <- mixture_rmixmod() 66 | # 67 | results2 <- wsmc_continue(results2, savefile = filename2, maxsim = 8e5) 68 | # load(filename2) 69 | # results2 <- wsmc_continue(results, savefile = filename2, maxsim = 8e5) 70 | # 71 | # library(gridExtra) 72 | # grid.arrange(plot_threshold_time(results2) + scale_y_log10(), plot_ncomputed(results2)) 73 | # # 74 | # plot_bivariate(results2, 4, 5) 75 | # plot_marginal(results2, 1) 76 | # plot_marginal(results2, 2) 77 | # plot_marginal(results2, 4) + scale_x_log10() 78 | # plot_marginal(results2, 5) + scale_x_log10() 79 | # 80 | # # names(results2) 81 | # # results2$param_algo$proposal$param_update() 82 | # # results2$thetas_history %>% length 83 | # thetas <- results2$thetas_history[[results2$thetas_history %>% length]] 84 | # fit <- mixmodCluster(data = data.frame(thetas), nbCluster = 5, dataType = "quantitative") 85 | # results2$param_algo$proposal$r 86 | # results2$param_algo$proposal$d 87 | # 88 | # 89 | # ## 90 | # 91 | # ## timings 92 | # library(microbenchmark) 93 | # microbenchmark(target$simulate(true_theta), times = 100) 94 | # microbenchmark(compute_d_summary(target$simulate(true_theta)), times = 100) 95 | 96 | -------------------------------------------------------------------------------- /inst/reproduceabc/mgandk/README.R: -------------------------------------------------------------------------------- 1 | Reproduce results for the multivariate g-and-k model in Section 5.1.2 of the WABC article. 2 | === 3 | 4 | mgandk_generate_data.R: generate and save a data set from the model 5 | 6 | mgandk_wsmc_wasserstein.R: loads a data set of size 500 and runs SMC to approximate the WABC 7 | posterior using the exact Wasserstein distance with a budget of 2*10^6 model simulations. 8 | 9 | mgandk_wsmc_swap.R: loads the data set and runs SMC to approximate the WABC 10 | posterior using the swapping distance with a budget of 2*10^6 model simulations. 11 | 12 | mgandk_wsmc_hilbert.R: loads the data set and runs SMC to approximate the WABC 13 | posterior using the Hilbert distance with a budget of 2*10^6 model simulations. 14 | 15 | mgandk_wsmc_mmd.R: loads the data set and runs SMC to approximate the WABC 16 | posterior using an approximation of the MMD distance with a budget of 2*10^6 17 | model simulations. 18 | 19 | mgandk_mcmc.R: loads the data the WABC using the Hilbert approximation to tune 20 | the MCMC proposal used to approximate the posterior distribution. Uses numerical 21 | approximations of the likelihood. Performs 150,000 iterations. 22 | 23 | mgandk_plots_ncomputed.R: loads the data and output from the scripts above. 24 | Plot corresponds to Fig 6 a). number of sims vs number of smc steps 25 | 26 | mgandk_plots_threshold.R: loads the data and output from the scripts above. 27 | Plot corresponds to Fig 6 b). threshold vs number of sims. 28 | 29 | mgandk_plots_w_to_posterior.R: loads the data and output from the scripts above. 30 | Approximates the W1 distance between the posterior and the different WABC approximations, 31 | plots W1 distance to posterior as function of model simulations. Corresponds to Fig 6 c). 32 | 33 | mgandk_timings.R: estimate the average time it takes to compute the different distances 34 | 35 | mgandk_plots.R: produces plots of WABC marginals. -------------------------------------------------------------------------------- /inst/reproduceabc/mgandk/mgandk_generate_data.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_mgandk() 7 | prefix = "" 8 | 9 | nobservations <- 10000 10 | true_theta <- c(3, 1, 1, 0.5, 4, 0.5, 2, 0.4, 0.6) 11 | 12 | obs <- target$robservation(nobservations, true_theta) 13 | 14 | save(true_theta, obs, file = paste0(prefix, "mgandkdata.RData")) 15 | -------------------------------------------------------------------------------- /inst/reproduceabc/mgandk/mgandk_mcmc.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_mgandk() 7 | 8 | prefix = "" 9 | 10 | nobservations <- 500 11 | load(paste0(prefix, "mgandkdata.RData")) 12 | obs <- obs[,1:nobservations] 13 | # load ABC posterior 14 | load(paste0(prefix, "mgandk.wsmc.n",nobservations,".hilbert.RData")) 15 | thetas <- tail(results$thetas_history, 1)[[1]] 16 | theta_init <- thetas[sample(x = 1:nrow(thetas), 8, replace = TRUE),] 17 | colMeans(thetas) 18 | cov <- cov(thetas) 19 | # test log-likelihood 20 | target$loglikelihood(theta_init, obs) 21 | tuning_parameters <- list(niterations = 150000, nchains = nrow(theta_init), 22 | cov_proposal = cov, 23 | adaptation = 1000, init_chains = theta_init) 24 | mhfile <- paste0(prefix, "mgandk.mcmc.n", nobservations, ".mh.initfromABC.RData") 25 | mh <- metropolishastings(obs, target, tuning_parameters, savefile = mhfile) 26 | save(mh, file = mhfile) 27 | load(mhfile) 28 | # if (exists("mh_results")){ 29 | # mcmc.df <- mhchainlist_to_dataframe(mh_results$chains) 30 | # mcmc.df <- mcmc.df %>% filter(iteration < mh_results$iteration) 31 | # } else { 32 | # mcmc.df <- mhchainlist_to_dataframe(mh$chains) 33 | # } 34 | 35 | # mcmc.df %>% head 36 | # burnin <- 5000 37 | # # burnin <- 0 38 | # ggplot(mcmc.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.7, group = ichain, colour = factor(ichain))) + geom_line(alpha = 0.5) 39 | # ggplot(mcmc.df %>% filter(iteration > burnin, iteration %% 100 == 1), aes(x = iteration, y = X.9, group = ichain, colour = factor(ichain))) + geom_line(alpha = 0.5) 40 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.1, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[1]) 41 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.2, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[2]) 42 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.3, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[3]) 43 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.4, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[4]) 44 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.5, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[5]) 45 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.6, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[6]) 46 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.7, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[7]) 47 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.8, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[8]) 48 | # ggplot(mcmc.df %>% filter(iteration > burnin), aes(x = X.9, group = ichain, colour = factor(ichain))) + geom_density() + geom_vline(xintercept = true_theta[9]) 49 | # 50 | 51 | -------------------------------------------------------------------------------- /inst/reproduceabc/mgandk/mgandk_plots_ncomputed.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_gandk() 7 | 8 | fig.height <- 5 9 | fig.width <- 5 10 | 11 | my_colors <- get_my_colors() 12 | 13 | prefix <- "" 14 | 15 | nobservations = 500 16 | load(paste0(prefix, "mgandkdata.RData")) 17 | obs <- obs[1:nobservations] 18 | 19 | # Wasserstein SMC 20 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".wasserstein.RData") 21 | load(filename) 22 | results_was = results 23 | wsmc.df = wsmc_to_dataframe(results_was) 24 | nsteps = tail(wsmc.df$step,n=1) 25 | ncomp = results_was$ncomputed 26 | 27 | # Swapping SMC 28 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".swap.RData") 29 | load(filename) 30 | results_swap = results 31 | swap.df = wsmc_to_dataframe(results_swap) 32 | swap.nsteps = tail(swap.df$step,n=1) 33 | swap.ncomp = results_swap$ncomputed 34 | 35 | # Hilbert SMC 36 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".hilbert.RData") 37 | load(filename) 38 | results_hilbert = results 39 | hilbert.df = wsmc_to_dataframe(results_hilbert) 40 | hilbert.nsteps = tail(hilbert.df$step,n=1) 41 | hilbert.ncomp = results_hilbert$ncomputed 42 | 43 | # MMD SMC 44 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".mmd.RData") 45 | load(filename) 46 | results_mmd = results 47 | mmd.df = wsmc_to_dataframe(results_mmd) 48 | mmd.nsteps = tail(mmd.df$step,n=1) 49 | mmd.ncomp = results_mmd$ncomputed 50 | 51 | 52 | w.df = data.frame(steps = 1:nsteps, ncomp = ncomp) 53 | s.df = data.frame(steps = 1:swap.nsteps, ncomp = swap.ncomp) 54 | h.df = data.frame(steps = 1:hilbert.nsteps, ncomp = hilbert.ncomp) 55 | m.df = data.frame(steps = 1:mmd.nsteps, ncomp = mmd.ncomp) 56 | 57 | g = ggplot(w.df, aes(x = steps, y = ncomp)) + geom_line(aes(colour = "Wasserstein")) + geom_point(aes(colour = "Wasserstein")) 58 | g = g + geom_line(data = s.df, aes(colour = "Swap")) + geom_point(data = s.df, aes(colour = "Swap")) 59 | g = g + geom_line(data = h.df, aes(colour = "Hilbert")) + geom_point(data = h.df, aes(colour = "Hilbert")) 60 | g = g + geom_line(data = m.df, aes(colour = "MMD")) + geom_point(data = m.df, aes(colour = "MMD")) 61 | g = g + scale_color_manual(name = "", values = my_colors) + xlab("steps") + ylab("# model simulations") 62 | g = g + scale_y_log10(breaks = c(0.5e4,1e4,2e4,4e4,1e5,2e5)) + scale_x_continuous(breaks = c(0,20,40,60,80)) 63 | g = g + geom_label(data = data.frame(x = c(60,70,60,17), y = c(7e3,1.5e4,1.7e5,1.9e4), method = c("Wasserstein","Swap","Hilbert","MMD")), 64 | aes(x = x, y = y, colour = method, label = method), size = 7) + theme(legend.position = "none") 65 | g 66 | ggsave(filename = paste0(prefix, "mgandk.n", nobservations, ".ncomp_vs_step.pdf"), plot = g, width = fig.width, height = fig.height) 67 | -------------------------------------------------------------------------------- /inst/reproduceabc/mgandk/mgandk_plots_threshold.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_gandk() 7 | 8 | fig.height <- 5 9 | fig.width <- 5 10 | 11 | my_colors <- get_my_colors() 12 | 13 | prefix <- "" 14 | 15 | nobservations = 500 16 | load(paste0(prefix, "mgandkdata.RData")) 17 | obs <- obs[1:nobservations] 18 | 19 | # Wasserstein SMC 20 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".wasserstein.RData") 21 | load(filename) 22 | results_was = results 23 | was.thresholds = results_was$threshold_history 24 | was.ncomp = cumsum(results_was$ncomputed) 25 | 26 | # Swapping SMC 27 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".swap.RData") 28 | load(filename) 29 | results_swap = results 30 | swap.thresholds = results_swap$threshold_history 31 | swap.ncomp = cumsum(results_swap$ncomputed) 32 | 33 | # Hilbert SMC 34 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".hilbert.RData") 35 | load(filename) 36 | results_hilbert = results 37 | hilbert.thresholds = results_hilbert$threshold_history 38 | hilbert.ncomp = cumsum(results_hilbert$ncomputed) 39 | 40 | # MMD SMC 41 | filename = paste0(prefix, "mgandk.wsmc.n", nobservations, ".mmd.RData") 42 | load(filename) 43 | results_mmd = results 44 | mmd.thresholds = results_mmd$threshold_history 45 | mmd.ncomp = cumsum(results_mmd$ncomputed) 46 | 47 | 48 | w.df = data.frame(threshold = was.thresholds, ncomp = was.ncomp) 49 | s.df = data.frame(threshold = swap.thresholds, ncomp = swap.ncomp) 50 | h.df = data.frame(threshold = hilbert.thresholds, ncomp = hilbert.ncomp) 51 | m.df = data.frame(threshold = mmd.thresholds, ncomp = mmd.ncomp) 52 | 53 | g = ggplot(w.df, aes(x = ncomp, y = threshold)) + geom_line(aes(colour = "Wasserstein")) + geom_point(aes(colour = "Wasserstein")) 54 | g = g + geom_line(data = s.df, aes(colour = "Swap")) + geom_point(data = s.df, aes(colour = "Swap")) 55 | g = g + geom_line(data = h.df, aes(colour = "Hilbert")) + geom_point(data = h.df, aes(colour = "Hilbert")) 56 | g = g + geom_line(data = m.df, aes(colour = "MMD")) + geom_point(data = m.df, aes(colour = "MMD")) 57 | g = g + scale_color_manual(name = "", values = my_colors) + xlab("# model simulations") + ylab("threshold") 58 | g = g + scale_y_log10(breaks = c(1e-2,1,1e2,1e4,1e6)) + scale_x_log10() 59 | g = g + geom_label(data = data.frame(x = c(3e5,5e4,7e5,1e4), y = c(5e2,4e4,7,0.07), method = c("Wasserstein","Swap","Hilbert","MMD")), 60 | aes(x = x, y = y, colour = method, label = method), size = 7) + theme(legend.position = "none") 61 | g 62 | ggsave(filename = paste0(prefix, "mgandk.n", nobservations, ".threshold_vs_ncomp.pdf"), plot = g, width = fig.width, height = fig.height) 63 | 64 | -------------------------------------------------------------------------------- /inst/reproduceabc/mgandk/mgandk_timings.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_mgandk() 7 | prefix <- "" 8 | 9 | nobservations <- 500 10 | load(paste0(prefix, "mgandkdata.RData")) 11 | obs <- obs[,1:nobservations] 12 | target$simulate <- function(theta) target$robservation(nobservations, theta) 13 | 14 | data_sets <- list() 15 | 16 | x <- proc.time() 17 | target$loglikelihood(target$rprior(1000, target$parameters), obs) 18 | newx <- proc.time() 19 | elapsed <- (newx - x)[3] 20 | cat("time to compute likelihood:", elapsed/nrep, "\n") 21 | 22 | nrep <- 1000 23 | x <- proc.time() 24 | for (irep in 1:nrep){ 25 | data_sets[[irep]] <- target$simulate(true_theta) 26 | } 27 | newx <- proc.time() 28 | elapsed <- (newx - x)[3] 29 | cat("time to simulate data sets:", elapsed/nrep, "\n") 30 | x <- proc.time() 31 | for (irep in 1:nrep){ 32 | sinkhorn_distance(obs, data_sets[[irep]]) 33 | } 34 | newx <- proc.time() 35 | elapsed <- (newx - x)[3] 36 | cat("time to compute Sinkhorn distance:", elapsed/nrep, "\n") 37 | 38 | x <- proc.time() 39 | for (irep in 1:nrep){ 40 | exact_transport_distance(obs, data_sets[[irep]]) 41 | } 42 | newx <- proc.time() 43 | elapsed <- (newx - x)[3] 44 | cat("time to compute Wasserstein distance:", elapsed/nrep, "\n") 45 | 46 | x <- proc.time() 47 | for (irep in 1:nrep){ 48 | hilbert_distance(obs, data_sets[[irep]]) 49 | } 50 | newx <- proc.time() 51 | elapsed <- (newx - x)[3] 52 | cat("time to compute Hilbert distance:", elapsed/nrep, "\n") 53 | 54 | 55 | cd <- get_mmd_to_y(obs) 56 | x <- proc.time() 57 | for (irep in 1:nrep){ 58 | cd(data_sets[[irep]]) 59 | } 60 | newx <- proc.time() 61 | elapsed <- (newx - x)[3] 62 | cat("time to compute MMD distance:", elapsed/nrep, "\n") 63 | 64 | x <- proc.time() 65 | for (irep in 1:nrep){ 66 | swap_distance(obs, data_sets[[irep]], tolerance = 1e-5) 67 | } 68 | newx <- proc.time() 69 | elapsed <- (newx - x)[3] 70 | cat("time to compute Swap distance:", elapsed/nrep, "\n") 71 | 72 | 73 | -------------------------------------------------------------------------------- /inst/reproduceabc/mgandk/mgandk_wsmc_hilbert.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_mgandk() 7 | 8 | prefix = "" 9 | 10 | nobservations <- 500 11 | load(paste0(prefix, "mgandkdata.RData")) 12 | obs <- obs[,1:nobservations] 13 | target$simulate <- function(theta) target$robservation(nobservations, theta) 14 | 15 | compute_d <- get_hilbert_to_y(obs) 16 | y_sim <- target$simulate(target$rprior(1, target$parameters)[1,]) 17 | compute_d(y_sim) 18 | 19 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(), 20 | minimum_diversity = 0.5, R = 2, maxtrials = 100000) 21 | 22 | filename <- paste0(prefix, "mgandk.wsmc.n", nobservations, ".hilbert.RData") 23 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 2e6) 24 | load(filename) 25 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 5e6) 26 | # plot_marginal_time(results, 7, from_step = 50) + geom_vline(xintercept = true_theta[7]) 27 | # plot_marginal_time(results, 5, from_step = 50) + geom_vline(xintercept = true_theta[5]) 28 | # qplot(x = cumsum(results$ncomputed), y = results$threshold_history, geom = "line") + scale_x_log10() + scale_y_log10() 29 | # qplot(x = tail(results$distances_history, 1)[[1]], geom = "histogram") 30 | # 31 | -------------------------------------------------------------------------------- /inst/reproduceabc/mgandk/mgandk_wsmc_mmd.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_mgandk() 7 | 8 | prefix = "" 9 | 10 | nobservations <- 500 11 | load(paste0(prefix, "mgandkdata.RData")) 12 | obs <- obs[,1:nobservations] 13 | target$simulate <- function(theta) target$robservation(nobservations, theta) 14 | 15 | compute_d <- get_mmd_to_y(obs) 16 | y_sim <- target$simulate(target$rprior(1, target$parameters)[1,]) 17 | compute_d(y_sim) 18 | 19 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(), 20 | minimum_diversity = 0.5, R = 2, maxtrials = 100000) 21 | 22 | filename <- paste0(prefix, "mgandk.wsmc.n", nobservations, ".mmd.RData") 23 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 2e6) 24 | load(filename) 25 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 1e6) 26 | # plot_marginal_time(results, 9, from_step = 50) 27 | # qplot(x = cumsum(results$ncomputed), y = results$threshold_history, geom = "line") + scale_x_log10() + scale_y_log10() 28 | # qplot(x = tail(results$distances_history, 1)[[1]], geom = "histogram") 29 | 30 | -------------------------------------------------------------------------------- /inst/reproduceabc/mgandk/mgandk_wsmc_swap.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_mgandk() 7 | 8 | prefix = "" 9 | 10 | nobservations <- 500 11 | load(paste0(prefix, "mgandkdata.RData")) 12 | obs <- obs[,1:nobservations] 13 | target$simulate <- function(theta) target$robservation(nobservations, theta) 14 | 15 | compute_d <- function(y_sim) swap_distance(obs, y_sim, tolerance = 1e-5)$distance 16 | y_sim <- target$simulate(target$rprior(1, target$parameters)[1,]) 17 | compute_d(y_sim) 18 | 19 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(), 20 | minimum_diversity = 0.5, R = 2, maxtrials = 100000) 21 | 22 | filename <- paste0(prefix, "mgandk.wsmc.n", nobservations, ".swap.RData") 23 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 2e6) 24 | load(filename) 25 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 3e6) 26 | # plot_marginal_time(results, 7, from_step = 50) 27 | # qplot(x = cumsum(results$ncomputed), y = results$threshold_history, geom = "line") + scale_x_log10() + scale_y_log10() 28 | # qplot(x = tail(results$distances_history, 1)[[1]], geom = "histogram") 29 | # 30 | -------------------------------------------------------------------------------- /inst/reproduceabc/mgandk/mgandk_wsmc_wasserstein.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()-2) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | target <- get_mgandk() 7 | 8 | prefix = "" 9 | 10 | nobservations <- 500 11 | load(paste0(prefix, "mgandkdata.RData")) 12 | obs <- obs[,1:nobservations] 13 | target$simulate <- function(theta) target$robservation(nobservations, theta) 14 | 15 | library(transport) 16 | compute_d <- function(z){ 17 | sink(file = paste0(prefix,"tmp")) 18 | wdistance <- exact_transport_distance(obs, z, 1, 2) 19 | sink(NULL) 20 | if (wdistance > 1e10){ 21 | wdistance <- 1e10 22 | } 23 | return(wdistance) 24 | } 25 | 26 | # set.seed(11) 27 | # y_sim = target$simulat(target$rprior(1)) 28 | # compute_d(y_sim) 29 | 30 | # thetas_ <- target$rprior(100, target$parameters) 31 | # y_sim <- list() 32 | # for (itheta in 1:nrow(thetas_)){ 33 | # y_sim[[itheta]] <- target$simulate(thetas_[itheta,]) 34 | # } 35 | # 36 | # ds <- sapply(y_sim, compute_d) 37 | # ds 38 | # 39 | # yy <- target$simulate(thetas_[1,]) 40 | # plot(yy[1,], yy[2,]) 41 | # 42 | # plot(obs[1,], obs[2,]) 43 | # thetas_[1,] 44 | # true_theta 45 | 46 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(), 47 | minimum_diversity = 0.5, R = 2, maxtrials = 100000) 48 | 49 | savefile <- paste0(prefix, "mgandk.wsmc.n", nobservations, ".wasserstein.RData") 50 | maxsimulation <- 2e6 51 | results <- wsmc(compute_d, target, param_algo, savefile = savefile, maxsimulation = maxsimulation) 52 | load(savefile) 53 | # results$param_algo$maxtrials <- 10000 54 | # results <- wsmc_continue(results, savefile = savefile, maxsimulation = 1e6) 55 | # results$ncomputed %>% sum 56 | # from_step <- 0 57 | # wsmc.df <- wsmc_to_dataframe(results) 58 | # index <- 7 59 | # g <- ggplot(wsmc.df %>% filter(step > from_step), aes_string(x = target$parameter_names[index], colour = "time", group = "factor(time)")) + geom_density(aes(y = ..density..)) 60 | # g <- g + xlab(target$parameter_names[index]) # + theme(legend.position = "none") 61 | # g <- g + scale_color_gradient(name = "time (s)", low = rgb(1,0.5,0.5), high = "darkblue") 62 | # g + geom_vline(xintercept = true_theta[index]) 63 | # 64 | # qplot(x = cumsum(results$ncomputed), y = results$threshold_history, geom = "line") + scale_x_log10() + scale_y_log10() 65 | # qplot(x = tail(results$distances_history, 1)[[1]], geom = "histogram") 66 | 67 | -------------------------------------------------------------------------------- /inst/reproduceabc/mvnormal/README.R: -------------------------------------------------------------------------------- 1 | Reproduce results for the multivariate model in Section 2.3 of the WABC article. 2 | === 3 | 4 | mvnormal_generate_data.R: generate and save a data set from the model 5 | 6 | mvnormal_wsmc_wasserstein.R: loads a data set of size 100 and runs SMC to approximate the WABC 7 | posterior using the exact Wasserstein distance with a budget of 10^6 model simulations. 8 | 9 | mvnormal_wsmc_summary.R: loads the data set and runs SMC to approximate the ABC 10 | posterior based on the sample mean with a budget of 10^6 model simulations. 11 | 12 | mvnormal_wsmc_euclidean.R: loads the data set and runs SMC to approximate the ABC 13 | posterior based on the Euclidean distance with a budget of 10^6 model simulations. 14 | 15 | mvnormal_rejection_summary.R: loads the data set and runs a rejection sampler to approximate the ABC 16 | posterior based on the sample mean with a budget of 10^6 model simulations. 17 | 18 | mvnormal_rejection_wasserstein.R: loads the data set and runs a rejection sampler to approximate the WABC 19 | posterior based on the exact Wasserstein distance with a budget of 10^6 model simulations. 20 | 21 | mvnormal_plots.R: loads the data and the output from the scripts above to plot the marginal ABC 22 | posteriors as well as their W1 distances to the posterior. Corresponds to Fig 1 of the paper. 23 | 24 | mvnormal_timings.R: estimates the average time it takes to compute the different distances 25 | and simulate data 26 | -------------------------------------------------------------------------------- /inst/reproduceabc/mvnormal/mvnormal_generate_data.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | 6 | set.seed(11) 7 | 8 | doRun <- FALSE 9 | max_time <- 30*60 10 | d <- 2 11 | target <- get_multivariate_normal(d) 12 | target$parameters$tau <- 5 13 | nobservations <- 100 14 | p <- 1 15 | true_theta <- rnorm(d) 16 | prefix <- "" 17 | 18 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData") 19 | obs <- target$robservation(nobservations, true_theta, target$parameters) 20 | save(true_theta, obs, file = obsfile) 21 | 22 | -------------------------------------------------------------------------------- /inst/reproduceabc/mvnormal/mvnormal_rejection_summary.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | rm(list = ls()) 3 | registerDoParallel(cores = detectCores()) 4 | setmytheme() 5 | 6 | set.seed(11) 7 | 8 | doRun <- FALSE 9 | max_time <- 30*60 10 | d <- 2 11 | target <- get_multivariate_normal(d) 12 | target$parameters$tau <- 5 13 | nobservations <- 100 14 | nparticles <- 2048 15 | maxsim = 10^6 16 | p <- 1 17 | prefix <- "" 18 | 19 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData") 20 | load(obsfile) 21 | 22 | # function to simulate data 23 | target$simulate <- function(theta){ 24 | return(target$robservation(nobservations, theta, target$parameters)) 25 | } 26 | # distance between summary 27 | summary_obs <- rowMeans(obs) 28 | dsummary <- function(z){ 29 | summary_z <- rowMeans(z) 30 | return(mean(abs(summary_z - summary_obs))) 31 | } 32 | 33 | 34 | 35 | filename <- paste0(prefix, "mvnormalrejection.d", d, ".n", nobservations, ".summary.RData") 36 | 37 | naccept = 2048 38 | t = proc.time() 39 | prior_theta = matrix(rnorm(d*maxsim,0,5), nrow=maxsim, ncol=d) 40 | distances = rep(0,maxsim) 41 | for(i in 1:maxsim){ 42 | ysim = target$simulate(prior_theta[i,]) 43 | distances[i] = dsummary(ysim) 44 | } 45 | # distances = foreach(i = 1:maxsim, .combine = c) %dorng% { 46 | # ysim = target$simulate(prior_theta[i,]) 47 | # return(dsummary(ysim)) 48 | # } 49 | sort_distances = sort(distances) 50 | results = prior_theta[distances <= sort_distances[naccept],] 51 | t = proc.time() - t 52 | t 53 | 54 | save(results, t, file = filename) 55 | 56 | 57 | -------------------------------------------------------------------------------- /inst/reproduceabc/mvnormal/mvnormal_rejection_wasserstein.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | rm(list = ls()) 3 | registerDoParallel(cores = detectCores()) 4 | setmytheme() 5 | 6 | set.seed(11) 7 | 8 | doRun <- FALSE 9 | max_time <- 30*60 10 | d <- 2 11 | target <- get_multivariate_normal(d) 12 | target$parameters$tau <- 5 13 | nobservations <- 100 14 | nparticles <- 2048 15 | maxsim = 10^6 16 | p <- 1 17 | prefix <- "" 18 | 19 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData") 20 | load(obsfile) 21 | 22 | # function to simulate data 23 | target$simulate <- function(theta){ 24 | return(target$robservation(nobservations, theta, target$parameters)) 25 | } 26 | # distance function 27 | wdistance <- get_transport_to_y(obs, p = p) 28 | 29 | filename <- paste0(prefix, "mvnormalrejection.d", d, ".n", nobservations, ".wasserstein.RData") 30 | 31 | naccept = 2048 32 | t = proc.time() 33 | prior_theta = matrix(rnorm(d*maxsim,0,5), nrow=maxsim, ncol=d) 34 | distances = rep(0,maxsim) 35 | for(i in 1:maxsim){ 36 | ysim = target$simulate(prior_theta[i,]) 37 | {sink("/dev/null"); distances[i] = wdistance(ysim); sink();} 38 | } 39 | # distances = foreach(i = 1:maxsim, .combine = c) %dorng% { 40 | # ysim = target$simulate(prior_theta[i,]) 41 | # return(wdistance(ysim)) 42 | # } 43 | sort_distances = sort(distances) 44 | results = prior_theta[distances <= sort_distances[naccept],] 45 | t = proc.time() - t 46 | t 47 | 48 | save(results, t, file = filename) 49 | 50 | -------------------------------------------------------------------------------- /inst/reproduceabc/mvnormal/mvnormal_timings.R: -------------------------------------------------------------------------------- 1 | library(microbenchmark) 2 | library(winference) 3 | rm(list = ls()) 4 | setmytheme() 5 | 6 | set.seed(11) 7 | 8 | doRun <- FALSE 9 | max_time <- 30*60 10 | d <- 2 11 | target <- get_multivariate_normal(d) 12 | target$parameters$tau <- 5 13 | nobservations <- 100 14 | p <- 1 15 | prefix <- "" 16 | 17 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData") 18 | load(obsfile) 19 | 20 | # function to simulate data 21 | target$simulate <- function(theta){ 22 | return(target$robservation(nobservations, theta, target$parameters)) 23 | } 24 | 25 | # wasserstein distance 26 | wdistance <- get_transport_to_y(obs, p = p) 27 | 28 | # euclidean distance 29 | ground_p <- 2 30 | deuclidean <- function(z){ 31 | return(mean(apply(abs(z - obs), 2, function(v) (sum(v^ground_p))^(1/ground_p))^p)^(1/p)) 32 | } 33 | 34 | # distance between summary 35 | summary_obs <- rowMeans(obs) 36 | dsummary <- function(z){ 37 | summary_z <- rowMeans(z) 38 | return(mean(abs(summary_z - summary_obs))) 39 | } 40 | 41 | {sink("/dev/null"); wtime = invisible(microbenchmark(wdistance(target$simulate(true_theta)), times = 10000)); sink();} 42 | wtime 43 | 44 | etime = microbenchmark(deuclidean(target$simulate(true_theta)), times = 10000) 45 | etime 46 | 47 | stime = microbenchmark(dsummary(target$simulate(true_theta)), times = 10000) 48 | stime 49 | 50 | simtime = microbenchmark(target$simulate(true_theta), times = 10000) 51 | simtime 52 | 53 | # # #result from timing (average values in seconds) 54 | # wtime = 0.0123 55 | # etime = 0.000380 56 | # stime = 0.000064 57 | # simtime = 0.000040 58 | # 59 | # wtime/etime 60 | # wtime/stime 61 | # 62 | # #load simulation results 63 | # filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".wasserstein.RData") 64 | # load(filename) 65 | # results_wasserstein <- results 66 | # tail(results_wasserstein$compute_times,n=1) 67 | # 68 | # 69 | # filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".euclidean.RData") 70 | # load(filename) 71 | # results_euclidean <- results 72 | # tail(results_euclidean$compute_times,n=1) 73 | # 74 | # 75 | # filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".summary.RData") 76 | # load(filename) 77 | # results_summary <- results 78 | # tail(results_summary$compute_times,n=1) 79 | # 80 | # 81 | # filename <- paste0(prefix, "mvnormalrejection.d", d, ".n", nobservations, ".summary.RData") 82 | # load(filename) 83 | # results_rejection_summary <- results 84 | # t 85 | 86 | -------------------------------------------------------------------------------- /inst/reproduceabc/mvnormal/mvnormal_wsmc_euclidean.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | 6 | set.seed(11) 7 | 8 | doRun <- FALSE 9 | max_time <- 30*60 10 | d <- 2 11 | target <- get_multivariate_normal(d) 12 | target$parameters$tau <- 5 13 | nobservations <- 100 14 | nparticles <- 2048 15 | p <- 1 16 | prefix <- "" 17 | 18 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData") 19 | load(obsfile) 20 | 21 | # function to simulate data 22 | target$simulate <- function(theta){ 23 | return(target$robservation(nobservations, theta, target$parameters)) 24 | } 25 | # euclidean distance 26 | # and also Euclidean distance 27 | ground_p <- 2 28 | deuclidean <- function(z){ 29 | return(mean(apply(abs(z - obs), 2, function(v) (sum(v^ground_p))^(1/ground_p))^p)^(1/p)) 30 | } 31 | 32 | # common algorithmic parameters 33 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(), 34 | minimum_diversity = 0.5, R = 2, maxtrials = 1e5) 35 | 36 | filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".euclidean.RData") 37 | results <- wsmc(deuclidean, target, param_algo, maxsimulation = 10^6, savefile = filename) 38 | load(filename) 39 | # results <- wsmc_continue(results, savefile = filename, maxtime = 10*60) 40 | 41 | # 42 | # load(filename) 43 | # plot_threshold_time(results) + geom_point() 44 | # mle <- rowMeans(obs) 45 | # plot_bivariate(results, 1, 2, from = 10) + geom_vline(xintercept = mle[1]) + geom_hline(yintercept = mle[2]) 46 | # plot_marginal(results, 1, from = 10) 47 | 48 | # library(microbenchmark) 49 | # microbenchmark(deuclidean(target$simulate(true_theta)), times = 1000) 50 | -------------------------------------------------------------------------------- /inst/reproduceabc/mvnormal/mvnormal_wsmc_summary.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | 6 | set.seed(11) 7 | 8 | doRun <- FALSE 9 | max_time <- 30*60 10 | d <- 2 11 | target <- get_multivariate_normal(d) 12 | target$parameters$tau <- 5 13 | nobservations <- 100 14 | nparticles <- 2048 15 | p <- 1 16 | prefix <- "" 17 | 18 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData") 19 | load(obsfile) 20 | 21 | # function to simulate data 22 | target$simulate <- function(theta){ 23 | return(target$robservation(nobservations, theta, target$parameters)) 24 | } 25 | # distance between summary 26 | summary_obs <- rowMeans(obs) 27 | dsummary <- function(z){ 28 | summary_z <- rowMeans(z) 29 | return(mean(abs(summary_z - summary_obs))) 30 | } 31 | 32 | # common algorithmic parameters 33 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(), 34 | minimum_diversity = 0.5, R = 2, maxtrials = 1e5) 35 | 36 | filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".summary.RData") 37 | results <- wsmc(dsummary, target, param_algo, maxsimulation = 10^6, savefile = filename) 38 | # load(filename) 39 | 40 | # results <- wsmc(dsummary, target, param_algo, maxsimulation = 10^6, parallel = FALSE) 41 | # results <- wsmc_continue(results, savefile = filename, maxtime = 10*60*60) 42 | # 43 | # plot_threshold_time(results) + geom_point() 44 | # mle <- rowMeans(obs) 45 | # plot_bivariate(results, 1, 2, from = 10) + geom_vline(xintercept = mle[1]) + geom_hline(yintercept = mle[2]) 46 | # plot_marginal(results, 1, from = 10) 47 | # 48 | # library(microbenchmark) 49 | # microbenchmark(dsummary(target$simulate(true_theta)), times = 1000) 50 | # 51 | # library(microbenchmark) 52 | # microbenchmark(target$simulate(true_theta), times = 1000) 53 | # 54 | 55 | -------------------------------------------------------------------------------- /inst/reproduceabc/mvnormal/mvnormal_wsmc_wasserstein.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | 6 | set.seed(11) 7 | 8 | doRun <- FALSE 9 | max_time <- 30*60 10 | d <- 2 11 | target <- get_multivariate_normal(d) 12 | target$parameters$tau <- 5 13 | nobservations <- 100 14 | nparticles <- 2048 15 | p <- 1 16 | prefix <- "" 17 | 18 | obsfile <- paste0(prefix, "mvnormaldata.d", d, ".n", nobservations, ".RData") 19 | load(obsfile) 20 | 21 | # function to simulate data 22 | target$simulate <- function(theta){ 23 | return(target$robservation(nobservations, theta, target$parameters)) 24 | } 25 | # wasserstein distance 26 | wdistance <- get_transport_to_y(obs, p = p) 27 | # 28 | param_algo <- list(nthetas = nparticles, nmoves = 1, proposal = mixture_rmixmod(), 29 | minimum_diversity = 0.5, R = 2, maxtrials = 1e5) 30 | 31 | filename <- paste0(prefix, "mvnormalwsmc.d", d, ".n", nobservations, ".wasserstein.RData") 32 | results <- wsmc(wdistance, target, param_algo, maxsimulation = 10^6, savefile = filename) 33 | load(filename) 34 | # results <- wsmc_continue(results, savefile = filename, maxsimulation = 800000) 35 | # 36 | # load(filename) 37 | # plot_threshold_time(results) + geom_point() 38 | # mle <- rowMeans(obs) 39 | # plot_bivariate(results, 1, 2, from = 10) + geom_vline(xintercept = mle[1]) + geom_hline(yintercept = mle[2]) 40 | # plot_marginal(results, 1, from = 10) 41 | 42 | # library(microbenchmark) 43 | # microbenchmark(wdistance(target$simulate(true_theta)), times = 1000) 44 | -------------------------------------------------------------------------------- /inst/reproduceabc/queue/50.intermediateobs.neal.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/reproduceabc/queue/50.intermediateobs.neal.Rdata -------------------------------------------------------------------------------- /inst/reproduceabc/queue/README.R: -------------------------------------------------------------------------------- 1 | Reproduce results for the queueing model in Section 5.3 of the article. 2 | === 3 | 4 | queue_wsmc_marginal_intermediate.R: load data set (RData file in the folder), 5 | and approximates WABC posterior based on Wasserstein distance between marginal 6 | distributions of synthetic and observed data sets. 7 | 8 | queue_pmmh_intermediate.R: loads results from above script, uses them to tune the parameters 9 | of a PMMH algorithm to target the exact posterior distribution, and runs PMMH. 10 | 11 | queue_abctools.R: runs semi-automatic ABC using abctools package 12 | (install.packages("abctools")) 13 | 14 | queue_plots_compare.R: loads results from above scripts, 15 | and creates the three plots in Figure 8 (a,b,c). 16 | -------------------------------------------------------------------------------- /inst/reproduceabc/queue/queue_wsmc_marginal_intermediate.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | 6 | set.seed(11) 7 | 8 | prefix = "" 9 | 10 | load(paste0(prefix, "50.intermediateobs.neal.Rdata")) 11 | obs <- matrix(obs, nrow = 1) 12 | obs_sorted = sort(obs) 13 | nobs <- ncol(obs) 14 | # 15 | target <- get_queue() 16 | target$simulate <- function(theta){ 17 | return(matrix(target$robservation(nobs, theta, target$parameters), nrow = 1)) 18 | } 19 | 20 | #x_obs <- cumsum(obs[1,]) 21 | # 22 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(), 23 | minimum_diversity = 0.5, R = 2, maxtrials = 100000) 24 | #compute_d <- get_hilbert_to_y(obs) 25 | compute_d = function(y){ 26 | sort_y = sort(y) 27 | mean(abs(sort_y-obs_sorted)) 28 | } 29 | 30 | 31 | filename <- paste0(prefix, "queue_intermediate_wsmc_marginal.RData") 32 | #results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsim = 10^7) 33 | load(filename) 34 | results <- wsmc_continue(results, savefile = filename, maxsim = 2*10^7) 35 | # load(filename) 36 | 37 | 38 | target$rprior <- function(ntheta, parameters){ 39 | theta1 <- runif(n = ntheta, min = 0, max = min(obs[1,])) 40 | theta2minus1 <- runif(n = ntheta, min = 0, max = 10) 41 | theta3 <- runif(n = ntheta, min = 0, max = 1/3) 42 | return(cbind(theta1, theta2minus1, theta3)) 43 | } 44 | # 45 | target$dprior <- function(thetas, parameters){ 46 | evals <- dunif(thetas[,1], min = 0, max = min(obs[1,]), log = TRUE) 47 | evals <- evals + dunif(thetas[,2], min = 0, max = 10, log = TRUE) 48 | evals <- evals + dunif(thetas[,3], min = 0, max = 1/3, log = TRUE) 49 | return(evals) 50 | } 51 | 52 | filename <- paste0(prefix, "queue_intermediate_wsmc_marginal_constraints.RData") 53 | #results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsim = 10^7) 54 | load(filename) 55 | results <- wsmc_continue(results, savefile = filename, maxsim = 2*10^7) 56 | # load(filename) 57 | 58 | 59 | # 60 | # plot_marginal(results, 1) 61 | # plot_marginal(results, 2) 62 | # plot_marginal(results, 3) 63 | # # 64 | # # get posterior mean and variance 65 | # thetas <- tail(results$thetas_history, 1)[[1]] 66 | # thetas[,3] <- log(thetas[,3]) 67 | # theta_mean <- colMeans(thetas) 68 | # theta_cov <- cov(thetas) 69 | # 70 | # theta_mean 71 | # theta_cov 72 | -------------------------------------------------------------------------------- /inst/reproduceabc/supplementary/README.R: -------------------------------------------------------------------------------- 1 | Reproduce results for the supplementary materials 2 | === 3 | 4 | check_assumption_plot.R: produces two plots for Figure 1 (a,b) of the supplementary, 5 | in the Section "2.1.2. Misspecified location model" 6 | 7 | gandk.checkassumption.R: produces 5 plots for Figure 2 (a,b,c,d) and Figure 3 8 | of the supplementary, in the Section "2.3 Checking Assumption 5 for the g-and-k example" 9 | 10 | multivariate.transportdistancecomparison.R: computes different transport distances 11 | between samples generated from multivariate Normal distributions, with different 12 | parameters and different dimensions. 13 | 14 | multivariate.transportdistancecomparison.plots.R: loads results of above script 15 | to create Figure 4 of the supplementary, in the Section "4. Approximation of the Wasserstein distance 16 | ". 17 | 18 | -------------------------------------------------------------------------------- /inst/reproduceabc/supplementary/check_assumption_plot.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(13) 6 | 7 | 8 | dgpname <- "gamma" 9 | modelname <- "normal" 10 | 11 | # dgp 12 | if (dgpname == "gamma"){ 13 | robs <- function(n) rgamma(n, 10, 5) 14 | } else { 15 | robs <- function(n) rnorm(n, mean = 2, sd = 1) 16 | } 17 | nobs <- 500000 18 | 19 | # model 20 | if (modelname == "gamma"){ 21 | rmodel <- function(n, theta) rgamma(n, theta, 5) 22 | } else { 23 | rmodel <- function(n, theta) rnorm(n, mean = theta, sd = 1) 24 | } 25 | prefix <- "" 26 | savefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".RData") 27 | 28 | nseq <- 100 29 | if (modelname == "gamma"){ 30 | theta_seq <- seq(from = 9.5, to = 10.5, length.out = nseq) 31 | } else { 32 | theta_seq <- seq(from = 1.5, to = 2.5, length.out = nseq) 33 | } 34 | 35 | # Wasserstein distance with p = 2 36 | y_obs <- robs(nobs) 37 | compute_d2 <- get_hilbert_to_y(matrix(y_obs, nrow = 1), p = 2, ground_p = 2) 38 | results <- foreach(i = 1:nseq) %dorng% { 39 | 40 | theta <- theta_seq[i] 41 | y_fake <- rmodel(nobs, theta) 42 | # hilbert_distance(matrix(y_obs, nrow = 1), matrix(y_fake, nrow = 1)) 43 | compute_d2(matrix(y_fake, nrow = 1)) 44 | } 45 | save(results, file = savefile) 46 | load(file = savefile) 47 | 48 | wp2 <- sapply(results, function(v) v) 49 | 50 | theta_star <- theta_seq[which.min(wp2)] 51 | x <- abs(theta_seq - 2) 52 | ysqrt <- sqrt(abs(wp2 - wp2[which.min(wp2)])) 53 | y <- abs(wp2 - wp2[which.min(wp2)]) 54 | 55 | figurefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".theta-vs-wasserstein.pdf") 56 | g <- qplot(x = theta_seq, y = wp2, geom = "point") + xlab(expression(theta)) + ylab("Wasserstein distance") 57 | print(g) 58 | ggsave(filename = figurefile, plot = g, width = 7, height = 5) 59 | figurefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".theta-vs-wasserstein.png") 60 | ggsave(filename = figurefile, plot = g, width = 7, height = 5, dpi = 150) 61 | 62 | if (modelname == dgpname){ 63 | figurefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".distancetheta.pdf") 64 | g <- qplot(x = x, y = y, geom = "point") + xlab("distance to theta star") + ylab("Wasserstein distance") + geom_smooth(method = "lm", se = FALSE) 65 | print(g) 66 | ggsave(filename = figurefile, plot = g, width = 7, height = 5) 67 | } else { 68 | figurefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".distancetheta.pdf") 69 | g <- qplot(x = x, y = ysqrt, geom = "point") + geom_smooth(method = "lm", se = FALSE) + xlab(expression(abs(theta-2))) + ylab("sqrt(Wasserstein distance)") 70 | print(g) 71 | ggsave(filename = figurefile, plot = g, width = 7, height = 5) 72 | figurefile <- paste0(prefix, "check.as6.dgp", dgpname, ".model", modelname, ".distancetheta.png") 73 | ggsave(filename = figurefile, plot = g, width = 7, height = 5, dpi = 150) 74 | } 75 | 76 | 77 | -------------------------------------------------------------------------------- /inst/reproduceabc/supplementary/multivariate.transportdistancecomparison.plots.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | library(doParallel) 3 | library(doRNG) 4 | library(dplyr) 5 | library(ggthemes) 6 | registerDoParallel(cores = 10) 7 | rm(list = ls()) 8 | set.seed(1) 9 | setmytheme() 10 | my_colors <- get_my_colors() 11 | # my_colors <- c(my_colors, c("Sinkhorn" = "cornflowerblue")) 12 | # my_colors['Swap'] <- "009E73" 13 | timings_all <- data.frame() 14 | distances.df_all <- data.frame() 15 | 16 | for (d in 2:5){ 17 | load(paste0("transportdistances.n500.d", d, ".i2.RData")) 18 | timings_all <- rbind(timings_all, timings %>% group_by(expr) %>% summarise(m = median(time)/1e9) %>% mutate(dimension = d)) 19 | distances.df$dimension <- d 20 | distances.df_all <- rbind(distances.df_all, distances.df) 21 | } 22 | 23 | timings_all 24 | # timings %>% group_by(expr, dimension) %>% summarise(m = median(time)/1e9) 25 | distances.df_all$dimension <- factor(distances.df_all$dimension, levels = 2:5, labels = paste0("d = ", 2:5)) 26 | head(distances.df_all) 27 | g <- ggplot(distances.df_all, aes(x = thetas, y = e)) + geom_point(aes(colour = "Wasserstein")) 28 | g <- g + geom_point(aes(y = sw, colour = "Swap")) 29 | g <- g + geom_point(aes(y = h, colour = "Hilbert")) 30 | g <- g + scale_color_manual(name = "", values = my_colors) + xlab(expression(sigma^2)) + ylab("distance") + facet_wrap(~ dimension, nrow = 1) 31 | g <- g + guides(colour = guide_legend(override.aes = list(size=5))) + theme(legend.text=element_text(size=20)) 32 | g <- g + scale_x_continuous(breaks = c(1,2,3,4,5,6,7,8)) 33 | # if (icomponent==2){ 34 | # g <- g + geom_vline(xintercept = var(y[1,]), linetype = 3) + geom_vline(xintercept = theta_dgp[icomponent], linetype = 2) 35 | # } 36 | g 37 | ggsave(filename = "transportdistances.n500.i2.png", plot = g, width = 15, height = 5, dpi = 500) 38 | -------------------------------------------------------------------------------- /inst/reproduceabc/supplementary/wasserstein_clt_mvtnorm_mean.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | 7 | fig.height <- 5 8 | fig.width <- 5 9 | 10 | my_colors <- get_my_colors() 11 | 12 | d = 10 13 | 14 | #obs 15 | n = 10^2 16 | obs = fast_rmvnorm(n,rep(2,d),diag(1,d)) 17 | obs_mean = apply(obs,2,mean) 18 | 19 | #fake 20 | m = 10^2 21 | 22 | compute_d = function(fake_obs){ 23 | swap_distance(t(obs), t(fake_obs), tolerance = 1e-5)$distance 24 | } 25 | #test 26 | fake_obs = fast_rmvnorm(n,rep(1,d),diag(1,d)) 27 | compute_d(fake_obs) 28 | 29 | 30 | 31 | #Grid 32 | grid = seq(obs_mean[1]-1.5,obs_mean[1]+1.5,length.out = 20) 33 | 34 | #Number of data sets per grid point 35 | N = 100 36 | 37 | #Variance of fake data sets 38 | sigma2 = 1 39 | 40 | dists = foreach(k = 1:length(grid), .combine = rbind) %dorng% { 41 | store_distances = rep(0,N) 42 | store_meandiffs = rep(0,N) 43 | for(i in 1:N){ 44 | #fake_obs = fast_rmvnorm(m,c(grid[k],obs_mean[2:d]),diag(sigma2,d)) 45 | fake_obs = fast_rmvnorm(m,c(grid[k],rep(1,d-1)),diag(sigma2,d)) 46 | fake_mean = apply(fake_obs,2,mean) 47 | store_distances[i] = compute_d(fake_obs) 48 | store_meandiffs[i] = sum((obs_mean-fake_mean)^2)^0.5 49 | } 50 | out = cbind(rep(grid[k],N),store_distances,store_meandiffs) 51 | return(out) 52 | } 53 | 54 | dists.df = data.frame(theta = dists[,1], Wasserstein = dists[,2], xbardiff = dists[,3]) 55 | # save(dists.df,file = paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".Rdata")) 56 | # load(paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".Rdata")) 57 | 58 | wmax = max(dists.df$Wasserstein) 59 | wmin = min(dists.df$Wasserstein) 60 | wmax - wmin 61 | 62 | xmax = max(dists.df$xbardiff) 63 | xmin = min(dists.df$xbardiff) 64 | xmax - xmin 65 | 66 | ll = max(wmax-wmin,xmax-xmin) 67 | 68 | # g = ggplot(dists.df, aes(theta, Wasserstein, group = theta)) + geom_boxplot() 69 | # g = g + coord_cartesian(ylim = c(wmin-ll/20,wmin+ll)) 70 | # g 71 | # 72 | # g = ggplot(dists.df, aes(theta, xbardiff, group = theta)) + geom_boxplot() 73 | # g = g + coord_cartesian(ylim = c(xmin-ll/20,xmin+ll)) 74 | # g 75 | 76 | g = ggplot(data = dists.df, aes(theta, Wasserstein, group = theta)) + geom_boxplot(aes(fill = "Wasserstein"),alpha=0.7) 77 | g = g + geom_boxplot(data = dists.df, aes(theta, xbardiff, group = theta, fill = "Summary"),alpha=0.7) 78 | g = g + scale_fill_manual(name = "", values = my_colors) + xlab(expression(theta)) + ylab("distance") 79 | g 80 | #ggsave(filename = paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".pdf"), plot = g, width = fig.width, height = fig.height) 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /inst/reproduceabc/supplementary/wasserstein_clt_mvtnorm_mean_plots.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | 7 | fig.height <- 5 8 | fig.width <- 5 9 | 10 | my_colors <- get_my_colors() 11 | 12 | dset = c(2,5,10) 13 | nset = c(10,100,1000) 14 | 15 | for(dd in 1:3){ 16 | for(nn in 1:3){ 17 | 18 | d = dset[dd] 19 | 20 | #obs 21 | n = nset[nn] 22 | obs = fast_rmvnorm(n,rep(2,d),diag(1,d)) 23 | obs_mean = apply(obs,2,mean) 24 | 25 | #fake 26 | m = nset[nn] 27 | 28 | compute_d = function(fake_obs){ 29 | swap_distance(t(obs), t(fake_obs), tolerance = 1e-5)$distance 30 | } 31 | #test 32 | fake_obs = fast_rmvnorm(n,rep(1,d),diag(1,d)) 33 | compute_d(fake_obs) 34 | 35 | 36 | 37 | #Grid 38 | grid = seq(obs_mean[1]-1.5,obs_mean[1]+1.5,length.out = 20) 39 | 40 | #Number of data sets per grid point 41 | N = 100 42 | 43 | #Variance of fake data sets 44 | sigma2 = 1 45 | 46 | # dists = foreach(k = 1:length(grid), .combine = rbind) %dorng% { 47 | # store_distances = rep(0,N) 48 | # store_meandiffs = rep(0,N) 49 | # for(i in 1:N){ 50 | # #fake_obs = fast_rmvnorm(m,c(grid[k],obs_mean[2:d]),diag(sigma2,d)) 51 | # fake_obs = fast_rmvnorm(m,c(grid[k],rep(1,d-1)),diag(sigma2,d)) 52 | # fake_mean = apply(fake_obs,2,mean) 53 | # store_distances[i] = compute_d(fake_obs) 54 | # store_meandiffs[i] = sum((obs_mean-fake_mean)^2)^0.5 55 | # } 56 | # out = cbind(rep(grid[k],N),store_distances,store_meandiffs) 57 | # return(out) 58 | # } 59 | # 60 | # dists.df = data.frame(theta = dists[,1], Wasserstein = dists[,2], xbardiff = dists[,3]) 61 | # save(dists.df,file = paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".Rdata")) 62 | load(paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".Rdata")) 63 | 64 | wmax = max(dists.df$Wasserstein) 65 | wmin = min(dists.df$Wasserstein) 66 | wmax - wmin 67 | 68 | xmax = max(dists.df$xbardiff) 69 | xmin = min(dists.df$xbardiff) 70 | xmax - xmin 71 | 72 | ll = max(wmax-wmin,xmax-xmin) 73 | 74 | # g = ggplot(dists.df, aes(theta, Wasserstein, group = theta)) + geom_boxplot() 75 | # g = g + coord_cartesian(ylim = c(wmin-ll/20,wmin+ll)) 76 | # g 77 | # 78 | # g = ggplot(dists.df, aes(theta, xbardiff, group = theta)) + geom_boxplot() 79 | # g = g + coord_cartesian(ylim = c(xmin-ll/20,xmin+ll)) 80 | # g 81 | 82 | g = ggplot(data = dists.df, aes(theta, Wasserstein, group = theta)) + geom_boxplot(aes(fill = "Wasserstein"),alpha=0.7) 83 | g = g + geom_boxplot(data = dists.df, aes(theta, xbardiff, group = theta, fill = "Summary"),alpha=0.7) 84 | g = g + scale_fill_manual(name = "", values = my_colors) + xlab(expression(theta[1])) + ylab("distance") 85 | #g 86 | ggsave(filename = paste0(prefix,"was_vs_xbar_mvtnorm_d",d,"_n",n,"_var",sigma2,".pdf"), plot = g, width = fig.width, height = fig.height) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /inst/reproduceabc/toggleswitch/README.R: -------------------------------------------------------------------------------- 1 | Reproduce results for the toggle switch model in Section 5.2 of the WABC article. 2 | === 3 | 4 | toggle_switch_generate.R: generate and save a data set from the model 5 | 6 | toggle_switch_wsmc.R: loads a data set of size 2000 and runs SMC to approximate the WABC 7 | posterior using the exact Wasserstein distance with a budget of 10^6 model simulations. 8 | 9 | toggle_switch_summary.R: construct the summary statistic from Bonassi et al. 10 | 11 | toggle_switch_load_summary.R: after toggle_switch_summary.R has been run, use this 12 | script to load the summary statistic. 13 | 14 | toggle_switch_summary_smc.R: loads the data set and the summary statistic and runs SMC to 15 | approximate the ABC posterior based on the summary statistic with a budget of 10^6 model 16 | simulations. 17 | 18 | toggle_switch_plots.R: loads the data and the output from the SMC runs and plots the marginal 19 | distributions of the resulting ABC posteriors. Also plots a histogram of the data. Corresponds 20 | to Fig 7 of the article. -------------------------------------------------------------------------------- /inst/reproduceabc/toggleswitch/toggle_switch_generate.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | 6 | prefix = "" 7 | 8 | set.seed(11) 9 | target <- get_toggleswitch() 10 | 11 | # number of observations 12 | nobservations <- 2000 13 | # parameter of data-generating process 14 | # true_theta <- c(alpha_1, alpha_2, beta_1, beta_2, mu, sigma, gamma) 15 | true_theta <- c(22, 12, 4, 4.5, 325, 0.25, 0.15) 16 | 17 | obs <- target$robservation(nobservations, true_theta, 18 | target$parameters, target$generate_randomness(nobservations)) 19 | #save(true_theta, obs, file = "toggleswitchdata.RData") 20 | 21 | hist(obs) 22 | # plot(obs[1:1000], type = "l") 23 | # mean(obs) 24 | # sd(obs) 25 | 26 | -------------------------------------------------------------------------------- /inst/reproduceabc/toggleswitch/toggle_switch_load_summary.R: -------------------------------------------------------------------------------- 1 | #Loads the summary statistic provided the 2 | prefix = "" 3 | 4 | #Parameters 5 | p = 25000 #Number of data sets drawn from prior predictive 6 | nobservations = 2000 7 | R = 100 8 | 9 | upper.bound = 2000 10 | nbins = 50 11 | breaks = seq(0, upper.bound, length.out = nbins) 12 | 13 | #Set bandwith so as to mimic the histogram 14 | bandwidth = upper.bound/nbins 15 | #mids from histogram 16 | mids = breaks[-1] - (breaks[-1]-breaks[-nbins])/2 17 | 18 | filename = paste0(prefix,"toggle.binned.p",p,".nobs", nobservations, ".RData") 19 | 20 | load(filename) 21 | 22 | binned.data.matrix = t(sapply(binned.data, function(x) x[[1]])) 23 | 24 | binned.data.svd = svd(binned.data.matrix) 25 | 26 | first.col.A = binned.data.svd$u[,1] 27 | 28 | #Choose references as the R percentiles of first column of A 29 | reference.order = order(first.col.A) 30 | reference.index = reference.order[c(1,(1:(R-1))*(p/R),p)] 31 | 32 | prob.mat = binned.data.matrix[reference.index,]/nobservations 33 | 34 | # #Check call 35 | # summary_full(y,bandwidth,prob.mat,mids) 36 | 37 | data.matrix = t(sapply(binned.data, function(x) x[[2]])) 38 | 39 | #Load full summary stat for all R references 40 | filename2 = paste0(prefix,"summary.full.p",p,".nobs", nobservations, ".RData") 41 | load(filename2) 42 | 43 | summary.full.matrix = t(sapply(summary.stat.full, function(x) x)) 44 | #replace -Inf with -1000 45 | summary.full.matrix = pmax(summary.full.matrix,-1000) 46 | 47 | summary.full.svd = svd(summary.full.matrix) 48 | 49 | #Number of principal components 50 | r = 11 51 | M = rbind(diag(1,r),matrix(0,R+1-r,r)) 52 | H = summary.full.svd$v%*%M 53 | 54 | summary.stat = function(y){ 55 | S = summary_full(y,bandwidth,prob.mat,mids) 56 | S = pmax(S,-1000) 57 | S.red = S%*%H 58 | return(S.red) 59 | } 60 | -------------------------------------------------------------------------------- /inst/reproduceabc/toggleswitch/toggle_switch_summary.R: -------------------------------------------------------------------------------- 1 | #Constructs the summary statistic in Bonassi 2 | library(winference) 3 | registerDoParallel(cores = detectCores()) 4 | rm(list = ls()) 5 | setmytheme() 6 | set.seed(11) 7 | prefix = "" 8 | 9 | target <- get_toggleswitch() 10 | 11 | #Parameters 12 | p = 25000 #Number of data sets drawn from prior predictive 13 | nobservations = 2000 14 | R = 100 15 | 16 | upper.bound = 2000 17 | nbins = 50 18 | breaks = seq(0, upper.bound, length.out = nbins) 19 | 20 | target$simulate <- function(theta) matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1) 21 | 22 | prior_draws = target$rprior(p, target$parameters) 23 | 24 | filename = paste0(prefix,"toggle.binned.p",p,".nobs", nobservations, ".RData") 25 | 26 | t = proc.time() 27 | binned.data = foreach(i = 1:p) %dorng% { 28 | 29 | #simulate data set given draw from prior 30 | ysim = target$simulate(prior_draws[i,]) 31 | 32 | #sets all values greater than R to R-0.00001 33 | ysim = sapply(ysim, function(x) min(x,upper.bound-0.00001)) 34 | 35 | #bin the data 36 | binned = tabulate(findInterval(ysim, vec=breaks),nbins = nbins-1) 37 | 38 | return(list(binned,ysim)) 39 | } 40 | t = proc.time() - t 41 | save(binned.data,file = filename) 42 | 43 | load(filename) 44 | 45 | binned.data.matrix = t(sapply(binned.data, function(x) x[[1]])) 46 | 47 | binned.data.svd = svd(binned.data.matrix) 48 | 49 | first.col.A = binned.data.svd$u[,1] 50 | 51 | #Choose references as the R percentiles of first column of A 52 | reference.order = order(first.col.A) 53 | reference.index = reference.order[c(1,(1:(R-1))*(p/R),p)] 54 | 55 | #Set bandwith so as to mimic the histogram 56 | bandwidth = upper.bound/nbins 57 | 58 | #Continuous histograms 59 | mids = breaks[-1] - (breaks[-1]-breaks[-nbins])/2 60 | 61 | prob.mat = binned.data.matrix[reference.index,]/nobservations 62 | 63 | #Check call 64 | y = data.matrix[2,] 65 | summary_full(y,bandwidth,prob.mat,mids) 66 | 67 | data.matrix = t(sapply(binned.data, function(x) x[[2]])) 68 | 69 | filename2 = paste0(prefix,"summary.full.p",p,".nobs", nobservations, ".RData") 70 | t = proc.time() 71 | summary.stat.full = foreach(i = 1:p) %dorng% { 72 | y = data.matrix[i,] 73 | out = summary_full(y,bandwidth,prob.mat,mids) 74 | return(out) 75 | } 76 | t = proc.time() - t 77 | save(summary.stat.full,file = filename2) 78 | load(filename2) 79 | 80 | summary.full.matrix = t(sapply(summary.stat.full, function(x) x)) 81 | #replaces -Inf with -1000 (number on log scale) 82 | summary.full.matrix = pmax(summary.full.matrix,-1000) 83 | 84 | summary.full.svd = svd(summary.full.matrix) 85 | 86 | #Number of principal components 87 | r = 11 88 | M = rbind(diag(1,r),matrix(0,R+1-r,r)) 89 | H = summary.full.svd$v%*%M 90 | summary.reduced.matrix = summary.full.matrix%*%H 91 | # filename3 = paste0("summary.H.p",p,".nobs", nobservations, ".RData") 92 | # save(H,file = filename3) 93 | 94 | summary.stat = function(y){ 95 | S = summary_full(y,bandwidth,prob.mat,mids) 96 | S = pmax(S,-1000) 97 | S.red = S%*%H 98 | return(S.red) 99 | } 100 | 101 | # #Try 102 | # t = proc.time() 103 | # y = target$simulate(target$rprior(target$truetheta, target$parameters)) 104 | # t = proc.time()-t 105 | # t 106 | # 107 | # t = proc.time() 108 | # ss = summary.stat(y) 109 | # t = proc.time()-t 110 | # t 111 | # 112 | # t = proc.time() 113 | # ss = sort(y) 114 | # t = proc.time()-t 115 | # t 116 | 117 | -------------------------------------------------------------------------------- /inst/reproduceabc/toggleswitch/toggle_switch_summary_smc.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()-2) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | 7 | prefix = "" 8 | 9 | target <- get_toggleswitch() 10 | # number of observations 11 | nobservations <- 2000 12 | load(file = paste0(prefix,"toggleswitchdata.RData")) 13 | obs <- obs[1:nobservations] 14 | 15 | #Load summary stat. Requires toggle_switch_summary.R to have been run first. 16 | source(paste0(prefix,"toggle_switch_load_summary.R")) 17 | 18 | s_obs = summary.stat(obs) 19 | 20 | # function to compute distance between observed data and data generated given theta 21 | compute_d <- function(y){ 22 | s_fake = summary.stat(y) 23 | dist = sqrt(sum((s_obs-s_fake)^2)) 24 | return(dist) 25 | } 26 | 27 | target$simulate <- function(theta) matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1) 28 | 29 | y_sim <- target$simulate(target$rprior(1, target$parameters)) 30 | compute_d(y_sim) 31 | 32 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(), 33 | minimum_diversity = 0.5, R = 2, maxtrials = 1000) 34 | 35 | filename <- paste0(prefix,"toggleswitchw.summary.smc.n", nobservations, ".RData") 36 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 1e6) 37 | load(filename) 38 | #results <- wsmc_continue(results, savefile = filename, maxsim = (1e6 - 166035)) 39 | -------------------------------------------------------------------------------- /inst/reproduceabc/toggleswitch/toggle_switch_wsmc.R: -------------------------------------------------------------------------------- 1 | library(winference) 2 | registerDoParallel(cores = detectCores()) 3 | rm(list = ls()) 4 | setmytheme() 5 | set.seed(11) 6 | 7 | prefix = "" 8 | 9 | target <- get_toggleswitch() 10 | # number of observations 11 | nobservations <- 2000 12 | load(file = paste0(prefix,"toggleswitchdata.RData")) 13 | obs <- obs[1:nobservations] 14 | obs_sorted <- sort(obs) 15 | 16 | # function to compute distance between observed data and data generated given theta 17 | compute_d = function(y){ 18 | sort_y = sort(y) 19 | mean(abs(sort_y-obs_sorted)) 20 | } 21 | 22 | target$simulate <- function(theta) matrix(target$robservation(nobservations, theta, target$parameters, target$generate_randomness(nobservations)), nrow = 1) 23 | 24 | y_sim <- target$simulate(target$rprior(1, target$parameters)) 25 | compute_d(y_sim) 26 | 27 | param_algo <- list(nthetas = 2048, nmoves = 1, proposal = mixture_rmixmod(), 28 | minimum_diversity = 0.5, R = 2, maxtrials = 1000) 29 | 30 | filename <- paste0(prefix, "toggleswitchwsmc.n", nobservations, ".RData") 31 | results <- wsmc(compute_d, target, param_algo, savefile = filename, maxsimulation = 1e6) 32 | load(filename) 33 | #results <- wsmc_continue(results, savefile = filename, maxsim = 1e6) 34 | # plot_bivariate(results, 1, 2) 35 | -------------------------------------------------------------------------------- /inst/reproducepointestimation/README.R: -------------------------------------------------------------------------------- 1 | These scripts produce the figure for the manuscript 2 | "On parameter estimation with the Wasserstein distance" 3 | by 4 | Espen Bernton, Pierre E. Jacob, Mathieu Gerber, Christian P. Robert 5 | 6 | The instructions below describe the different files. 7 | 8 | Some of the scripts require having loaded packages such as ggplot2 for plotting, 9 | doParallel, foreach, doRNG for parallel computation and some 'dplyr' for data.frame manipulations. 10 | 11 | ####### 12 | 13 | Section 4.1 Quantile “g-and-k” distribution 14 | 15 | gandk_functions.R: contains function definitions, in particular the inverse CDF of the g-and-k distribution in C++ 16 | 17 | gandk.cluster.script.R: computes the MEWE on B=1000 bootstrap versions of the data, and saves the output; we ran this 400 times in parallel on a cluster. 18 | (This function will have to be adapted to different machines, and file paths changed appropriately.) 19 | 20 | gandk_coverage.R: postprocesses the outputs of the above script to compute the coverage. 21 | 22 | gandk_plots.R: computes MEWE for different values of data size n, and a number of independent runs, 23 | in order to produce the scatter plots and rescaled plots of the section, as well as a histogram of a data set. 24 | These plots are in Figure 1 of the manuscript. 25 | 26 | gandk_correlated.R: same but for a data-generating process that generates dependent 27 | observations. The resulting plots are in Figure 2 of the manuscript. 28 | 29 | 30 | ####### 31 | 32 | Section 4.2 Sum of log-Normal random variables 33 | 34 | same as for g-and-k, with files starting with "lognormal_" 35 | 36 | The resulting plots are in Figure 3 of the manuscript. 37 | 38 | ####### 39 | 40 | Section 4.3 Gamma data fitted with a Normal model 41 | 42 | gamma_normal_functions.R: contains function definitions, in particular those that generate data from the model 43 | 44 | gamma_normal_fixedmk_diffn.R: produces plots to show the behavior of the MEWE as the number of observations increase 45 | (plots of Figure 4 in the manuscript) 46 | 47 | gamma_normal_fixedn_diffmk.R: produces plots to show the effect of different m and k on the quality of the MEWE approximation of the MWE, for a fixed data set. 48 | (plots of Figure 5 in the manuscript) 49 | 50 | gamma_normal_bootstrap.R: runs 400 independent experiments in which the MEWE is computed on B=1000 bootstrap versions of the data, and saves the the corresponding bootstrap intervals. It also calculates the coverage rate of the intervals. 51 | 52 | ####### 53 | 54 | Section 4.4 Cauchy data fitted with a Normal model 55 | 56 | cauchydata_normalfit_fixedmk_diffn.R: computes MEWE repeatedly, on independent data sets of various sizes, 57 | and produces scatter plots and rescaled plots. 58 | 59 | cauchydata_normalfit_fixedmk_diffn_KL.R: same for a minimum KL estimator, using the 60 | FNN package. 61 | 62 | These two scripts produce the plots presented in Figure 6 of the manuscript. 63 | -------------------------------------------------------------------------------- /inst/reproducepointestimation/gamma_normal_functions.R: -------------------------------------------------------------------------------- 1 | target <- list() 2 | 3 | target$generate_randomness <- function(nobservations){ 4 | return(rnorm(nobservations)) 5 | } 6 | 7 | target$thetadim = 2 8 | 9 | metricL1 = function(xvec,yvec) mean(abs(xvec - yvec)) 10 | -------------------------------------------------------------------------------- /inst/reproducepointestimation/gandk.cluster.script.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | library(parallel) 3 | # parallel RNG using L'Ecuyer et al (2002) 4 | RNGkind("L'Ecuyer-CMRG") # L'Ecuyer CMRG required for multiple streams 5 | igrid <- as.integer(Sys.getenv('SLURM_ARRAY_TASK_ID')) 6 | set.seed(1) # initial seed 7 | for (i in 1:igrid){ 8 | .Random.seed <- nextRNGStream(.Random.seed) # compute appropriate stream 9 | } 10 | 11 | source("gandk_functions.R") 12 | true_theta = c(3,1,2,0.5) 13 | 14 | 15 | #Pick m to be larger than or equal to max(n) and a multiple of each entry in n 16 | #M = 50 #Number of data sets from dgp 17 | n = 1000 #Number of samples in each data set from dgp 18 | N = 20 #Number of synthetic data sets for finding MEWE 19 | m = 10^4 #Number of samples in the synthetic data sets 20 | B = 10 #Number of bootstrap samples per data set from dgp 21 | alpha = 0.05 #Desired significance level, compute the alpha/2 and 1-alpha/2 quantiles 22 | thetadim = 4 #Inference on all parameters 23 | 24 | filename = paste0("output/gandk_bootstrap_run",igrid,"_N",N,"_m",m,"_n",n,"_B",B,".Rdata") 25 | 26 | # Bootstrap confidence intervals for the A and B parameters 27 | time_elapsed = proc.time() 28 | #Temporary storage for the bootstrapped estimators and other information 29 | bs_store = matrix(0,B, thetadim)#target$thetadim) 30 | bs_count_store = rep(0,B) 31 | 32 | 33 | #Generate the observations, sort, and replicate observations to match length of the synthetic data 34 | obs_rand = target$generate_randomness(n) 35 | obs = target$robservation(true_theta,obs_rand) 36 | sort_obs = sort(obs) 37 | sort_obs_mult = rep(sort_obs, each = m/n) 38 | 39 | #Generate the synthetic randomness, sort. 40 | randomness = t(sapply(1:N, function(x) target$generate_randomness(m))) 41 | sort_randomness = t(apply(randomness, 1, sort)) 42 | 43 | #Define the objective to be minimized to find the MEWE 44 | obj1 = function(theta){ 45 | if(prod(theta) < 0){ 46 | out = 100 47 | } else{ 48 | wass_dists = apply(sort_randomness, 1, function(x) metricL1(sort_obs_mult,target$robservation(theta,x))) 49 | out = mean(wass_dists) 50 | } 51 | return(out) 52 | } 53 | 54 | #Find the MEWE 55 | mewe = optim(true_theta,obj1) 56 | 57 | #### Bootstrap 58 | #Generate multinomials 59 | multinom = matrix(sample(1:n, size = n*B, replace = T), nrow = B) 60 | sort_multinom = t(apply(multinom, 1, sort)) 61 | 62 | #Resample B times from the empirical distribution, sorted 63 | emp_sort_obs = t(apply(sort_multinom, 1, function(x) sort_obs[x])) 64 | 65 | #For each of the resampled data sets, find the MEWE. 66 | ### WARNING: CURRENTLY RECYCLING THE RANDOMNESS FROM ABOVE, CHECK TO SEE IF IT MAKES A DIFFERENCE. 67 | for(i in 1:B){ 68 | 69 | #Replicate observations to match length of the synthetic data 70 | emp_obs_mult = rep(emp_sort_obs[i,], each = m/n) 71 | 72 | #Define the objective function 73 | obj_bs = function(theta){ 74 | wass_dists = apply(sort_randomness, 1, function(x) metricL1(emp_obs_mult,target$robservation(theta,x))) 75 | out = mean(wass_dists) 76 | return(out) 77 | } 78 | 79 | #Find minimum, initializing at the MEWE 80 | mewe_bs = try(optim(mewe$par,obj_bs)) 81 | if (inherits(mewe_bs, "try-error")){ 82 | mewe_bs <- list(par = rep(NA, 4), count = Inf) 83 | } 84 | 85 | #Store the stuff 86 | bs_store[i,] = mewe_bs$par 87 | bs_count_store[i] = mewe_bs$count[1] 88 | } 89 | 90 | time_elapsed = proc.time() - time_elapsed 91 | 92 | save(bs_store, bs_count_store, time_elapsed, file = filename) 93 | -------------------------------------------------------------------------------- /inst/reproducepointestimation/gandk_coverage.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | 3 | source("gandk_functions.R") 4 | prefix = "output/" 5 | 6 | true_theta = c(3,1,2,0.5) 7 | 8 | M = 400 9 | n = 1000 #Number of samples in each data set from dgp 10 | N = 20 #Number of synthetic data sets for finding MEWE 11 | m = 10^4 #Number of samples in the synthetic data sets 12 | B = 1000 #Number of bootstrap samples per data set from dgp 13 | alpha = 0.05 #Desired significance level, compute the alpha/2 and 1-alpha/2 quantiles 14 | thetadim = 4 #Inference on all parameters 15 | 16 | #Check if value is inside the interval 17 | is.inside = function(value,interval){ 18 | if(interval[1] <= value & value <= interval[2]){ 19 | return(TRUE) 20 | } else{ 21 | return(FALSE) 22 | } 23 | } 24 | 25 | #Store the number of intervals that cover 26 | store.cover = matrix(NA,nrow = M, ncol = thetadim + 1) 27 | 28 | for(i in 1:M){ 29 | 30 | # Load file from run i 31 | filename = paste0(prefix,"gandk_bootstrap_run",i,"_N",N,"_m",m,"_n",n,"_B",B,".Rdata") 32 | load(filename) 33 | 34 | # Compute the empirical quantiles of the bootstrap samples, 35 | # both separately for each parameter and with Bonferroni correction. 36 | q_A = quantile(bs_store[,1],probs = c(alpha/8,alpha/2,1-alpha/2,1-alpha/8)) 37 | q_B = quantile(bs_store[,2],probs = c(alpha/8,alpha/2,1-alpha/2,1-alpha/8)) 38 | q_g = quantile(bs_store[,3],probs = c(alpha/8,alpha/2,1-alpha/2,1-alpha/8)) 39 | q_k = quantile(bs_store[,4],probs = c(alpha/8,alpha/2,1-alpha/2,1-alpha/8)) 40 | 41 | ci_A = q_A[2:3] 42 | ci_B = q_B[2:3] 43 | ci_g = q_g[2:3] 44 | ci_k = q_k[2:3] 45 | ci_all = rbind(q_A[c(1,4)],q_B[c(1,4)],q_g[c(1,4)],q_k[c(1,4)]) 46 | 47 | #Check if inside 48 | store.cover[i,1] = is.inside(true_theta[1],ci_A) 49 | store.cover[i,2] = is.inside(true_theta[2],ci_B) 50 | store.cover[i,3] = is.inside(true_theta[3],ci_g) 51 | store.cover[i,4] = is.inside(true_theta[4],ci_k) 52 | 53 | if(is.inside(true_theta[1],ci_all[1,]) & is.inside(true_theta[2],ci_all[2,]) & 54 | is.inside(true_theta[3],ci_all[3,]) & is.inside(true_theta[4],ci_all[4,])){ 55 | store.cover[i,5] = TRUE 56 | } else{ 57 | store.cover[i,5] = FALSE 58 | } 59 | } 60 | 61 | coverages = apply(store.cover,2,mean) 62 | -------------------------------------------------------------------------------- /inst/reproducepointestimation/gandk_functions.R: -------------------------------------------------------------------------------- 1 | library(Rcpp) 2 | 3 | cppFunction("NumericVector gandkinversecdf_givennormals(NumericVector normals, NumericVector theta){ 4 | NumericVector z = normals; 5 | double A = theta(0); 6 | double B = theta(1); 7 | double c = 0.8; 8 | double g = theta(2); 9 | double k = theta(3); 10 | return A + B * (1 + c * (1 - exp(- g * z)) / (1 + exp(- g * z))) * pow((1 + pow(z, 2.0)), k) * z; 11 | }") 12 | 13 | cppFunction("NumericVector gandkinversecdf(NumericVector x, NumericVector theta){ 14 | NumericVector z = qnorm(x); 15 | double A = theta(0); 16 | double B = theta(1); 17 | double c = 0.8; 18 | double g = theta(2); 19 | double k = theta(3); 20 | return A + B * (1 + c * (1 - exp(- g * z)) / (1 + exp(- g * z))) * pow((1 + pow(z, 2.0)), k) * z; 21 | }") 22 | 23 | target <- list() 24 | 25 | target$generate_randomness <- function(nobservations){ 26 | return(rnorm(nobservations)) 27 | } 28 | 29 | target$robservation <- function(theta, randomness){ 30 | return(gandkinversecdf_givennormals(randomness, theta)) 31 | } 32 | 33 | metricL1 = function(xvec,yvec) mean(abs(xvec - yvec)) 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /inst/reproducepointestimation/lognormal.cluster.script.R: -------------------------------------------------------------------------------- 1 | ## computes MEWE on B=1000 bootstrap version of a data set 2 | ## File to be run on a cluster preferably! 3 | 4 | rm(list = ls()) 5 | library(parallel) 6 | # parallel RNG using L'Ecuyer et al (2002) 7 | RNGkind("L'Ecuyer-CMRG") # L'Ecuyer CMRG required for multiple streams 8 | igrid <- as.integer(Sys.getenv('SLURM_ARRAY_TASK_ID')) 9 | set.seed(1) # initial seed 10 | for (i in 1:igrid){ 11 | .Random.seed <- nextRNGStream(.Random.seed) # compute appropriate stream 12 | } 13 | 14 | source("lognormal_functions.R") 15 | 16 | #Pick m to be larger than or equal to max(n) and a multiple of each entry in n 17 | #M = 50 #Number of data sets from dgp 18 | n = 10^3 #Number of samples in each data set from dgp 19 | N = 20 #Number of synthetic data sets for finding MEWE 20 | m = 10^4 #Number of samples in the synthetic data sets 21 | B = 1000 #Number of bootstrap samples per data set from dgp 22 | alpha = 0.05 #Desired significance level, compute the alpha/2 and 1-alpha/2 quantiles 23 | 24 | filename = paste0("output/lognormal_bootstrap_run",igrid,"_N",N,"_m",m,"_n",n,"_B",B,".Rdata") 25 | 26 | # Bootstrap confidence intervals for the A and B parameters 27 | time_elapsed = proc.time() 28 | #Temporary storage for the bootstrapped estimators and other information 29 | bs_store = matrix(0, B, thetadim)#target$thetadim) 30 | bs_count_store = rep(0, B) 31 | 32 | #Generate the observations, sort, and replicate observations to match length of the synthetic data 33 | obs_rand = target$generate_randomness(n) 34 | obs = target$robservation(true_theta,obs_rand) 35 | sort_obs = sort(obs) 36 | sort_obs_mult = rep(sort_obs, each = m/n) 37 | 38 | #Generate the synthetic randomness, sort. 39 | randomness = t(sapply(1:N, function(x) target$generate_randomness(m))) 40 | # sort_randomness = t(apply(randomness, 1, sort)) 41 | 42 | #Define the objective to be minimized to find the MEWE 43 | obj1 = function(theta){ 44 | if(theta[2] < 0){ 45 | out = 100 46 | } else{ 47 | wass_dists = apply(randomness, 1, function(x) metricL1(sort_obs_mult, sort(target$robservation(theta, x)))) 48 | out = mean(wass_dists) 49 | } 50 | return(out) 51 | } 52 | 53 | #Find the MEWE 54 | mewe = optim(true_theta,obj1) 55 | mewe$par 56 | 57 | # #### Bootstrap 58 | #Generate multinomials 59 | multinom = matrix(sample(1:n, size = n*B, replace = T), nrow = B) 60 | sort_multinom = t(apply(multinom, 1, sort)) 61 | 62 | #Resample B times from the empirical distribution, sorted 63 | emp_sort_obs = t(apply(sort_multinom, 1, function(x) sort_obs[x])) 64 | 65 | #For each of the resampled data sets, find the MEWE. 66 | for(i in 1:B){ 67 | #Replicate observations to match length of the synthetic data 68 | emp_obs_mult = rep(emp_sort_obs[i,], each = m/n) 69 | 70 | #Define the objective function 71 | obj_bs = function(theta){ 72 | if(theta[2] < 0){ 73 | out = 100 74 | } else{ 75 | wass_dists = apply(randomness, 1, function(x) metricL1(emp_obs_mult, sort(target$robservation(theta, x)))) 76 | out = mean(wass_dists) 77 | } 78 | return(out) 79 | } 80 | 81 | #Find minimum, initializing at the MEWE 82 | mewe_bs = try(optim(mewe$par,obj_bs)) 83 | if (inherits(mewe_bs, "try-error")){ 84 | mewe_bs <- list(par = rep(NA, thetadim), count = Inf) 85 | } 86 | 87 | #Store the stuff 88 | bs_store[i,] = mewe_bs$par 89 | bs_count_store[i] = mewe_bs$count[1] 90 | } 91 | 92 | time_elapsed = proc.time() - time_elapsed 93 | 94 | save(bs_store, bs_count_store, time_elapsed, file = filename) 95 | -------------------------------------------------------------------------------- /inst/reproducepointestimation/lognormal_coverage.R: -------------------------------------------------------------------------------- 1 | ## post process output of lognormal.cluster.script.R 2 | ## to compute coverage of confidence intervals 3 | rm(list = ls()) 4 | 5 | prefix = "output/" 6 | 7 | true_theta = c(0,1) 8 | 9 | M = 400 10 | n = 1000 #Number of samples in each data set from dgp 11 | N = 20 #Number of synthetic data sets for finding MEWE 12 | m = 10^4 #Number of samples in the synthetic data sets 13 | B = 1000 #Number of bootstrap samples per data set from dgp 14 | alpha = 0.05 #Desired significance level, compute the alpha/2 and 1-alpha/2 quantiles 15 | thetadim = 2 #Inference on all parameters 16 | 17 | #Check if value is inside the interval 18 | is.inside = function(value,interval){ 19 | if(interval[1] <= value & value <= interval[2]){ 20 | return(TRUE) 21 | } else{ 22 | return(FALSE) 23 | } 24 | } 25 | 26 | #Store the number of intervals that cover 27 | store.cover = matrix(NA,nrow = M, ncol = thetadim + 1) 28 | 29 | for(i in 1:M){ 30 | 31 | # Load file from run i 32 | filename = paste0(prefix,"lognormal_bootstrap_run",i,"_N",N,"_m",m,"_n",n,"_B",B,".Rdata") 33 | load(filename) 34 | 35 | # Compute the empirical quantiles of the bootstrap samples, 36 | # both separately for each parameter and with Bonferroni correction. 37 | q_mu = quantile(bs_store[,1],probs = c(alpha/4,alpha/2,1-alpha/2,1-alpha/4)) 38 | q_sigma = quantile(bs_store[,2],probs = c(alpha/4,alpha/2,1-alpha/2,1-alpha/4)) 39 | 40 | ci_mu = q_mu[2:3] 41 | ci_sigma = q_sigma[2:3] 42 | ci_all = rbind(q_mu[c(1,4)],q_sigma[c(1,4)]) 43 | 44 | #Check if inside 45 | store.cover[i,1] = is.inside(true_theta[1],ci_mu) 46 | store.cover[i,2] = is.inside(true_theta[2],ci_sigma) 47 | 48 | if(is.inside(true_theta[1],ci_all[1,]) & is.inside(true_theta[2],ci_all[2,])){ 49 | store.cover[i,3] = TRUE 50 | } else{ 51 | store.cover[i,3] = FALSE 52 | } 53 | } 54 | 55 | coverages = apply(store.cover,2,mean) 56 | -------------------------------------------------------------------------------- /inst/reproducepointestimation/lognormal_functions.R: -------------------------------------------------------------------------------- 1 | 2 | L <- 10 3 | 4 | target <- list() 5 | 6 | target$generate_randomness <- function(nobservations){ 7 | return(rnorm(nobservations*L)) 8 | } 9 | 10 | target$robservation <- function(theta, randomness){ 11 | normals_ <- theta[1] + theta[2] * randomness 12 | lognormals_ <- exp(normals_) 13 | return(rowSums(matrix(lognormals_, ncol = L))) 14 | } 15 | 16 | metricL1 = function(xvec, yvec) mean(abs(xvec - yvec)) 17 | 18 | true_theta <- c(0,1) 19 | 20 | thetadim = 2 #Inference on all parameters 21 | 22 | target$thetadim = 2 23 | -------------------------------------------------------------------------------- /inst/tutorials/tutorial_cosine.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/tutorials/tutorial_cosine.pdf -------------------------------------------------------------------------------- /inst/tutorials/tutorial_distances.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/tutorials/tutorial_distances.pdf -------------------------------------------------------------------------------- /inst/tutorials/tutorial_gandk.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/tutorials/tutorial_gandk.pdf -------------------------------------------------------------------------------- /inst/tutorials/tutorial_normal.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/inst/tutorials/tutorial_normal.pdf -------------------------------------------------------------------------------- /man/.Rapp.history: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/winference/bba0e89a019ccfeb403159f480cf2ac57e577f9d/man/.Rapp.history -------------------------------------------------------------------------------- /man/cost_matrix_L1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/metrics.R 3 | \name{cost_matrix_L1} 4 | \alias{cost_matrix_L1} 5 | \title{cost_matrix_L1} 6 | \usage{ 7 | cost_matrix_L1(x, y) 8 | } 9 | \description{ 10 | Compute cost matrix L1 between two matrices of dimension d x N 11 | } 12 | -------------------------------------------------------------------------------- /man/cost_matrix_L2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/metrics.R 3 | \name{cost_matrix_L2} 4 | \alias{cost_matrix_L2} 5 | \title{cost_matrix_L2} 6 | \usage{ 7 | cost_matrix_L2(x, y) 8 | } 9 | \description{ 10 | Compute cost matrix L2 between two matrices of dimension d x N 11 | } 12 | -------------------------------------------------------------------------------- /man/cost_matrix_Lp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/metrics.R 3 | \name{cost_matrix_Lp} 4 | \alias{cost_matrix_Lp} 5 | \title{cost_matrix_Lp} 6 | \usage{ 7 | cost_matrix_Lp(x, y, p) 8 | } 9 | \description{ 10 | Compute cost matrix Lp between two matrices of dimension d x N 11 | } 12 | -------------------------------------------------------------------------------- /man/create_lagmatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/create_lagmatrix.R 3 | \name{create_lagmatrix} 4 | \alias{create_lagmatrix} 5 | \title{create_lagmatrix} 6 | \usage{ 7 | create_lagmatrix(timeseries, k) 8 | } 9 | \value{ 10 | a matrix with k rows and n-k columns, where n is the length of the provided time series 11 | } 12 | \description{ 13 | This function creates the delay reconstruction, i.e. a matrix 14 | where the first row contains y_1, ..., y_T (the given univariate time series) 15 | the second row contains NA, y_2, ..., y_T .... etc 16 | the k-th row contains NA, NA, ..., y_{k+1}, ..., y_T 17 | } 18 | -------------------------------------------------------------------------------- /man/fast_dmvnorm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fast_dmvnorm.R 3 | \name{fast_dmvnorm} 4 | \alias{fast_dmvnorm} 5 | \title{fast_dmvnorm} 6 | \usage{ 7 | fast_dmvnorm(x, mean, covariance) 8 | } 9 | \description{ 10 | evaluate multivariate Normal log-densities 11 | } 12 | -------------------------------------------------------------------------------- /man/fast_rmvnorm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fast_rmvnorm.R 3 | \name{fast_rmvnorm} 4 | \alias{fast_rmvnorm} 5 | \title{fast_rmvnorm} 6 | \usage{ 7 | fast_rmvnorm(nparticles, mean, covariance) 8 | } 9 | \description{ 10 | generate multivariate Normal draws 11 | } 12 | -------------------------------------------------------------------------------- /man/get_autoregressive.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model_get_autoregressive.R 3 | \name{get_autoregressive} 4 | \alias{get_autoregressive} 5 | \title{Autoregressive model} 6 | \usage{ 7 | get_autoregressive() 8 | } 9 | \value{ 10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution), 11 | generate_randomness (generate data-generating variables), robservation (create synthetic 12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 13 | ydim (dimension of observations), parameters (list of hyperparameters, 14 | to be passed to rprior,dprior,robservation) 15 | } 16 | \description{ 17 | This function returns a list representing an auto-regressive 18 | model of order 1. 19 | } 20 | -------------------------------------------------------------------------------- /man/get_cosine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model_get_cosine.R 3 | \name{get_cosine} 4 | \alias{get_cosine} 5 | \title{Cosine model} 6 | \usage{ 7 | get_cosine() 8 | } 9 | \value{ 10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution), 11 | generate_randomness (generate data-generating variables), robservation (create synthetic 12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 13 | ydim (dimension of observations), parameters (list of hyperparameters, 14 | to be passed to rprior,dprior,robservation) 15 | } 16 | \description{ 17 | This function returns a list representing a cosine trend model. 18 | } 19 | -------------------------------------------------------------------------------- /man/get_gandk.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model_get_gandk.R 3 | \name{get_gandk} 4 | \alias{get_gandk} 5 | \title{G and k model} 6 | \usage{ 7 | get_gandk() 8 | } 9 | \value{ 10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution), 11 | generate_randomness (generate data-generating variables), robservation (create synthetic 12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 13 | ydim (dimension of observations), parameters (list of hyperparameters, 14 | to be passed to rprior,dprior,robservation) 15 | } 16 | \description{ 17 | This function returns a list representing the g-and-k 18 | quantile distribution. 19 | } 20 | -------------------------------------------------------------------------------- /man/get_levydriven.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model_get_levydriven.R 3 | \name{get_levydriven} 4 | \alias{get_levydriven} 5 | \title{Levy driven stochastic volatility model} 6 | \usage{ 7 | get_levydriven() 8 | } 9 | \value{ 10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution), 11 | generate_randomness (generate data-generating variables), robservation (create synthetic 12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 13 | ydim (dimension of observations), parameters (list of hyperparameters, 14 | to be passed to rprior,dprior,robservation) 15 | } 16 | \description{ 17 | This function returns a list representing a Levy driven stochastic volatility model. 18 | } 19 | -------------------------------------------------------------------------------- /man/get_mgandk.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model_get_mgandk.R 3 | \name{get_mgandk} 4 | \alias{get_mgandk} 5 | \title{multivariate G and k model} 6 | \usage{ 7 | get_mgandk() 8 | } 9 | \value{ 10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution), 11 | generate_randomness (generate data-generating variables), robservation (create synthetic 12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 13 | ydim (dimension of observations), parameters (list of hyperparameters, 14 | to be passed to rprior,dprior,robservation) 15 | } 16 | \description{ 17 | This function returns 18 | } 19 | -------------------------------------------------------------------------------- /man/get_multivariate_normal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model_get_multivariatenormal.R 3 | \name{get_multivariate_normal} 4 | \alias{get_multivariate_normal} 5 | \title{Multivariate Normal model} 6 | \usage{ 7 | get_multivariate_normal(dimension) 8 | } 9 | \value{ 10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution), 11 | generate_randomness (generate data-generating variables), robservation (create synthetic 12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 13 | ydim (dimension of observations), parameters (list of hyperparameters, 14 | to be passed to rprior,dprior,robservation) 15 | } 16 | \description{ 17 | This function returns a list representing 18 | a Normal location model, in a dimension specified by the user as an argument. 19 | } 20 | -------------------------------------------------------------------------------- /man/get_normal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model_get_normal.R 3 | \name{get_normal} 4 | \alias{get_normal} 5 | \title{Normal model} 6 | \usage{ 7 | get_normal() 8 | } 9 | \value{ 10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution), 11 | generate_randomness (generate data-generating variables), robservation (create synthetic 12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 13 | ydim (dimension of observations), parameters (list of hyperparameters, 14 | to be passed to rprior,dprior,robservation) 15 | } 16 | \description{ 17 | This function returns a list representing 18 | a Normal location model. 19 | The prior is mu ~ Normal(mu_0, nu^{-1}), where nu is precision 20 | and tau ~ Gamma(alpha, beta), where beta is rate (1/scale). 21 | The likelihood is Y ~ Normal(mu, tau^2) where tau^2 is the variance. 22 | } 23 | -------------------------------------------------------------------------------- /man/get_pz_4param.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model_get_pz_4param.R 3 | \name{get_pz_4param} 4 | \alias{get_pz_4param} 5 | \title{Phytoplankton-zooplankton model} 6 | \usage{ 7 | get_pz_4param() 8 | } 9 | \value{ 10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution), 11 | generate_randomness (generate data-generating variables), robservation (create synthetic 12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 13 | ydim (dimension of observations), parameters (list of hyperparameters, 14 | to be passed to rprior,dprior,robservation) 15 | } 16 | \description{ 17 | This function returns a list representing 18 | a Lotka-Volterra type model for plankton. See 19 | Jones, E., Parslow, J., and Murray, L. (2010). A Bayesian approach to state and parameter estimation in a phytoplankton-zooplankton model. Australian Meteorological and Oceanographic Journal, 59:7-16. 20 | } 21 | -------------------------------------------------------------------------------- /man/get_ricker.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model_get_ricker.R 3 | \name{get_ricker} 4 | \alias{get_ricker} 5 | \title{Ricker model} 6 | \usage{ 7 | get_ricker() 8 | } 9 | \value{ 10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution), 11 | generate_randomness (generate data-generating variables), robservation (create synthetic 12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 13 | ydim (dimension of observations), parameters (list of hyperparameters, 14 | to be passed to rprior,dprior,robservation) 15 | } 16 | \description{ 17 | This function returns a list representing 18 | the Ricker model in 19 | Wood (2010) Statistical inference for noisy nonlinear ecological dynamic systems 20 | } 21 | -------------------------------------------------------------------------------- /man/get_toggleswitch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model_get_toggleswitch.R 3 | \name{get_toggleswitch} 4 | \alias{get_toggleswitch} 5 | \title{Toggle switch model} 6 | \usage{ 7 | get_toggleswitch() 8 | } 9 | \value{ 10 | The list contains rprior, dprior (generate and evaluate the density of prior distribution), 11 | generate_randomness (generate data-generating variables), robservation (create synthetic 12 | data sets), parameter_names (useful for plotting), thetadim (dimension of parameter), 13 | ydim (dimension of observations), parameters (list of hyperparameters, 14 | to be passed to rprior,dprior,robservation) 15 | } 16 | \description{ 17 | This function returns a list representing the toggle switch model 18 | of Bonassi, F. V., West, M., et al. (2015). 19 | Sequential Monte Carlo with adaptive weights for approximate Bayesian computation. Bayesian Analysis, 10(1):171-187. 20 | } 21 | -------------------------------------------------------------------------------- /man/hilbert_order.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hilbert_order.R 3 | \name{hilbert_order} 4 | \alias{hilbert_order} 5 | \title{hilbert_order} 6 | \usage{ 7 | hilbert_order(x) 8 | } 9 | \value{ 10 | a vector of index corresponding to the ordered samples 11 | } 12 | \description{ 13 | This function returns the "Hilbert order" of a sample of n 14 | points of dimension d, stored in a matrix with d rows and n columns 15 | where d is the dimension of each sample and n the number of samples. 16 | The function essentially calls Hilbert_Sort_CGAL, from the CGAL library. 17 | } 18 | -------------------------------------------------------------------------------- /man/pz_transition.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pz_transition.R 3 | \name{pz_transition} 4 | \alias{pz_transition} 5 | \title{pz_transition} 6 | \usage{ 7 | pz_transition(xparticles, alphas, time, parameters) 8 | } 9 | \description{ 10 | Solve PZ ODE for each particle, given each alpha, from time to time + 1, 11 | and given the parameters (c, e, ml, mq). 12 | } 13 | -------------------------------------------------------------------------------- /man/setmytheme.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/setmytheme.R 3 | \name{setmytheme} 4 | \alias{setmytheme} 5 | \title{Set My Theme} 6 | \usage{ 7 | setmytheme() 8 | } 9 | \description{ 10 | set theme for ggplot, to create consistent figures 11 | } 12 | -------------------------------------------------------------------------------- /man/systematic_resampling.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/systematic_resampling.R 3 | \name{systematic_resampling} 4 | \alias{systematic_resampling} 5 | \title{systematic_resampling} 6 | \usage{ 7 | systematic_resampling(normalized_weights) 8 | } 9 | \description{ 10 | systematic_resampling 11 | } 12 | -------------------------------------------------------------------------------- /man/wasserstein.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wasserstein.R 3 | \name{wasserstein} 4 | \alias{wasserstein} 5 | \title{wasserstein} 6 | \usage{ 7 | wasserstein(p, q, cost_matrix, epsilon, niterations) 8 | } 9 | \value{ 10 | a list with "distances", "transportmatrix", "u" and "v" 11 | } 12 | \description{ 13 | Compute regularized Wasserstein distance between two empirical distributions, 14 | p and q, specified as vector of probabilities summing to one. 15 | The third argument is the cost matrix, i.e. a matrix of pair-wise distances, 16 | the fourth argument is the regularization parameter, e.g. 0.05*median(cost_matrix), 17 | and the last argument is the number of Sinkhorn iterations to perform, e.g. 100. 18 | Important references are 19 | 20 | - Cuturi, M. (2013). Sinkhorn distances: Lightspeed computation of optimal transport. In Advances in Neural Information Processing Systems (NIPS), pages 2292-2300. 21 | 22 | - Cuturi, M. and Doucet, A. (2014). Fast computation of Wasserstein barycenters. In Proceedings of the 31st International Conference on Machine Learning (ICML), pages 685-693. 23 | } 24 | -------------------------------------------------------------------------------- /man/wcovariance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wcovariance.R 3 | \name{wcovariance} 4 | \alias{wcovariance} 5 | \title{wcovariance} 6 | \usage{ 7 | wcovariance(xparticles, normweights, mean) 8 | } 9 | \description{ 10 | wcovariance 11 | } 12 | -------------------------------------------------------------------------------- /man/winference-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/winference-package.R 3 | \docType{package} 4 | \name{winference-package} 5 | \alias{winference-package} 6 | \alias{winference} 7 | \title{winference} 8 | \description{ 9 | ... 10 | } 11 | \details{ 12 | ... 13 | } 14 | \author{ 15 | anonymous 16 | } 17 | \keyword{package} 18 | -------------------------------------------------------------------------------- /man/wmean.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wmean.R 3 | \name{wmean} 4 | \alias{wmean} 5 | \title{wmean} 6 | \usage{ 7 | wmean(xparticles, normweights) 8 | } 9 | \description{ 10 | wmean 11 | } 12 | -------------------------------------------------------------------------------- /src/HilbertCode.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "HilbertCode.h" 13 | 14 | #include 15 | #include //malloc 16 | 17 | typedef CGAL::Cartesian_d Kernel; 18 | typedef Kernel::Point_d Point_d; 19 | 20 | typedef 21 | CGAL::Spatial_sort_traits_adapter_d Search_traits_d; 22 | 23 | 24 | void Hilbert_Sort_CGAL(double *x, int dx, int N, double *J) 25 | { 26 | int i,k; 27 | double *work = new double[dx]; 28 | 29 | std::vector points; 30 | for(i=0;i indices; 41 | indices.reserve(points.size()); 42 | 43 | std::copy(boost::counting_iterator(0), 44 | boost::counting_iterator(points.size()), 45 | std::back_inserter(indices)); 46 | 47 | CGAL::hilbert_sort( indices.begin(),indices.end(),Search_traits_d(&(points[0])) ); 48 | 49 | for (i=0;i 2 | #include 3 | #include 4 | #include 5 | #include 6 | using namespace Rcpp; 7 | using namespace std; 8 | using namespace Eigen; 9 | 10 | 11 | // Takes two matrices of size d times N1 and d times N2, returns an N1 times N2 matrix 12 | // [[Rcpp::export]] 13 | NumericMatrix cost_matrix_L1_(const NumericMatrix & x, const NumericMatrix & y){ 14 | NumericMatrix cost(x.cols(), y.cols()); 15 | for (int ix = 0; ix < x.cols(); ix ++){ 16 | for (int iy = 0; iy < y.cols(); iy ++){ 17 | cost(ix,iy) = 0; 18 | // loop over components, stored in columns 19 | for (int id = 0; id < x.rows(); id ++){ 20 | // cost(ix,iy) += std::abs(x(ix,id)-y(iy,id)); 21 | cost(ix,iy) += std::abs(x(id,ix)-y(id,iy)); 22 | } 23 | } 24 | } 25 | return cost; 26 | } 27 | // Takes two matrices of size d times N1 and d times N2, returns an N1 times N2 matrix 28 | // [[Rcpp::export]] 29 | NumericMatrix cost_matrix_L2_(const NumericMatrix & x, const NumericMatrix & y){ 30 | NumericMatrix cost(x.cols(), y.cols()); 31 | for (int ix = 0; ix < x.cols(); ix ++){ 32 | for (int iy = 0; iy < y.cols(); iy ++){ 33 | cost(ix,iy) = 0; 34 | // loop over components, stored in columns 35 | for (int id = 0; id < x.rows(); id ++){ 36 | // cost(ix,iy) += std::abs(x(ix,id)-y(iy,id)); 37 | cost(ix,iy) += std::pow(x(id,ix)-y(id,iy),2); 38 | } 39 | cost(ix,iy) = std::sqrt(cost(ix,iy)); 40 | } 41 | } 42 | return cost; 43 | } 44 | 45 | // Takes two matrices of size d times N1 and d times N2, returns an N1 times N2 matrix 46 | // [[Rcpp::export]] 47 | NumericMatrix cost_matrix_Lp_(const NumericMatrix & x, const NumericMatrix & y, double p){ 48 | NumericMatrix cost(x.cols(), y.cols()); 49 | double z = 0; 50 | for (int ix = 0; ix < x.cols(); ix ++){ 51 | for (int iy = 0; iy < y.cols(); iy ++){ 52 | cost(ix,iy) = 0; 53 | // loop over components 54 | for (int id = 0; id < x.rows(); id ++){ 55 | z = x(id,ix)-y(id,iy); 56 | cost(ix,iy) += std::pow(std::fabs(z), p); 57 | } 58 | cost(ix,iy) = std::pow(cost(ix,iy), 1/p); 59 | } 60 | } 61 | return cost; 62 | } 63 | 64 | // // [[Rcpp::export]] 65 | // NumericMatrix compute_cost1_(const NumericVector & x, const NumericVector & y){ 66 | // NumericMatrix cost(x.size(), y.size()); 67 | // for (int ix = 0; ix < x.size(); ix ++){ 68 | // for (int iy = 0; iy < y.size(); iy ++){ 69 | // cost(ix,iy) = std::abs(x(ix)-y(iy)); 70 | // } 71 | // } 72 | // return cost; 73 | // } 74 | // 75 | // 76 | // // [[Rcpp::export]] 77 | // NumericMatrix compute_cost2_(const NumericVector & x, const NumericVector & y){ 78 | // NumericMatrix cost(x.size(), y.size()); 79 | // for (int ix = 0; ix < x.size(); ix ++){ 80 | // for (int iy = 0; iy < y.size(); iy ++){ 81 | // cost(ix,iy) = std::pow(x(ix)-y(iy), 2); 82 | // } 83 | // } 84 | // return cost; 85 | // } 86 | // 87 | // // [[Rcpp::export]] 88 | // NumericMatrix cost_matrix_(const NumericMatrix & x, const NumericMatrix & y){ 89 | // NumericMatrix cost(x.rows(), y.rows()); 90 | // for (int ix = 0; ix < x.rows(); ix ++){ 91 | // for (int iy = 0; iy < y.rows(); iy ++){ 92 | // cost(ix,iy) = 0; 93 | // // loop over components, stored in columns 94 | // for (int id = 0; id < x.cols(); id ++){ 95 | // // cost(ix,iy) += std::abs(x(ix,id)-y(iy,id)); 96 | // cost(ix,iy) += std::pow(x(ix,id)-y(iy,id),2); 97 | // } 98 | // cost(ix,iy) = std::sqrt(cost(ix,iy)); 99 | // } 100 | // } 101 | // return cost; 102 | // } 103 | 104 | -------------------------------------------------------------------------------- /src/gandkinversecdf.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | using namespace Rcpp; 5 | using namespace std; 6 | using namespace Eigen; 7 | 8 | // [[Rcpp::export]] 9 | NumericVector gandkinversecdf_(NumericVector x, NumericVector theta){ 10 | NumericVector z = qnorm(x); 11 | double A = theta(0); 12 | double B = theta(1); 13 | double c = 0.8; 14 | double g = theta(2); 15 | double k = theta(3); 16 | return A + B * (1 + c * (1 - exp(- g * z)) / (1 + exp(- g * z))) * pow((1 + pow(z, 2.0)), k) * z; 17 | } 18 | 19 | // [[Rcpp::export]] 20 | NumericVector gandkinversecdf_givennormals_(NumericVector normals, NumericVector theta){ 21 | NumericVector z = normals; 22 | double A = theta(0); 23 | double B = theta(1); 24 | double c = 0.8; 25 | double g = theta(2); 26 | double k = theta(3); 27 | return A + B * (1 + c * (1 - exp(- g * z)) / (1 + exp(- g * z))) * pow((1 + pow(z, 2.0)), k) * z; 28 | } 29 | 30 | // [[Rcpp::export]] 31 | double gandkcdf_(double y, NumericVector theta, int maxsteps = 1000, double tolerance = 1e-10, 32 | double lower = 1e-20, double upper = 1-1e-20){ 33 | double A = theta(0); 34 | double B = theta(1); 35 | double c = 0.8; 36 | double g = theta(2); 37 | double k = theta(3); 38 | 39 | int istep = 0; 40 | double current_try = (upper + lower) / 2; 41 | double current_size = (upper - lower) / 4; 42 | NumericVector dd(1); 43 | dd(0) = current_try; 44 | NumericVector z = qnorm(dd); 45 | double fattempt = A + B * (1 + c * (1 - exp(- g * z(0))) / (1 + exp(- g * z(0)))) * pow((1 + pow(z(0), 2.0)), k) * z(0); 46 | while (!(fattempt > y-tolerance && fattempt < y+tolerance) && (istep < maxsteps)){ 47 | istep++; 48 | if (fattempt > y-tolerance){ 49 | current_try = current_try - current_size; 50 | dd(0) = current_try; 51 | NumericVector z = qnorm(dd); 52 | fattempt = A + B * (1 + c * (1 - exp(- g * z(0))) / (1 + exp(- g * z(0)))) * pow((1 + pow(z(0), 2.0)), k) * z(0); 53 | current_size = current_size / 2; 54 | } else { 55 | current_try = current_try + current_size; 56 | dd(0) = current_try; 57 | NumericVector z = qnorm(dd); 58 | fattempt = A + B * (1 + c * (1 - exp(- g * z(0))) / (1 + exp(- g * z(0)))) * pow((1 + pow(z(0), 2.0)), k) * z(0); 59 | current_size = current_size / 2; 60 | } 61 | } 62 | return current_try; 63 | } 64 | -------------------------------------------------------------------------------- /src/hilbert_order.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "HilbertCode.h" 3 | using namespace Rcpp; 4 | using namespace std; 5 | using namespace Eigen; 6 | 7 | // [[Rcpp::export]] 8 | NumericVector hilbert_order_(NumericMatrix x){ 9 | int dx = x.nrow(); 10 | int N = x.ncol(); 11 | NumericVector order_indices(N); 12 | Hilbert_Sort_CGAL(REAL(x), dx, N, REAL(order_indices)); 13 | return order_indices; 14 | } 15 | -------------------------------------------------------------------------------- /src/levydriven_.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | using namespace std; 4 | using namespace Eigen; 5 | 6 | 7 | // [[Rcpp::export]] 8 | List levydriven_rtransition_rand_cpp(int nparticles, NumericVector & theta){ 9 | RNGScope scope; 10 | NumericVector sum_weighted_e(nparticles); 11 | NumericVector sum_e(nparticles); 12 | NumericVector k = rpois(nparticles, theta[4] * theta[2] * theta[2] / theta[3]); 13 | for (int i = 0; i < nparticles; i++){ 14 | NumericVector c = runif(k(i)); 15 | NumericVector e = rexp(k(i), theta[2]/theta[3]); // rcpp rexp is parametrized with rate 16 | sum_e(i) = sum(e); 17 | sum_weighted_e(i) = sum(exp(-theta[4] * c) * e); 18 | } 19 | return(List::create(Named("sum_weighted_e") = sum_weighted_e, Named("sum_e") = sum_e)); 20 | } 21 | -------------------------------------------------------------------------------- /src/median.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | double median_rcpp(NumericVector x) { 6 | NumericVector y = clone(x); 7 | int n, half; 8 | double y1, y2; 9 | n = y.size(); 10 | half = n / 2; 11 | if(n % 2 == 1) { 12 | // median for odd length vector 13 | std::nth_element(y.begin(), y.begin()+half, y.end()); 14 | return y[half]; 15 | } else { 16 | // median for even length vector 17 | std::nth_element(y.begin(), y.begin()+half, y.end()); 18 | y1 = y[half]; 19 | std::nth_element(y.begin(), y.begin()+half-1, y.begin()+half); 20 | y2 = y[half-1]; 21 | return (y1 + y2) / 2.0; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/median.h: -------------------------------------------------------------------------------- 1 | #ifndef MEDIAN_H_ 2 | #define MEDIAN_H_ 3 | #include 4 | using namespace Rcpp; 5 | 6 | double median_rcpp(NumericVector x); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /src/mmd.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | double mmd_c(double first_term, double eps, const NumericMatrix & x, const NumericMatrix & y){ 6 | int nobs = y.cols(); 7 | int dimension = y.rows(); 8 | double result = first_term; 9 | double cost_xx; 10 | double cost_xy; 11 | double second_term = 0; 12 | double third_term = 0; 13 | for (int i1 = 0; i1 < nobs; i1 ++){ 14 | for (int i2 = 0; i2 < nobs; i2 ++){ 15 | cost_xx = 0; 16 | cost_xy = 0; 17 | for (int id = 0; id < dimension; id ++){ 18 | cost_xx += std::pow(x(id,i1)-x(id,i2),2); 19 | cost_xy += std::pow(x(id,i1)-y(id,i2),2); 20 | } 21 | second_term += exp(-cost_xx / (2*eps*eps)); 22 | third_term += exp(-cost_xy / (2*eps*eps)); 23 | } 24 | } 25 | result += second_term / (nobs * nobs) - 2 * third_term / (nobs*nobs); 26 | return result; 27 | } 28 | -------------------------------------------------------------------------------- /src/mvnorm.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mvnorm.h" 3 | using namespace Rcpp; 4 | 5 | // [[Rcpp::export]] 6 | NumericMatrix rmvnorm(int nsamples, const NumericVector & mean, const NumericMatrix & covariance){ 7 | RNGScope scope; 8 | int ncols = covariance.cols(); 9 | const Eigen::Map covariance_(as >(covariance)); 10 | Eigen::MatrixXd cholesky_covariance(covariance_.llt().matrixU()); 11 | Eigen::MatrixXd Y(nsamples, ncols); 12 | for(int i = 0; i < ncols; i++){ 13 | Y.col(i) = as(rnorm(nsamples)); 14 | } 15 | Y = Y * cholesky_covariance; 16 | for(int j = 0; j < ncols; j++){ 17 | for(int i = 0; i < nsamples; i++){ 18 | Y(i,j) = Y(i,j) + mean(j); 19 | } 20 | } 21 | return wrap(Y); 22 | } 23 | 24 | 25 | NumericMatrix centered_rmvnorm(int nsamples, const Eigen::MatrixXd & cholesky_covariance){ 26 | // sample centered gaussian variates given the upper triangular factor in the cholesky decomposition of the covariance 27 | RNGScope scope; 28 | int ncols = cholesky_covariance.cols(); 29 | Eigen::MatrixXd Y(nsamples, ncols); 30 | for(int i = 0; i < ncols; i++){ 31 | Y.col(i) = as(rnorm(nsamples)); 32 | } 33 | Y = Y * cholesky_covariance; 34 | return wrap(Y); 35 | } 36 | 37 | // [[Rcpp::export]] 38 | NumericVector dmvnorm(const NumericMatrix & x, const NumericVector & mean, const NumericMatrix & covariance){ 39 | const Eigen::Map covariance_(as >(covariance)); 40 | const Eigen::Map x_(as >(x)); 41 | Eigen::LLT lltofcov(covariance_); 42 | Eigen::MatrixXd lower = lltofcov.matrixL(); 43 | Eigen::MatrixXd xcentered(x_); 44 | double halflogdeterminant = lower.diagonal().array().log().sum();; 45 | double cst = - (halflogdeterminant) - (x.cols() * 0.9189385); 46 | for(int j = 0; j < x.cols(); j++){ 47 | for(int i = 0; i < x.rows(); i++){ 48 | xcentered(i,j) = xcentered(i,j) - mean(j); 49 | } 50 | } 51 | Eigen::VectorXd results = -0.5 * lower.triangularView().solve(xcentered.transpose()).colwise().squaredNorm(); 52 | for (int i = 0; i < results.size(); i++){ 53 | results(i) = results(i) + cst; 54 | } 55 | return wrap(results); 56 | } 57 | 58 | RCPP_MODULE(module_mvnorm) { 59 | function( "rmvnorm", &rmvnorm ); 60 | function( "dmvnorm", &dmvnorm ); 61 | } 62 | 63 | 64 | -------------------------------------------------------------------------------- /src/mvnorm.h: -------------------------------------------------------------------------------- 1 | #ifndef _INCL_MVNORM_ 2 | #define _INCL_MVNORM_ 3 | #include 4 | using namespace Rcpp; 5 | 6 | // generate samples from a multivariate normal distribution 7 | NumericMatrix rmvnorm(int nsamples, const NumericVector & mean, const NumericMatrix & covariance); 8 | 9 | NumericMatrix centered_rmvnorm(int nsamples, const Eigen::MatrixXd & cholesky_covariance); 10 | 11 | // evaluate probability density function of a multivariate normal distribution 12 | NumericVector dmvnorm(const NumericMatrix & x, const NumericVector & mean, const NumericMatrix & covariance); 13 | 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /src/pz_functions.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | // [[Rcpp::depends(BH)]] 6 | // [[Rcpp::plugins(cpp11)]] 7 | 8 | using namespace std; 9 | using namespace boost::numeric::odeint; 10 | 11 | // const double sigma = 10.0, r = 28.0, b = 8.0 / 3.0; 12 | 13 | typedef boost::array< double , 2> state_type; 14 | 15 | class pz_ { 16 | 17 | double alpha, c, e, ml, mq; 18 | 19 | public: 20 | pz_( double alpha, double c, double e, double ml, double mq ) : alpha(alpha), c(c), e(e), ml(ml), mq(mq) { } 21 | 22 | void operator() ( const state_type &x , state_type &dxdt , const double /* t */ ) 23 | { 24 | dxdt[0] = alpha * x[0] - c * x[0] * x[1]; 25 | dxdt[1] = e * c * x[0] * x[1] - ml * x[1] - mq * x[1] * x[1]; 26 | } 27 | }; 28 | 29 | struct push_back_state_and_time 30 | { 31 | std::vector< state_type >& m_states; 32 | std::vector< double >& m_times; 33 | 34 | push_back_state_and_time( std::vector< state_type > &states , std::vector< double > × ) 35 | : m_states( states ) , m_times( times ) { } 36 | 37 | void operator()( const state_type &x , double t ) 38 | { 39 | m_states.push_back( x ); 40 | m_times.push_back( t ); 41 | } 42 | }; 43 | 44 | using namespace Rcpp; 45 | 46 | // // [[Rcpp::export]] 47 | // NumericVector one_step_pz_(double P, double Z, double t, double alpha, double c, double e, double ml, double mq ) { 48 | // state_type x = { P , Z }; // initial conditions 49 | // vector x_vec; 50 | // vector times; 51 | // pz_ pz_instance(alpha, c, e, ml, mq); 52 | // size_t steps = integrate( pz_instance , x , t , t + 1, 1.0 , push_back_state_and_time(x_vec, times)); 53 | // NumericVector result = NumericVector::create(0, 0); 54 | // result(0) = x_vec[steps][0]; 55 | // result(1) = x_vec[steps][1]; 56 | // return result; 57 | // } 58 | 59 | // [[Rcpp::export]] 60 | NumericMatrix one_step_pz_vector(NumericMatrix xparticles, NumericVector alphas, double t, NumericVector parameters){ 61 | double c = parameters[0]; 62 | double e = parameters[1]; 63 | double ml = parameters[2]; 64 | double mq = parameters[3]; 65 | NumericMatrix result(2, xparticles.cols()); 66 | for (int i = 0; i < xparticles.cols(); i++){ 67 | double P = xparticles(0, i); 68 | double Z = xparticles(1, i); 69 | state_type x = { P , Z }; // initial conditions 70 | vector x_vec; 71 | vector times; 72 | pz_ pz_instance(alphas(i), c, e, ml, mq); 73 | size_t steps = integrate( pz_instance , x , t , t + 1, 1.0 , push_back_state_and_time(x_vec, times)); 74 | result(0, i) = x_vec[steps][0]; 75 | result(1, i) = x_vec[steps][1]; 76 | } 77 | return result; 78 | } 79 | 80 | // [[Rcpp::export]] 81 | NumericVector pz_generate_randomness_cpp(int nparticles, int datalength){ 82 | RNGScope scope; 83 | NumericVector normal_draws = rnorm((2 + datalength) * nparticles, 0, 1); 84 | return normal_draws; 85 | } 86 | 87 | // [[Rcpp::export]] 88 | NumericVector pz_perturb_randomness_cpp(const NumericVector & randomness, double rho){ 89 | RNGScope scope; 90 | int l = randomness.size(); 91 | NumericVector newrand(l); 92 | double v = sqrt(1.0 - rho*rho); 93 | NumericVector normal_draws = rnorm(l, 0, 1); 94 | for (int i = 0; i < l; i ++){ 95 | newrand(i) = rho * randomness(i) + v * normal_draws(i); 96 | } 97 | return newrand; 98 | } 99 | -------------------------------------------------------------------------------- /src/resampling.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "resampling.h" 3 | using namespace Rcpp; 4 | using namespace std; 5 | 6 | 7 | void permute(IntegerVector & a, const int & nparticles){ 8 | int swap; 9 | for (int i = 0; i < nparticles; i++){ 10 | if (a(i) != i && a(a(i)) != a(i)){ 11 | swap = a(a(i)); 12 | a(a(i)) = a(i); 13 | a(i) = swap; 14 | i--; 15 | } 16 | } 17 | } 18 | 19 | // weights have to sum to 1 20 | void systematic(IntegerVector & ancestors, const NumericVector & weights, const int & nparticles){ 21 | RNGScope scope; 22 | NumericVector u_vec = runif(1); 23 | double u = u_vec(0) / nparticles; 24 | int j = 0; 25 | double csw = weights(0); 26 | for(int k = 0; k < nparticles; k++){ 27 | while(csw < u){ 28 | j++; 29 | csw += weights(j); 30 | } 31 | ancestors(k) = j; 32 | u = u + 1./nparticles; 33 | } 34 | } 35 | 36 | // made for export to R 37 | // [[Rcpp::export]] 38 | IntegerVector systematic_resampling_(const NumericVector & weights){ 39 | RNGScope scope; 40 | int nparticles = weights.size(); 41 | IntegerVector ancestors(nparticles); 42 | NumericVector u_vec = runif(1); 43 | double u = u_vec(0) / nparticles; 44 | int j = 0; 45 | double csw = weights(0); 46 | for(int k = 0; k < nparticles; k++){ 47 | while(csw < u){ 48 | j++; 49 | csw += weights(j); 50 | } 51 | ancestors(k) = j; 52 | u = u + 1./nparticles; 53 | } 54 | return ancestors + 1; 55 | } 56 | 57 | inline int randWrapper(const int n) { return floor(unif_rand()*n); } 58 | 59 | // the weights in argument don't need to be normalised, and 'ancestors' don't need to be of the same size as 'weights' 60 | void multinomial(IntegerVector & ancestors, const NumericVector & weights){ 61 | RNGScope scope; 62 | int nparents = weights.size(); 63 | NumericVector cumsumw = cumsum(weights); 64 | int nchildren = ancestors.size(); 65 | NumericVector uniforms = runif(nchildren); 66 | double sumw = cumsumw(nparents - 1); 67 | double lnMax = 0; 68 | int j = nparents; 69 | for (int i = nchildren; i > 0; i--){ 70 | lnMax += log(uniforms(i-1)) / i; 71 | uniforms(i-1) = sumw * exp(lnMax); 72 | while (uniforms(i-1) < cumsumw(j-1)){ 73 | j --; 74 | } 75 | ancestors(i-1) = j; 76 | } 77 | std::random_shuffle(ancestors.begin(), ancestors.end(), randWrapper); 78 | } 79 | -------------------------------------------------------------------------------- /src/resampling.h: -------------------------------------------------------------------------------- 1 | #ifndef _INCL_RESAMPLING_ 2 | #define _INCL_RESAMPLING_ 3 | #include 4 | using namespace Rcpp; 5 | using namespace std; 6 | 7 | // permute the ancestors so that whenever o(i) > 0, a(i) = i; allows in-place resampling 8 | void permute(IntegerVector & a, const int & nparticles); 9 | // systematic resampling, weights have to sum to 1 10 | void systematic(IntegerVector & ancestors, const NumericVector & weights, const int & nparticles); 11 | // made for export to R; takes normalized weights 12 | IntegerVector systematic_resampling_(const NumericVector & weights); 13 | // multinomial resampling, weights don't have to sum to 1 14 | void multinomial(IntegerVector & ancestors, const NumericVector & weights); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /src/swapsweep.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | // [[Rcpp::export]] 4 | List swapsweep(IntegerVector permutation, const NumericMatrix Cp, double totalcost){ 5 | int n = Cp.cols(); 6 | double currentcost, proposedcost; 7 | int perm_i, perm_j; 8 | for (int i = 0; i < n - 1; i ++){ 9 | int perm_i = permutation[i]; 10 | for (int j = i+1; j < n; j++){ 11 | perm_j = permutation[j]; 12 | currentcost = Cp(i, perm_i) + Cp(j, perm_j); 13 | proposedcost = Cp(i, perm_j) + Cp(j, perm_i); 14 | if (proposedcost < currentcost){ 15 | permutation[i] = perm_j; 16 | permutation[j] = perm_i; 17 | perm_i = perm_j; 18 | totalcost = totalcost - currentcost + proposedcost; 19 | } 20 | } 21 | } 22 | return List::create(Named("totalcost")=totalcost, 23 | Named("permutation") = permutation); 24 | } 25 | -------------------------------------------------------------------------------- /src/systematic.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | using namespace std; 4 | 5 | // [[Rcpp::export]] 6 | IntegerVector systematic_resampling_n_(const NumericVector & weights, int ndraws, double u){ 7 | RNGScope scope; 8 | int nparticles = weights.size(); 9 | IntegerVector ancestors(ndraws); 10 | u = u / ndraws; 11 | int j = 0; 12 | double csw = weights(0); 13 | for(int k = 0; k < ndraws; k++){ 14 | while(csw < u){ 15 | j++; 16 | csw += weights(j); 17 | } 18 | u = u + 1. / ndraws; 19 | ancestors(k) = j; 20 | } 21 | // int swap; 22 | // for (int i = 0; i < nparticles; i++){ 23 | // if (ancestors(i) != i && ancestors(ancestors(i)) != ancestors(i)){ 24 | // swap = ancestors(ancestors(i)); 25 | // ancestors(ancestors(i)) = ancestors(i); 26 | // ancestors(i) = swap; 27 | // i--; 28 | // } 29 | // } 30 | return ancestors + 1; 31 | } 32 | -------------------------------------------------------------------------------- /src/toggle_switch.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | 5 | double normal_pdf(double x, double m, double s) 6 | { 7 | static const double inv_sqrt_2pi = 0.3989422804014327; 8 | double a = (x - m) / s; 9 | 10 | return inv_sqrt_2pi / s * std::exp(-0.5f * a * a); 11 | } 12 | 13 | 14 | // [[Rcpp::export]] 15 | NumericVector cont_hist(NumericVector x, double h, NumericVector prob, NumericVector mids) { 16 | 17 | int n = x.size(); 18 | int m = prob.size(); 19 | 20 | double sum; 21 | NumericVector out(n); 22 | 23 | for(int i = 0; i 2 | using namespace Rcpp; 3 | using namespace std; 4 | using namespace Eigen; 5 | 6 | // [[Rcpp::export]] 7 | List wasserstein_(NumericVector p_, NumericVector q_, NumericMatrix cost_matrix_, 8 | double epsilon, int niterations){ 9 | // compute distance between p and q 10 | // p corresponds to the weights of a N-sample 11 | // each q corresponds to the weights of a M-sample 12 | // Thus cost_matrix must be a N x M cost matrix 13 | // epsilon is a regularization parameter, equal to 1/lambda in some references 14 | int N = p_.size(); 15 | int M = q_.size(); 16 | 17 | Map q(as >(q_)); 18 | Map cost_matrix(as >(cost_matrix_)); 19 | Map p(as >(p_)); 20 | // avoid to take exp(k) when k is less than -500, 21 | // as K then contains zeros, and then the upcoming computations divide by zero 22 | MatrixXd K = (cost_matrix.array() * (-1./epsilon)).exp(); // K = exp(- M / epsilon) 23 | 24 | // MatrixXd K = (cost_matrix.array() * (-1./epsilon)); // K = exp(- M / epsilon) 25 | // for (int i = 0; i < N; i++){ 26 | // for (int j = 0; j < M; j++){ 27 | // if (K(i,j) < -500){ 28 | // K(i,j) = exp(-500); 29 | // } else { 30 | // K(i,j) = exp(K(i,j)); 31 | // } 32 | // } 33 | // } 34 | 35 | MatrixXd K_transpose = K.transpose(); 36 | MatrixXd K_tilde = p.array().inverse().matrix().asDiagonal() * K; // diag(1/p) %*% K 37 | MatrixXd u = VectorXd::Constant(N, 1./N); 38 | for (int iteration = 0; iteration < niterations; iteration ++){ 39 | // cerr << iteration << endl; 40 | // u is set to 1 / (K_tilde %*% (qs / (K_transpose %*% u))) 41 | u = 1. / (K_tilde * (q.array() / (K_transpose * u).array()).matrix()).array(); 42 | // for (int i = 0; i < N; i ++) cerr << u(i,0) << endl; 43 | } 44 | MatrixXd v = q.array() / (K_transpose * u).array(); 45 | // compute the optimal transport matrix between p and the first q 46 | MatrixXd transportmatrix = u.col(0).asDiagonal() * K * v.col(0).asDiagonal(); 47 | MatrixXd uXIv = u.array() * ((K.array() * cost_matrix.array()).matrix() * v).array(); 48 | NumericVector d = wrap(uXIv.colwise().sum()); 49 | return List::create(Named("distances")=d, 50 | Named("transportmatrix") = wrap(transportmatrix), 51 | Named("u") = wrap(u), 52 | Named("v") = wrap(v)); 53 | } 54 | 55 | -------------------------------------------------------------------------------- /src/wasserstein_auto.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | using namespace Rcpp; 4 | using namespace std; 5 | using namespace Eigen; 6 | 7 | // [[Rcpp::export]] 8 | List wasserstein_auto_(NumericVector p_, NumericVector q_, NumericMatrix cost_matrix_, 9 | double epsilon, double desired_alpha){ 10 | // compute distance between p and q 11 | // p corresponds to the weights of a N-sample 12 | // each q corresponds to the weights of a M-sample 13 | // Thus cost_matrix must be a N x M cost matrix 14 | // epsilon is a regularization parameter, equal to 1/lambda in some references 15 | int N = p_.size(); 16 | int M = q_.size(); 17 | 18 | Map p(as >(p_)); 19 | Map q(as >(q_)); 20 | Map cost_matrix(as >(cost_matrix_)); 21 | // avoid to take exp(k) when k is less than -500, 22 | // as K then contains zeros, and then the upcoming computations divide by zero 23 | MatrixXd K = (cost_matrix.array() * (-1./epsilon)); // K = exp(- M / epsilon) 24 | for (int i = 0; i < N; i++){ 25 | for (int j = 0; j < M; j++){ 26 | if (K(i,j) < -500){ 27 | K(i,j) = exp(-500); 28 | } else { 29 | K(i,j) = exp(K(i,j)); 30 | } 31 | } 32 | } 33 | MatrixXd K_transpose = K.transpose(); 34 | MatrixXd K_tilde = p.array().inverse().matrix().asDiagonal() * K; // diag(1/p) %*% K 35 | VectorXd u = VectorXd::Constant(N, 1./N); 36 | // 37 | VectorXd marginal1, marginal2; 38 | MatrixXd transportmatrix; 39 | VectorXd v; 40 | double alpha = 0; 41 | double beta = 0; 42 | int niterations_max = 1000; 43 | int iteration = 0; 44 | // for (int iteration = 0; iteration < niterations; iteration ++){ 45 | while ((iteration < niterations_max) and (alpha < desired_alpha)){ 46 | iteration ++; 47 | u = 1. / (K_tilde * (q.array() / (K_transpose * u).array()).matrix()).array(); 48 | if (iteration % 10 == 1){ 49 | // check if criterion is satisfied 50 | v = q.array() / (K_transpose * u).array(); 51 | transportmatrix = u.col(0).asDiagonal() * K * v.col(0).asDiagonal(); 52 | marginal1 = transportmatrix.rowwise().sum(); 53 | marginal2 = transportmatrix.colwise().sum(); 54 | alpha = 10; 55 | for (int i = 0; i < N; i++){ 56 | beta = std::min(p(i) / marginal1(i), q(i) / marginal2(i)); 57 | alpha = std::min(alpha, beta); 58 | } 59 | // cerr << "alpha = " << alpha << endl; 60 | } 61 | } 62 | v = q.array() / (K_transpose * u).array(); 63 | // compute the optimal transport matrix between p and the first q 64 | transportmatrix = u.col(0).asDiagonal() * K * v.col(0).asDiagonal(); 65 | // MatrixXd uXIv = u.array() * ((K.array() * cost_matrix.array()).matrix() * v).array(); 66 | // NumericVector d = wrap(uXIv.colwise().sum()); 67 | return List::create(Named("transportmatrix") = wrap(transportmatrix), 68 | Named("u") = wrap(u), 69 | Named("v") = wrap(v), 70 | Named("iteration") = iteration); 71 | } 72 | 73 | -------------------------------------------------------------------------------- /src/weighted_averages.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "weighted_averages.h" 3 | using namespace Rcpp; 4 | 5 | // [[Rcpp::export]] 6 | NumericVector wmean_(const NumericMatrix & x, const NumericVector & unnormalized_w){ 7 | int nrows = x.rows(); 8 | int ncols = x.cols(); 9 | double sumw = sum(unnormalized_w); 10 | NumericVector result(ncols); 11 | double cumsumxw; 12 | for (int icol = 0; icol < ncols; icol++){ 13 | cumsumxw = 0.; 14 | for (int irow = 0; irow < nrows ; irow++){ 15 | cumsumxw += unnormalized_w(irow) * x(irow, icol); 16 | } 17 | result(icol) = cumsumxw / sumw; 18 | } 19 | return result; 20 | } 21 | 22 | // [[Rcpp::export]] 23 | NumericMatrix wcovariance_(const NumericMatrix & x, const NumericVector & unnormalized_w, const NumericVector & xbar){ 24 | int nrows = x.rows(); 25 | int ncols = x.cols(); 26 | double sumw = sum(unnormalized_w); 27 | double sumsqw = sum(unnormalized_w*unnormalized_w); 28 | NumericMatrix result(ncols, ncols); 29 | std::fill(result.begin(), result.end(), 0); 30 | for (int i = 0; i < ncols; i++){ 31 | for (int j = 0; j < ncols; j++){ 32 | for (int irow = 0; irow < nrows ; irow++){ 33 | result(i, j) += unnormalized_w(irow) * (x(irow, i) - xbar(i)) * (x(irow, j) - xbar(j)); 34 | } 35 | result(i,j) /= (sumw - sumsqw / sumw); 36 | } 37 | } 38 | return result ; 39 | } 40 | 41 | RCPP_MODULE(module_waverage) { 42 | function( "wmean", &wmean_ ); 43 | function( "wcovariance", &wcovariance_ ); 44 | } 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /src/weighted_averages.h: -------------------------------------------------------------------------------- 1 | #ifndef _INCL_WEIGHTED_AVRG_ 2 | #define _INCL_WEIGHTED_AVRG_ 3 | #include 4 | using namespace Rcpp; 5 | 6 | 7 | NumericVector wmean_(const NumericMatrix & x, const NumericVector & unnormalized_w); 8 | 9 | NumericMatrix wcovariance_(const NumericMatrix & x, const NumericVector & unnormalized_w, const NumericVector & xbar); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /winference.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace,vignette 22 | --------------------------------------------------------------------------------