├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── bootstrap_ci.R ├── bootstrap_lrt.R ├── calculate_U.R ├── calculate_criterion_lr.R ├── calculate_log_penalty.R ├── construct_Z_tilde.R ├── crossfit_U.R ├── d_mu_d_P_original.R ├── d_mu_d_gamma_original.R ├── data.R ├── dataframes_to_parameters.R ├── deriv_criterion_lr.R ├── dfs_to_derivs.R ├── dfs_to_mean.R ├── do_one_boot.R ├── do_one_lrt.R ├── do_one_simulation.R ├── estimate_parameters.R ├── evaluate_criterion_lr.R ├── fit_simple_model.R ├── fit_simulation_model.R ├── get_A_tilde_k_list.R ├── get_Ak_list.R ├── get_gmm_inv_weights.R ├── gmm_criterion.R ├── log_penalty.R ├── log_penalty_grad.R ├── log_penalty_gradient.R ├── log_penalty_hessian.R ├── logpois.R ├── lr_to_ra.R ├── make_model_bundle.R ├── mean_jac.R ├── mean_jac_lr_faster.R ├── meaninate.R ├── mu_d_alpha_tilde.R ├── mu_d_beta.R ├── mu_d_beta_faster.R ├── mu_d_gamma_faster.R ├── mu_d_gamma_tilde.R ├── mu_d_rho_faster.R ├── mu_d_rho_tilde_faster.R ├── nullify.R ├── numerical_jacobian.R ├── par_to_jacobian_row.R ├── parameters_to_dataframes.R ├── pb_update_one_constrained.R ├── pb_update_unconstrained.R ├── poisson_criterion.R ├── proximal_bootstrap.R ├── push_row.R ├── ra_to_lr.R ├── refit_model.R ├── safe_divide.R ├── safe_multiply.R ├── simpl_auglag_fnnls.R ├── simpl_opt_linesearch_fnnls.R ├── simple_poisson.R ├── simulate_paper_data.R ├── simulate_simple_data.R └── universal_test.R ├── README.Rmd ├── README.md ├── data ├── costea2017_metaphlan2_profiles.rda ├── costea2017_mock_composition.rda ├── costea2017_sample_data.rda ├── karstens.rdata └── karstens_phyloseq.rda ├── man ├── bootstrap_ci.Rd ├── calculate_log_penalty.Rd ├── costea2017_mock_composition.Rd ├── dataframes_to_parameters.Rd ├── estimate_parameters.Rd ├── mu_d_P.Rd ├── mu_d_beta.Rd ├── mu_d_gamma.Rd ├── mu_d_gamma_tilde.Rd ├── mu_d_rho_faster.Rd └── mu_d_rho_tilde_faster.Rd ├── tests ├── testthat.R └── testthat │ ├── test-bootstrap_ci.R │ ├── test-bootstrap_lrt.R │ ├── test-calculate_log_penalty.R │ ├── test-d_mu_d_beta_original.R │ ├── test-d_mu_d_gamma_original.R │ ├── test-deriv_criterion_lr.R │ ├── test-dfs_to_derivs.R │ ├── test-do_one_simulation.R │ ├── test-estimate_parameters.R │ ├── test-fit_simulation_model.R │ ├── test-get_A_tilde_k_list.R │ ├── test-gmm_criterion.R │ ├── test-lr_to_ra.R │ ├── test-mean_jac_lr_faster.R │ ├── test-mu_d_P.R │ ├── test-mu_d_beta.R │ ├── test-mu_d_gamma_faster.R │ ├── test-mu_d_gamma_tilde.R │ ├── test-mu_d_rho.R │ ├── test-mu_d_rho_tilde.R │ ├── test-optimization.R │ ├── test-par_to_jacobian_row.R │ ├── test-safe_divide.R │ ├── test-simulate_paper_data.R │ ├── test-test_meanination.R │ └── test-universal_test.R └── vignettes ├── compare-experiments.Rmd └── dilution-series.Rmd /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: tinyvamp 2 | Title: Modeling complex measurement error in microbiome experiments 3 | Version: 0.0.6.0 4 | Authors@R: as.person(c( 5 | "David Clausen [aut, cre]", 6 | "Amy Willis [aut]" 7 | )) 8 | Description: tinyvamp estimates the model described in Clausen & Willis, 2022. Notably, our model allows relative abundances to lie on the boundary of the simplex. We present a stable algorithm for computing parameter estimates, asymptotically valid procedures for inference in this nonstandard problem, and examples of the utility of the method. Our approach can be used to select or compare experimental protocols, design experiments with appropriate control data, analyze mixed-specimen samples, and remove across-sample contamination. 9 | License: BSD_3_clause + file LICENSE 10 | Encoding: UTF-8 11 | LazyData: true 12 | Roxygen: list(markdown = TRUE) 13 | RoxygenNote: 7.2.3 14 | Depends: 15 | R (>= 3.5.0), 16 | cir, 17 | fastnnls, 18 | logsum, 19 | numDeriv, 20 | parallel, 21 | stats 22 | Imports: 23 | data.table, 24 | Matrix, 25 | magrittr 26 | Suggests: 27 | knitr, 28 | phyloseq, 29 | rmarkdown, 30 | testthat (>= 3.0.0) 31 | Config/testthat/edition: 3 32 | VignetteBuilder: knitr 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ======= 2 | License 3 | ======= 4 | 5 | Files: tinyvamp/*/*.R 6 | Copyright: 2022, The Regents of the University of Washington. 7 | License: BSD-3-Clause 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are 11 | met: 12 | 13 | Redistributions of source code must retain the above copyright 14 | notice, this list of conditions and the following disclaimer. 15 | 16 | Redistributions in binary form must reproduce the above copyright 17 | notice, this list of conditions and the following disclaimer in 18 | the documentation and/or other materials provided with the 19 | distribution. 20 | 21 | Neither the name of the University of Washington nor the names of its 22 | contributors may be used to endorse or promote products derived 23 | from this software without specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(bootstrap_ci) 4 | export(bootstrap_lrt) 5 | export(calculate_criterion_lr) 6 | export(estimate_parameters) 7 | export(evaluate_criterion_lr) 8 | import(cir) 9 | import(fastnnls) 10 | import(logsum) 11 | import(numDeriv) 12 | import(parallel) 13 | import(stats) 14 | -------------------------------------------------------------------------------- /R/bootstrap_ci.R: -------------------------------------------------------------------------------- 1 | #' Apply the Bayesian subsampled bootstrap to a fitted tinyvamp model 2 | #' 3 | #' @import stats 4 | #' @import parallel 5 | #' 6 | #' 7 | #' @export 8 | bootstrap_ci <- function(W, 9 | fitted_model, 10 | n_boot, 11 | m = NULL, 12 | alpha = 0.05, 13 | parallelize = FALSE, 14 | ncores = 5, 15 | seed = NULL, 16 | return_models = FALSE, 17 | verbose = FALSE, 18 | adjust = FALSE 19 | 20 | ){ 21 | n <- nrow(W) 22 | J <- ncol(W) 23 | 24 | if(is.null(m)){ 25 | m <- sqrt(n) 26 | } 27 | if(is.null(seed)){ 28 | seed <- 0 29 | } 30 | 31 | if(is.null(fitted_model$wts)){ 32 | fitted_model$wts <- rep(1,n*J) 33 | } 34 | wts <- fitted_model$wts 35 | set.seed(seed) 36 | boot_seeds <- sample(1:1e8,n_boot) 37 | 38 | boot_results <- vector(n_boot, 39 | mode = "list") 40 | 41 | if(!parallelize){ 42 | for(boot_iter in 1:n_boot){ 43 | print(boot_iter) 44 | set.seed(boot_seeds[boot_iter]) 45 | boot_weights <- rgamma(n,m/n) 46 | boot_weights <- boot_weights/sum(boot_weights) 47 | boot_weights <- rep(boot_weights, each = J) 48 | boot_weights <- boot_weights*wts 49 | boot_weights <- J*boot_weights/sum(boot_weights) 50 | 51 | boot_model <- estimate_parameters(W = W, 52 | X = fitted_model$X, 53 | Z = fitted_model$Z, 54 | Z_tilde = fitted_model$Z_tilde, 55 | Z_tilde_gamma_cols = 56 | fitted_model$Z_tilde_gamma_cols, 57 | gammas = 58 | fitted_model$gammas, 59 | gammas_fixed_indices = 60 | fitted_model$gammas_fixed_indices, 61 | P = fitted_model$P, 62 | P_fixed_indices = 63 | fitted_model$P_fixed_indices, 64 | B = fitted_model$B, 65 | B_fixed_indices = 66 | fitted_model$B_fixed_indices, 67 | X_tilde = fitted_model$X_tilde, 68 | P_tilde = fitted_model$P_tilde, 69 | P_tilde_fixed_indices = 70 | fitted_model$P_tilde_fixed_indices, 71 | gamma_tilde = fitted_model$gamma_tilde, 72 | gamma_tilde_fixed_indices = 73 | fitted_model$gamma_tilde_fixed_indices, 74 | alpha_tilde = 75 | fitted_model$alpha_tilde, 76 | Z_tilde_list = 77 | fitted_model$Z_tilde_list, 78 | barrier_t = 1, #starting value of reciprocal barrier penalty coef. 79 | barrier_scale = 10, #increments for value of barrier penalty 80 | max_barrier = 1e12, #maximum value of barrier_t 81 | initial_conv_tol = 1000, 82 | final_conv_tol = 0.1, 83 | constraint_tolerance = 1e-10, 84 | hessian_regularization = 0.01, 85 | criterion = "Poisson", 86 | profile_P = TRUE, 87 | verbose = verbose, 88 | wts = boot_weights, 89 | profiling_maxit = 25) 90 | 91 | boot_results[[boot_iter]] <- boot_model 92 | 93 | } 94 | } 95 | if(parallelize){ 96 | boot_weights <- lapply(1:n_boot, 97 | function(x){ 98 | # set.seed(x) 99 | bwts <- rgamma(n,m/n) 100 | bwts <- rep(bwts, each = J) 101 | bwts <- bwts*wts 102 | bwts <- J*bwts/sum(bwts) 103 | return(bwts) 104 | }) 105 | 106 | boot_results <- 107 | parallel::mclapply(1:n_boot, 108 | function(k) 109 | do_one_boot(W = W, 110 | fitted_model = 111 | fitted_model, 112 | m = m, 113 | seed = boot_seeds[k], 114 | boot_weights = boot_weights[[k]]), 115 | mc.cores = ncores, 116 | mc.set.seed = TRUE) 117 | } 118 | 119 | boot_matrix <- 120 | do.call(cbind, 121 | lapply(1:n_boot,function(k) matrix(sqrt(m)*( 122 | boot_results[[k]]$varying$value - fitted_model$varying$value), 123 | ncol = 1))) 124 | 125 | if(adjust){ 126 | num_nonsillyparams <- sum(fitted_model$varying$param != "gamma") 127 | adjust_factor <- (n*J)/(n*J - num_nonsillyparams) 128 | boot_matrix <- boot_matrix*sqrt(adjust_factor) 129 | } 130 | 131 | lower_boot_quantiles <- apply(boot_matrix,1, function(x) 132 | quantile(x, alpha/2)) 133 | 134 | upper_boot_quantiles <- apply(boot_matrix,1,function(x) 135 | quantile(x,1-alpha/2)) 136 | 137 | summary_df <- fitted_model$varying 138 | 139 | summary_df$lower_ci <- summary_df$value - (1/sqrt(n))*upper_boot_quantiles 140 | summary_df$upper_ci <- summary_df$value - (1/sqrt(n))*lower_boot_quantiles 141 | 142 | summary_df$lower_ci[summary_df$param %in% c("P","P_tilde")] <- 143 | pmax(summary_df$lower_ci[summary_df$param %in% c("P","P_tilde")], 144 | 0) 145 | 146 | summary_df$upper_ci[summary_df$param %in% c("P","P_tilde")] <- 147 | pmin(summary_df$upper_ci[summary_df$param %in% c("P","P_tilde")], 148 | 1) 149 | 150 | if(return_models){ 151 | return(list("ci" = summary_df, 152 | "bootstrapped_models" = boot_results)) 153 | } else{ 154 | return(list("ci" = summary_df, 155 | "bootstrapped_models" = NULL)) 156 | } 157 | 158 | } 159 | -------------------------------------------------------------------------------- /R/calculate_U.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | calculate_U <- function(W, 4 | full_model, 5 | null_model, 6 | training_indicator, 7 | log_scale = TRUE){ 8 | W_train <- W[training_indicator,,drop = FALSE] 9 | W_test <- W[!training_indicator,,drop = FALSE] 10 | 11 | ### may need to set some entries of P to be fixed if no observations in 12 | ### training set correspond to them 13 | 14 | if(is.null(full_model$Z_tilde)){ 15 | full_model_Z_tilde <- NULL 16 | full_model_Z_tilde_test <- NULL 17 | } else{ 18 | full_model_Z_tilde <- full_model$Z_tilde[training_indicator,,drop = FALSE] 19 | full_model_Z_tilde_test <- 20 | full_model$Z_tilde[!training_indicator,,drop = FALSE] 21 | } 22 | if(!is.null(full_model$Z_tilde_list)){ 23 | full_model_Z_tilde_list <- 24 | lapply(1:length(full_model$Z_tilde_list), 25 | function(k) 26 | full_model$Z_tilde_list[[k]][training_indicator,,drop = FALSE]) 27 | full_model_Z_tilde_list_test <- 28 | lapply(1:length(full_model$Z_tilde_list), 29 | function(k) 30 | full_model$Z_tilde_list[[k]][!training_indicator,,drop = FALSE]) 31 | } else{ 32 | full_model_Z_tilde_list <- full_model$Z_tilde_list 33 | full_model_Z_tilde_list_test <- full_model$Z_tilde_list 34 | } 35 | 36 | 37 | if(is.null(null_model$Z_tilde)){ 38 | null_model_Z_tilde <- NULL 39 | null_model_Z_tilde_test <- NULL 40 | } else{ 41 | null_model_Z_tilde <- null_model$Z_tilde[training_indicator,,drop = FALSE] 42 | null_model_Z_tilde_test <- 43 | null_model$Z_tilde[!training_indicator,,drop = FALSE] 44 | } 45 | if(!is.null(null_model$Z_tilde_list)){ 46 | null_model_Z_tilde_list <- 47 | lapply(1:length(null_model$Z_tilde_list), 48 | function(k) 49 | null_model$Z_tilde_list[[k]][training_indicator,,drop = FALSE]) 50 | null_model_Z_tilde_list_test <- 51 | lapply(1:length(null_model$Z_tilde_list), 52 | function(k) 53 | null_model$Z_tilde_list[[k]][!training_indicator,,drop = FALSE]) 54 | } else{ 55 | null_model_Z_tilde_list <- null_model$Z_tilde_list 56 | null_model_Z_tilde_list_test <- null_model$Z_tilde_list 57 | } 58 | 59 | full_training <- 60 | estimate_parameters(W = W_train, 61 | X = full_model$X[training_indicator,,drop = FALSE], 62 | Z = full_model$Z[training_indicator,,drop = FALSE], 63 | Z_tilde = full_model_Z_tilde, 64 | Z_tilde_gamma_cols = full_model$Z_tilde_gamma_cols, 65 | gammas = full_model$gammas[training_indicator], 66 | gammas_fixed_indices = 67 | full_model$gammas_fixed_indices[training_indicator], 68 | P = full_model$P, 69 | P_fixed_indices = full_model$P_fixed_indices, 70 | B = full_model$B, 71 | B_fixed_indices = full_model$B_fixed_indices, 72 | X_tilde = full_model$X_tilde, 73 | P_tilde = full_model$P_tilde, 74 | P_tilde_fixed_indices = full_model$P_tilde_fixed_indices, 75 | gamma_tilde = full_model$gamma_tilde, 76 | gamma_tilde_fixed_indices = 77 | full_model$gamma_tilde_fixed_indices, 78 | alpha_tilde = full_model$alpha_tilde, 79 | Z_tilde_list = full_model_Z_tilde_list) 80 | 81 | null_training <- 82 | estimate_parameters(W = W_train, 83 | X = null_model$X[training_indicator,,drop = FALSE], 84 | Z = null_model$Z[training_indicator,,drop = FALSE], 85 | Z_tilde = null_model_Z_tilde, 86 | Z_tilde_gamma_cols = null_model$Z_tilde_gamma_cols, 87 | gammas = null_model$gammas[training_indicator], 88 | gammas_fixed_indices = 89 | null_model$gammas_fixed_indices[training_indicator], 90 | P = null_model$P, 91 | P_fixed_indices = null_model$P_fixed_indices, 92 | B = null_model$B, 93 | B_fixed_indices = null_model$B_fixed_indices, 94 | X_tilde = null_model$X_tilde, 95 | P_tilde = null_model$P_tilde, 96 | P_tilde_fixed_indices = null_model$P_tilde_fixed_indices, 97 | gamma_tilde = null_model$gamma_tilde, 98 | gamma_tilde_fixed_indices = 99 | null_model$gamma_tilde_fixed_indices, 100 | alpha_tilde = null_model$alpha_tilde, 101 | Z_tilde_list = null_model_Z_tilde_list) 102 | 103 | full_starter_means <- meaninate(gammas = rep(0, sum(!training_indicator)), 104 | B = full_training$B, 105 | X = 106 | full_model$X[!training_indicator,,drop = FALSE], 107 | Z = 108 | full_model$Z[!training_indicator,,drop = FALSE], 109 | P = full_training$P, 110 | X_tilde = full_model$X_tilde, 111 | Z_tilde = full_model_Z_tilde_test, 112 | Z_tilde_gamma_cols = 113 | full_model$Z_tilde_gamma_cols, 114 | P_tilde = full_training$P_tilde, 115 | gamma_tilde = full_training$gamma_tilde, 116 | alpha_tilde = full_training$alpha_tilde, 117 | Z_tilde_list = full_model_Z_tilde_list_test) 118 | 119 | full_profile_gammas <- log(apply(W_test,1,sum)) - 120 | log(apply(full_starter_means,1,sum)) 121 | 122 | null_starter_means <- meaninate(gamma = rep(0, sum(!training_indicator)), 123 | B = null_training$B, 124 | X = 125 | null_model$X[!training_indicator,,drop = FALSE], 126 | Z = 127 | null_model$Z[!training_indicator,,drop = FALSE], 128 | P = null_training$P, 129 | X_tilde = null_model$X_tilde, 130 | Z_tilde = null_model_Z_tilde_test, 131 | Z_tilde_gamma_cols = 132 | null_model$Z_tilde_gamma_cols, 133 | P_tilde = null_training$P_tilde, 134 | gamma_tilde = null_training$gamma_tilde, 135 | alpha_tilde = null_training$alpha_tilde, 136 | Z_tilde_list = null_model_Z_tilde_list_test) 137 | 138 | null_profile_gammas <- log(apply(W_test,1,sum)) - 139 | log(apply(null_starter_means,1,sum)) 140 | 141 | 142 | full_means_test <- meaninate(gammas = full_profile_gammas, 143 | B = full_training$B, 144 | X = 145 | full_model$X[!training_indicator,,drop = FALSE], 146 | Z = 147 | full_model$Z[!training_indicator,,drop = FALSE], 148 | P = full_training$P, 149 | X_tilde = full_model$X_tilde, 150 | Z_tilde = full_model_Z_tilde_test, 151 | Z_tilde_gamma_cols = 152 | full_model$Z_tilde_gamma_cols, 153 | P_tilde = full_training$P_tilde, 154 | gamma_tilde = full_training$gamma_tilde, 155 | alpha_tilde = full_training$alpha_tilde, 156 | Z_tilde_list = full_model_Z_tilde_list_test) 157 | 158 | null_means_test <- meaninate(gammas = null_profile_gammas, 159 | B = null_training$B, 160 | X = 161 | null_model$X[!training_indicator,,drop = FALSE], 162 | Z = 163 | null_model$Z[!training_indicator,,drop = FALSE], 164 | P = null_training$P, 165 | X_tilde = null_model$X_tilde, 166 | Z_tilde = null_model_Z_tilde_test, 167 | Z_tilde_gamma_cols = 168 | null_model$Z_tilde_gamma_cols, 169 | P_tilde = null_training$P_tilde, 170 | gamma_tilde = null_training$gamma_tilde, 171 | alpha_tilde = null_training$alpha_tilde, 172 | Z_tilde_list = null_model_Z_tilde_list_test) 173 | 174 | l_HA <- poisson_criterion(W_test,full_means_test) 175 | l_H0 <- poisson_criterion(W_test,null_means_test) 176 | 177 | if(log_scale){ 178 | return(l_HA - l_H0) 179 | } else{ 180 | return(exp(l_HA - l_H0)) 181 | } 182 | 183 | } 184 | -------------------------------------------------------------------------------- /R/calculate_criterion_lr.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | #' 3 | calculate_criterion_lr <- function(W, 4 | X, 5 | Z, 6 | Z_tilde = NULL, 7 | Z_tilde_gamma_cols, 8 | Z_tilde_list = NULL, 9 | alpha_tilde = NULL, 10 | gammas, 11 | gammas_fixed_indices, 12 | P, 13 | P_fixed_indices, 14 | B, 15 | B_fixed_indices, 16 | X_tilde, 17 | P_tilde, 18 | P_tilde_fixed_indices, 19 | gamma_tilde, 20 | gamma_tilde_fixed_indices, 21 | criterion = "Poisson", 22 | lr_scale = TRUE, 23 | wts = NULL) { 24 | 25 | stopifnot(criterion == "Poisson") 26 | stopifnot(lr_scale == TRUE) 27 | 28 | means <- meaninate(gammas = gammas, 29 | B = B, 30 | X = X, 31 | Z = Z, 32 | P = P, 33 | X_tilde = X_tilde, 34 | Z_tilde = Z_tilde, 35 | Z_tilde_gamma_cols = Z_tilde_gamma_cols, 36 | Z_tilde_list = Z_tilde_list, 37 | P_tilde = P_tilde, 38 | gamma_tilde = gamma_tilde, 39 | alpha_tilde = alpha_tilde, 40 | return_separate = FALSE) 41 | 42 | poisson_criterion(W = W, 43 | means = means, 44 | wts = wts) 45 | 46 | } -------------------------------------------------------------------------------- /R/calculate_log_penalty.R: -------------------------------------------------------------------------------- 1 | #################### Barrier Penalty Function ####################### 2 | #' Calculate barrier penalty to add to objective function inside 3 | #' barrier algorithm 4 | #' 5 | #' @param varying_lr_df A data frame containing values of parameters that 6 | #' are treated as unknown, with relative abundance parameters represented 7 | #' on the log ratio scale (i.e., as phi and phi_tilde) 8 | #' @param fixed_df A data frame containing values of parameters that are 9 | #' treated as known 10 | #' @param barrier_t The current value of t, the barrier penalty parameter 11 | #' 12 | #' @import logsum 13 | #' 14 | #' @return The calculated value of the barrier penalty 15 | calculate_log_penalty <- function(varying_lr_df, 16 | fixed_df, 17 | barrier_t){ 18 | 19 | which_rho_k <- unique(varying_lr_df$k[varying_lr_df$param=="rho"]) 20 | which_rho_tilde_k <- unique(varying_lr_df$k[varying_lr_df$param=="rho_tilde"]) 21 | 22 | log_P <- lapply(which_rho_k, 23 | function(k) (varying_lr_df$value[varying_lr_df$param == "rho" & 24 | varying_lr_df$k == k]) %>% 25 | (function(x) c(x,0) - logsum::sum_of_logs(c(x,0)))) 26 | log_P_tilde <- lapply(which_rho_tilde_k, 27 | function(k) 28 | (varying_lr_df$value[ 29 | varying_lr_df$param == "rho_tilde" & 30 | varying_lr_df$k == k]) %>% 31 | (function(x) c(x,0) - logsum::sum_of_logs(c(x,0)))) 32 | log_ra_sum <- do.call(sum,log_P) + do.call(sum,log_P_tilde) 33 | 34 | return((-1/barrier_t)*log_ra_sum) 35 | 36 | } 37 | -------------------------------------------------------------------------------- /R/construct_Z_tilde.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | construct_Z_tilde <- function(Z_tilde_list, 4 | alpha_tilde){ 5 | 6 | n_components <- length(Z_tilde_list) 7 | if(length(alpha_tilde) != n_components - 1){ 8 | stop("length(alpha_tilde) must equal length(Z_tilde_list) - 1") 9 | } 10 | 11 | Z_tilde <- Z_tilde_list[[1]] 12 | 13 | for(a in 2:n_components){ 14 | Z_tilde <- Z_tilde + Z_tilde_list[[a]]*exp(alpha_tilde[a - 1]) 15 | } 16 | return(Z_tilde) 17 | } 18 | -------------------------------------------------------------------------------- /R/crossfit_U.R: -------------------------------------------------------------------------------- 1 | 2 | crossfit_U <- function(W, 3 | full_model, 4 | null_model, 5 | parallelize = FALSE){ 6 | 7 | n <- nrow(W) 8 | 9 | if(!parallelize){ 10 | log_us <- numeric(n) 11 | 12 | for(i in 1:n){ 13 | # print(i) 14 | train_ind <- rep(TRUE,n) 15 | train_ind[i] <- FALSE 16 | log_us[i] <- calculate_U(W, full_model, null_model, 17 | training_indicator = train_ind) 18 | } 19 | } else{ 20 | log_us <- parallel::mclapply(1:n, 21 | function(i){ 22 | train_ind <- rep(TRUE,n) 23 | train_ind[i] <- FALSE 24 | return(calculate_U(W, 25 | full_model, 26 | null_model, 27 | training_indicator = train_ind)) 28 | 29 | }) 30 | log_us <- unlist(log_us) 31 | } 32 | 33 | return(exp(logsum::sum_of_logs(log_us) - log(n))) 34 | } 35 | -------------------------------------------------------------------------------- /R/d_mu_d_P_original.R: -------------------------------------------------------------------------------- 1 | #' Calculate derivative of mu_ij with respect to an entry of P 2 | #' 3 | #' @param i The sample index (must be in 1, ..., n) 4 | #' @param j The taxon index (must be in 1, ..., J) 5 | #' @param m The row of P with respect to which to take derivative 6 | #' @param gammas Numeric vector of read intensities 7 | #' @param B Detection efficiency matrix 8 | #' @param X The efficiency design matrix (n x p) 9 | #' @param Z The sample design matrix (n x K) 10 | #' @param P The sample relative abundance matrix (K x J) 11 | #' @param X_tilde The spurious read efficiency design (K_tilde x p) 12 | #' @param Z_tilde The spurious read design (n x K_tilde) 13 | #' @param Z_tilde_gamma_cols Numeric vector containing indexes of columns of 14 | #' Z_tilde to scale by exp(gamma); NULL if no columns to be scaled 15 | #' @param P_tilde The spurious source relative abundance matrix (K_tilde x J) 16 | #' @param gamma_tilde Spurious read intensity parameter 17 | #' 18 | #' @return A derivative d mu_ij / d P_kj 19 | mu_d_P <- function(i, 20 | j, 21 | m, 22 | gammas, 23 | B, 24 | X, 25 | Z, 26 | P){ 27 | 28 | mu_deriv <- 29 | Z[i,m,drop = F]*exp(gammas[i] + 30 | X[i,,drop = F]%*%B[,j,drop = F]) 31 | 32 | return(mu_deriv) 33 | } 34 | -------------------------------------------------------------------------------- /R/d_mu_d_gamma_original.R: -------------------------------------------------------------------------------- 1 | #' Calculate derivative of mu_ij with respect to ith entry of gamma 2 | #' 3 | #' @param i The sample index (must be in 1, ..., n) 4 | #' @param j The taxon index (must be in 1, ..., J) 5 | #' @param gammas Numeric vector of read intensities 6 | #' @param B Detection efficiency matrix 7 | #' @param X The efficiency design matrix (n x p) 8 | #' @param Z The sample design matrix (n x K) 9 | #' @param P The sample relative abundance matrix (K x J) 10 | #' @param X_tilde The spurious read efficiency design (K_tilde x p) 11 | #' @param Z_tilde The spurious read design (n x K_tilde) 12 | #' @param Z_tilde_gamma_cols Numeric vector containing indexes of columns of 13 | #' Z_tilde to scale by exp(gamma); NULL if no columns to be scaled 14 | #' @param P_tilde The spurious source relative abundance matrix (K_tilde x J) 15 | #' @param gamma_tilde Spurious read intensity parameter 16 | #' 17 | #' @return A derivative d mu_ij / d gamma_i 18 | 19 | mu_d_gamma <- function(i, 20 | j, 21 | gammas, 22 | B, 23 | X, 24 | Z, 25 | P, 26 | X_tilde, 27 | Z_tilde, 28 | Z_tilde_gamma_cols, 29 | P_tilde, 30 | gamma_tilde){ 31 | 32 | mu_deriv <- Z[i,,drop = F]%*%P[,j,drop = F]*exp(gammas[i] + 33 | X[i,,drop = F]%*%B[,j,drop = F]) 34 | 35 | K_tilde <- dim(P_tilde)[1] 36 | for(k_tilde in 1:K_tilde){ 37 | if(k_tilde %in% Z_tilde_gamma_cols){ 38 | mu_deriv <- mu_deriv + exp(gammas[i])* 39 | (Z_tilde[i,k_tilde,drop = F])%*% 40 | (P_tilde[k_tilde,j,drop = F])* 41 | exp(gamma_tilde[k_tilde] + 42 | X_tilde[k_tilde,,drop = F]%*%B[,j,drop = F]) 43 | } 44 | } 45 | return(mu_deriv) 46 | } 47 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' The composition of 10 spike-in taxa in 19 samples observed from flow cytometry. 2 | #' 3 | #' For details, see Costea et al (2017). 4 | #' 5 | #' 6 | #' @source \url{https://doi.org/10.1038/nbt.3960} 7 | "costea2017_mock_composition" -------------------------------------------------------------------------------- /R/dataframes_to_parameters.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' Convert parameter values stored in data frame format to matrix format 4 | #' 5 | #' @param fixed_df A dataframe containing values of model parameters 6 | #' treated as fixed and known (i.e. held constant at known values) 7 | #' @param varying_df A dataframe containing current values of model parameters treated 8 | #' as fixed and unknown (i.e., parameters to be estimated) 9 | #' @return A list containing 10 | #' \item{P}{Specimen relative abundance matrix (of dimension K x J)} 11 | #' \item{P_tilde}{Spurious read source 12 | #' relative abundance matrix (of dimension K-tilde x J)} 13 | #' \item{B}{A matrix of detection efficiencies (of dimension p x J)} 14 | #' \item{gammas}{An n-vector of sample-specific read intensities} 15 | #' \item{gamma_tilde}{A-vector of spurious read source intensities 16 | #' (of length K-tilde)} 17 | #' @author David Clausen 18 | dataframes_to_parameters <- function(fixed_df, 19 | varying_df){ 20 | 21 | together_df <- rbind(fixed_df,varying_df) 22 | 23 | K <- max(together_df$k[together_df$param == "P"]) 24 | 25 | K_tilde <- max(together_df$k[together_df$param == "P_tilde"]) 26 | 27 | J <- max(together_df$j[together_df$param == "P"]) 28 | 29 | p <- max(together_df$k[together_df$param == "B"]) 30 | 31 | P <- matrix(ncol = J, nrow = K) 32 | 33 | P_df <- together_df[together_df$param == "P",] 34 | for(k in 1:K){ 35 | P_row <- P_df[P_df$k ==k,] 36 | P[k,] <- P_row$value[order(P_row$j)] 37 | } 38 | 39 | P_tilde <- matrix(ncol = J, nrow = K_tilde) 40 | P_tilde_df <- together_df[together_df$param == "P_tilde",] 41 | for(k in 1:K_tilde){ 42 | P_tilde_row <- P_tilde_df[P_tilde_df$k ==k,] 43 | P_tilde[k,] <- P_tilde_row$value[order(P_tilde_row$j)] 44 | } 45 | 46 | B_df <- together_df[together_df$param == "B",] 47 | B <- matrix(0,ncol = J, nrow = p) 48 | 49 | for(k in 1:p){ 50 | B_row <- B_df[B_df$k ==k,] 51 | B[k,] <- B_row$value[order(B_row$j)] 52 | } 53 | 54 | B[,J] <- 0 55 | 56 | gammas_df <- together_df[together_df$param == "gamma",] 57 | 58 | gammas <- matrix(nrow = nrow(gammas_df), ncol = 1) 59 | 60 | gammas[] <- gammas_df$value[order(gammas_df$k)] 61 | 62 | gamma_tilde_df <- together_df[together_df$param == "gamma_tilde",] 63 | 64 | gamma_tilde <- matrix(nrow = nrow(gamma_tilde_df), ncol = 1) 65 | 66 | gamma_tilde[] <- gamma_tilde_df$value[order(gamma_tilde_df$k)] 67 | 68 | if(sum(varying_df$param == "alpha_tilde") > 0){ 69 | alpha_tilde <- varying_df$value[varying_df$param == "alpha_tilde"] 70 | } else{ 71 | alpha_tilde <- NULL 72 | } 73 | 74 | return(list("P" = P, 75 | "P_tilde" = P_tilde, 76 | "B" = B, 77 | "gammas" = gammas, 78 | "gamma_tilde" = gamma_tilde, 79 | "alpha_tilde" = alpha_tilde)) 80 | } 81 | -------------------------------------------------------------------------------- /R/dfs_to_derivs.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | dfs_to_derivs <- function(varying_df, 4 | varying_lr_df = NULL, 5 | fixed_df, 6 | X, 7 | Z, 8 | X_tilde, 9 | Z_tilde, 10 | Z_tilde_gamma_cols, 11 | criterion = "Poisson", 12 | gmm_inv_wts = NULL){ 13 | 14 | if(!is.null(varying_lr_df)){ 15 | varying_df <- lr_to_ra(fixed_df, 16 | varying_lr_df, 17 | varying_df) 18 | } 19 | 20 | 21 | K <- max(c(varying_df$k[varying_df$param == "P"], 22 | fixed_df$k[fixed_df$param == "P"])) 23 | 24 | fixed_P_multipliers <- sapply(1:K, function(k) 25 | 1 - sum(fixed_df$value[fixed_df$param == "P"& 26 | fixed_df$k ==k])) 27 | 28 | K_tilde <- max(c(varying_df$k[varying_df$param == "P_tilde"], 29 | fixed_df$k[fixed_df$param == "P_tilde"])) 30 | 31 | fixed_P_tilde_multipliers <- sapply(1:K_tilde, function(k) 32 | 1 - sum(fixed_df$value[fixed_df$param == "P_tilde"& 33 | fixed_df$k ==k])) 34 | 35 | Ak_list <- get_Ak_list(fixed_df, 36 | varying_df, 37 | varying_lr_df) 38 | 39 | A_tilde_k_list <- get_A_tilde_k_list(fixed_df, 40 | varying_df, 41 | varying_lr_df) 42 | 43 | # message("created Ak_list and A_tilde_k_list") 44 | 45 | #calculate at outset of optimization pass as argument to mean_jac_lr, etc. 46 | which_k_p <- sapply(1:K, function(k) ifelse(is.null(Ak_list[[k]]), 47 | NA, k)) 48 | 49 | which_k_p <- which_k_p[!is.na(which_k_p)] 50 | 51 | #calculate at outset of optimization pass as argument to mean_jac_lr, etc. 52 | which_k_p_tilde <- sapply(1:K_tilde, 53 | function(k) ifelse( 54 | is.null(A_tilde_k_list[[k]]), 55 | NA,k 56 | )) 57 | 58 | which_k_p_tilde <- which_k_p_tilde[!is.na(which_k_p_tilde)] 59 | 60 | # message("saved which_k_p and which_k_p_tilde") 61 | 62 | #calculate at outset of optimization 63 | which_B_rows <- unique(varying_df$k[varying_df$param == "B"]) 64 | which_B_rows <- which_B_rows[order(which_B_rows)] 65 | 66 | #calculate at outset of optimization 67 | which_B_keep <- lapply(which_B_rows, 68 | function(k) sapply(1:(J - 1), 69 | function(j) 70 | j %in% varying_lr_df$j[ 71 | varying_lr_df$param == "B" & 72 | varying_lr_df$k == k] 73 | )) 74 | which_B_keep <- do.call(rbind,which_B_keep) 75 | 76 | # message("saved which_B_keep") 77 | 78 | which_gammas <- unique(varying_df$k[varying_df$param == "gamma"]) 79 | 80 | which_gamma_tilde <- unique(varying_df$k[varying_df$param == "gamma_tilde"]) 81 | 82 | which_unconstrained <- varying_lr_df$param %in% c("B","gamma","gamma_tilde") 83 | which_rho <- varying_lr_df$param %in% c("rho") 84 | which_rho_tilde <- varying_lr_df$param %in% c("rho_tilde") 85 | npar <- nrow(varying_lr_df) 86 | 87 | return(deriv_criterion_lr(W = W, 88 | X = X, 89 | Z = Z, 90 | which_k_p = which_k_p, 91 | which_k_p_tilde = which_k_p_tilde, 92 | fixed_P_multipliers = fixed_P_multipliers, 93 | fixed_P_tilde_multipliers = fixed_P_tilde_multipliers, 94 | which_B_keep = which_B_keep, 95 | which_B_rows = which_B_rows, 96 | which_gammas = which_gammas, 97 | which_gamma_tilde = which_gamma_tilde, 98 | Z_tilde = Z_tilde, 99 | Z_tilde_gamma_cols = Z_tilde_gamma_cols, 100 | X_tilde = X_tilde, 101 | Ak_list = Ak_list, 102 | A_tilde_k_list = A_tilde_k_list, 103 | fixed_df = fixed_df, 104 | varying_df = varying_df, 105 | varying_lr_df = varying_lr_df, 106 | K = K, 107 | K_tilde = K_tilde, 108 | barrier_t = 1, 109 | criterion = "Poisson", 110 | lr_scale = TRUE, 111 | include_log_penalty_derivatives = TRUE, 112 | return_info = FALSE, 113 | wts = NULL, 114 | gmm_inv_wts = gmm_inv_wts)) 115 | } 116 | -------------------------------------------------------------------------------- /R/dfs_to_mean.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | dfs_to_mean <- function(varying_df, 4 | varying_lr_df = NULL, 5 | fixed_df, 6 | X, 7 | Z, 8 | X_tilde, 9 | Z_tilde, 10 | Z_tilde_gamma_cols){ 11 | 12 | 13 | if(!is.null(varying_lr_df)){ 14 | varying_df <- lr_to_ra(fixed_df,varying_lr_df, 15 | varying_df) 16 | } 17 | 18 | params <- dataframes_to_parameters(fixed_df, 19 | varying_df) 20 | 21 | return(meaninate(gammas = params$gammas, 22 | B = B, 23 | X = X, 24 | Z = Z, 25 | P = params$P, 26 | X_tilde = X_tilde, 27 | Z_tilde = Z_tilde, 28 | Z_tilde_gamma_cols = Z_tilde_gamma_cols, 29 | P_tilde = params$P_tilde, 30 | gamma_tilde = params$gamma_tilde)) 31 | } 32 | 33 | 34 | -------------------------------------------------------------------------------- /R/do_one_boot.R: -------------------------------------------------------------------------------- 1 | 2 | do_one_boot <- function(W, 3 | fitted_model, 4 | m = NULL, 5 | seed = NULL, 6 | boot_weights = NULL){ 7 | 8 | 9 | n <- nrow(W) 10 | if(is.null(m)){ 11 | m <- sqrt(n) 12 | } 13 | 14 | 15 | if(is.null(boot_weights)){ 16 | boot_weights <- rgamma(n,m/n) 17 | boot_weights <- boot_weights/sum(boot_weights) 18 | boot_weights <- rep(boot_weights, each = J) 19 | } 20 | 21 | if(!is.null(fitted_model$Z_tilde_list)){ 22 | fitted_model$Z_tilde <- NULL 23 | } 24 | 25 | boot_model <- estimate_parameters(W = W, 26 | X = fitted_model$X, 27 | Z = fitted_model$Z, 28 | Z_tilde = fitted_model$Z_tilde, 29 | Z_tilde_gamma_cols = 30 | fitted_model$Z_tilde_gamma_cols, 31 | gammas = 32 | fitted_model$gammas, 33 | gammas_fixed_indices = 34 | fitted_model$gammas_fixed_indices, 35 | P = fitted_model$P, 36 | P_fixed_indices = 37 | fitted_model$P_fixed_indices, 38 | B = fitted_model$B, 39 | B_fixed_indices = 40 | fitted_model$B_fixed_indices, 41 | X_tilde = fitted_model$X_tilde, 42 | P_tilde = fitted_model$P_tilde, 43 | P_tilde_fixed_indices = 44 | fitted_model$P_tilde_fixed_indices, 45 | gamma_tilde = fitted_model$gamma_tilde, 46 | gamma_tilde_fixed_indices = 47 | fitted_model$gamma_tilde_fixed_indices, 48 | alpha_tilde = 49 | fitted_model$alpha_tilde, 50 | Z_tilde_list = 51 | fitted_model$Z_tilde_list, 52 | barrier_t = 1, #starting value of reciprocal barrier penalty coef. 53 | barrier_scale = 10, #increments for value of barrier penalty 54 | max_barrier = 1e12, #maximum value of barrier_t 55 | initial_conv_tol = 1000, 56 | final_conv_tol = 0.1, 57 | constraint_tolerance = 1e-10, 58 | hessian_regularization = 0.01, 59 | criterion = "Poisson", 60 | profile_P = TRUE, 61 | wts = boot_weights, 62 | profiling_maxit = 25) 63 | 64 | return(boot_model) 65 | 66 | } 67 | -------------------------------------------------------------------------------- /R/do_one_lrt.R: -------------------------------------------------------------------------------- 1 | 2 | do_one_lrt <- function(W0, 3 | full_model, 4 | null_model,#null model specification 5 | boot_method = "bayesian_subsample", 6 | m = NULL, 7 | seed = NULL, 8 | boot_weights = NULL, 9 | return_models = FALSE){ 10 | 11 | 12 | n <- nrow(W0) 13 | if(is.null(m)){ 14 | m <- sqrt(n) 15 | } 16 | 17 | 18 | if(is.null(boot_weights)){ 19 | stop("Bootstrapping weights boot_weights must be provided.") 20 | } 21 | 22 | boot_full <- estimate_parameters(W = W0, 23 | X = full_model$X, 24 | Z = full_model$Z, 25 | Z_tilde = full_model$Z_tilde, 26 | Z_tilde_gamma_cols = 27 | full_model$Z_tilde_gamma_cols, 28 | gammas = 29 | full_model$gammas, 30 | gammas_fixed_indices = 31 | full_model$gammas_fixed_indices, 32 | P = full_model$P, 33 | P_fixed_indices = 34 | full_model$P_fixed_indices, 35 | B = full_model$B, 36 | B_fixed_indices = 37 | full_model$B_fixed_indices, 38 | X_tilde = full_model$X_tilde, 39 | P_tilde = full_model$P_tilde, 40 | P_tilde_fixed_indices = 41 | full_model$P_tilde_fixed_indices, 42 | gamma_tilde = full_model$gamma_tilde, 43 | gamma_tilde_fixed_indices = 44 | full_model$gamma_tilde_fixed_indices, 45 | alpha_tilde = 46 | full_model$alpha_tilde, 47 | Z_tilde_list = 48 | full_model$Z_tilde_list, 49 | barrier_t = 1, #starting value of reciprocal barrier penalty coef. 50 | barrier_scale = 10, #increments for value of barrier penalty 51 | max_barrier = 1e12, #maximum value of barrier_t 52 | initial_conv_tol = 1000, 53 | final_conv_tol = 0.1, 54 | 55 | constraint_tolerance = 1e-10, 56 | hessian_regularization = 0.01, 57 | criterion = "Poisson", 58 | 59 | profile_P = TRUE, 60 | wts = boot_weights, 61 | verbose = TRUE, 62 | profiling_maxit = 25) 63 | 64 | print(paste("full model weights sum to "), sum(boot_weights), 65 | sep = "", collapse = "") 66 | 67 | # if(boot_full$criterion == "reweighted_Poisson"){ 68 | # null_model$criterion <- "Poisson" 69 | # boot_weights <- boot_full$weights 70 | # } 71 | 72 | boot_null <- estimate_parameters(W = W0, 73 | X = null_model$X, 74 | Z = null_model$Z, 75 | Z_tilde = null_model$Z_tilde, 76 | Z_tilde_gamma_cols = 77 | null_model$Z_tilde_gamma_cols, 78 | gammas = 79 | null_model$gammas, 80 | gammas_fixed_indices = 81 | null_model$gammas_fixed_indices, 82 | P = null_model$P, 83 | P_fixed_indices = 84 | null_model$P_fixed_indices, 85 | B = null_model$B, 86 | B_fixed_indices = 87 | null_model$B_fixed_indices, 88 | X_tilde = null_model$X_tilde, 89 | P_tilde = null_model$P_tilde, 90 | P_tilde_fixed_indices = 91 | null_model$P_tilde_fixed_indices, 92 | gamma_tilde = null_model$gamma_tilde, 93 | gamma_tilde_fixed_indices = 94 | null_model$gamma_tilde_fixed_indices, 95 | alpha_tilde = 96 | null_model$alpha_tilde, 97 | Z_tilde_list = 98 | null_model$Z_tilde_list, 99 | barrier_t = 1, #starting value of reciprocal barrier penalty coef. 100 | barrier_scale = 10, #increments for value of barrier penalty 101 | max_barrier = 1e12, #maximum value of barrier_t 102 | initial_conv_tol = 1000, 103 | final_conv_tol = 0.1, 104 | 105 | constraint_tolerance = 1e-10, 106 | hessian_regularization = 0.01, 107 | criterion = "Poisson", 108 | 109 | profile_P = TRUE, 110 | wts = boot_weights, 111 | verbose = TRUE, 112 | profiling_maxit = 25) 113 | 114 | print(paste("null model weights sum to "), sum(boot_weights), 115 | sep = "", collapse = "") 116 | 117 | 118 | lr_stat <- 2*(boot_null$objective - 119 | boot_full$objective) 120 | 121 | 122 | if(!return_models){ 123 | return(lr_stat) 124 | } else{ 125 | return(list("lr_stat" = lr_stat, 126 | "full_model" = boot_full_model, 127 | "null_model" = boot_null_model)) 128 | } 129 | 130 | } 131 | -------------------------------------------------------------------------------- /R/do_one_simulation.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | do_one_simulation <- function(n, 4 | J, 5 | distrib, 6 | B_multiplier, 7 | seed, 8 | label, 9 | n_boot, 10 | # load_tinyvamp = FALSE, 11 | folder_name, 12 | return_dont_save = FALSE, 13 | parallelize = TRUE, 14 | verbose = FALSE, 15 | return_variance = FALSE){ 16 | 17 | # if(load_tinyvamp){ 18 | # devtools::load_all() 19 | # } 20 | 21 | print(paste("n = ",n,sep = "",collapse = "")) 22 | print(paste("J = ",J,sep = "",collapse = "")) 23 | print(paste("distrib = ",distrib,sep = "",collapse = "")) 24 | print(paste("B_multiplier = ", B_multiplier,sep = "",collapse = "")) 25 | print(paste("seed = ",seed,sep = "",collapse = "")) 26 | print(paste("label = ",label,sep = "",collapse = "")) 27 | 28 | W <- simulate_paper_data(n = n, 29 | J = J, 30 | B_multiplier = B_multiplier, 31 | distrib = distrib, 32 | seed = seed) 33 | 34 | poisson_fit <- try(fit_simulation_model(W,"Poisson")) 35 | reweighted_fit <- try(fit_simulation_model(W, "reweighted_Poisson", 36 | return_variance = return_variance)) 37 | 38 | ### Do Bootstrapped LRT for both 39 | 40 | print("Bootstrapping...") 41 | if(is.list(poisson_fit)){ 42 | poisson_null <- poisson_fit 43 | poisson_null$B[] <- 0 44 | poisson_null$B_fixed_indices[] <- TRUE 45 | poisson_lrt <- 46 | try(bootstrap_lrt(W = W, 47 | fitted_model = poisson_fit, 48 | null_param = poisson_null, 49 | n_boot = n_boot, 50 | parallelize = parallelize, 51 | ncores = 5, 52 | verbose = verbose)) 53 | 54 | poisson_ci <- try(bootstrap_ci(W = W, 55 | fitted_model = poisson_fit, 56 | n_boot = n_boot, 57 | alpha = 0.05, 58 | parallelize = parallelize, 59 | ncores = 5, 60 | seed = seed, 61 | verbose = verbose)) 62 | 63 | 64 | } 65 | 66 | if(is.list(reweighted_fit)){ 67 | reweighted_null <- reweighted_fit 68 | reweighted_null$B[] <- 0 69 | reweighted_null$B_fixed_indices[] <- TRUE 70 | reweighted_lrt <- 71 | try(bootstrap_lrt(W, 72 | fitted_model = reweighted_fit, 73 | null_param = reweighted_null, 74 | n_boot = n_boot, 75 | parallelize = parallelize, 76 | ncores = 5, 77 | verbose = verbose)) 78 | 79 | reweighted_ci <- try(bootstrap_ci(W = W, 80 | fitted_model = reweighted_fit, 81 | n_boot = n_boot, 82 | alpha = 0.05, 83 | parallelize = parallelize, 84 | ncores = 5, 85 | seed = seed, 86 | verbose = verbose)) 87 | 88 | } 89 | 90 | if(!return_dont_save){ 91 | saveRDS(list("poisson_lrt" = poisson_lrt, 92 | "poisson_ci" = poisson_ci, 93 | "reweighted_lrt" = reweighted_lrt, 94 | "reweighted_ci" = reweighted_ci), 95 | paste(folder_name,"/", 96 | paste(label,"n",n,"J", J, distrib, 97 | "Bmult", B_multiplier,"nboot", n_boot,"sim", 98 | seed, 99 | sep = "_",collapse = "_"), 100 | sep = "",collapse = "")) 101 | } else{ 102 | return(list("poisson_lrt" = poisson_lrt, 103 | "poisson_ci" = poisson_ci, 104 | "reweighted_lrt" = reweighted_lrt, 105 | "reweighted_ci" = reweighted_ci)) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /R/evaluate_criterion_lr.R: -------------------------------------------------------------------------------- 1 | #' Criterion Evaluation Function 2 | #' 3 | #' @export 4 | evaluate_criterion_lr <- function(W, 5 | X, 6 | Z, 7 | Z_tilde, 8 | Z_tilde_gamma_cols, 9 | Z_tilde_list = NULL, 10 | X_tilde, 11 | fixed_df, 12 | varying_df, 13 | varying_lr_df = NULL, 14 | barrier_t = NULL, 15 | criterion = "Poisson", 16 | lr_scale = TRUE, 17 | include_log_penalty = TRUE, 18 | wts = NULL, 19 | gmm_inv_wts = NULL, 20 | return_gmm_inv_weights = FALSE){ 21 | 22 | if(lr_scale){ 23 | varying_df <- lr_to_ra(fixed_df, 24 | varying_lr_df, 25 | varying_df) 26 | } 27 | 28 | params <- dataframes_to_parameters(fixed_df, varying_df) 29 | 30 | if(lr_scale){ 31 | if(include_log_penalty){ 32 | log_penalty <- calculate_log_penalty(varying_lr_df, 33 | fixed_df, 34 | barrier_t) 35 | } else{ 36 | log_penalty <- 0 37 | } 38 | 39 | } else{ 40 | log_penalty <- 0 41 | } 42 | 43 | means <- meaninate(gammas = params$gammas, 44 | B = params$B, 45 | X = X, 46 | Z = Z, 47 | P = params$P, 48 | X_tilde = X_tilde, 49 | Z_tilde = Z_tilde, 50 | Z_tilde_gamma_cols = Z_tilde_gamma_cols, 51 | Z_tilde_list = Z_tilde_list, 52 | P_tilde = params$P_tilde, 53 | gamma_tilde = params$gamma_tilde, 54 | alpha_tilde = params$alpha_tilde, 55 | return_separate = FALSE) 56 | 57 | 58 | 59 | if(criterion == "Poisson"){ 60 | return(poisson_criterion(W = W, 61 | means = means, 62 | wts = wts) + log_penalty) 63 | 64 | } 65 | if(criterion == "GMM"){ 66 | n <- nrow(W) 67 | W_long <- lapply(1:n,function(i) as.numeric(W[i,])) 68 | W_long <- do.call(c,W_long) 69 | means_long <- lapply(1:n, function(i) as.numeric(means[i,])) 70 | means_long <- do.call(c,means_long) 71 | 72 | if(is.null(gmm_inv_wts)){ 73 | 74 | inv_wts <- get_gmm_inv_weights(W_long = W_long, 75 | means_long = means_long) 76 | } else{ 77 | inv_wts <- gmm_inv_wts 78 | } 79 | 80 | if(!return_gmm_inv_weights){ 81 | return(gmm_criterion(W_long = W_long, 82 | means_long = means_long, 83 | inv_wts= inv_wts) + log_penalty) 84 | } else{ 85 | return(list("gmm_crit" = gmm_criterion(W_long = W_long, 86 | means_long = means_long, 87 | inv_wts = inv_wts) + log_penalty, 88 | "inv_wts" = inv_wts)) 89 | } 90 | 91 | } 92 | 93 | 94 | } 95 | -------------------------------------------------------------------------------- /R/fit_simple_model.R: -------------------------------------------------------------------------------- 1 | 2 | fit_simple_model <- function(W, 3 | B_fixed_at_zero = FALSE, 4 | reweight = FALSE){ 5 | 6 | n <- nrow(W) 7 | J <- ncol(W) 8 | 9 | if(B_fixed_at_zero){ 10 | B_fixed_indices <- matrix(TRUE,nrow = 1, ncol = J) 11 | }else{ 12 | B_fixed_indices <- matrix( 13 | c(rep(FALSE, J - 1),TRUE),nrow = 1, ncol = J) 14 | } 15 | 16 | if(reweight){ 17 | criterion <- "reweighted_Poisson" 18 | } else{ 19 | criterion <- "Poisson" 20 | } 21 | 22 | 23 | 24 | fitted_model <- estimate_parameters(W = W, 25 | X = matrix(1, nrow = n,ncol = 1), 26 | Z = matrix(1, nrow = n, ncol = 1), 27 | P = matrix(1/J,nrow = 1, ncol = J), 28 | P_fixed_indices = matrix(TRUE, nrow = 1, ncol = J), 29 | X_tilde = matrix(0,nrow = 1, ncol = 1), 30 | Z_tilde = matrix(0,nrow = n, ncol = 1), 31 | Z_tilde_gamma_cols = 1, 32 | P_tilde = matrix(1/J,nrow = 1, ncol = J), 33 | P_tilde_fixed_indices = matrix(TRUE, nrow = 1, ncol = J), 34 | gammas = apply(W,1,function(x) log(sum(x))), 35 | gammas_fixed_indices = rep(FALSE, n), 36 | B = matrix(0, ncol = J, nrow = 1), 37 | B_fixed_indices = B_fixed_indices, 38 | gamma_tilde = matrix(0,ncol = 1, nrow = 1), 39 | gamma_tilde_fixed_indices = matrix(TRUE, 40 | ncol = 1, nrow = 1), 41 | alpha_tilde = NULL, 42 | Z_tilde_list = NULL, 43 | barrier_t = 1, #starting value of reciprocal barrier penalty coef. 44 | barrier_scale = 10, #increments for value of barrier penalty 45 | max_barrier = 1e12, #maximum value of barrier_t 46 | initial_conv_tol = 1000, 47 | final_conv_tol = 0.1, 48 | constraint_tolerance = 1e-10, 49 | hessian_regularization = 0.01, 50 | criterion = criterion, 51 | profile_P = TRUE, 52 | profiling_maxit = 25, 53 | wts = NULL, 54 | verbose = FALSE, 55 | bootstrap_failure_cutoff = NULL) 56 | 57 | return(fitted_model) 58 | 59 | } 60 | -------------------------------------------------------------------------------- /R/fit_simulation_model.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | fit_simulation_model <- function(W, 4 | estimator, 5 | return_variance = FALSE){ 6 | 7 | n <- nrow(W)/16 8 | J <- ncol(W) 9 | 10 | 11 | Z_tilde <- do.call(rbind,lapply(1:n, 12 | function(x) matrix(rep(c(1,9,81,729), 13 | 4), 14 | ncol = 1))) 15 | 16 | Z_tilde_list <- list(Z_tilde*rbind(matrix(1,nrow = 4*n,ncol = 1), 17 | matrix(0, nrow = 3*4*n,ncol = 1)), 18 | Z_tilde*rbind(matrix(0,nrow = 4*n,ncol = 1), 19 | matrix(1, nrow = 4*n,ncol = 1), 20 | matrix(0,nrow = 2*4*n,ncol = 1)), 21 | Z_tilde*rbind(matrix(0,nrow = 2*4*n,ncol = 1), 22 | matrix(1, nrow = 4*n,ncol = 1), 23 | matrix(0,nrow = 4*n,ncol = 1)), 24 | Z_tilde*rbind(matrix(0,nrow = 3*4*n,ncol = 1), 25 | matrix(1, nrow = 4*n,ncol = 1))) 26 | 27 | Z_tilde <- NULL 28 | 29 | Z_tilde_gamma_cols <- 1 30 | 31 | alpha_tilde <- c(0,0,0) 32 | gamma_tilde <- matrix(-5,ncol = 1, nrow = 1) 33 | 34 | ### generate Z 35 | Z <- do.call(rbind,lapply(1:4, 36 | function(x) do.call(rbind, 37 | lapply(1:(4*n),function(k) matrix( 38 | as.numeric(x == 1:4),nrow = 1 39 | ))))) 40 | 41 | X <- matrix(1,nrow = n*16,ncol = 1) 42 | 43 | B <- matrix(0,ncol = J, nrow = 1) 44 | B_fixed_indices <- matrix(c(rep(FALSE,J - 1), TRUE), nrow = 1) 45 | 46 | P <- matrix(1/J, nrow = 4, ncol = J) 47 | P[1,] <- 2^(seq(0,4,length.out = J)) 48 | P[1,] <- P[1,]/sum(P[1,]) 49 | P[2,] <- P[1,J:1] 50 | 51 | P_fixed_indices <- rbind(matrix(TRUE, nrow = 1, ncol = J), 52 | matrix(TRUE,nrow = 1, ncol = J), 53 | matrix(FALSE, nrow = 1, ncol = J), 54 | matrix(FALSE, nrow = 1, ncol = J)) 55 | 56 | P_tilde <- matrix(1/J,ncol = J, nrow = 1) 57 | P_tilde_fixed_indices <- matrix(FALSE, nrow = 1, ncol = J) 58 | X_tilde <- matrix(1,ncol = 1, nrow = 1) 59 | gamma_tilde <- matrix(-3.7,ncol = 1,nrow = 1) 60 | gamma_tilde_fixed_indices <- matrix(FALSE, ncol = 1, nrow =1) 61 | 62 | gammas <- apply(W, 1, function(x) log(sum(x))) 63 | gammas_fixed_indices <- rep(FALSE, 16*n) 64 | 65 | fitted_model <- estimate_parameters(W = W, 66 | X = X, 67 | Z = Z, 68 | Z_tilde_gamma_cols = Z_tilde_gamma_cols, 69 | gammas = gammas, 70 | gammas_fixed_indices = gammas_fixed_indices, 71 | P = P, 72 | P_fixed_indices = P_fixed_indices, 73 | B = B, 74 | B_fixed_indices = B_fixed_indices, 75 | X_tilde = X_tilde, 76 | P_tilde = P_tilde, 77 | P_tilde_fixed_indices = P_tilde_fixed_indices, 78 | gamma_tilde = gamma_tilde, 79 | gamma_tilde_fixed_indices = gamma_tilde_fixed_indices, 80 | alpha_tilde = alpha_tilde, 81 | Z_tilde_list = Z_tilde_list, 82 | barrier_t = 1, #starting value of reciprocal barrier penalty coef. 83 | barrier_scale = 10, #increments for value of barrier penalty 84 | max_barrier = 1e12, #maximum value of barrier_t 85 | initial_conv_tol = 1000, 86 | final_conv_tol = 0.1, 87 | constraint_tolerance = 1e-10, 88 | hessian_regularization = 0.01, 89 | criterion = estimator, 90 | profile_P = TRUE, 91 | profiling_maxit = 25, 92 | wts = NULL, 93 | verbose = TRUE, 94 | bootstrap_failure_cutoff = NULL, 95 | return_variance = return_variance 96 | ) 97 | 98 | 99 | 100 | return(fitted_model) 101 | 102 | } 103 | -------------------------------------------------------------------------------- /R/get_A_tilde_k_list.R: -------------------------------------------------------------------------------- 1 | get_A_tilde_k_list <- function(fixed_df, 2 | varying_df, 3 | varying_lr_df){ 4 | 5 | J <- max(c(varying_df$j,fixed_df$j)) 6 | K_tilde <- max(c(varying_df$k[varying_df$param == "P_tilde"], 7 | fixed_df$k[fixed_df$param == "P_tilde"])) 8 | 9 | which_k_p_tilde <- unique(varying_lr_df$k[varying_lr_df$param == "rho_tilde"]) 10 | which_k_p_tilde <- which_k_p_tilde[order(which_k_p_tilde)] 11 | 12 | A_tilde_k_list <- vector(mode = "list", K_tilde) 13 | for(k in which_k_p_tilde){ 14 | fixed_p_tilde_k <- fixed_df[(fixed_df$param == "P_tilde")&(fixed_df$k == k),] 15 | varying_p_tilde_k <- varying_df[(varying_df$param == "P_tilde")& 16 | (varying_df$k == k),] 17 | varying_j <- varying_p_tilde_k$j 18 | varying_j <- varying_j[order(varying_j)] 19 | C_k <- J - nrow(fixed_p_tilde_k) 20 | A_tilde_k <- matrix(0, nrow = J, 21 | ncol = C_k) 22 | for(jstar in 1:length(varying_j)){ 23 | A_tilde_k[varying_j[jstar],jstar] <- 1 24 | } 25 | A_tilde_k_list[[k]] <- A_tilde_k 26 | } 27 | return(A_tilde_k_list) 28 | } 29 | -------------------------------------------------------------------------------- /R/get_Ak_list.R: -------------------------------------------------------------------------------- 1 | get_Ak_list <- function(fixed_df, 2 | varying_df, 3 | varying_lr_df){ 4 | 5 | J <- max(c(varying_df$j,fixed_df$j)) 6 | K <- max(c(varying_df$k[varying_df$param %in% c("P")], 7 | fixed_df$k[fixed_df$param %in% c("P")])) 8 | 9 | which_k_p <- unique(varying_lr_df$k[varying_lr_df$param == "rho"]) 10 | which_k_p <- which_k_p[order(which_k_p)] 11 | 12 | Ak_list <- vector(mode = "list", K) 13 | for(k in which_k_p){ 14 | fixed_p_k <- fixed_df[(fixed_df$param == "P")&(fixed_df$k == k),] 15 | varying_p_k <- varying_df[(varying_df$param == "P")& 16 | (varying_df$k == k),] 17 | varying_j <- varying_p_k$j 18 | varying_j <- varying_j[order(varying_j)] 19 | C_k <- J - nrow(fixed_p_k) 20 | A_k <- matrix(0, nrow = J, 21 | ncol = C_k) 22 | for(jstar in 1:length(varying_j)){ 23 | A_k[varying_j[jstar],jstar] <- 1 24 | } 25 | Ak_list[[k]] <- A_k 26 | } 27 | return(Ak_list) 28 | } 29 | -------------------------------------------------------------------------------- /R/get_gmm_inv_weights.R: -------------------------------------------------------------------------------- 1 | get_gmm_inv_weights <- function(W_long, 2 | means_long){ 3 | 4 | squerror_long <- (W_long - means_long)^2 5 | 6 | pre_wts <- isoreg(means_long,squerror_long) 7 | 8 | means_long_ordered <- means_long[order(means_long)] 9 | squerror_long_ordered <- squerror_long[order(means_long)] 10 | 11 | # pre_wts$yf <- pre_wts$yf*(mean(1/pre_wts$yf)) 12 | # sum(1/pre_wts$yf) 13 | 14 | # if any variances estimated to be zero at positive means 15 | # if(min(pre_wts$yf[means_long_ordered>0]) ==0){ 16 | # # if there are zero means, do linear interpolation 17 | # if(sum(means_long_ordered == 0)<0){ 18 | # pre_wts$yf[means_long_ordered == 0] <- 0 19 | # 20 | # max_index <- max(which(pre_wts$yf[means_long_ordered>0] ==0)) 21 | # (pre_wts$yf[means_long_ordered>0])[1:max_index] <- 22 | # ((means_long_ordered[means_long_ordered>0])[1:max_index])* 23 | # (pre_wts$yf[means_long_ordered>0])[max_index + 1] 24 | # } 25 | # #if no zero means, set min var equal to min nonzero var 26 | # if(sum(means_long_ordered == 0) == 0){ 27 | # pre_wts$yf[pre_wts$yf ==0] <- min(pre_wts$yf[pre_wts$yf > 0]) 28 | # } 29 | # 30 | # } 31 | inv_wts <- numeric(length(W_long)) 32 | inv_wts[order(means_long)] <- pre_wts$yf 33 | 34 | return(inv_wts) 35 | } 36 | -------------------------------------------------------------------------------- /R/gmm_criterion.R: -------------------------------------------------------------------------------- 1 | 2 | gmm_criterion <- function(W_long, 3 | means_long, 4 | inv_wts){ 5 | 6 | 7 | squerror_long <- (W_long - means_long)^2 8 | 9 | gmm_crit <- 0.5*sum(sapply(1:length(means_long), 10 | function(i){ 11 | ifelse(inv_wts[i]>0, 12 | squerror_long[i]/inv_wts[i], 13 | ifelse(squerror_long[i] ==0,0,Inf)) 14 | } 15 | )) 16 | 17 | return(gmm_crit) 18 | } 19 | 20 | -------------------------------------------------------------------------------- /R/log_penalty.R: -------------------------------------------------------------------------------- 1 | #################### Barrier Penalty Function ####################### 2 | 3 | calculate_log_penalty <- function(varying_lr_df, 4 | fixed_df, 5 | barrier_t){ 6 | 7 | which_rho_k <- unique(varying_lr_df$k[varying_lr_df$param=="rho"]) 8 | which_rho_tilde_k <- unique(varying_lr_df$k[varying_lr_df$param=="rho_tilde"]) 9 | 10 | log_P <- lapply(which_rho_k, 11 | function(k) (varying_lr_df$value[varying_lr_df$param == "rho" & 12 | varying_lr_df$k == k]) %>% 13 | (function(x) -c(x,0) + sum_of_logs(c(x,0)))) 14 | log_P_tilde <- lapply(which_rho_tilde_k, 15 | function(k) 16 | (varying_lr_df$value[ 17 | varying_lr_df$param == "rho_tilde" & 18 | varying_lr_df$k == k]) %>% 19 | (function(x) -c(x,0) + sum_of_logs(c(x,0)))) 20 | log_ra_sum <- do.call(sum,log_P) + do.call(sum,log_P_tilde) 21 | 22 | return((1/barrier_t)*log_ra_sum) 23 | 24 | } 25 | -------------------------------------------------------------------------------- /R/log_penalty_grad.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/R/log_penalty_grad.R -------------------------------------------------------------------------------- /R/log_penalty_gradient.R: -------------------------------------------------------------------------------- 1 | 2 | log_penalty_grad <- function(rho,barrier_t){ 3 | # d/d_rho sum(-log(exp(c(rho, 0))/sum(exp(rho,0))) 4 | # = d/d_rho sum( - c(rho,0) + log(sum(exp(rho,0)))) 5 | rho <- as.numeric(rho) 6 | log_bt <- log(barrier_t) 7 | 8 | n_rho <- length(rho) 9 | 10 | return(rep(-1/barrier_t,n_rho) + (n_rho +1)*exp(rho - log_bt - sum_of_logs(c(rho,0)))) 11 | } 12 | -------------------------------------------------------------------------------- /R/log_penalty_hessian.R: -------------------------------------------------------------------------------- 1 | log_penalty_hess <- function(rho,barrier_t){ 2 | rho <- as.numeric(rho) 3 | n_rho <- length(rho) 4 | log_bt <- log(barrier_t) 5 | 6 | ras <- exp(rho - sum_of_logs(c(rho,0))) 7 | ras_bt <- exp(rho - log_bt - sum_of_logs(c(rho,0))) 8 | 9 | return((n_rho + 1)*(diag(ras_bt) - 10 | matrix(ras_bt,ncol = 1)%*% 11 | matrix(ras,nrow = 1))) 12 | 13 | 14 | } 15 | -------------------------------------------------------------------------------- /R/logpois.R: -------------------------------------------------------------------------------- 1 | ##################### Poisson-Type (Log) Density ##################### 2 | logpois <- function(x,intensity){ 3 | 4 | if(!is.finite(intensity)){ 5 | intensity <- 0 6 | } 7 | if((x==0)&(intensity==0)){ 8 | return(0) 9 | } else{ 10 | if(intensity <= 0){ 11 | return(-Inf) 12 | } else{ 13 | return(x*log(intensity) - lgamma(x + 1) - intensity) 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /R/lr_to_ra.R: -------------------------------------------------------------------------------- 1 | 2 | lr_to_ra <- function(fixed_df, 3 | varying_lr_df, 4 | varying_df){ 5 | 6 | rho_df <- varying_lr_df[varying_lr_df$param == "rho",] 7 | rho_tilde_df <- varying_lr_df[varying_lr_df$param == "rho_tilde",] 8 | 9 | P_df <- varying_df[varying_df$param == "P",] 10 | P_tilde_df <- varying_df[varying_df$param == "P_tilde",] 11 | 12 | 13 | 14 | #update values in P_df with values from varying_lr_df 15 | rho_rows <- unique(rho_df$k) 16 | for(rho_row in rho_rows){ 17 | 18 | ref_j <- max(varying_df$j[varying_df$param == "P" & varying_df$k == rho_row]) 19 | 20 | rho_mini <- rho_df[rho_df$k == rho_row,] 21 | 22 | unscaled_ra <- exp(c(rho_mini$value, 0) - sum_of_logs(c(rho_mini$value, 0))) 23 | 24 | scaling <- 1 - sum(fixed_df$value[(fixed_df$k == rho_row) &(fixed_df$param == "rho")]) 25 | scaled_ra <- unscaled_ra*scaling 26 | 27 | for(j_index in 1:length(c(rho_mini$j, ref_j))){ 28 | P_df$value[ 29 | (P_df$k == rho_row) & 30 | (P_df$j == c(rho_mini$j, ref_j)[j_index]) 31 | ] <- 32 | scaled_ra[j_index] 33 | 34 | } 35 | 36 | } 37 | 38 | #update values in P_tilde_df with values from varying_lr_df 39 | rho_tilde_rows <- unique(rho_tilde_df$k) 40 | for(rho_tilde_row in rho_tilde_rows){ 41 | 42 | ref_j <- max(varying_df$j[varying_df$param == "P_tilde" & 43 | varying_df$k == rho_tilde_row]) 44 | 45 | rho_tilde_mini <- rho_tilde_df[rho_tilde_df$k == rho_tilde_row,] 46 | 47 | unscaled_ra <- exp(c(rho_tilde_mini$value, 0) - sum_of_logs(c(rho_tilde_mini$value, 0))) 48 | 49 | scaling <- 1 - sum(fixed_df$value[(fixed_df$k == rho_tilde_row) & 50 | (fixed_df$param == "rho_tilde")]) 51 | scaled_ra <- unscaled_ra*scaling 52 | 53 | for(j_index in 1:length(c(rho_tilde_mini$j, ref_j))){ 54 | P_tilde_df$value[ 55 | (P_tilde_df$k == rho_tilde_row) & 56 | (P_tilde_df$j == c(rho_tilde_mini$j, ref_j)[j_index]) 57 | ] <- 58 | scaled_ra[j_index] 59 | 60 | } 61 | 62 | } 63 | 64 | P_df <- P_df[order(P_df$k,P_df$j),] 65 | P_tilde_df <- P_tilde_df[order(P_tilde_df$k, 66 | P_tilde_df$j),] 67 | 68 | to_return_df <- rbind(varying_lr_df[ 69 | !(varying_lr_df$param %in% c("rho", "rho_tilde","alpha_tilde")), 70 | ], 71 | P_df, 72 | P_tilde_df) 73 | 74 | to_return_df <- rbind(to_return_df, 75 | varying_lr_df[varying_lr_df$para == "alpha_tilde",]) 76 | 77 | to_return_df <- rbind(to_return_df[to_return_df$param == "P",], 78 | to_return_df[to_return_df$param == "P_tilde",], 79 | to_return_df[to_return_df$param == "B",], 80 | to_return_df[to_return_df$param == "gamma",], 81 | to_return_df[to_return_df$param == "gamma_tilde",], 82 | to_return_df[to_return_df$param == "alpha_tilde",]) 83 | 84 | return(to_return_df) 85 | } 86 | -------------------------------------------------------------------------------- /R/make_model_bundle.R: -------------------------------------------------------------------------------- 1 | # 2 | # make_model_bundle <- function(W, 3 | # X, 4 | # Z, 5 | # Z_tilde, 6 | # Z_tilde_gamma_cols, 7 | # gammas, 8 | # gammas_fixed_indices, 9 | # P, 10 | # P_fixed_indices, 11 | # B, 12 | # B_fixed_indices, 13 | # X_tilde, 14 | # P_tilde, 15 | # P_tilde_fixed_indices, 16 | # gamma_tilde, 17 | # gamma_tilde_fixed_indices, 18 | # criterion = "Poisson"){ 19 | # 20 | # 21 | # model_bundle <- list() 22 | # model_bundle$W <- W 23 | # model_bundle$designs <- list(Z = Z, 24 | # Z_tilde = Z_tilde, 25 | # Z_tilde_gamma_cols = Z_tilde_gamma_cols, 26 | # X_tilde = X_tilde) 27 | # model_bundle$parameters <- 28 | # list(gammas = gammas, 29 | # gammas_fixed_indices = gammas_fixed_indices, 30 | # P = P, 31 | # P_fixed_indices = P_fixed_indices, 32 | # B = B, 33 | # B_fixed_indices = B_fixed_indices, 34 | # P_tilde = P_tilde, 35 | # P_tilde_fixed_indices = P_tilde_fixed_indices, 36 | # gamma_tilde = gamma_tilde, 37 | # gamma_tilde_fixed_indices = gamma_tilde_fixed_indices) 38 | # model_bundle$criterion <- criterion 39 | # 40 | # 41 | # model_bundle$n <- nrow(W) 42 | # model_bundle$J <- ncol(W) 43 | # model_bundle$K <- NA 44 | # model_bundle$K_tilde <- NA 45 | # 46 | # model_bundle$optimization_info <- list() 47 | # model_bundle$optimization_info$nsteps <- 48 | # ceiling(log(max_barrier/barrier_t)/log(barrier_scale)) + 1 49 | # 50 | # model_bundle$optimization_info$tolerances <- 51 | # exp(seq(log(initial_conv_tol),log(final_conv_tol), length.out = nsteps)) 52 | # 53 | # model_bundle$parameter_dfs <- parameters_to_dataframes(P, 54 | # P_fixed_indices, 55 | # P_tilde, 56 | # P_tilde_fixed_indices, 57 | # B, 58 | # B_fixed_indices, 59 | # gammas, 60 | # gammas_fixed_indices, 61 | # gamma_tilde, 62 | # gamma_tilde_fixed_indices) 63 | # 64 | # 65 | # 66 | # 67 | # # message("created parameter dfs") 68 | # 69 | # model_bundle$K <- 70 | # with(model_bundle$parameter_dfs, 71 | # max(c(varying_df$k[varying_df$param == "P"], 72 | # fixed_df$k[fixed_df$param == "P"]))) 73 | # 74 | # model_bundle$K_tilde <- 75 | # with(model_bundle$parameter_dfs, 76 | # max(c(varying_df$k[varying_df$param == "P_tilde"], 77 | # fixed_df$k[fixed_df$param == "P_tilde"])) 78 | # ) 79 | # 80 | # model_bundle$rho_P_conversion <-list() 81 | # model_bundle$rho_P_conversion$fixed_P_multipliers <- 82 | # sapply(1:model_bundle$K, function(k) 83 | # with(model_bundle$parameter_dfs,1 - sum(fixed_df$value[fixed_df$param == "P"& 84 | # fixed_df$k ==k]))) 85 | # 86 | # model_bundle$rho_P_conversion$fixed_P_tilde_multipliers <- 87 | # sapply(1:model_bundle$K_tilde, function(k) 88 | # with(model_bundle$parameter_dfs, 1 - sum(fixed_df$value[fixed_df$param == "P_tilde"& 89 | # fixed_df$k ==k]))) 90 | # 91 | # 92 | # # create matrices to track rho-P and rho_tilde-P_tilde relationships 93 | # model_bundle$rho_P_conversion$Ak_list <- 94 | # with(model_bundle$parameter_dfs, 95 | # get_Ak_list(fixed_df, 96 | # varying_df, 97 | # varying_lr_df)) 98 | # 99 | # A_tilde_k_list <- get_A_tilde_k_list(fixed_df, 100 | # varying_df, 101 | # varying_lr_df) 102 | # 103 | # # message("created Ak_list and A_tilde_k_list") 104 | # 105 | # #calculate at outset of optimization pass as argument to mean_jac_lr, etc. 106 | # which_k_p <- sapply(1:K, function(k) ifelse(is.null(Ak_list[[k]]), 107 | # NA, k)) 108 | # 109 | # which_k_p <- which_k_p[!is.na(which_k_p)] 110 | # 111 | # #calculate at outset of optimization pass as argument to mean_jac_lr, etc. 112 | # which_k_p_tilde <- sapply(1:K_tilde, 113 | # function(k) ifelse( 114 | # is.null(A_tilde_k_list[[k]]), 115 | # NA,k 116 | # )) 117 | # 118 | # which_k_p_tilde <- which_k_p_tilde[!is.na(which_k_p_tilde)] 119 | # 120 | # # message("saved which_k_p and which_k_p_tilde") 121 | # 122 | # #calculate at outset of optimization 123 | # which_B_rows <- unique(varying_df$k[varying_df$param == "B"]) 124 | # which_B_rows <- which_B_rows[order(which_B_rows)] 125 | # 126 | # #calculate at outset of optimization 127 | # which_B_keep <- lapply(which_B_rows, 128 | # function(k) sapply(1:(J - 1), 129 | # function(j) 130 | # j %in% varying_lr_df$j[ 131 | # varying_lr_df$param == "B" & 132 | # varying_lr_df$k == k] 133 | # )) 134 | # which_B_keep <- do.call(rbind,which_B_keep) 135 | # 136 | # # message("saved which_B_keep") 137 | # 138 | # which_gammas <- unique(varying_df$k[varying_df$param == "gamma"]) 139 | # 140 | # which_gamma_tilde <- unique(varying_df$k[varying_df$param == "gamma_tilde"]) 141 | # 142 | # which_unconstrained <- varying_lr_df$param %in% c("B","gamma","gamma_tilde") 143 | # which_rho <- varying_lr_df$param %in% c("rho") 144 | # which_rho_tilde <- varying_lr_df$param %in% c("rho_tilde") 145 | # npar <- nrow(varying_lr_df) 146 | # } 147 | -------------------------------------------------------------------------------- /R/mean_jac.R: -------------------------------------------------------------------------------- 1 | mean_jac <- function(varying_df, 2 | fixed_df, 3 | X, 4 | Z, 5 | X_tilde, 6 | Z_tilde, 7 | Z_tilde_gamma_cols, 8 | Z_tilde_list = NULL){ 9 | 10 | 11 | 12 | params <- dataframes_to_parameters(fixed_df, 13 | varying_df) 14 | 15 | if(!is.null(params$alpha_tilde)){ 16 | Z_tilde <- construct_Z_tilde(Z_tilde_list, 17 | params$alpha_tilde) 18 | } 19 | 20 | fixed_status <- fixed_df 21 | if(nrow(fixed_status)>0){ 22 | fixed_status$value <- 0 23 | } 24 | varying_status <- varying_df 25 | varying_status$value <- 1 26 | 27 | param_status <- dataframes_to_parameters(fixed_status, 28 | varying_status) 29 | 30 | n <- nrow(X) 31 | J <- ncol(params$P) 32 | n_varying <- nrow(varying_df) 33 | 34 | jacobian <- matrix(nrow = n*J, 35 | ncol = n_varying) 36 | counter <- 0 37 | for(i in 1:n){ 38 | for(j in 1:J){ 39 | counter <- counter + 1 40 | jacobian[counter,] <- 41 | par_to_jacobian_row(params, 42 | param_status, 43 | i, 44 | j, 45 | X, 46 | Z, 47 | X_tilde, 48 | Z_tilde, 49 | Z_tilde_gamma_cols) 50 | 51 | } 52 | } 53 | return(jacobian) 54 | } 55 | -------------------------------------------------------------------------------- /R/meaninate.R: -------------------------------------------------------------------------------- 1 | ##################### Mean Function ##################### 2 | meaninate <- function(gammas, 3 | B, 4 | X, 5 | Z, 6 | P, 7 | X_tilde, 8 | Z_tilde = NULL, 9 | Z_tilde_gamma_cols, 10 | P_tilde, 11 | gamma_tilde, 12 | alpha_tilde = NULL, 13 | Z_tilde_list = NULL, 14 | return_separate = FALSE, 15 | exclude_gammas = FALSE){ 16 | 17 | if(!is.null(alpha_tilde)){ 18 | Z_tilde <- construct_Z_tilde(Z_tilde_list, 19 | alpha_tilde) 20 | } 21 | 22 | J <- ncol(B) 23 | n <- nrow(X) 24 | 25 | if(!exclude_gammas){ 26 | #multiply appropriate columns of Z_tilde by exp(gamma) 27 | if(length(Z_tilde_gamma_cols >0)){ 28 | for(colnum in Z_tilde_gamma_cols){ 29 | Z_tilde[,colnum] <- exp(gammas)* Z_tilde[,colnum] 30 | } 31 | } 32 | 33 | sample_part <- 34 | (Z%*%P)*( 35 | exp(gammas%*%matrix(1,nrow = 1, ncol = J) 36 | + X%*%B)) 37 | spurious_part <- Z_tilde%*%(P_tilde * 38 | exp(gamma_tilde %*% matrix(1, 39 | nrow = 1, 40 | ncol = J) + 41 | X_tilde%*%B)) 42 | } else{ 43 | 44 | 45 | sample_part <- 46 | (Z%*%P)*( 47 | exp(X%*%B)) 48 | spurious_part <- Z_tilde%*%(P_tilde * 49 | exp(gamma_tilde %*% matrix(1, 50 | nrow = 1, 51 | ncol = J) + 52 | X_tilde%*%B)) 53 | 54 | } 55 | 56 | if(return_separate){ 57 | return(list("sample" = sample_part, 58 | "spurious" = spurious_part)) 59 | } else{ 60 | return(sample_part + spurious_part) 61 | } 62 | 63 | 64 | } 65 | -------------------------------------------------------------------------------- /R/mu_d_alpha_tilde.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | mu_d_alpha_tilde <- function(i, 4 | J, 5 | a_tilde, 6 | gammas, 7 | B, 8 | X_tilde, 9 | Z_tilde_gamma_cols, 10 | alpha_tilde, 11 | Z_tilde_list, 12 | P_tilde, 13 | gamma_tilde){ 14 | 15 | Z_tilde_piece <- Z_tilde_list[[a_tilde + 1]] 16 | 17 | if(length(Z_tilde_gamma_cols >0)){ 18 | for(colnum in Z_tilde_gamma_cols){ 19 | Z_tilde_piece[,colnum] <- exp(gammas)* Z_tilde_piece[,colnum] 20 | } 21 | } 22 | 23 | return(as.numeric( 24 | exp(alpha_tilde[a_tilde])*Z_tilde_piece[i,]%*%( 25 | P_tilde*exp(matrix(gamma_tilde,ncol = 1)%*%matrix(1, nrow = 1, ncol = J) + 26 | X_tilde%*%B)) 27 | ) 28 | ) 29 | 30 | 31 | 32 | 33 | } 34 | -------------------------------------------------------------------------------- /R/mu_d_beta.R: -------------------------------------------------------------------------------- 1 | #' Calculate derivative of mu_ij with respect to B 2 | #' 3 | #' @param i The sample index (must be in 1, ..., n) 4 | #' @param j The taxon index (must be in 1, ..., J) 5 | #' @param q Which row of B to take derivative with respect to 6 | #' (must be in 1, ..., p) 7 | #' @param gammas Numeric vector of read intensities 8 | #' @param B Detection efficiency matrix 9 | #' @param X The efficiency design matrix (n x p) 10 | #' @param Z The sample design matrix (n x K) 11 | #' @param P The sample relative abundance matrix (K x J) 12 | #' @param X_tilde The spurious read efficiency design (K_tilde x p) 13 | #' @param Z_tilde The spurious read design (n x K_tilde) 14 | #' @param Z_tilde_gamma_cols Numeric vector containing indexes of columns of 15 | #' Z_tilde to scale by exp(gamma); NULL if no columns to be scaled 16 | #' @param P_tilde The spurious source relative abundance matrix (K_tilde x J) 17 | #' @param gamma_tilde Spurious read intensity parameter 18 | #' 19 | #' @return A derivative d mu_ij / d B_qj 20 | 21 | mu_d_beta <- function(i, 22 | j, 23 | q, 24 | gammas, 25 | B, 26 | X, 27 | Z, 28 | P, 29 | X_tilde, 30 | Z_tilde, 31 | Z_tilde_gamma_cols, 32 | alpha_tilde = NULL, 33 | Z_tilde_list = NULL, 34 | P_tilde, 35 | gamma_tilde){ 36 | 37 | if(!is.null(alpha_tilde)){ 38 | Z_tilde <- construct_Z_tilde(Z_tilde_list, 39 | alpha_tilde) 40 | } 41 | 42 | 43 | 44 | 45 | mu_deriv <- 46 | t(X[i,q,drop = F])%*%(Z[i,,drop = F]%*%P[,j,drop = F]*exp(gammas[i] + 47 | X[i,,drop = F]%*% 48 | B[,j,drop = F])) 49 | 50 | K_tilde <- dim(P_tilde)[1] 51 | for(k_tilde in 1:K_tilde){ 52 | if(k_tilde %in% Z_tilde_gamma_cols){ 53 | 54 | mu_deriv <- mu_deriv + exp(gammas[i])* 55 | Z_tilde[i,k_tilde]%*%(P_tilde[k_tilde,j] * 56 | exp(gamma_tilde[k_tilde] + 57 | X_tilde[k_tilde,]%*%B[,j]))*X_tilde[k_tilde,q] 58 | 59 | 60 | } else{ 61 | mu_deriv <- mu_deriv + 62 | Z_tilde[i,k_tilde]%*%(P_tilde[k_tilde,j] * 63 | exp(gamma_tilde[k_tilde] + 64 | X_tilde[k_tilde,]%*%B[,j]))*X_tilde[k_tilde,q] 65 | } 66 | } 67 | 68 | 69 | 70 | return(mu_deriv) 71 | } 72 | -------------------------------------------------------------------------------- /R/mu_d_beta_faster.R: -------------------------------------------------------------------------------- 1 | mu_d_beta_faster <- function(i, 2 | J, 3 | k, 4 | gammas, 5 | B, 6 | X, 7 | Z, 8 | P, 9 | X_tilde, 10 | Z_tilde, 11 | Z_tilde_gamma_cols, 12 | P_tilde, 13 | gamma_tilde){ 14 | 15 | mu_deriv <- sapply(1:J, function(j) 16 | t(X[i,k,drop = F])%*%(Z[i,,drop = F]%*%P[,j,drop = F]*exp(gammas[i] + 17 | X[i,,drop = F]%*%B[,j,drop = F]))) 18 | 19 | K_tilde <- dim(P_tilde)[1] 20 | for(k_tilde in 1:K_tilde){ 21 | if(k_tilde %in% Z_tilde_gamma_cols){ 22 | mu_deriv <- mu_deriv + sapply(1:J, function(j) 23 | exp(gammas[i])* 24 | (Z_tilde[i,k_tilde,drop = F])%*% 25 | P_tilde[k_tilde,j,drop = F]* 26 | exp(gamma_tilde[k_tilde] + 27 | X_tilde[k_tilde,,drop = F]%*%B[,j,drop = F])* 28 | X_tilde[k_tilde,k,drop = F]) 29 | } else{ 30 | mu_deriv <- mu_deriv + 31 | sapply(1:J, function(j) (Z_tilde[i,k_tilde,drop = F])%*% 32 | P_tilde[k_tilde,j,drop = F]* 33 | exp(gamma_tilde[k_tilde] + 34 | X_tilde[k_tilde,,drop = F]%*%B[,j,drop = F])* 35 | X_tilde[k_tilde,k,drop = F]) 36 | } 37 | } 38 | 39 | 40 | 41 | return(mu_deriv) 42 | } 43 | -------------------------------------------------------------------------------- /R/mu_d_gamma_faster.R: -------------------------------------------------------------------------------- 1 | 2 | mu_d_gamma_faster <- function(i, 3 | J, 4 | gammas, 5 | B, 6 | X, 7 | Z, 8 | P, 9 | X_tilde, 10 | Z_tilde, 11 | Z_tilde_gamma_cols, 12 | alpha_tilde = NULL, 13 | Z_tilde_list = NULL, 14 | P_tilde, 15 | gamma_tilde){ 16 | 17 | if(!is.null(alpha_tilde)){ 18 | Z_tilde <- construct_Z_tilde(Z_tilde_list, 19 | alpha_tilde) 20 | } 21 | 22 | mu_deriv <- Z[i,,drop = F]%*%P*exp(gammas[i] + 23 | X[i,,drop = F]%*%B) 24 | 25 | K_tilde <- dim(P_tilde)[1] 26 | for(k_tilde in 1:K_tilde){ 27 | if(k_tilde %in% Z_tilde_gamma_cols){ 28 | mu_deriv <- mu_deriv + exp(gammas[i])* 29 | (Z_tilde[i,k_tilde,drop = F])%*% 30 | (P_tilde[k_tilde,,drop = F])* 31 | exp(gamma_tilde[k_tilde] + 32 | X_tilde[k_tilde,,drop = F]%*%B) 33 | } 34 | } 35 | return(mu_deriv) 36 | } 37 | -------------------------------------------------------------------------------- /R/mu_d_gamma_tilde.R: -------------------------------------------------------------------------------- 1 | #' Calculate derivative of mu_ij with respect to an entry of gamma_tilde 2 | #' 3 | #' @param i The sample index (must be in 1, ..., n) 4 | #' @param j The taxon index (must be in 1, ..., J) 5 | #' @param k_tilde The element of gamma_tilde with respect to which to take derivative 6 | #' @param gammas Numeric vector of read intensities 7 | #' @param B Detection efficiency matrix 8 | #' @param X The efficiency design matrix (n x p) 9 | #' @param Z The sample design matrix (n x K) 10 | #' @param P The sample relative abundance matrix (K x J) 11 | #' @param X_tilde The spurious read efficiency design (K_tilde x p) 12 | #' @param Z_tilde The spurious read design (n x K_tilde) 13 | #' @param Z_tilde_gamma_cols Numeric vector containing indexes of columns of 14 | #' Z_tilde to scale by exp(gamma); NULL if no columns to be scaled 15 | #' @param P_tilde The spurious source relative abundance matrix (K_tilde x J) 16 | #' @param gamma_tilde Spurious read intensity parameter 17 | #' 18 | #' @return A derivative d mu_ij / d gamma_tilde_k_tilde 19 | mu_d_gamma_tilde <- function(i, 20 | j, 21 | k_tilde, 22 | gammas, 23 | B, 24 | X, 25 | Z, 26 | P, 27 | X_tilde, 28 | Z_tilde, 29 | Z_tilde_gamma_cols, 30 | alpha_tilde = NULL, 31 | Z_tilde_list = NULL, 32 | P_tilde, 33 | gamma_tilde){ 34 | 35 | if(!is.null(alpha_tilde)){ 36 | Z_tilde <- construct_Z_tilde(Z_tilde_list, 37 | alpha_tilde) 38 | } 39 | 40 | mu_deriv <- 0 41 | 42 | if(k_tilde %in% Z_tilde_gamma_cols){ 43 | mu_deriv <- mu_deriv + exp(gammas[i])* 44 | (Z_tilde[i,k_tilde,drop = F]%*% 45 | P_tilde[k_tilde,j,drop = F])* 46 | exp(gamma_tilde[k_tilde] + 47 | X_tilde[k_tilde,,drop = F]%*%B[,j,drop = F]) 48 | } else{ 49 | mu_deriv <- mu_deriv + 50 | (Z_tilde[i,k_tilde,drop = F]%*% 51 | P_tilde[k_tilde,j,drop = F])* 52 | exp(gamma_tilde[k_tilde] + 53 | X_tilde[k_tilde,,drop = F]%*%B[,j,drop = F]) 54 | 55 | } 56 | 57 | return(mu_deriv) 58 | } 59 | -------------------------------------------------------------------------------- /R/mu_d_rho_faster.R: -------------------------------------------------------------------------------- 1 | #' Calculate derivative of mu_ij with respect to a row of matrix-valued 2 | #' parameter rho 3 | #' 4 | #' @param i The sample index (must be in 1, ..., n) 5 | #' @param J The total number of taxa modeled 6 | #' @param k Row index (which row of rho with respect to which to take derivative) 7 | #' @param gammas Numeric vector of read intensities 8 | #' @param B Detection efficiency matrix 9 | #' @param X The efficiency design matrix (n x p) 10 | #' @param Z The sample design matrix (n x K) 11 | #' @param Ak_list List containing matrices that map back-transformed 12 | #' rho to entries of P 13 | #' @param rho_k Value of kth row of rho 14 | #' @param fixed_P_multipliers Numeric vector of length K containing values in (0,1] 15 | #' equal to 1 - sum(fixed relative abundances in row k of P) 16 | #' Z_tilde to scale by exp(gamma); NULL if no columns to be scaled 17 | #' 18 | #' @return A derivative d mu_i / d rho_k 19 | mu_d_rho_faster <- function(i, 20 | J, 21 | k, 22 | gammas, 23 | B, 24 | X, 25 | Z, 26 | Ak_list, 27 | rho_k, 28 | fixed_P_multipliers, 29 | proportion_scale = FALSE){ 30 | dmu_i_dpk <- Matrix::Diagonal(x = as.numeric(Z[i,k]*exp(matrix(gammas[i],nrow = 1, ncol = J) + X[i,,drop=F]%*%B))) 31 | if(!proportion_scale){ 32 | nu_k <- exp(rho_k) 33 | front_term <- 1/(1 + sum(nu_k)) 34 | dpk_drhok <- Matrix::Matrix(fixed_P_multipliers[k]*( 35 | front_term*rbind(diag(nu_k), 0) - 36 | front_term^2*outer(c(nu_k,1),nu_k)) 37 | ) 38 | 39 | rho_jacob_i <- dmu_i_dpk%*%dpk_drhok 40 | 41 | return(rho_jacob_i) 42 | } else{ 43 | return(dmu_i_dpk) 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /R/mu_d_rho_tilde_faster.R: -------------------------------------------------------------------------------- 1 | #' Calculate derivative of mu_i with respect to a row of matrix-valued 2 | #' parameter rho_tilde 3 | #' 4 | #' @param i The sample index (must be in 1, ..., n) 5 | #' @param J The total number of taxa modeled 6 | #' @param k_tilde Row index (which row of rho_tilde with respect to which to take derivative) 7 | #' @param gammas Numeric vector of read intensities 8 | #' @param B Detection efficiency matrix 9 | #' @param rho_tilde_k Value of kth row of rho_tilde 10 | #' @param A_tilde_k_list List containing matrices that map back-transformed 11 | #' rho_tilde to entries of P_tilde 12 | #' @param fixed_P_multipliers Numeric vector of length K containing values in (0,1] 13 | #' equal to 1 - sum(fixed relative abundances in row k of P_tilde) 14 | #' 15 | #' @return A derivative d mu_i / d rho_tilde_k 16 | mu_d_rho_tilde_faster <- function(i, 17 | J, 18 | k_tilde, 19 | gammas, 20 | B, 21 | rho_tilde_k, 22 | A_tilde_k_list, 23 | fixed_P_tilde_multipliers, 24 | X_tilde, 25 | Z_tilde, 26 | Z_tilde_gamma_cols, 27 | alpha_tilde = NULL, 28 | Z_tilde_list = NULL, 29 | gamma_tilde, 30 | proportion_scale = FALSE) 31 | { 32 | 33 | if(!is.null(alpha_tilde)){ 34 | Z_tilde <- construct_Z_tilde(Z_tilde_list, 35 | alpha_tilde) 36 | } 37 | 38 | for(zcol in Z_tilde_gamma_cols){ 39 | Z_tilde[,zcol] <- exp(gammas)*Z_tilde[,zcol] 40 | } 41 | 42 | dmu_i_dptildektilde <- Matrix::Diagonal(x = as.numeric(Z_tilde[i,k_tilde]* 43 | exp(matrix(gamma_tilde[k_tilde],nrow = 1, ncol = J) + 44 | X_tilde[k_tilde,,drop = F]%*%B))) 45 | 46 | if(!proportion_scale){ 47 | nu_tilde_k <- exp(rho_tilde_k) 48 | 49 | front_term <- 1/(1 + sum(nu_tilde_k)) 50 | dptildek_drhotildek <- Matrix::Matrix(fixed_P_tilde_multipliers[k_tilde]*( 51 | front_term*rbind(diag(nu_tilde_k), 0) - 52 | front_term^2*outer(c(nu_tilde_k,1),nu_tilde_k) 53 | )) 54 | 55 | rho_tilde_jacob_i <- dmu_i_dptildektilde%*%A_tilde_k_list[[k_tilde]]%*%dptildek_drhotildek 56 | 57 | return(rho_tilde_jacob_i) 58 | } else{ 59 | return(dmu_i_dptildektilde) 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /R/nullify.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | nullify <- function(W, 4 | full_model, 5 | null_model){ 6 | 7 | J <- ncol(full_model$W) 8 | n <- nrow(full_model$W) 9 | 10 | full_means <- 11 | meaninate(gammas = full_model$gammas, 12 | B = full_model$B, 13 | X = full_model$X, 14 | Z = full_model$Z, 15 | P = full_model$P, 16 | X_tilde = full_model$X_tilde, 17 | Z_tilde = full_model$Z_tilde, 18 | Z_tilde_gamma_cols = full_model$Z_tilde_gamma_cols, 19 | P_tilde = full_model$P_tilde, 20 | gamma_tilde = full_model$gamma_tilde, 21 | alpha_tilde = full_model$alpha_tilde, 22 | Z_tilde_list = full_model$Z_tilde_list, 23 | exclude_gammas= TRUE) 24 | 25 | null_means <- 26 | meaninate(gammas = null_model$gammas, 27 | B = null_model$B, 28 | X = null_model$X, 29 | Z = null_model$Z, 30 | P = null_model$P, 31 | X_tilde = null_model$X_tilde, 32 | Z_tilde = null_model$Z_tilde, 33 | Z_tilde_gamma_cols = null_model$Z_tilde_gamma_cols, 34 | P_tilde = null_model$P_tilde, 35 | gamma_tilde = null_model$gamma_tilde, 36 | alpha_tilde = null_model$alpha_tilde, 37 | Z_tilde_list = null_model$Z_tilde_list, 38 | exclude_gammas = TRUE) 39 | 40 | W0 <- W*(null_means/full_means) 41 | W0[full_means == 0] <- 0 42 | 43 | for(i in 1:nrow(W0)){ 44 | W0[i,] <- (W0[i,]/sum(W0[i,]))*sum(W[i,]) 45 | } 46 | 47 | return(W0) 48 | 49 | } 50 | -------------------------------------------------------------------------------- /R/numerical_jacobian.R: -------------------------------------------------------------------------------- 1 | #' @import numDeriv 2 | #' 3 | numerical_jacobian <- function(varying_lr_df, 4 | fixed_df, 5 | varying_df, 6 | gammas, 7 | B, 8 | X, 9 | Z, 10 | P, 11 | X_tilde, 12 | Z_tilde = NULL, 13 | Z_tilde_gamma_cols, 14 | P_tilde, 15 | gamma_tilde, 16 | alpha_tilde = NULL, 17 | Z_tilde_list = NULL 18 | ){ 19 | n <- nrow(Z) 20 | J <- ncol(P) 21 | npar <- nrow(varying_lr_df) 22 | mean_func <- function(x,index,i,j){ 23 | temp_lr <- varying_lr_df 24 | temp_lr$value[index] <- x 25 | temp_params <- dataframes_to_parameters(fixed_df, 26 | lr_to_ra(fixed_df, 27 | temp_lr, 28 | varying_df)) 29 | return(with(temp_params,meaninate(gammas, 30 | B, 31 | X, 32 | Z, 33 | P, 34 | X_tilde, 35 | Z_tilde = NULL, 36 | Z_tilde_gamma_cols, 37 | P_tilde, 38 | gamma_tilde, 39 | alpha_tilde = alpha_tilde, 40 | Z_tilde_list = Z_tilde_list, 41 | return_separate = FALSE, 42 | exclude_gammas = FALSE)[i,j])) 43 | } 44 | 45 | 46 | num_jacob <- matrix(0,nrow = n*J, ncol = npar) 47 | 48 | for(i in 1:n){ 49 | print(paste("i = ", i, sep = "", collapse = "")) 50 | for(j in 1:J){ 51 | print(paste("j = ", j, sep = "", collapse = "")) 52 | for(parindex in 1:npar){ 53 | num_jacob[(i - 1)*J + j, parindex] <- numDeriv::grad( 54 | function(x) mean_func(x,index = parindex,i = i,j = j), 55 | varying_lr_df$value[parindex]) 56 | 57 | } 58 | } 59 | } 60 | 61 | return(num_jacob) 62 | 63 | 64 | 65 | } 66 | -------------------------------------------------------------------------------- /R/par_to_jacobian_row.R: -------------------------------------------------------------------------------- 1 | par_to_jacobian_row <- function(params, 2 | param_status, 3 | i, 4 | j, 5 | X, 6 | Z, 7 | X_tilde, 8 | Z_tilde, 9 | Z_tilde_gamma_cols){ 10 | J <- ncol(params$P) 11 | 12 | ### jacobian row in P 13 | which_P_rows <- which(apply(param_status$P,1, max) ==1) 14 | P_jac_row <- numeric(J*length(which_P_rows)) 15 | 16 | if(length(which_P_rows) >0){ 17 | for(row_index in 1:length(which_P_rows)){ 18 | P_jac_row[(row_index - 1)*J + j] <- mu_d_P(i, 19 | j, 20 | m = which_P_rows[row_index], 21 | gammas = params$gammas, 22 | B = params$B, 23 | X = X, 24 | Z = Z, 25 | P = params$P) 26 | } 27 | } 28 | 29 | ### jacobian row in P_tilde 30 | which_P_tilde_rows <- which(apply(param_status$P_tilde,1, max) ==1) 31 | P_tilde_jac_row <- numeric(J*length(which_P_tilde_rows)) 32 | 33 | if(length(which_P_tilde_rows)>0){ 34 | for(row_index in 1:length(which_P_tilde_rows)){ 35 | P_tilde_jac_row[(row_index - 1)*J + j] <- mu_d_P_tilde(i, 36 | j, 37 | k_tilde = which_P_tilde_rows[row_index], 38 | gammas = params$gammas, 39 | B = params$B, 40 | X = X, 41 | Z = Z, 42 | P = params$P, 43 | X_tilde = X_tilde, 44 | Z_tilde = Z_tilde, 45 | Z_tilde_gamma_cols = Z_tilde_gamma_cols, 46 | P_tilde = params$P_tilde, 47 | gamma_tilde = params$gamma_tilde) 48 | } 49 | } 50 | 51 | ### jacobian row in B 52 | which_B_rows <- which(apply(param_status$B,1, max) ==1) 53 | B_jac_row <- numeric((J - 1)*length(which_B_rows)) 54 | 55 | if(length(which_B_rows)>0){ 56 | if(j < J){ #row of jacobian is zero if j = J 57 | for(row_index in 1:length(which_B_rows)){ 58 | B_jac_row[(row_index - 1)*(J - 1) + j] <- mu_d_beta(i, 59 | j, 60 | q = which_B_rows[row_index], 61 | gammas = params$gammas, 62 | B = params$B, 63 | X = X, 64 | Z = Z, 65 | P = params$P, 66 | X_tilde = X_tilde, 67 | Z_tilde = Z_tilde, 68 | Z_tilde_gamma_cols = Z_tilde_gamma_cols, 69 | P_tilde = params$P_tilde, 70 | gamma_tilde = params$gamma_tilde) 71 | } 72 | } 73 | } 74 | 75 | ### gamma jacobian row 76 | which_gammas <- which(apply(param_status$gammas,1, max) ==1) 77 | gammas_jac_row <- numeric(sum(param_status$gammas)) 78 | 79 | if(length(which_gammas)>0){ 80 | gammas_jac_row[i] <- mu_d_gamma(i, 81 | j, 82 | gammas = params$gammas, 83 | B = params$B, 84 | X = X, 85 | Z = Z, 86 | P = params$P, 87 | X_tilde = X_tilde, 88 | Z_tilde = Z_tilde, 89 | Z_tilde_gamma_cols = Z_tilde_gamma_cols, 90 | P_tilde = params$P_tilde, 91 | gamma_tilde = params$gamma_tilde) 92 | } 93 | gammas_jac_row <- gammas_jac_row[which_gammas] #??? 94 | 95 | ### gamma tilde jacobian row 96 | which_gamma_tilde <- which(apply(param_status$gamma_tilde,1, max) ==1) 97 | gamma_tilde_jac_row <- numeric(sum(param_status$gamma_tilde)) 98 | 99 | if(length(which_gamma_tilde)>0){ 100 | for(row_index in 1:length(which_gamma_tilde)){ 101 | gamma_tilde_jac_row[row_index] <- mu_d_gamma_tilde(i, 102 | j, 103 | k_tilde = which_gamma_tilde[row_index], 104 | gammas = params$gammas, 105 | B = params$B, 106 | X = X, 107 | Z = Z, 108 | P = params$P, 109 | X_tilde = X_tilde, 110 | Z_tilde = Z_tilde, 111 | Z_tilde_gamma_cols = Z_tilde_gamma_cols, 112 | P_tilde = params$P_tilde, 113 | gamma_tilde = params$gamma_tilde) 114 | } 115 | } 116 | 117 | return(c(P_jac_row, 118 | P_tilde_jac_row, 119 | B_jac_row, 120 | gammas_jac_row, 121 | gamma_tilde_jac_row)) 122 | 123 | } 124 | -------------------------------------------------------------------------------- /R/parameters_to_dataframes.R: -------------------------------------------------------------------------------- 1 | parameters_to_dataframes <- function(P, 2 | P_fixed_indices, 3 | P_tilde, 4 | P_tilde_fixed_indices, 5 | B, 6 | B_fixed_indices, 7 | gammas, 8 | gammas_fixed_indices, 9 | gamma_tilde, 10 | gamma_tilde_fixed_indices, 11 | alpha_tilde = NULL){ 12 | 13 | ### extract important dimensions 14 | J <- ncol(P) 15 | K <- nrow(P) 16 | K_tilde <- nrow(P_tilde) 17 | p_B <- nrow(B) 18 | n <- nrow(as.matrix(gammas,ncol = 1)) 19 | 20 | if(length(c(J, K, K_tilde, p_B, n)) < 5) { 21 | badbad <- which(sapply(c(J, K, K_tilde, p_B, n), is.null)) 22 | bad_param <- c("J", "K", "K_tilde", "p_B", "n")[badbad] 23 | stop(c("Unfortunately, one of J, K, K_tilde, p_B or n is NULL.\n", 24 | "One of P, P_tilde, B or gammas may be a vector, not a matrix.\n", 25 | "Please try again.")) 26 | } 27 | 28 | P_fixed_indices <- apply(P_fixed_indices, c(1,2), as.logical) 29 | P_tilde_fixed_indices <- apply(P_tilde_fixed_indices, c(1,2), as.logical) 30 | 31 | ### check that P_fixed_indices and P satisfy RA requirements 32 | for(k in 1:K){ 33 | if(sum(!P_fixed_indices[k,]) >0){ 34 | C_k <- sum(P_fixed_indices[k,]) 35 | fixed_sum_k <- sum(P[k,P_fixed_indices[k,]]) 36 | if(C_k >= J - 1){ 37 | P_fixed_indices[k,] <- TRUE 38 | warning(paste("Row ", k, " of P set to fixed; ", 39 | "\nprovided number of known elements via P_fixed_indices is greater ", 40 | "\nthan number of taxa - 2, which implies entire row is known.")) 41 | } 42 | if(fixed_sum_k >=1){ 43 | P_fixed_indices[k,] <- TRUE 44 | warning(paste("Row ", k, " of P set to fixed; ", 45 | "\ntotal relative abundances across taxa indicated as known", 46 | "\nvia P_fixed_indices is 1, which implies entire row is known.")) 47 | } 48 | } 49 | } 50 | 51 | ### check that P_tilde_fixed_indices and P_tilde satisfy RA requirements 52 | for(k in 1:K_tilde){ 53 | if(sum(!P_tilde_fixed_indices[k,]) >0){ 54 | C_k <- sum(P_tilde_fixed_indices[k,]) 55 | fixed_sum_k <- sum(P_tilde[k,P_tilde_fixed_indices[k,]]) 56 | if(C_k >= J - 1){ 57 | P_tilde_fixed_indices[k,] <- TRUE 58 | warning(paste("Row ", k, " of P_tilde set to fixed; ", 59 | "\nprovided number of known elements via P_tilde_fixed_indices is greater ", 60 | "\nthan number of taxa - 2, which implies entire row is known.")) 61 | } 62 | if(fixed_sum_k >=1){ 63 | P_tilde_fixed_indices[k,] <- TRUE 64 | warning(paste("Row ", k, " of P_tilde set to fixed; ", 65 | "\ntotal relative abundances across taxa indicated as known", 66 | "\nvia P_tilde_fixed_indices is 1, which implies entire row is known.")) 67 | } 68 | } 69 | } 70 | 71 | ### set up fixed parameter and varying parameter data.frames 72 | fixed_df <- data.frame("value" = numeric(0), 73 | "param" = character(0), 74 | "j" = numeric(0)) 75 | 76 | varying_df <- data.frame("value" = numeric(0), 77 | "param" = character(0), 78 | "j" = numeric(0)) 79 | 80 | 81 | ### Set up matrix to track specimen provenance 82 | p_k_matrix <- P 83 | for(k in 1:K){ 84 | p_k_matrix[k,] <- rep(k,J) 85 | } 86 | 87 | ### Set up matrix to track taxon provenance 88 | 89 | p_j_matrix <- P 90 | for(j in 1:J){ 91 | p_j_matrix[,j] <- rep(j,K) 92 | } 93 | 94 | known_temp_df <- data.frame("value" = P[P_fixed_indices], 95 | "param" = rep("P",sum(P_fixed_indices)), 96 | "k" = p_k_matrix[P_fixed_indices], 97 | "j" = p_j_matrix[P_fixed_indices]) 98 | 99 | varying_temp_df <- data.frame("value" = P[!P_fixed_indices], 100 | "param" = rep("P", 101 | sum(!P_fixed_indices)), 102 | "k" = p_k_matrix[!P_fixed_indices], 103 | "j" = p_j_matrix[!P_fixed_indices]) 104 | 105 | fixed_df <- rbind(fixed_df, 106 | known_temp_df) 107 | 108 | varying_df <- rbind(varying_df, 109 | varying_temp_df) 110 | 111 | ### Set up matrix to track spurious read specimen provenance 112 | p_tilde_k_matrix <- P_tilde 113 | for(k in 1:K_tilde){ 114 | p_tilde_k_matrix[k,] <- rep(k,J) 115 | } 116 | 117 | ### Set up matrix to track spurious read taxon provenance 118 | 119 | p_tilde_j_matrix <- P_tilde 120 | for(j in 1:J){ 121 | p_tilde_j_matrix[,j] <- rep(j,K_tilde) 122 | } 123 | 124 | known_temp_df <- data.frame("value" = P_tilde[P_tilde_fixed_indices], 125 | "param" = rep("P_tilde",sum(P_tilde_fixed_indices)), 126 | "k" = p_tilde_k_matrix[P_tilde_fixed_indices], 127 | "j" = p_tilde_j_matrix[P_tilde_fixed_indices]) 128 | 129 | varying_temp_df <- data.frame("value" = P_tilde[!P_tilde_fixed_indices], 130 | "param" = rep("P_tilde",sum(!P_tilde_fixed_indices)), 131 | "k" = p_tilde_k_matrix[!P_tilde_fixed_indices], 132 | "j" = p_tilde_j_matrix[!P_tilde_fixed_indices]) 133 | 134 | fixed_df <- rbind(fixed_df, 135 | known_temp_df) 136 | 137 | varying_df <- rbind(varying_df, 138 | varying_temp_df) 139 | 140 | 141 | 142 | ### Set up matrix to track effects 143 | B_k_matrix <- B 144 | for(k in 1:p_B){ 145 | B_k_matrix[k,] <- rep(k,J) 146 | } 147 | 148 | ### Set up matrix to track effects by taxon 149 | 150 | B_j_matrix <- B 151 | for(j in 1:J){ 152 | B_j_matrix[,j] <- rep(j,p_B) 153 | } 154 | 155 | 156 | known_temp_df <- data.frame("value" = B[B_fixed_indices], 157 | "param" = rep("B", sum(B_fixed_indices)), 158 | "k" = B_k_matrix[B_fixed_indices], 159 | "j" = B_j_matrix[B_fixed_indices]) 160 | 161 | varying_temp_df <- data.frame("value" = B[!B_fixed_indices], 162 | "param" = rep("B",sum(!B_fixed_indices)), 163 | "k" = B_k_matrix[!B_fixed_indices], 164 | "j" = B_j_matrix[!B_fixed_indices]) 165 | 166 | ### make sure B is ordered by k and *then* j 167 | 168 | varying_temp_df <- varying_temp_df[order(varying_temp_df$k, 169 | varying_temp_df$j),] 170 | 171 | fixed_df <- rbind(fixed_df, 172 | known_temp_df) 173 | 174 | varying_df <- rbind(varying_df, 175 | varying_temp_df) 176 | 177 | 178 | 179 | temp_df <- data.frame("value" = as.numeric(gammas), 180 | "param" = "gamma", 181 | "k" = 1:n, 182 | "j" = 0) 183 | fixed_df <- rbind(fixed_df,temp_df[((1:n) %in% which(gammas_fixed_indices)),]) 184 | 185 | varying_df <- rbind(varying_df,temp_df[!((1:n) %in% which(gammas_fixed_indices)),]) 186 | 187 | temp_df <- data.frame("value" = as.numeric(gamma_tilde), 188 | "param" = "gamma_tilde", 189 | "k" = 1:nrow(gamma_tilde), 190 | "j" = 0) 191 | 192 | K_tilde <- length(as.numeric(gamma_tilde)) 193 | fixed_df <- rbind(fixed_df,temp_df[((1:K_tilde) %in% which(gamma_tilde_fixed_indices)),]) 194 | 195 | varying_df <- rbind(varying_df,temp_df[!((1:K_tilde) %in% which(gamma_tilde_fixed_indices)),]) 196 | 197 | if(!is.null(alpha_tilde)){ 198 | varying_df <- rbind(varying_df, 199 | data.frame("value" = alpha_tilde, 200 | "param" = "alpha_tilde", 201 | "k" = 1:length(alpha_tilde), 202 | "j" = 0)) 203 | } 204 | 205 | return(list("fixed" = fixed_df, 206 | "varying" = varying_df)) 207 | 208 | } 209 | -------------------------------------------------------------------------------- /R/pb_update_one_constrained.R: -------------------------------------------------------------------------------- 1 | # 2 | # pb_update_one_constrained <- 3 | # function(constr_gradient_term, # alpha root-n grad l-star - grad l 4 | # H2dot_transp_theta_hat, 5 | # #H2dot_n^Ttheta^hat %*% 6 | # #rbind(curr_theta_const - theta_const_mle, 7 | # #-theta_var_mle) 8 | # # where theta_var is (simplex-constrained) parameter to be optimized 9 | # # over, and theta_const is all other parameters (held constant in 10 | # # this step) 11 | # H22n, #submatrix of criterion hessian corresp. to theta_var 12 | # curr_theta_var, 13 | # curr_theta_const, 14 | # theta_hat_var, 15 | # theta_hat_const 16 | # ){ 17 | # 18 | # #directly use auglag - no simpl_auglag_fnnls 19 | # # prox_criterion <- function(theta_var){ 20 | # # theta_diff <- ( rbind(curr_theta_const, 21 | # # x) - 22 | # # rbind(theta_hat_const, 23 | # # theta_hat_var)) 24 | # # gr_term <- constr_gradient_term %*%theta_diff 25 | # # hess_term <- t(theta_diff)%*%H22n%*%theta_diff 26 | # # return(as.numeric(gr_term + 0.5*hess_term)) 27 | # # } 28 | # # 29 | # # 30 | # # simpl_auglag_fnnls(x = curr_theta_var, 31 | # # fn = prox_criterion, #function of x to optimize 32 | # # xhess = H22n, #hessian at x 33 | # # xgrad = , #gradient at x 34 | # # lambda, #penalty parameters 35 | # # nu = 1, #starting lagrangian penalty 36 | # # mu = 1, #starting augmented lagrangian penalty 37 | # # constraint_tolerance = 1e-10, #sum-to-one constraint tolerance 38 | # # maxit = 100 # maximum number of iterations (outer loop) 39 | # # 40 | # # ) 41 | # } 42 | -------------------------------------------------------------------------------- /R/pb_update_unconstrained.R: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | # 4 | # pb_update_unconstrained <- 5 | # function(hessian_premultiplier, #H22n_inv%**H2dotn 6 | # theta_hat_unconstr, #mle 7 | # premult_unconstr_gradient_term, #H22n_inv %*% alpha root-n diff unconstr. gradients 8 | # theta_hat_constr, #mle for unconstrained variables 9 | # theta_hat_unconstr, #mle for constrained variables 10 | # curr_theta_constr #current value of constrained pars (in pb update) 11 | # ){ 12 | # return( 13 | # -hessian_premultiplier%*% rbind(curr_theta_constr - theta_hat_constr, 14 | # -theta_hat_unconstr) - 15 | # premult_unconstr_gradient_term 16 | # ) 17 | # } 18 | -------------------------------------------------------------------------------- /R/poisson_criterion.R: -------------------------------------------------------------------------------- 1 | 2 | poisson_criterion <- function(W, 3 | means, 4 | wts = NULL){ 5 | n <- nrow(W) 6 | J <- ncol(W) 7 | wt_list <- lapply(1:n, 8 | function(i) wts[1:J + (i - 1)*J]) 9 | wts <- do.call(rbind,wt_list) 10 | 11 | rm(wt_list) 12 | 13 | if( (ncol(W) != ncol(means)) | (nrow(W) != nrow(means))){ 14 | stop("W and means must have the same dimensions.") 15 | } 16 | 17 | if(is.null(wts)){ 18 | return(sum(sapply(1:nrow(means), function(i) sapply(1:ncol(means), 19 | function(j) 20 | -logpois(W[i,j],means[i,j]))))) 21 | } 22 | else{ 23 | if((ncol(W) != ncol(wts)) | (nrow(W) != nrow(wts))){ 24 | stop("Weight matrix wts must have same dimesions as W.") 25 | } 26 | return(sum(sapply(1:nrow(means), function(i) sapply(1:ncol(means), 27 | function(j) 28 | -wts[i,j]*logpois(W[i,j],means[i,j]))))) 29 | 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /R/proximal_bootstrap.R: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | # proximal_bootstrap <- function( 4 | # n, #number of observations 5 | # alpha_n, #bootstrap scaling term 6 | # varying_df, #dataframe containing fitted parameter values 7 | # fixed_df, #dataframe containing fixed parameter values 8 | # W, #observations 9 | # X, #sample efficiency design 10 | # Z, #sample specimen design 11 | # Z_tilde = NULL, 12 | # Z_tilde_gamma_cols, 13 | # gammas_fixed_indices, 14 | # P_fixed_indices, 15 | # B_fixed_indices, 16 | # X_tilde, 17 | # P_tilde_fixed_indices, 18 | # gamma_tilde_fixed_indices, 19 | # Z_tilde_list = NULL, 20 | # constraint_tolerance = 1e-10, 21 | # hessian_regularization = 0.01, 22 | # criterion = "Poisson", 23 | # gmm_inv_wts = NULL, 24 | # nu = 1, #starting lagrangian penalty 25 | # mu = 1, #starting augmented lagrangian penalty 26 | # ){ 27 | # 28 | # params <- dataframes_to_parameters(fixed_df, 29 | # varying_df) 30 | # 31 | # jacobian <- mean_jac_lr_faster(fixed_df = fixed_df, 32 | # varying_lr_df = varying_lr_df, 33 | # varying_df = varying_df, 34 | # which_k_p = which_k_p, 35 | # which_k_p_tilde = which_k_p_tilde, 36 | # which_B_rows = which_B_rows, 37 | # which_B_keep = which_B_keep, 38 | # which_gammas = which_gammas, 39 | # which_gamma_tilde = which_gamma_tilde, 40 | # Ak_list = Ak_list, 41 | # A_tilde_k_list = A_tilde_k_list, 42 | # fixed_P_multipliers = fixed_P_multipliers, 43 | # fixed_P_tilde_multipliers = fixed_P_tilde_multipliers, 44 | # params = params, 45 | # X = X, 46 | # Z = Z, 47 | # K = K, 48 | # K_tilde = K_tilde, 49 | # X_tilde = X_tilde, 50 | # Z_tilde = Z_tilde, 51 | # Z_tilde_gamma_cols =Z_tilde_gamma_cols, 52 | # Z_tilde_list = Z_tilde_list, 53 | # sparse = TRUE, 54 | # proportion_scale = TRUE, 55 | # P_fixed_indices = P_fixed_indices, 56 | # P_tilde_fixed_indices = P_tilde_fixed_indices) 57 | # 58 | # means <- meaninate(gammas = params$gammas, 59 | # B = params$B, 60 | # X = X, 61 | # Z = Z, 62 | # P = params$P, 63 | # X_tilde = X_tilde, 64 | # Z_tilde = Z_tilde, 65 | # Z_tilde_gamma_cols = Z_tilde_gamma_cols, 66 | # P_tilde = params$P_tilde, 67 | # gamma_tilde = params$gamma_tilde, 68 | # alpha_tilde = params$alpha_tilde, 69 | # Z_tilde_list = Z_tilde_list) 70 | # 71 | # W_long <- lapply(1:n,function(i) as.numeric(W[i,])) 72 | # W_long <- do.call(c,W_long) 73 | # means_long <- lapply(1:n, function(i) as.numeric(means[i,])) 74 | # means_long <- do.call(c,means_long) 75 | # 76 | # ### only for Poisson: 77 | # if(criterion == "Poisson"){ 78 | # V <- diag(1/means_long) 79 | # } else{ 80 | # V <- diag(rep(1, length(means_long))) 81 | # } 82 | # 83 | # if(!is.null(gmm_inv_wts)){ 84 | # diag(V) <- diag(V)/gmm_inv_wts 85 | # } 86 | # 87 | # dli_dtheta <- 88 | # lapply(1:n, function(i) 89 | # -Matrix::crossprod(jacobian[1:J + (i - 1)*J,], 90 | # V[1:J + (i - 1)*J,1:J + (i - 1)*J])%*%( 91 | # Matrix::Matrix(W_long[1:J + (i - 1)*J] - means_long[1:J + (i - 1)*J], 92 | # ncol = 1))) 93 | # 94 | # dli_dtheta <- do.call(cbind,dli_dtheta) 95 | # dli_dtheta <- dli_dtheta/n 96 | # 97 | # lgrad <- apply(dli_dtheta,1,sum) 98 | # 99 | # V2 <- as(sqrt(abs(V)),"sparseMatrix") 100 | # pre_info <- V2%*%jacobian 101 | # H_n <- Matrix::crossprod(pre_info) 102 | # 103 | # 104 | # boot_weights <- rexp(n) 105 | # boot_weights <- boot_weights/sum(boot_weights) 106 | # 107 | # 108 | # lgrad_star <- apply(dli_dtheta%*%diag(boot_weights),1,sum) 109 | # 110 | # diff_dls <- alpha_n*sqrt(n)*as.numeric( 111 | # (apply(dli_star_dtheta,1,sum) - apply(dli_dtheta,1,sum))) 112 | # diff_dls <- matrix(diff_dls,nrow = 1) 113 | # 114 | # prox_crit <- function(x){ 115 | # x_deviation <- matrix(x - varying_df$value,ncol = 1) 116 | # 117 | # return(as.numeric(diff_dls%*%x_deviation + 118 | # 0.5*t(x_deviation)%*%H_n%*%x_deviation)) 119 | # 120 | # } 121 | # 122 | # K <- length(unique(varying_df$k[varying_df$param == "P"])) 123 | # K_tilde <- length(unique(varying_df$k[varying_df$param == "P_tilde"])) 124 | # aug_lag_params <- matrix(1,ncol = 2, nrow = n_simplex_constraints) 125 | # 126 | # varying_p_k <- unique(varying_df$k[varying_df$param == "P"]) 127 | # simplex_matrix_P <- 128 | # lapply(varying_p_k, 129 | # function(d) as.numeric( 130 | # (varying_df$k == d) & (varying_df$param == "P")) 131 | # ) 132 | # 133 | # simplex_matrix_P <- do.call(rbind,simplex_matrix_P) 134 | # 135 | # varying_p_tilde_k <- unique(varying_df$k[varying_df$param == "P_tilde"]) 136 | # simplex_matrix_P_tilde <- 137 | # lapply(varying_p_tilde_k, 138 | # function(d) as.numeric( 139 | # (varying_df$k == d) & (varying_df$param == "P_tilde")) 140 | # ) 141 | # 142 | # simplex_matrix_P_tilde <- do.call(rbind,simplex_matrix_P_tilde) 143 | # 144 | # for(k in 1:maxit){ 145 | # counter <- counter + 1 146 | # # print(counter) 147 | # 148 | # new_x <- optim(varying_df$value, 149 | # ) 150 | # 151 | # # print("checking constraints") 152 | # 153 | # V <- abs(sum(x) - 1) 154 | # 155 | # satisfied <- V< constraint_tolerance 156 | # 157 | # 158 | # 159 | # if(V < constraint_tolerance){ 160 | # return(x) 161 | # } 162 | # 163 | # if(V < 0.25*previous_V){ 164 | # nu <- nu + 2*mu*V 165 | # } else{ 166 | # mu <- 2*mu 167 | # } 168 | # # print(mu) 169 | # # print(nu) 170 | # previous_V <- V 171 | # 172 | # } 173 | # 174 | # 175 | # 176 | # 177 | # } 178 | -------------------------------------------------------------------------------- /R/push_row.R: -------------------------------------------------------------------------------- 1 | push_row <- function(row_vec, 2 | row_num, 3 | target_matrix){ 4 | target_matrix[row_num,] <- row_vec 5 | return(target_matrix) 6 | } 7 | -------------------------------------------------------------------------------- /R/ra_to_lr.R: -------------------------------------------------------------------------------- 1 | 2 | ra_to_lr <- function(varying_df){ 3 | 4 | P_df <- varying_df[varying_df$param == "P",] 5 | P_tilde_df <- varying_df[varying_df$param == "P_tilde",] 6 | 7 | ### Generate rho data.frame 8 | P_rows <- unique(P_df$k) 9 | 10 | rho_df <- P_df[rep(F, nrow(P_df)),] 11 | for(p_row in P_rows){ 12 | mini_P <- P_df[P_df$k == p_row,] 13 | ref_j <- max(mini_P$j) 14 | mini_rho <- mini_P[mini_P$j != ref_j,] 15 | mini_rho$param <- "rho" 16 | mini_rho$value <- log(mini_rho$value) - log(mini_P$value[mini_P$j == ref_j]) 17 | rho_df <- rbind(rho_df, mini_rho) 18 | } 19 | 20 | ### Generate rho_tilde data.frame 21 | P_tilde_rows <- unique(P_tilde_df$k) 22 | 23 | rho_tilde_df <- P_tilde_df[rep(F, nrow(P_df)),] 24 | for(p_tilde_row in P_tilde_rows){ 25 | mini_P_tilde <- P_tilde_df[P_tilde_df$k == p_tilde_row,] 26 | ref_j <- max(mini_P_tilde$j) 27 | mini_rho_tilde <- mini_P_tilde[mini_P_tilde$j != ref_j,] 28 | mini_rho_tilde$param <- "rho_tilde" 29 | mini_rho_tilde$value <- log(mini_rho_tilde$value) - 30 | log(mini_P_tilde$value[mini_P_tilde$j == ref_j]) 31 | rho_tilde_df <- rbind(rho_tilde_df, mini_rho_tilde) 32 | } 33 | 34 | varying_lr_df <- rbind(varying_df[!(varying_df$param %in% c("P","P_tilde")),], 35 | rho_df, 36 | rho_tilde_df) 37 | 38 | # varying_lr_df <- rbind(varying_lr_df[varying_lr_df$param == "rho",], 39 | # varying_lr_df[varying_lr_df$param == "rho_tilde",], 40 | # varying_lr_df[varying_lr_df$param == "B",], 41 | # varying_lr_df[varying_lr_df$param == "gamma",], 42 | # varying_lr_df[varying_lr_df$param == "gamma_tilde",]) 43 | 44 | rownames(varying_lr_df) <- 1:nrow(varying_lr_df) 45 | 46 | varying_lr_df <- rbind(varying_lr_df[varying_lr_df$param != "alpha_tilde",], 47 | varying_lr_df[varying_lr_df$param == "alpha_tilde",]) 48 | 49 | return(varying_lr_df) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /R/refit_model.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | refit_model <- function(fitted_model, 4 | m, 5 | seed = NULL, 6 | bootstrap_method = "bayesian_subsample", 7 | bootstrap_failure_cutoff = -1e4){ 8 | 9 | n <- nrow(fitted_model$W) 10 | J <- ncol(fitted_model$W) 11 | 12 | if(!is.null(seed)){ 13 | set.seed(seed) 14 | } 15 | 16 | if(!(boot_method %in% c("bayesian_subsample", 17 | "subsample"))){ 18 | stop("Argument boot_method must be equal to `bayesian_subsample` 19 | or `subsample`.") 20 | } 21 | 22 | if(boot_method == "bayesian_subsample"){ 23 | bootstrap_weights <- rgamma(n_effective,shape = m/n_effective) 24 | bootstrap_weights <- bootstrap_weights/sum(bootstrap_weights) 25 | } 26 | if(boot_method == "subsample"){ 27 | bootstrap_weights <- rmultinom(1,m,rep(1/n,n)) 28 | bootstrap_weights <- bootstrap_weights/sum(bootstrap_weights) 29 | } 30 | 31 | bootstrap_weights <- rep(bootstrap_weights, each = J) 32 | 33 | refit <- 34 | estimate_parameters(W = fitted_model$W, 35 | X = fitted_model$X, 36 | Z = fitted_model$Z, 37 | Z_tilde = fitted_model$Z_tilde, 38 | Z_tilde_gamma_cols = fitted_model$Z_tilde_gamma_cols, 39 | gammas = fitted_model$gammas, 40 | gammas_fixed_indices = 41 | fitted_model$gammas_fixed_indices, 42 | P = fitted_model$P + .01, 43 | P_fixed_indices = fitted_model$P_fixed_indices, 44 | B = fitted_model$B, 45 | B_fixed_indices = fitted_model$B_fixed_indices, 46 | X_tilde = fitted_model$X_tilde, 47 | P_tilde = fitted_model$P_tilde + .01, 48 | P_tilde_fixed_indices = 49 | fitted_model$P_tilde_fixed_indices, 50 | gamma_tilde = fitted_model$gamma_tilde, 51 | gamma_tilde_fixed_indices = 52 | fitted_model$gamma_tilde_fixed_indices, 53 | alpha_tilde = fitted_model$alpha_tilde, 54 | Z_tilde_list = fitted_model$Z_tilde_list, 55 | barrier_t = 1, #starting value of reciprocal barrier penalty coef. 56 | barrier_scale = 10, #increments for value of barrier penalty 57 | max_barrier = 1e10, #maximum value of barrier_t 58 | initial_conv_tol = 1000, 59 | final_conv_tol = 0.1, 60 | constraint_tolerance = 1e-10, 61 | hessian_regularization = .01, 62 | criterion = fitted_model$criterion, 63 | profile_P = TRUE, 64 | profiling_maxit = 25, 65 | wts = bootstrap_weights, 66 | verbose = FALSE, 67 | bootstrap_failure_cutoff = bootstrap_failure_cutoff) 68 | 69 | return(list("varying" = refit$varying, 70 | "objective" = refit$objective)) 71 | } 72 | -------------------------------------------------------------------------------- /R/safe_divide.R: -------------------------------------------------------------------------------- 1 | ##################### Division for Log Likelihood Derivatives ##################### 2 | safe_divide <- function(numer,denom, zero_divisor_penalty = 0){ 3 | if((numer ==0 )&(denom == 0)){ 4 | return(1) 5 | } else{ 6 | if(denom == 0){ 7 | return(zero_divisor_penalty) 8 | } 9 | } 10 | return(numer/denom) 11 | } 12 | -------------------------------------------------------------------------------- /R/safe_multiply.R: -------------------------------------------------------------------------------- 1 | 2 | safe_multiply <- function(x,y){ 3 | if(length(x)!= length(y)){ 4 | stop("Arguments x and y must have equal length") 5 | } 6 | 7 | product <- x*y 8 | 9 | #define inf*0 = 1; -inf*0 = -1 10 | product[is.infinite(x) & y == 0] <- sign(x)[is.infinite(x) & y == 0] 11 | product[is.infinite(y) & x == 0] <- sign(y)[is.infinite(y) & x == 0] 12 | 13 | return(product) 14 | 15 | } 16 | -------------------------------------------------------------------------------- /R/simpl_auglag_fnnls.R: -------------------------------------------------------------------------------- 1 | #' @import fastnnls 2 | #' 3 | #' 4 | simpl_auglag_fnnls <- function(x, 5 | fn, #function of x to optimize 6 | xhess, #hessian at x 7 | xgrad, #gradient at x 8 | lambda, #penalty parameters 9 | nu = 1, #starting lagrangian penalty 10 | mu = 1, #starting augmented lagrangian penalty 11 | constraint_tolerance = 1e-10, #sum-to-one constraint tolerance 12 | maxit = 100 # maximum number of iterations (outer loop) 13 | 14 | ){ 15 | 16 | npar <- nrow(xhess) 17 | 18 | x0 <- matrix(x, ncol = 1) 19 | xgrad <- matrix(xgrad,ncol = 1) 20 | 21 | previous_V <- Inf 22 | # grad_norm <- sqrt(sum(xgrad^2)) 23 | counter <- 0 24 | for(k in 1:maxit){ 25 | counter <- counter + 1 26 | # print(counter) 27 | 28 | ATA <- 0.5*xhess + lambda*diag(rep(1,length(xgrad))) + matrix(mu,nrow = npar, ncol = npar) 29 | 30 | 31 | # print(npar) 32 | Ab <- t((-0.5)*(matrix(nu - 2*mu,nrow = 1, ncol = npar) - 33 | 2*lambda*t(x0) - 34 | t(x0)%*%xhess + t(xgrad))) 35 | # 36 | # eigen_ATA <- eigen(ATA) 37 | # 38 | # sqrt_eigen_values <- sapply(eigen_ATA$values, 39 | # function(t) ifelse(t>=0,sqrt(t),0)) 40 | # 41 | # inv_sqrt_eigen_values <- sapply(eigen_ATA$values, 42 | # function(t) ifelse(t>0,1/sqrt(t),0)) 43 | # 44 | # A <- eigen_ATA$vectors%*%diag(sqrt_eigen_values)%*%t(eigen_ATA$vectors) 45 | # A_inv <- eigen_ATA$vectors%*%diag(inv_sqrt_eigen_values)%*%t(eigen_ATA$vectors) 46 | # b <- A_inv%*%Ab 47 | # 48 | # 49 | # x <- nnls::nnls(A,b)$x 50 | 51 | ## BEGIN Amy July 8 2023 52 | # stopifnot(counter<60) 53 | if (counter >= 60) { 54 | warning(paste("In fastNNLS, the counter is", counter, "which is >=60. Potential convergence issue?")) 55 | } 56 | ## END Amy July 8 2023 57 | 58 | # print("doing fastnnls") 59 | x <- fastnnls::fast_nnls(ZTx = Ab, ZTZ = ATA, 60 | tolerance = constraint_tolerance) 61 | 62 | # print("checking constraints") 63 | 64 | V <- abs(sum(x) - 1) 65 | 66 | satisfied <- V< constraint_tolerance 67 | 68 | if(V < constraint_tolerance){ 69 | return(x) 70 | } 71 | 72 | if(V < 0.25*previous_V){ 73 | nu <- nu + 2*mu*V 74 | } else{ 75 | mu <- 2*mu 76 | } 77 | # print(mu) 78 | # print(nu) 79 | previous_V <- V 80 | 81 | } 82 | 83 | 84 | 85 | warning("Maximum iterations reached; returning initial value") 86 | return(x0) 87 | } 88 | -------------------------------------------------------------------------------- /R/simpl_opt_linesearch_fnnls.R: -------------------------------------------------------------------------------- 1 | simpl_opt_linesearch_fnnls <- function(fn, #objective function to be minimized 2 | x, #starting values of simplex-constrained parameter 3 | xhess, # hessian matrix of objective function at x 4 | xgrad, # gradient of objective function at x 5 | lambda = 0, #trust penalty 6 | maxit = 100, 7 | constraint_tolerance = 1e-10 8 | ){ 9 | 10 | x0 <- x 11 | curr_fn_value <- fn(x0) 12 | 13 | 14 | new_x <- simpl_auglag_fnnls(x = x, 15 | fn = fn, 16 | xhess = xhess, 17 | xgrad = xgrad, 18 | lambda = lambda, 19 | constraint_tolerance = constraint_tolerance) 20 | 21 | new_fn_value <- fn(new_x) 22 | 23 | step_direction <- new_x - x0 24 | 25 | stepsize <- 1 26 | 27 | fn_decrease <- 10 28 | 29 | while((fn_decrease > 0)&(stepsize>1e-2)){ 30 | 31 | 32 | prop_x <- x0 + stepsize*step_direction 33 | 34 | new_fn_value <- fn(prop_x) 35 | if(is.nan(new_fn_value)){ 36 | new_fn_value <- 1e100 37 | } 38 | 39 | if(min(prop_x)<0){ 40 | new_fn_value <- 1e100 41 | } 42 | # 43 | fn_decrease <- new_fn_value - curr_fn_value 44 | stepsize <- stepsize/2 45 | 46 | } 47 | 48 | if(fn_decrease <= 0){ 49 | return(prop_x) 50 | } else{ 51 | return(x0) 52 | } 53 | 54 | 55 | } 56 | -------------------------------------------------------------------------------- /R/simulate_paper_data.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | simulate_paper_data <- function(n, 4 | J, 5 | B_multiplier, 6 | distrib, 7 | seed){ 8 | 9 | if(!(J %in% c(5,20))){ 10 | stop("Number of taxa J must be equal to 5 or 20 in this simulation") 11 | } 12 | 13 | if(!(n %in% c(1,3))){ 14 | stop("Number of technical replicates n must be 1 or 3 in this simulation") 15 | } 16 | 17 | ### generate Z_tilde (alpha_tilde_k = 0 for k = 1, 2) 18 | Z_tilde <- do.call(rbind,lapply(1:n, 19 | function(x) matrix(rep(c(1,9,81,729), 20 | 4), 21 | ncol = 1))) 22 | 23 | ### generate Z 24 | Z <- do.call(rbind,lapply(1:4, 25 | function(x) do.call(rbind, 26 | lapply(1:(4*n),function(k) matrix( 27 | as.numeric(x == 1:4),nrow = 1 28 | ))))) 29 | 30 | X <- matrix(1,nrow = n*16,ncol = 1) 31 | 32 | B_star <- matrix(c( 33 | rep(c(3,-1,1,-3),ceiling(J/4))[1:(J - 1)], 34 | 0),nrow = 1,ncol = J) 35 | 36 | B <- B_star*B_multiplier 37 | 38 | P1 <- 2^(seq(0,4,length.out = J)) 39 | P1 <- P1/sum(P1) 40 | P2 <- P1[J:1] 41 | P3 <- c(rep(0,0.4*J), 42 | 10^seq(0,2,length.out = 0.6*J)) 43 | P3 <- P3/sum(P3) 44 | P4 <- P3[J:1] 45 | 46 | P <- rbind(P1,P2,P3,P4) 47 | 48 | P_tilde <- matrix(1/J,ncol = J, nrow = 1) 49 | X_tilde <- matrix(1,ncol = 1, nrow = 1) 50 | gamma_tilde <- -3.7 51 | 52 | dilutions <- log(as.numeric(Z_tilde))/log(9) 53 | 54 | gamma_means <- sapply(dilutions, function(d) min(13.5 - 1.5*d,12) ) 55 | set.seed(seed) 56 | gammas <- sapply(gamma_means, function(x) rnorm(1,mean = x, sd = sqrt(0.05))) 57 | 58 | 59 | means <- meaninate(gammas = gammas, 60 | B = B, 61 | X= X, 62 | Z = Z, 63 | P = P, 64 | X_tilde = X_tilde, 65 | Z_tilde = Z_tilde, 66 | Z_tilde_gamma_cols = ncol(Z_tilde), 67 | P_tilde = P_tilde, 68 | gamma_tilde = gamma_tilde) 69 | 70 | 71 | if(distrib== "Poisson"){ 72 | W <- apply(means, c(1,2), function(x) rpois(1,lambda = x)) 73 | } 74 | 75 | if(distrib == "NB"){ 76 | W <- apply(means, c(1,2), function(x) rnbinom(1,mu = x, size = 13)) 77 | } 78 | 79 | return(W) 80 | 81 | } 82 | -------------------------------------------------------------------------------- /R/simulate_simple_data.R: -------------------------------------------------------------------------------- 1 | #' @import stats 2 | #' 3 | #' 4 | simulate_simple_data <- function(B, 5 | distrib = "Poisson", 6 | n = 5, 7 | gamma_mean = 11){ 8 | if(nrow(B) != 1){ 9 | stop("B must have a single row") 10 | } 11 | J <- ncol(B) 12 | gammas <- rnorm(n,gamma_mean) 13 | 14 | means <- meaninate(gammas, 15 | B = B, 16 | X = matrix(1, nrow = n,ncol = 1), 17 | Z = matrix(1, nrow = n, ncol = 1), 18 | P = matrix(1/J,nrow = 1, ncol = J), 19 | X_tilde = matrix(0,nrow = 1, ncol = 1), 20 | Z_tilde = matrix(0,nrow = n, ncol = 1), 21 | Z_tilde_gamma_cols = 1, 22 | P_tilde = matrix(1/J,nrow = 1, ncol = J), 23 | gamma_tilde = matrix(0, ncol = 1, nrow = 1), 24 | alpha_tilde = NULL, 25 | Z_tilde_list = NULL, 26 | return_separate = FALSE, 27 | exclude_gammas = FALSE) 28 | 29 | if(distrib == "Poisson"){ 30 | W <- apply(means,c(1,2),function(x) rpois(1,x)) 31 | } 32 | if(distrib == "nb10"){ 33 | W <- apply(means,c(1,2), function(x) rnbinom(1, mu = x, size = 10)) 34 | } 35 | 36 | if(distrib == "nb.5"){ 37 | W <- apply(means,c(1,2), function(x) rnbinom(1, mu = x, size = .5)) 38 | } 39 | 40 | return(W) 41 | } 42 | -------------------------------------------------------------------------------- /R/universal_test.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | universal_test <- function(W, 4 | full_model, 5 | null_model, 6 | parallelize = FALSE){ 7 | U <- crossfit_U(W = W, 8 | full_model = full_model, 9 | null_model = null_model, 10 | parallelize = parallelize) 11 | 12 | return(1/U) 13 | } 14 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # tinyvamp 17 | 18 | 19 | 20 | [![Coverage status](https://codecov.io/gh/statdivlab/tinyvamp/branch/main/graph/badge.svg)](https://codecov.io/github/statdivlab/tinyvamp?branch=main) 21 | 22 | 23 | `tinyvamp` is a package for estimation and removal of measurement error in high-throughput sequencing data. 24 | 25 | Full details on the model and estimation method are available in the [preprint](https://arxiv.org/abs/2204.12733). 26 | 27 | **Documentation and vignettes are under construction - please check back soon!** 28 | 29 | **Needs something more urgently? File an [issue](https://github.com/statdivlab/tinyvamp/issues) or [send Amy an email](http://statisticaldiversitylab.com/team/amy-willis)!** 30 | 31 | ## Installation 32 | 33 | You can install the development version of tinyvamp from [GitHub](https://github.com/) with: 34 | 35 | ``` r 36 | # install.packages("remotes") 37 | remotes::install_github("statdivlab/tinyvamp") 38 | ``` 39 | 40 | If you haven't already, you may need to install `fastnnls` and `logsum`, too: 41 | 42 | ``` r 43 | # install.packages("remotes") 44 | remotes::install_github("ailurophilia/fastnnls") 45 | remotes::install_github("ailurophilia/logsum") 46 | remotes::install_github("statdivlab/tinyvamp") 47 | ``` 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | ## Humans 60 | 61 | Authors: [David Clausen](https://www.biostat.washington.edu/people/david-clausen) and [Amy Willis](http://statisticaldiversitylab.com) 62 | 63 | Do you have a request for us? Let us know! We want folks to use `tinyvamp` and will try to make it as easy as possible. 64 | 65 | Do you have a question? Check out the above documentation list, then shoot us an email or open a [Discussion](https://github.com/adw96/breakaway/discussions). We receive a lot of emails from users, so we try to answer questions on the Wiki rather than responding to everyone individually. 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # tinyvamp 5 | 6 | 7 | 8 | 9 | [![Coverage 10 | status](https://codecov.io/gh/statdivlab/tinyvamp/branch/main/graph/badge.svg)](https://codecov.io/github/statdivlab/tinyvamp?branch=main) 11 | 12 | 13 | `tinyvamp` is a package for estimation and removal of measurement error 14 | in high-throughput sequencing data. 15 | 16 | Full details on the model and estimation method are available in the 17 | [preprint](https://arxiv.org/abs/2204.12733). 18 | 19 | **Documentation and vignettes are under construction - please check back 20 | soon!** 21 | 22 | **Needs something more urgently? File an 23 | [issue](https://github.com/statdivlab/tinyvamp/issues) or [send Amy an 24 | email](http://statisticaldiversitylab.com/team/amy-willis)!** 25 | 26 | ## Installation 27 | 28 | You can install the development version of tinyvamp from 29 | [GitHub](https://github.com/) with: 30 | 31 | ``` r 32 | # install.packages("remotes") 33 | remotes::install_github("statdivlab/tinyvamp") 34 | ``` 35 | 36 | If you haven’t already, you may need to install `fastnnls` and `logsum`, 37 | too: 38 | 39 | ``` r 40 | # install.packages("remotes") 41 | remotes::install_github("ailurophilia/fastnnls") 42 | remotes::install_github("ailurophilia/logsum") 43 | remotes::install_github("statdivlab/tinyvamp") 44 | ``` 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | ## Humans 54 | 55 | Authors: [David 56 | Clausen](https://www.biostat.washington.edu/people/david-clausen) and 57 | [Amy Willis](http://statisticaldiversitylab.com) 58 | 59 | Do you have a request for us? Let us know! We want folks to use 60 | `tinyvamp` and will try to make it as easy as possible. 61 | 62 | Do you have a question? Check out the above documentation list, then 63 | shoot us an email or open a 64 | [Discussion](https://github.com/adw96/breakaway/discussions). We receive 65 | a lot of emails from users, so we try to answer questions on the Wiki 66 | rather than responding to everyone individually. 67 | -------------------------------------------------------------------------------- /data/costea2017_metaphlan2_profiles.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/data/costea2017_metaphlan2_profiles.rda -------------------------------------------------------------------------------- /data/costea2017_mock_composition.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/data/costea2017_mock_composition.rda -------------------------------------------------------------------------------- /data/costea2017_sample_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/data/costea2017_sample_data.rda -------------------------------------------------------------------------------- /data/karstens.rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/data/karstens.rdata -------------------------------------------------------------------------------- /data/karstens_phyloseq.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/data/karstens_phyloseq.rda -------------------------------------------------------------------------------- /man/bootstrap_ci.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bootstrap_ci.R 3 | \name{bootstrap_ci} 4 | \alias{bootstrap_ci} 5 | \title{Apply the Bayesian subsampled bootstrap to a fitted tinyvamp model} 6 | \usage{ 7 | bootstrap_ci( 8 | W, 9 | fitted_model, 10 | n_boot, 11 | m = NULL, 12 | alpha = 0.05, 13 | parallelize = FALSE, 14 | ncores = 5, 15 | seed = NULL, 16 | return_models = FALSE, 17 | verbose = FALSE, 18 | adjust = FALSE 19 | ) 20 | } 21 | \description{ 22 | Apply the Bayesian subsampled bootstrap to a fitted tinyvamp model 23 | } 24 | -------------------------------------------------------------------------------- /man/calculate_log_penalty.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calculate_log_penalty.R 3 | \name{calculate_log_penalty} 4 | \alias{calculate_log_penalty} 5 | \title{Calculate barrier penalty to add to objective function inside 6 | barrier algorithm} 7 | \usage{ 8 | calculate_log_penalty(varying_lr_df, fixed_df, barrier_t) 9 | } 10 | \arguments{ 11 | \item{varying_lr_df}{A data frame containing values of parameters that 12 | are treated as unknown, with relative abundance parameters represented 13 | on the log ratio scale (i.e., as phi and phi_tilde)} 14 | 15 | \item{fixed_df}{A data frame containing values of parameters that are 16 | treated as known} 17 | 18 | \item{barrier_t}{The current value of t, the barrier penalty parameter} 19 | } 20 | \value{ 21 | The calculated value of the barrier penalty 22 | } 23 | \description{ 24 | Calculate barrier penalty to add to objective function inside 25 | barrier algorithm 26 | } 27 | -------------------------------------------------------------------------------- /man/costea2017_mock_composition.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{costea2017_mock_composition} 5 | \alias{costea2017_mock_composition} 6 | \title{The composition of 10 spike-in taxa in 19 samples observed from flow cytometry.} 7 | \format{ 8 | An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 19 rows and 6 columns. 9 | } 10 | \source{ 11 | \url{https://doi.org/10.1038/nbt.3960} 12 | } 13 | \usage{ 14 | costea2017_mock_composition 15 | } 16 | \description{ 17 | For details, see Costea et al (2017). 18 | } 19 | \keyword{datasets} 20 | -------------------------------------------------------------------------------- /man/dataframes_to_parameters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dataframes_to_parameters.R 3 | \name{dataframes_to_parameters} 4 | \alias{dataframes_to_parameters} 5 | \title{Convert parameter values stored in data frame format to matrix format} 6 | \usage{ 7 | dataframes_to_parameters(fixed_df, varying_df) 8 | } 9 | \arguments{ 10 | \item{fixed_df}{A dataframe containing values of model parameters 11 | treated as fixed and known (i.e. held constant at known values)} 12 | 13 | \item{varying_df}{A dataframe containing current values of model parameters treated 14 | as fixed and unknown (i.e., parameters to be estimated)} 15 | } 16 | \value{ 17 | A list containing 18 | \item{P}{Specimen relative abundance matrix (of dimension K x J)} 19 | \item{P_tilde}{Spurious read source 20 | relative abundance matrix (of dimension K-tilde x J)} 21 | \item{B}{A matrix of detection efficiencies (of dimension p x J)} 22 | \item{gammas}{An n-vector of sample-specific read intensities} 23 | \item{gamma_tilde}{A-vector of spurious read source intensities 24 | (of length K-tilde)} 25 | } 26 | \description{ 27 | Convert parameter values stored in data frame format to matrix format 28 | } 29 | \author{ 30 | David Clausen 31 | } 32 | -------------------------------------------------------------------------------- /man/estimate_parameters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/estimate_parameters.R 3 | \name{estimate_parameters} 4 | \alias{estimate_parameters} 5 | \title{Fit tinyvamp model to HTS microbiome data} 6 | \usage{ 7 | estimate_parameters( 8 | W, 9 | X, 10 | Z, 11 | Z_tilde = NULL, 12 | Z_tilde_gamma_cols, 13 | gammas, 14 | gammas_fixed_indices, 15 | P, 16 | P_fixed_indices, 17 | B, 18 | B_fixed_indices, 19 | X_tilde, 20 | P_tilde, 21 | P_tilde_fixed_indices, 22 | gamma_tilde, 23 | gamma_tilde_fixed_indices, 24 | alpha_tilde = NULL, 25 | Z_tilde_list = NULL, 26 | barrier_t = 1, 27 | barrier_scale = 10, 28 | max_barrier = 1e+12, 29 | initial_conv_tol = 1000, 30 | final_conv_tol = 0.1, 31 | constraint_tolerance = 1e-10, 32 | hessian_regularization = 0.01, 33 | criterion = "Poisson", 34 | profile_P = TRUE, 35 | barrier_maxit = 500, 36 | profiling_maxit = 25, 37 | wts = NULL, 38 | verbose = FALSE, 39 | bootstrap_failure_cutoff = NULL, 40 | tinker_zeroes = 0.1, 41 | return_variance = FALSE 42 | ) 43 | } 44 | \arguments{ 45 | \item{W}{An \eqn{n \times J} matrix of numeric HTS output (e.g., read counts, coverages, etc.)} 46 | 47 | \item{X}{The sample efficiency design -- an \eqn{n \times p} matrix} 48 | 49 | \item{Z}{The sample-specimen design -- an \eqn{n \times K} matrix whose \eqn{ij}-th entry 50 | indicates the proportional contribution of specimen \eqn{j} to sample \eqn{i}. Rows must 51 | sum to 1 or be identically 0.} 52 | 53 | \item{Z_tilde}{The spurious read design -- an \eqn{n \times \tilde{K}} matrix where 54 | \eqn{\tilde{K}} is the number of spurious read sources modeled.} 55 | 56 | \item{Z_tilde_gamma_cols}{A numeric vector containing the columns of Z_tilde which should be 57 | multiplied by exp(gamma).} 58 | 59 | \item{gammas}{A numeric vector of length n of starting values for read intensity parameter gamma} 60 | 61 | \item{gammas_fixed_indices}{A logical vector of length n whose \eqn{i}-th entry is TRUE if the 62 | \eqn{i}-th entry of gamma should be treated as fixed and known, and FALSE otherwise} 63 | 64 | \item{P}{A \eqn{K \times J} numeric matrix giving initial values for the relative abundance matrix.} 65 | 66 | \item{P_fixed_indices}{P_fixed_indices A \eqn{K \times J} logical matrix specifying any entries of P that are known. If known, the corresponding values from \code{P} will be treated as the fixed, known values.} 67 | 68 | \item{B}{A \eqn{p \times J} numeric matrix giving initial values for the sample efficiencies.} 69 | 70 | \item{B_fixed_indices}{A \eqn{p \times J} logical matrix specifying any entries of B that are known. If known, the corresponding values from \code{B} will be treated as the fixed, known values.} 71 | 72 | \item{X_tilde}{A \eqn{\tilde{K} \times p} matrix giving the spurious read source efficiency design matrix} 73 | 74 | \item{P_tilde}{A \eqn{\tilde{K} \times J} numeric matrix giving initial values for the spurious read source relative abundances.} 75 | 76 | \item{P_tilde_fixed_indices}{A \eqn{\tilde{K} \times J} logical matrix indicating if the \eqn{(i,j)}th entry of \code{P_tilde} should be treated as fixed and known.} 77 | 78 | \item{gamma_tilde}{A numeric vector of length \eqn{\tilde{K}} of starting values for spurious read intensity parameter gamma_tilde} 79 | 80 | \item{gamma_tilde_fixed_indices}{A logical vector of length \eqn{\tilde{K}} whose \eqn{i}-th entry is TRUE if the 81 | \eqn{i}-th entry of gamma_tilde should be treated as fixed and known, and FALSE otherwise.} 82 | 83 | \item{alpha_tilde}{A numeric vector containing starting values of length \eqn{M}. If used, \code{Z_tilde_list} must be provided.} 84 | 85 | \item{Z_tilde_list}{A list of length \eqn{M + 1} containing matrices \eqn{\tilde{Z}_1,\dots,\tilde{Z}_{M + 1}} to be linearly combined to 86 | generate \code{Z_tilde}: \eqn{\tilde{Z} = \tilde{Z}_{(1)} + \sum_{m = 1}^M \exp(\tilde{\alpha}_m)\tilde{Z}_{(m + 1)}}. If used, 87 | \code{alpha_tilde} must be provided.} 88 | 89 | \item{barrier_t}{Starting value of reciprocal barrier penalty coef. Defaults to 1.} 90 | 91 | \item{barrier_scale}{Increments for value of barrier penalty. Defaults to 10.} 92 | 93 | \item{max_barrier}{Maximum value of barrier_t. Defaults to 1e12.} 94 | 95 | \item{constraint_tolerance}{The tolerance for the augmented Lagrangian algorithm. Final estimates of P are relative abundances to within \code{constraint_tolerance} of 1, i.e., abs(sum p_{kj} - 1) < \code{constraint_tolerance}. Defaults to 1e-10.} 96 | 97 | \item{hessian_regularization}{The second step of optimization involves a quadratic approximation to the likelihood, for which we use a modified Taylor series for stability. This is the constant that dampens the second term. Defaults to 0.01.} 98 | 99 | \item{criterion}{Should the algorithm return the Poisson maximum likelihood estimates or the reweighted Poisson maximum likelihood estimates? Options are "Poisson" or "reweighted_Poisson".} 100 | 101 | \item{profile_P}{Defaults to TRUE Run profiling step after barrier algorithm has run? If TRUE, this step is performed, possibly setting some estimated relative abundances in P equal to zero. If FALSE, profiling step is skipped and back-transformed log-ratio parameter estimated via barrier algorithm is returned for P.} 102 | 103 | \item{barrier_maxit}{The maximum number of iterations for the barrier method} 104 | 105 | \item{profiling_maxit}{Maximum number of iterations to run profiling step in P for (default is 25).} 106 | 107 | \item{wts}{Weights for reweighting the likelihood contributions. This is usually done to improve efficiency. Defaults to NULL. We compute the weights for you even if you choose \code{criterion = "reweighted_Poisson"}.} 108 | 109 | \item{verbose}{Do you want to know what I'm doing? Defaults to FALSE.} 110 | 111 | \item{bootstrap_failure_cutoff}{Defaults to NULL.} 112 | 113 | \item{tinker_zeroes}{Because the barrier method can only be applied to relative abundances in the interior of the simplex, tinker_zeroes divided by the number of taxa is added to all relative abundances to be estimated before the barrier method is applied. Default 0.1.} 114 | 115 | \item{return_variance}{Defaults to FALSE.} 116 | } 117 | \value{ 118 | A list containing estimated parameter values, along with the given inputs 119 | } 120 | \description{ 121 | This function fits a model to HTS microbiome data that allows for estimation of 122 | detection efficiency effects as well as modeling of spurious read sources 123 | (e.g., contamination). 124 | } 125 | \author{ 126 | David Clausen 127 | 128 | Amy Willis 129 | } 130 | -------------------------------------------------------------------------------- /man/mu_d_P.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/d_mu_d_P_original.R 3 | \name{mu_d_P} 4 | \alias{mu_d_P} 5 | \title{Calculate derivative of mu_ij with respect to an entry of P} 6 | \usage{ 7 | mu_d_P(i, j, m, gammas, B, X, Z, P) 8 | } 9 | \arguments{ 10 | \item{i}{The sample index (must be in 1, ..., n)} 11 | 12 | \item{j}{The taxon index (must be in 1, ..., J)} 13 | 14 | \item{m}{The row of P with respect to which to take derivative} 15 | 16 | \item{gammas}{Numeric vector of read intensities} 17 | 18 | \item{B}{Detection efficiency matrix} 19 | 20 | \item{X}{The efficiency design matrix (n x p)} 21 | 22 | \item{Z}{The sample design matrix (n x K)} 23 | 24 | \item{P}{The sample relative abundance matrix (K x J)} 25 | 26 | \item{X_tilde}{The spurious read efficiency design (K_tilde x p)} 27 | 28 | \item{Z_tilde}{The spurious read design (n x K_tilde)} 29 | 30 | \item{Z_tilde_gamma_cols}{Numeric vector containing indexes of columns of 31 | Z_tilde to scale by exp(gamma); NULL if no columns to be scaled} 32 | 33 | \item{P_tilde}{The spurious source relative abundance matrix (K_tilde x J)} 34 | 35 | \item{gamma_tilde}{Spurious read intensity parameter} 36 | } 37 | \value{ 38 | A derivative d mu_ij / d P_kj 39 | } 40 | \description{ 41 | Calculate derivative of mu_ij with respect to an entry of P 42 | } 43 | -------------------------------------------------------------------------------- /man/mu_d_beta.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mu_d_beta.R 3 | \name{mu_d_beta} 4 | \alias{mu_d_beta} 5 | \title{Calculate derivative of mu_ij with respect to B} 6 | \usage{ 7 | mu_d_beta( 8 | i, 9 | j, 10 | q, 11 | gammas, 12 | B, 13 | X, 14 | Z, 15 | P, 16 | X_tilde, 17 | Z_tilde, 18 | Z_tilde_gamma_cols, 19 | alpha_tilde = NULL, 20 | Z_tilde_list = NULL, 21 | P_tilde, 22 | gamma_tilde 23 | ) 24 | } 25 | \arguments{ 26 | \item{i}{The sample index (must be in 1, ..., n)} 27 | 28 | \item{j}{The taxon index (must be in 1, ..., J)} 29 | 30 | \item{q}{Which row of B to take derivative with respect to 31 | (must be in 1, ..., p)} 32 | 33 | \item{gammas}{Numeric vector of read intensities} 34 | 35 | \item{B}{Detection efficiency matrix} 36 | 37 | \item{X}{The efficiency design matrix (n x p)} 38 | 39 | \item{Z}{The sample design matrix (n x K)} 40 | 41 | \item{P}{The sample relative abundance matrix (K x J)} 42 | 43 | \item{X_tilde}{The spurious read efficiency design (K_tilde x p)} 44 | 45 | \item{Z_tilde}{The spurious read design (n x K_tilde)} 46 | 47 | \item{Z_tilde_gamma_cols}{Numeric vector containing indexes of columns of 48 | Z_tilde to scale by exp(gamma); NULL if no columns to be scaled} 49 | 50 | \item{P_tilde}{The spurious source relative abundance matrix (K_tilde x J)} 51 | 52 | \item{gamma_tilde}{Spurious read intensity parameter} 53 | } 54 | \value{ 55 | A derivative d mu_ij / d B_qj 56 | } 57 | \description{ 58 | Calculate derivative of mu_ij with respect to B 59 | } 60 | -------------------------------------------------------------------------------- /man/mu_d_gamma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/d_mu_d_gamma_original.R 3 | \name{mu_d_gamma} 4 | \alias{mu_d_gamma} 5 | \title{Calculate derivative of mu_ij with respect to ith entry of gamma} 6 | \usage{ 7 | mu_d_gamma( 8 | i, 9 | j, 10 | gammas, 11 | B, 12 | X, 13 | Z, 14 | P, 15 | X_tilde, 16 | Z_tilde, 17 | Z_tilde_gamma_cols, 18 | P_tilde, 19 | gamma_tilde 20 | ) 21 | } 22 | \arguments{ 23 | \item{i}{The sample index (must be in 1, ..., n)} 24 | 25 | \item{j}{The taxon index (must be in 1, ..., J)} 26 | 27 | \item{gammas}{Numeric vector of read intensities} 28 | 29 | \item{B}{Detection efficiency matrix} 30 | 31 | \item{X}{The efficiency design matrix (n x p)} 32 | 33 | \item{Z}{The sample design matrix (n x K)} 34 | 35 | \item{P}{The sample relative abundance matrix (K x J)} 36 | 37 | \item{X_tilde}{The spurious read efficiency design (K_tilde x p)} 38 | 39 | \item{Z_tilde}{The spurious read design (n x K_tilde)} 40 | 41 | \item{Z_tilde_gamma_cols}{Numeric vector containing indexes of columns of 42 | Z_tilde to scale by exp(gamma); NULL if no columns to be scaled} 43 | 44 | \item{P_tilde}{The spurious source relative abundance matrix (K_tilde x J)} 45 | 46 | \item{gamma_tilde}{Spurious read intensity parameter} 47 | } 48 | \value{ 49 | A derivative d mu_ij / d gamma_i 50 | } 51 | \description{ 52 | Calculate derivative of mu_ij with respect to ith entry of gamma 53 | } 54 | -------------------------------------------------------------------------------- /man/mu_d_gamma_tilde.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mu_d_gamma_tilde.R 3 | \name{mu_d_gamma_tilde} 4 | \alias{mu_d_gamma_tilde} 5 | \title{Calculate derivative of mu_ij with respect to an entry of gamma_tilde} 6 | \usage{ 7 | mu_d_gamma_tilde( 8 | i, 9 | j, 10 | k_tilde, 11 | gammas, 12 | B, 13 | X, 14 | Z, 15 | P, 16 | X_tilde, 17 | Z_tilde, 18 | Z_tilde_gamma_cols, 19 | alpha_tilde = NULL, 20 | Z_tilde_list = NULL, 21 | P_tilde, 22 | gamma_tilde 23 | ) 24 | } 25 | \arguments{ 26 | \item{i}{The sample index (must be in 1, ..., n)} 27 | 28 | \item{j}{The taxon index (must be in 1, ..., J)} 29 | 30 | \item{k_tilde}{The element of gamma_tilde with respect to which to take derivative} 31 | 32 | \item{gammas}{Numeric vector of read intensities} 33 | 34 | \item{B}{Detection efficiency matrix} 35 | 36 | \item{X}{The efficiency design matrix (n x p)} 37 | 38 | \item{Z}{The sample design matrix (n x K)} 39 | 40 | \item{P}{The sample relative abundance matrix (K x J)} 41 | 42 | \item{X_tilde}{The spurious read efficiency design (K_tilde x p)} 43 | 44 | \item{Z_tilde}{The spurious read design (n x K_tilde)} 45 | 46 | \item{Z_tilde_gamma_cols}{Numeric vector containing indexes of columns of 47 | Z_tilde to scale by exp(gamma); NULL if no columns to be scaled} 48 | 49 | \item{P_tilde}{The spurious source relative abundance matrix (K_tilde x J)} 50 | 51 | \item{gamma_tilde}{Spurious read intensity parameter} 52 | } 53 | \value{ 54 | A derivative d mu_ij / d gamma_tilde_k_tilde 55 | } 56 | \description{ 57 | Calculate derivative of mu_ij with respect to an entry of gamma_tilde 58 | } 59 | -------------------------------------------------------------------------------- /man/mu_d_rho_faster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mu_d_rho_faster.R 3 | \name{mu_d_rho_faster} 4 | \alias{mu_d_rho_faster} 5 | \title{Calculate derivative of mu_ij with respect to a row of matrix-valued 6 | parameter rho} 7 | \usage{ 8 | mu_d_rho_faster( 9 | i, 10 | J, 11 | k, 12 | gammas, 13 | B, 14 | X, 15 | Z, 16 | Ak_list, 17 | rho_k, 18 | fixed_P_multipliers, 19 | proportion_scale = FALSE 20 | ) 21 | } 22 | \arguments{ 23 | \item{i}{The sample index (must be in 1, ..., n)} 24 | 25 | \item{J}{The total number of taxa modeled} 26 | 27 | \item{k}{Row index (which row of rho with respect to which to take derivative)} 28 | 29 | \item{gammas}{Numeric vector of read intensities} 30 | 31 | \item{B}{Detection efficiency matrix} 32 | 33 | \item{X}{The efficiency design matrix (n x p)} 34 | 35 | \item{Z}{The sample design matrix (n x K)} 36 | 37 | \item{Ak_list}{List containing matrices that map back-transformed 38 | rho to entries of P} 39 | 40 | \item{rho_k}{Value of kth row of rho} 41 | 42 | \item{fixed_P_multipliers}{Numeric vector of length K containing values in (0,1] 43 | equal to 1 - sum(fixed relative abundances in row k of P) 44 | Z_tilde to scale by exp(gamma); NULL if no columns to be scaled} 45 | } 46 | \value{ 47 | A derivative d mu_i / d rho_k 48 | } 49 | \description{ 50 | Calculate derivative of mu_ij with respect to a row of matrix-valued 51 | parameter rho 52 | } 53 | -------------------------------------------------------------------------------- /man/mu_d_rho_tilde_faster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mu_d_rho_tilde_faster.R 3 | \name{mu_d_rho_tilde_faster} 4 | \alias{mu_d_rho_tilde_faster} 5 | \title{Calculate derivative of mu_i with respect to a row of matrix-valued 6 | parameter rho_tilde} 7 | \usage{ 8 | mu_d_rho_tilde_faster( 9 | i, 10 | J, 11 | k_tilde, 12 | gammas, 13 | B, 14 | rho_tilde_k, 15 | A_tilde_k_list, 16 | fixed_P_tilde_multipliers, 17 | X_tilde, 18 | Z_tilde, 19 | Z_tilde_gamma_cols, 20 | alpha_tilde = NULL, 21 | Z_tilde_list = NULL, 22 | gamma_tilde, 23 | proportion_scale = FALSE 24 | ) 25 | } 26 | \arguments{ 27 | \item{i}{The sample index (must be in 1, ..., n)} 28 | 29 | \item{J}{The total number of taxa modeled} 30 | 31 | \item{k_tilde}{Row index (which row of rho_tilde with respect to which to take derivative)} 32 | 33 | \item{gammas}{Numeric vector of read intensities} 34 | 35 | \item{B}{Detection efficiency matrix} 36 | 37 | \item{rho_tilde_k}{Value of kth row of rho_tilde} 38 | 39 | \item{A_tilde_k_list}{List containing matrices that map back-transformed 40 | rho_tilde to entries of P_tilde} 41 | 42 | \item{fixed_P_multipliers}{Numeric vector of length K containing values in (0,1] 43 | equal to 1 - sum(fixed relative abundances in row k of P_tilde)} 44 | } 45 | \value{ 46 | A derivative d mu_i / d rho_tilde_k 47 | } 48 | \description{ 49 | Calculate derivative of mu_i with respect to a row of matrix-valued 50 | parameter rho_tilde 51 | } 52 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(tinyvamp) 3 | 4 | test_check("tinyvamp") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-bootstrap_ci.R: -------------------------------------------------------------------------------- 1 | test_that("bootstrap_ci works", { 2 | W <- tinyvamp:::simulate_paper_data(n =3, 3 | J = 5, 4 | B_multiplier = 1, 5 | distrib = "NB", 6 | seed = 0) 7 | 8 | 9 | fitted_model <- tinyvamp:::fit_simulation_model(W,"reweighted_Poisson") 10 | 11 | library(parallel) 12 | cis <- bootstrap_ci(W = W, 13 | fitted_model = fitted_model, 14 | n_boot = 10, 15 | m = NULL, 16 | alpha = 0.05, 17 | parallelize = TRUE, 18 | ncores = 5, 19 | seed = 3, 20 | return_models = FALSE 21 | 22 | ) 23 | 24 | expect_type(cis,"list") 25 | 26 | 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-bootstrap_lrt.R: -------------------------------------------------------------------------------- 1 | # # # # # test_that("multiplication works", { 2 | # # # # # 3 | # # # # n <- 10 4 | # # # # sim_p_10 <- numeric(100) 5 | # # # # set.seed(4939323) 6 | # # # # for(sim in 1:100){ 7 | # # # # print(paste("Simulation ", sim, sep = "", collapse = "")) 8 | # # # # W <- simulate_simple_data(matrix(0, nrow = 1, ncol = 2), 9 | # # # # distrib = "nb10", 10 | # # # # n = n, 11 | # # # # gamma_mean = 11) 12 | # # # # 13 | # # # # fitted_model <- fit_simple_model(W, 14 | # # # # B_fixed_at_zero = FALSE, 15 | # # # # reweight = TRUE) 16 | # # # # 17 | # # # # null_param <- fitted_model 18 | # # # # null_param$B[] <- 0 19 | # # # # null_param$B_fixed_indices[] <- TRUE 20 | # # # # 21 | # # # # boot_fit <- bootstrap_lrt(W = W, 22 | # # # # fitted_model = fitted_model, 23 | # # # # null_param = null_param, 24 | # # # # n_boot = 100, 25 | # # # # m = n^(3/4), 26 | # # # # recalculate_W0 = FALSE, 27 | # # # # parallelize = TRUE, 28 | # # # # ncores = 7, 29 | # # # # save_models = FALSE) 30 | # # # # 31 | # # # # sim_p_10[sim] <- boot_fit$boot_pval 32 | # # # # 33 | # # # # print(sim_p_10[sim]) 34 | # # # # hist(boot_fit$boot_lr_stats,breaks = 7) 35 | # # # # abline(v = boot_fit$observed_lr_stat, col = "red") 36 | # # # # 37 | # # # # } 38 | # # # # # 39 | # # # # qs <- seq(0.01,.99,by = .01) 40 | # # # # # 41 | # # # # plot(qs, sapply(qs,function(k) quantile(sim_p_10,k)), type = "s", 42 | # # # # ylim = c(0,1), 43 | # # # # xlim = c(0,1)) 44 | # # # # points(qs, sapply(qs,function(k) quantileCI(sim_p_10,k, 45 | # # # # method = "asymptotic")$conf.int[1]), 46 | # # # # type = "s",lty = 2) 47 | # # # # points(qs, sapply(qs,function(k) quantileCI(sim_p_10,k, 48 | # # # # method = "asymptotic")$conf.int[2]), 49 | # # # # type = "s",lty = 2) 50 | # # # # abline(a = 0, b = 1, lty = 2) 51 | # # # ################################ error in how weights are assigned? 52 | # n <- 25 53 | # sim_p_25 <- numeric(1000) 54 | # sim_p_25_weighted <- numeric(1000) 55 | # qs <- seq(0,1,by = .01) 56 | # set.seed(4939323) 57 | # boot_fits <- vector(1000,mode = "list") 58 | # boot_fits_weighted <- boot_fits 59 | # for(sim in 1:1000){ 60 | # print(paste("Simulation ", sim, sep = "", collapse = "")) 61 | # W <- simulate_simple_data(matrix(c(0,0), nrow = 1, ncol = 2), 62 | # distrib = "nb10", 63 | # n = n, 64 | # gamma_mean = 11) 65 | # 66 | # fitted_model <- fit_simple_model(W = W, 67 | # B_fixed_at_zero = FALSE, 68 | # reweight = FALSE) 69 | # 70 | # null_param <- fitted_model 71 | # null_param$B[] <- 0 72 | # null_param$B_fixed_indices[] <- TRUE 73 | # 74 | # boot_fit <- bootstrap_lrt(W = W, 75 | # fitted_model = fitted_model, 76 | # null_param = null_param, 77 | # n_boot = 1000, 78 | # m = sqrt(m), 79 | # recalculate_W0 = FALSE, 80 | # parallelize = TRUE, 81 | # ncores = 5, 82 | # save_models = FALSE) 83 | # 84 | # boot_fits[[sim]] <- boot_fit 85 | # 86 | # sim_p_25[sim] <- boot_fit$boot_pval 87 | # 88 | # fitted_model_weighted <- fit_simple_model(W = W, 89 | # B_fixed_at_zero = FALSE, 90 | # reweight = TRUE) 91 | # 92 | # null_param <- fitted_model_weighted 93 | # null_param$B[] <- 0 94 | # null_param$B_fixed_indices[] <- TRUE 95 | # 96 | # boot_fit_weighted <- bootstrap_lrt(W = W, 97 | # fitted_model = fitted_model_weighted, 98 | # null_param = null_param, 99 | # n_boot = 1000, 100 | # m = sqrt(m), 101 | # recalculate_W0 = FALSE, 102 | # parallelize = TRUE, 103 | # ncores = 5, 104 | # save_models = FALSE) 105 | # 106 | # boot_fits_weighted[[sim]] <- boot_fit_weighted 107 | # 108 | # sim_p_25_weighted[sim] <- boot_fit_weighted$boot_pval 109 | # 110 | # print(sim_p_25[sim]) 111 | # print(sim_p_25_weighted[sim]) 112 | # 113 | # 114 | # plot(qs,sapply(qs, function(k) quantile(sim_p_25[1:sim],k)), 115 | # type = "s", 116 | # xlim = c(0,1), 117 | # ylim = c(0,1)) 118 | # 119 | # lines(qs,sapply(qs, function(k) quantile(sim_p_25_weighted[1:sim],k)), 120 | # type = "s", 121 | # xlim = c(0,1), 122 | # ylim = c(0,1), 123 | # col = "red") 124 | # 125 | # abline(a = 0, b = 1, lty = 2) 126 | # 127 | # 128 | # 129 | # } 130 | # 131 | # qs <- seq(0,1,by = .01) 132 | # # 133 | # plot(qs, sapply(qs,function(k) quantile(sim_p_25,k)), type = "s") 134 | # abline(a = 0, b = 1, lty = 2) 135 | # # 136 | # # boot_lrs <- lapply(1:100, function(k) boot_fits[[k]]$boot_lr_stats) 137 | # # obs_lrs <- sapply(1:100, function(k) boot_fits[[k]]$observed_lr_stat) 138 | # # plot(1:100, log(sapply(1:100, function(k) median(boot_lrs[[k]]))/obs_lrs)) 139 | # # abline(a = 0, b= 1, lty = 2) 140 | # # }) 141 | -------------------------------------------------------------------------------- /tests/testthat/test-calculate_log_penalty.R: -------------------------------------------------------------------------------- 1 | test_that("log penalty is correct", { 2 | require(logsum) 3 | 4 | P <- matrix(1/7,ncol = 7, nrow = 1) 5 | P_tilde <- matrix(1/7,ncol = 7, nrow = 1) 6 | 7 | param_dfs <- parameters_to_dataframes(P = P, 8 | P_fixed_indices = matrix(FALSE,ncol = 7, nrow = 1), 9 | P_tilde = P_tilde, 10 | P_tilde_fixed_indices = matrix(FALSE,ncol = 7, nrow = 1), 11 | B = matrix(0, ncol = 7, nrow = 1), 12 | B_fixed_indices = matrix(FALSE,ncol = 7, nrow = 1), 13 | gammas = 0, 14 | gammas_fixed_indices = FALSE, 15 | gamma_tilde = matrix(0,ncol = 1, nrow = 1), 16 | gamma_tilde_fixed_indices = FALSE 17 | ) 18 | 19 | varying_lr_df <- ra_to_lr(param_dfs$varying) 20 | 21 | penalty_from_function <- calculate_log_penalty(varying_lr_df = varying_lr_df, 22 | fixed_df = param_dfs$fixed, 23 | barrier_t = 1) 24 | 25 | direct_penalty <- -log(1/7)*14 26 | 27 | expect_equal(penalty_from_function, direct_penalty) 28 | }) 29 | -------------------------------------------------------------------------------- /tests/testthat/test-d_mu_d_beta_original.R: -------------------------------------------------------------------------------- 1 | # test_that("beta ", { 2 | # load("data/karstens_beta_problems.rdata") 3 | # 4 | # # mean_function <- function(x,i,j){ 5 | # temp <- varying_lr_df 6 | # temp <- lr_to_ra(fixed_df,varying_lr_df, 7 | # varying_df) 8 | # temp$value[temp$param == "B"] <- x 9 | # temp_params <- dataframes_to_parameters(fixed_df, 10 | # temp) 11 | # 12 | # means <- meaninate(gammas = temp_params$gammas, 13 | # B = temp_params$B, 14 | # X = X, 15 | # Z = Z, 16 | # P = temp_params$P, 17 | # X_tilde = X_tilde, 18 | # Z_tilde = Z_tilde, 19 | # Z_tilde_gamma_cols = Z_tilde_gamma_cols, 20 | # P_tilde = temp_params$P_tilde, 21 | # gamma_tilde = temp_params$gamma_tilde) 22 | # return(means[i,j]) 23 | # } 24 | # 25 | # numeric_deriv <- numDeriv::grad(func = function(x) mean_function(x, 1,247), 26 | # varying_lr_df$value[varying_lr_df$param == "B"])[7] 27 | # 28 | # varying_df <- lr_to_ra(fixed_df,varying_lr_df, 29 | # varying_df) 30 | # params <- dataframes_to_parameters(fixed_df,varying_df) 31 | # analytic_deriv <- mu_d_beta(i = 1, 32 | # j = 247, 33 | # q = 1, 34 | # gammas = params$gammas, 35 | # B = params$B, 36 | # X = X, 37 | # Z = Z, 38 | # P = params$P, 39 | # X_tilde = X_tilde, 40 | # Z_tilde = Z_tilde, 41 | # Z_tilde_gamma_cols = Z_tilde_gamma_cols, 42 | # P_tilde = params$P_tilde, 43 | # gamma_tilde = params$gamma_tilde) 44 | # 45 | # expect_equal(numeric_deriv,as.numeric(analytic_deriv)) 46 | # }) 47 | -------------------------------------------------------------------------------- /tests/testthat/test-d_mu_d_gamma_original.R: -------------------------------------------------------------------------------- 1 | test_that("gamma derivative is correct when all Z_tilde rows are multiplied by exp(gamma)", { 2 | gammas <- 4.53 3 | B <- matrix(c(rnorm(4),0),nrow = 1) 4 | X <- matrix(1, nrow = 1, ncol = 1) 5 | Z <- matrix(1, nrow = 1, ncol = 1) 6 | P <- matrix((1:5)/15, nrow = 1, ncol = 5) 7 | X_tilde <- matrix(1,nrow = 1, ncol = 1) 8 | Z_tilde <- matrix(1, nrow = 1, ncol = 1) 9 | P_tilde <- matrix((5:1)/15, nrow = 1, ncol = 5) 10 | gamma_tilde <- 1 11 | function_value <- mu_d_gamma(i = 1, 12 | j = 5, 13 | gammas = gammas, 14 | B = B, 15 | X = X, 16 | Z = Z, 17 | P = P, 18 | X_tilde = X_tilde, 19 | Z_tilde = Z_tilde, 20 | Z_tilde_gamma_cols = 1, 21 | P_tilde = P_tilde, 22 | gamma_tilde = gamma_tilde) 23 | 24 | direct_calculation <- ((Z%*%P)*exp(X%*%B + gammas) + 25 | Z_tilde%*%(P_tilde*exp(X_tilde%*%B + gamma_tilde + gammas)))[5] 26 | 27 | expect_equal(as.numeric(function_value), direct_calculation) 28 | }) 29 | -------------------------------------------------------------------------------- /tests/testthat/test-dfs_to_derivs.R: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | # test_that("We get output at all", 4 | # { 5 | # 6 | # set.seed(0) 7 | # W <- matrix(sapply(1:10,function(x) rpois(1,1000)), 8 | # ncol = 5) 9 | # X <- matrix(1,ncol = 1, nrow = 2) 10 | # Z <- matrix(1,nrow = 2, ncol = 1) 11 | # Z_tilde <- matrix(0,nrow = 2, ncol = 1) 12 | # Z_tilde_gamma_cols <- 1 13 | # gammas <- apply(W,1,function(x) log(sum(x))) 14 | # gammas_fixed_indices <- rep(F,2) 15 | # P <- matrix(1/5, nrow = 1, ncol = 5) 16 | # P_fixed_indices <- matrix(FALSE, nrow = 1, ncol = 5) 17 | # B <- matrix(0,ncol = 5, nrow = 1) 18 | # B_fixed_indices <- matrix(TRUE, ncol = 5, nrow = 1) 19 | # X_tilde <- matrix(0,ncol = 1, nrow = 1) 20 | # P_tilde <- matrix(1/5,ncol = 5, nrow = 1) 21 | # P_tilde_fixed_indices <- matrix(TRUE, ncol = 5, nrow = 1) 22 | # gamma_tilde <- matrix(0,nrow = 1, ncol = 1) 23 | # gamma_tilde_fixed_indices <- matrix(TRUE, nrow = 1, ncol = 1) 24 | # 25 | # 26 | # parameter_dfs <- parameters_to_dataframes(P, 27 | # P_fixed_indices, 28 | # P_tilde, 29 | # P_tilde_fixed_indices, 30 | # B, 31 | # B_fixed_indices, 32 | # gammas, 33 | # gammas_fixed_indices, 34 | # gamma_tilde, 35 | # gamma_tilde_fixed_indices) 36 | # 37 | # 38 | # dfs_to_derivs(varying_df = parameter_dfs$varying, 39 | # varying_lr_df = NULL, 40 | # fixed_df = parameter_dfs$fixed_df, 41 | # X = X, 42 | # Z = Z, 43 | # X_tilde = X_tilde, 44 | # Z_tilde = Z_tilde, 45 | # Z_tilde_gamma_cols = Z_tilde_gamma_cols, 46 | # criterion = "Poisson", 47 | # gmm_inv_wts = NULL) 48 | # 49 | # }) 50 | -------------------------------------------------------------------------------- /tests/testthat/test-do_one_simulation.R: -------------------------------------------------------------------------------- 1 | test_that("do_one_simulation returns list of lists", { 2 | 3 | tinysim <- do_one_simulation(n = 1, 4 | J = 5, 5 | distrib = "NB", 6 | B_multiplier = 0, 7 | seed = 1, 8 | label = "test", 9 | n_boot = 5, 10 | verbose =FALSE, 11 | # load_tinyvamp = FALSE, 12 | folder_name = "test", 13 | return_dont_save = TRUE) 14 | 15 | expect_type(tinysim$poisson_lrt,"list") 16 | expect_type(tinysim$poisson_ci,"list") 17 | expect_type(tinysim$reweighted_lrt,"list") 18 | expect_type(tinysim$reweighted_ci,"list") 19 | 20 | }) 21 | 22 | 23 | 24 | # test_that("Simulation that was stalling on Bayes 25 | # is not stalling",{ 26 | # 27 | # please_dont_stall <- 28 | # do_one_simulation(n = 1, 29 | # J = 20, 30 | # distrib = "NB", 31 | # B_multiplier = 0, 32 | # seed = 1, 33 | # label = "testrun", 34 | # n_boot = 5, 35 | # load_tinyvamp = FALSE, 36 | # folder_name, 37 | # return_dont_save = TRUE, 38 | # parallelize = TRUE, 39 | # verbose= TRUE) 40 | # 41 | # expect_type(please_dont_stall$poisson_lrt,"list") 42 | # expect_type(please_dont_stall$poisson_ci,"list") 43 | # expect_type(please_dont_stall$reweighted_lrt,"list") 44 | # expect_type(please_dont_stall$reweighted_ci,"list") 45 | # 46 | # } 47 | # ) 48 | 49 | test_that("Simulation that failed bc of log penalty 50 | gradient off-by-one-type error now succeeds.",{ 51 | 52 | failing_on_bayes <- do_one_simulation(n = 1, 53 | J = 5, 54 | distrib = "Poisson", 55 | B_multiplier = 1, 56 | seed = 1, 57 | label = "test", 58 | n_boot = 5, 59 | parallelize= FALSE, 60 | # load_tinyvamp = FALSE, 61 | folder_name = "test", 62 | verbose = FALSE, 63 | return_dont_save = TRUE) 64 | 65 | expect_type(failing_on_bayes$poisson_lrt,"list") 66 | expect_type(failing_on_bayes$poisson_ci,"list") 67 | expect_type(failing_on_bayes$reweighted_lrt,"list") 68 | expect_type(failing_on_bayes$reweighted_ci,"list") 69 | 70 | }) 71 | 72 | 73 | # test_that("Simulation that might be stalled on Bayes 74 | # actually runs.",{ 75 | # 76 | # possibly_failing_on_bayes <- do_one_simulation(n = 3, 77 | # J = 5, 78 | # distrib = "NB", 79 | # B_multiplier = 1, 80 | # seed = 1, 81 | # label = "test", 82 | # n_boot = 5, 83 | # parallelize= FALSE, 84 | # load_tinyvamp = FALSE, 85 | # folder_name = "test", 86 | # verbose = TRUE, 87 | # return_dont_save = TRUE) 88 | # 89 | # expect_type(possibly_failing_on_bayes$poisson_lrt,"list") 90 | # expect_type(possibly_failing_on_bayes$poisson_ci,"list") 91 | # expect_type(possibly_failing_on_bayes$reweighted_lrt,"list") 92 | # expect_type(possibly_failing_on_bayes$reweighted_ci,"list") 93 | # 94 | # }) 95 | 96 | # test_that("Yet another thing that stalled on Bayes doesn't stall anymore.", { 97 | # 98 | # stalling_on_bayes <- do_one_simulation(n = 1, 99 | # J = 5, 100 | # distrib = "Poisson", 101 | # B_multiplier = 1, 102 | # seed = 1, 103 | # label = "test", 104 | # n_boot = 5, 105 | # parallelize= FALSE, 106 | # load_tinyvamp = FALSE, 107 | # folder_name = "test", 108 | # return_dont_save = TRUE, 109 | # verbose = TRUE) 110 | # 111 | # 112 | # }) 113 | 114 | test_that("Another simulation stalled on Bayes can run and 115 | gives different estimates depending on estimator used.",{ 116 | 117 | stalling_out_on_bayes <- 118 | do_one_simulation(n = 3, 119 | J = 20, 120 | distrib = "Poisson", 121 | B_multiplier = 0, 122 | seed = 67791629, 123 | label = "trying_for_hundo", 124 | n_boot = 10, 125 | parallelize= FALSE, 126 | # load_tinyvamp = FALSE, 127 | folder_name = "test", 128 | verbose = TRUE, 129 | return_dont_save = TRUE) 130 | 131 | expect_true(mean(stalling_out_on_bayes$poisson_lrt$boot_lr_stats) != 132 | mean(stalling_out_on_bayes$reweighted_lrt$boot_lr_stats)) 133 | 134 | expect_true(sd(stalling_out_on_bayes$poisson_ci$ci$lower_ci - 135 | stalling_out_on_bayes$reweighted_ci$ci$lower_ci) >1e-4) 136 | 137 | expect_true(sd(stalling_out_on_bayes$poisson_ci$ci$upper_ci - 138 | stalling_out_on_bayes$reweighted_ci$ci$upper_ci) >1e-4) 139 | 140 | expect_true(sd(stalling_out_on_bayes$poisson_ci$ci$value - 141 | stalling_out_on_bayes$reweighted_ci$ci$value)>1e-4) 142 | 143 | 144 | 145 | 146 | }) 147 | -------------------------------------------------------------------------------- /tests/testthat/test-fit_simulation_model.R: -------------------------------------------------------------------------------- 1 | test_that("We are able to fit a simulation model (Poisson likelihood) to 2 | Poisson simulation data", { 3 | W <- simulate_paper_data(n = 1, 4 | J = 5, 5 | B_multiplier = 1, 6 | distrib = "Poisson", 7 | seed = 0) 8 | 9 | 10 | fitted.model <- try(fit_simulation_model(W,"Poisson")) 11 | expect_type(fitted.model,"list") 12 | }) 13 | 14 | test_that("We are able to fit a simulation model (Poisson likelihood) to 15 | negative binomial simulation data", { 16 | W <- simulate_paper_data(n = 1, 17 | J = 5, 18 | B_multiplier = 1, 19 | distrib = "NB", 20 | seed = 0) 21 | 22 | 23 | fitted.model <- try(fit_simulation_model(W,"Poisson")) 24 | expect_type(fitted.model,"list") 25 | }) 26 | 27 | test_that("We are able to fit a simulation model (Poisson likelihood) 28 | to Poisson simulation data", { 29 | W <- simulate_paper_data(n = 1, 30 | J = 5, 31 | B_multiplier = 1, 32 | distrib = "Poisson", 33 | seed = 0) 34 | 35 | 36 | fitted.model <- try(fit_simulation_model(W,"reweighted_Poisson")) 37 | expect_type(fitted.model,"list") 38 | }) 39 | 40 | test_that("We are able to fit a simulation model (reweighted Poisson likelihood) to 41 | negative binomial simulation data", { 42 | W <- simulate_paper_data(n = 1, 43 | J = 5, 44 | B_multiplier = 1, 45 | distrib = "NB", 46 | seed = 0) 47 | 48 | 49 | fitted.model <- try(fit_simulation_model(W,"reweighted_Poisson", 50 | return_variance= TRUE)) 51 | expect_type(fitted.model,"list") 52 | 53 | # fitted.model$variance_function %>% 54 | # ggplot() + 55 | # geom_point(aes(x= mean, y = squerror), 56 | # size = 0.5) + 57 | # geom_line(aes(x = mean, y = estd_var), 58 | # color = "red") + 59 | # theme_bw()+ 60 | # scale_y_sqrt() + 61 | # scale_x_sqrt() 62 | }) 63 | 64 | test_that("We get different estimates from Poisson and 65 | reweighted Poisson estimators fit to negative binomial data", { 66 | W <- simulate_paper_data(n = 3, 67 | J = 20, 68 | B_multiplier = 1, 69 | distrib = "NB", 70 | seed = 0) 71 | 72 | 73 | poisson_fit <- try(fit_simulation_model(W,"Poisson")) 74 | reweighted_fit <- try(fit_simulation_model(W,"reweighted_Poisson", 75 | return_variance = TRUE)) 76 | 77 | expect_true( 78 | mean(abs(poisson_fit$varying$value - reweighted_fit$varying$value)) 79 | >0.01) 80 | # 81 | # 82 | # poisson_cis <- bootstrap_ci(W, 83 | # fitted_model = poisson_fit, 84 | # n_boot = 100, 85 | # verbose= TRUE, 86 | # parallelize = TRUE) 87 | # 88 | # reweighted_cis <- bootstrap_ci(W, 89 | # fitted_model = reweighted_fit, 90 | # n_boot = 100, 91 | # verbose = TRUE, 92 | # parallelize = TRUE 93 | # ) 94 | # 95 | # poisson_cis$ci$method <- "Poisson" 96 | # reweighted_cis$ci$method <- "Reweighted" 97 | # 98 | # rbind(poisson_cis$ci, 99 | # reweighted_cis$ci) %>% 100 | # dplyr::filter(param == "B") %>% 101 | # ggplot() + 102 | # geom_errorbar(aes(x = j, ymin = lower_ci, ymax= upper_ci, color = method), 103 | # position = position_dodge(0.5)) + 104 | # facet_wrap(~k)+ 105 | # theme_bw() 106 | # 107 | # rbind(poisson_cis$ci, 108 | # reweighted_cis$ci) %>% 109 | # dplyr::group_by(param, method) %>% 110 | # dplyr::summarize(mean_width = mean(abs(upper_ci - lower_ci))) %>% 111 | # ggplot() + 112 | # geom_point(aes(x= param, y = mean_width, color = method), 113 | # position = position_dodge(0.5)) + 114 | # scale_y_log10()+ 115 | # theme_bw() 116 | # 117 | # rbind(poisson_cis$ci, 118 | # reweighted_cis$ci) %>% 119 | # mutate(width = upper_ci - lower_ci) %>% 120 | # filter(param == "P_tilde") %>% 121 | # select(param, j, method, width) %>% 122 | # pivot_wider(values_from = width, names_from = method) %>% 123 | # ggplot(aes(x = j, y = Poisson/Reweighted)) + 124 | # geom_point() + 125 | # theme_bw() + 126 | # scale_y_log10() 127 | # 128 | # 129 | # 130 | # reweighted_fit$variance_function %>% 131 | # ggplot() + 132 | # geom_point(aes(x= mean,y = squerror)) + 133 | # geom_line(aes(x = mean,y = estd_var),color="red") + 134 | # scale_y_log10() + 135 | # scale_x_log10() + 136 | # theme_bw() 137 | 138 | 139 | }) 140 | -------------------------------------------------------------------------------- /tests/testthat/test-get_A_tilde_k_list.R: -------------------------------------------------------------------------------- 1 | test_that("get_A_tilde_k_list works when P_tilde entries fixed", { 2 | 3 | X <- matrix(1) 4 | X_tilde <- matrix(1) 5 | Z <- matrix(1) 6 | Z_tilde <- matrix(1) 7 | Z_tilde_gamma_cols <- 1 8 | 9 | P <- matrix(rep(1/8, 8), nrow = 1) 10 | P_fixed_indices <- matrix(rep(FALSE,8), nrow = 1) 11 | P_tilde <- matrix((1:8)/sum(1:8), nrow = 1) 12 | P_tilde_fixed_indices <- matrix(rep(FALSE,8), nrow = 1) 13 | B <- matrix(c(-3:3,0),nrow = 1) 14 | B_fixed_indices <- matrix(rep(TRUE,8), nrow = 1) 15 | gammas <- 8 16 | gammas_fixed_indices <- TRUE 17 | gamma_tilde <- matrix(log(100)) 18 | gamma_tilde_fixed_indices <- matrix(TRUE) 19 | 20 | dfs <- parameters_to_dataframes(P, 21 | P_fixed_indices, 22 | P_tilde, 23 | P_tilde_fixed_indices, 24 | B, 25 | B_fixed_indices, 26 | gammas, 27 | gammas_fixed_indices, 28 | gamma_tilde, 29 | gamma_tilde_fixed_indices) 30 | 31 | varying_lr_df <- ra_to_lr(dfs$varying) 32 | 33 | expect_equal(get_A_tilde_k_list(fixed_df = dfs$fixed, 34 | varying_df = dfs$varying, 35 | varying_lr_df = varying_lr_df), 36 | list(diag(8))) 37 | 38 | }) 39 | 40 | test_that("get_A_tilde_k_list works when some P_tilde entries fixed", { 41 | 42 | X <- matrix(1) 43 | X_tilde <- matrix(1) 44 | Z <- matrix(1) 45 | Z_tilde <- matrix(1) 46 | Z_tilde_gamma_cols <- 1 47 | 48 | P <- matrix(rep(1/8, 8), nrow = 1) 49 | P_fixed_indices <- matrix(rep(FALSE,8), nrow = 1) 50 | P_tilde <- matrix((1:8)/sum(1:8), nrow = 1) 51 | P_tilde_fixed_indices <- matrix(c(FALSE, TRUE, TRUE, rep(FALSE,5)), nrow = 1) 52 | B <- matrix(c(-3:3,0),nrow = 1) 53 | B_fixed_indices <- matrix(rep(TRUE,8), nrow = 1) 54 | gammas <- 8 55 | gammas_fixed_indices <- TRUE 56 | gamma_tilde <- matrix(log(100)) 57 | gamma_tilde_fixed_indices <- matrix(TRUE) 58 | 59 | dfs <- parameters_to_dataframes(P, 60 | P_fixed_indices, 61 | P_tilde, 62 | P_tilde_fixed_indices, 63 | B, 64 | B_fixed_indices, 65 | gammas, 66 | gammas_fixed_indices, 67 | gamma_tilde, 68 | gamma_tilde_fixed_indices) 69 | 70 | varying_lr_df <- ra_to_lr(dfs$varying) 71 | 72 | expect_equal(get_A_tilde_k_list(fixed_df = dfs$fixed, 73 | varying_df = dfs$varying, 74 | varying_lr_df = varying_lr_df), 75 | list(rbind(diag(6)[1,], 76 | 0*diag(6)[1:2,], 77 | diag(6)[2:6,]))) 78 | 79 | }) 80 | -------------------------------------------------------------------------------- /tests/testthat/test-gmm_criterion.R: -------------------------------------------------------------------------------- 1 | test_that("gmm criterion behaves predictably in the inv_wts argument", { 2 | set.seed(0) 3 | W <- matrix(sapply(1:10,function(x) rpois(1,1000)), 4 | ncol = 5) 5 | X <- matrix(1,ncol = 1, nrow = 2) 6 | Z <- matrix(1,nrow = 2, ncol = 1) 7 | Z_tilde <- matrix(0,nrow = 2, ncol = 1) 8 | Z_tilde_gamma_cols <- 1 9 | gammas <- apply(W,1,function(x) log(sum(x))) 10 | gammas_fixed_indices <- rep(F,2) 11 | P <- matrix(1/5, nrow = 1, ncol = 5) 12 | P_fixed_indices <- matrix(FALSE, nrow = 1, ncol = 5) 13 | B <- matrix(0,ncol = 5, nrow = 1) 14 | B_fixed_indices <- matrix(TRUE, ncol = 5, nrow = 1) 15 | X_tilde <- matrix(0,ncol = 1, nrow = 1) 16 | P_tilde <- matrix(1/5,ncol = 5, nrow = 1) 17 | P_tilde_fixed_indices <- matrix(TRUE, ncol = 5, nrow = 1) 18 | gamma_tilde <- matrix(0,nrow = 1, ncol = 1) 19 | gamma_tilde_fixed_indices <- matrix(TRUE, nrow = 1, ncol = 1) 20 | 21 | param_dfs <- parameters_to_dataframes(P, 22 | P_fixed_indices, 23 | P_tilde, 24 | P_tilde_fixed_indices, 25 | B, 26 | B_fixed_indices, 27 | gammas, 28 | gammas_fixed_indices, 29 | gamma_tilde, 30 | gamma_tilde_fixed_indices) 31 | 32 | means <- meaninate(gammas = gammas, 33 | B = B, 34 | X = X, 35 | Z = Z, 36 | P = P, 37 | X_tilde = X_tilde, 38 | Z_tilde = Z_tilde, 39 | Z_tilde_gamma_cols = 1, 40 | P_tilde = P_tilde, 41 | gamma_tilde = gamma_tilde) 42 | n <- nrow(W) 43 | W_long <- lapply(1:n,function(i) as.numeric(W[i,])) 44 | W_long <- do.call(c,W_long) 45 | means_long <- lapply(1:n, function(i) as.numeric(means[i,])) 46 | means_long <- do.call(c,means_long) 47 | 48 | sse <- sum((W_long - means_long)^2) 49 | 50 | expect_equal(0.5*sse,gmm_criterion(W_long,means_long, 51 | rep(1,length(W_long)))) 52 | 53 | expect_true(0.5*sse > gmm_criterion(W_long,means_long, 54 | rep(2,length(W_long)))) 55 | 56 | 57 | W_long[1] <- means_long[1] 58 | 59 | expect_equal(gmm_criterion(W_long,means_long,c(0,rep(1,length(W_long) - 1))), 60 | gmm_criterion(W_long,means_long,c(1,rep(1,length(W_long) - 1)))) 61 | 62 | }) 63 | -------------------------------------------------------------------------------- /tests/testthat/test-lr_to_ra.R: -------------------------------------------------------------------------------- 1 | test_that("Get same dfs out applying lr_to_ra on output of ra_to_lr", { 2 | 3 | fixed_df <- data.frame("value" = 1/5, 4 | "param" = "P_tilde", 5 | k = 1, 6 | j = 1:5) 7 | 8 | varying_df <- data.frame("value" = (1:5)/sum(1:5), 9 | "param" = "P", 10 | k = 1, 11 | j = 1:5) 12 | 13 | expect_equal(lr_to_ra(fixed_df = fixed_df, 14 | varying_lr_df =ra_to_lr(varying_df), 15 | varying_df = varying_df), 16 | varying_df) 17 | 18 | }) 19 | 20 | # test_that("Get same dfs out applying ra_to_lr on output of lr_to_ra",{ 21 | # temp_varying <- 22 | # with(list_of_dfs, 23 | # lr_to_ra(fixed_df,varying_lr_df,varying_df)) 24 | # temp_lr <- ra_to_lr(temp_varying) 25 | # 26 | # expect_equal(temp_lr,list_of_dfs$varying_lr_df) 27 | # }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-mu_d_P.R: -------------------------------------------------------------------------------- 1 | test_that("P derivative is correct in simple setting", { 2 | gammas <- 4.53 3 | B <- matrix(c(rnorm(4),0),nrow = 1) 4 | X <- matrix(1, nrow = 1, ncol = 1) 5 | Z <- matrix(1, nrow = 1, ncol = 1) 6 | P <- matrix((1:5)/15, nrow = 1, ncol = 5) 7 | X_tilde <- matrix(1,nrow = 1, ncol = 1) 8 | Z_tilde <- matrix(1, nrow = 1, ncol = 1) 9 | P_tilde <- matrix((5:1)/15, nrow = 1, ncol = 5) 10 | gamma_tilde <- 1 11 | function_value <- mu_d_P(i = 1, 12 | j = 5, 13 | m = 1, 14 | gammas = gammas, 15 | B = B, 16 | X = X, 17 | Z = Z, 18 | P = P) 19 | 20 | direct_calculation <- (Z%*%exp(X%*%B + gammas))[5] 21 | 22 | expect_equal(as.numeric(function_value), direct_calculation) 23 | }) 24 | -------------------------------------------------------------------------------- /tests/testthat/test-mu_d_beta.R: -------------------------------------------------------------------------------- 1 | test_that("Beta derivative is correct in no-contamination case", { 2 | 3 | set.seed(9382) 4 | B <- matrix(c(rnorm(6),0), nrow = 1) 5 | X <- matrix(1,ncol = 1, nrow = 1) 6 | Z <- matrix(1, ncol = 1, nrow = 1) 7 | P <- matrix(1/7, nrow =1 , ncol = 7) 8 | X_tilde <- matrix(1, ncol = 1, nrow = 1) 9 | Z_tilde <- matrix(1, ncol = 1, nrow = 1) 10 | P_tilde <- matrix(1/7, ncol = 7, nrow = 1) 11 | deriv_from_fn <- mu_d_beta(i = 1, 12 | j = 2, 13 | q = 1, 14 | gammas = 8, 15 | B = B, 16 | X = X, 17 | Z = Z, 18 | P = P, 19 | X_tilde = X_tilde, 20 | Z_tilde = Z_tilde, 21 | Z_tilde_gamma_cols = 1, 22 | P_tilde = P_tilde, 23 | gamma_tilde = matrix(-100, ncol = 1, nrow = 1)) 24 | 25 | direct_deriv <- ((Z%*%P)*(exp(X%*%B + 8)))[1,2] 26 | expect_equal(as.numeric(deriv_from_fn), direct_deriv) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-mu_d_gamma_faster.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("mu_d_gamma_faster output equal to mean when gamma_tilde negligible",{ 3 | set.seed(75943) 4 | B <- matrix(c(rnorm(6),0),nrow = 1) 5 | dgamma <- mu_d_gamma_faster(i = 1, 6 | J = 7, 7 | gammas = 1, 8 | B = B, 9 | X = matrix(1, ncol = 1), 10 | Z = matrix(1, ncol = 1), 11 | P = matrix((1:7)/(28),nrow = 1), 12 | X_tilde = matrix(1, ncol = 1), 13 | Z_tilde = matrix(1, ncol = 1), 14 | Z_tilde_gamma_cols = NULL, 15 | P_tilde = matrix((7:1)/28, nrow = 1), 16 | gamma_tilde = -100) 17 | 18 | 19 | mean_term <- meaninate(gammas = 1, 20 | B = B, 21 | X = matrix(1, ncol = 1), 22 | Z = matrix(1, ncol = 1), 23 | P = matrix((1:7)/(28),nrow = 1), 24 | X_tilde = matrix(1, ncol = 1), 25 | Z_tilde = matrix(1, ncol = 1), 26 | Z_tilde_gamma_cols = NULL, 27 | P_tilde = matrix((7:1)/28, nrow = 1), 28 | gamma_tilde = -100) 29 | 30 | expect_equal(mean_term, dgamma) 31 | }) 32 | 33 | 34 | test_that("mu_d_gamma_faster output equal to mean when gamma_tilde non-negligible 35 | and all columns of Z_tilde in Z_tilde_gamma_tilde_cols",{ 36 | set.seed(742343) 37 | B <- matrix(c(rnorm(6),0),nrow = 1) 38 | dgamma <- mu_d_gamma_faster(i = 1, 39 | J = 7, 40 | gammas = 1, 41 | B = B, 42 | X = matrix(1, ncol = 1), 43 | Z = matrix(1, ncol = 1), 44 | P = matrix((1:7)/(28),nrow = 1), 45 | X_tilde = matrix(1, ncol = 1), 46 | Z_tilde = matrix(1, ncol = 1), 47 | Z_tilde_gamma_cols = 1, 48 | P_tilde = matrix((7:1)/28, nrow = 1), 49 | gamma_tilde = 2) 50 | 51 | 52 | mean_term <- meaninate(gammas = 1, 53 | B = B, 54 | X = matrix(1, ncol = 1), 55 | Z = matrix(1, ncol = 1), 56 | P = matrix((1:7)/(28),nrow = 1), 57 | X_tilde = matrix(1, ncol = 1), 58 | Z_tilde = matrix(1, ncol = 1), 59 | Z_tilde_gamma_cols = 1, 60 | P_tilde = matrix((7:1)/28, nrow = 1), 61 | gamma_tilde = 2) 62 | 63 | expect_equal(mean_term, dgamma) 64 | }) 65 | -------------------------------------------------------------------------------- /tests/testthat/test-mu_d_gamma_tilde.R: -------------------------------------------------------------------------------- 1 | test_that("gamma_tilde derivative is correct when all Z_tilde rows are multiplied by exp(gamma)", { 2 | gammas <- 4.53 3 | B <- matrix(c(rnorm(4),0),nrow = 1) 4 | X <- matrix(1, nrow = 1, ncol = 1) 5 | Z <- matrix(1, nrow = 1, ncol = 1) 6 | P <- matrix((1:5)/15, nrow = 1, ncol = 5) 7 | X_tilde <- matrix(1,nrow = 1, ncol = 1) 8 | Z_tilde <- matrix(1, nrow = 1, ncol = 1) 9 | P_tilde <- matrix((5:1)/15, nrow = 1, ncol = 5) 10 | gamma_tilde <- 1 11 | function_value <- mu_d_gamma_tilde(i = 1, 12 | j = 5, 13 | k_tilde = 1, 14 | gammas = gammas, 15 | B = B, 16 | X = X, 17 | Z = Z, 18 | P = P, 19 | X_tilde = X_tilde, 20 | Z_tilde = Z_tilde, 21 | Z_tilde_gamma_cols = 1, 22 | P_tilde = P_tilde, 23 | gamma_tilde = gamma_tilde) 24 | 25 | direct_calculation <- ( 26 | Z_tilde%*%(P_tilde*exp(X_tilde%*%B + gamma_tilde + gammas)))[5] 27 | 28 | expect_equal(as.numeric(function_value), direct_calculation) 29 | }) 30 | -------------------------------------------------------------------------------- /tests/testthat/test-mu_d_rho.R: -------------------------------------------------------------------------------- 1 | test_that("rho derivative is correct when all Z_tilde rows are multiplied by exp(gamma)", { 2 | gammas <- 4.53 3 | B <- matrix(c(rnorm(4),0),nrow = 1) 4 | X <- matrix(1, nrow = 1, ncol = 1) 5 | Z <- matrix(1, nrow = 1, ncol = 1) 6 | P <- matrix((1:5)/15, nrow = 1, ncol = 5) 7 | X_tilde <- matrix(1,nrow = 1, ncol = 1) 8 | Z_tilde <- matrix(1, nrow = 1, ncol = 1) 9 | P_tilde <- matrix((5:1)/15, nrow = 1, ncol = 5) 10 | rho_k = log(P[1,1:4]/P[1,5]) 11 | gamma_tilde <- 1 12 | function_value <- mu_d_rho_faster(i = 1, 13 | J = 5, 14 | k = 1, 15 | gammas = gammas, 16 | B = B, 17 | X = X, 18 | Z = Z, 19 | rho_k = rho_k, 20 | Ak_list = list(diag(5)), 21 | fixed_P_multipliers = 1 22 | ) 23 | 24 | #convert to matrix from dgeMatrix 25 | function_value <- as.matrix(function_value) 26 | 27 | #remove empty dimnames 28 | dimnames(function_value) <- NULL 29 | 30 | 31 | direct_d_mu_dPk <- diag(as.numeric((Z%*%exp(X%*%B + gammas)))) 32 | direct_dPk_drho_k <- cbind(diag(exp(rho_k)/(sum(c(1,exp(rho_k))))),0) - 33 | outer(c(exp(rho_k))/(sum(c(1,exp(rho_k)))),c(exp(rho_k),1)/(sum(c(1,exp(rho_k))))) 34 | direct_calculation <- t(direct_dPk_drho_k%*%direct_d_mu_dPk) 35 | 36 | expect_equal(function_value, direct_calculation) 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/test-mu_d_rho_tilde.R: -------------------------------------------------------------------------------- 1 | test_that("rho derivative is correct when all Z_tilde rows are multiplied by exp(gamma)", { 2 | require(Matrix) 3 | gammas <- 4.53 4 | B <- matrix(c(rnorm(4),0),nrow = 1) 5 | X_tilde <- matrix(1,nrow = 1, ncol = 1) 6 | Z_tilde <- matrix(1, nrow = 1, ncol = 1) 7 | P_tilde <- matrix((5:1)/15, nrow = 1, ncol = 5) 8 | rho_tilde_k = log(P_tilde[1,1:4]/P_tilde[1,5]) 9 | gamma_tilde <- 1 10 | Z_tilde_gamma_cols <- 1 11 | 12 | function_value <- mu_d_rho_tilde_faster(i = 1, 13 | J = 5, 14 | k_tilde = 1, 15 | gammas = gammas, 16 | B = B, 17 | rho_tilde_k = rho_tilde_k, 18 | A_tilde_k_list = list(diag(5)), 19 | fixed_P_tilde_multipliers = 1, 20 | X_tilde = X_tilde, 21 | Z_tilde = Z_tilde, 22 | Z_tilde_gamma_cols = Z_tilde_gamma_cols, 23 | gamma_tilde = gamma_tilde) 24 | 25 | #convert to matrix from dgeMatrix 26 | function_value <- as.matrix(function_value) 27 | 28 | #remove empty dimnames 29 | dimnames(function_value) <- NULL 30 | 31 | 32 | direct_d_mu_dP_tilde_k <- diag(as.numeric((exp(gammas)*Z_tilde)%*%exp(X_tilde%*%B + gamma_tilde))) 33 | direct_dP_tilde_k_drho_tilde_k <- cbind(diag(exp(rho_tilde_k)/(sum(c(1,exp(rho_tilde_k))))),0) - 34 | outer(c(exp(rho_tilde_k))/(sum(c(1,exp(rho_tilde_k)))),c(exp(rho_tilde_k),1)/(sum(c(1,exp(rho_tilde_k))))) 35 | direct_calculation <- t(direct_dP_tilde_k_drho_tilde_k%*%direct_d_mu_dP_tilde_k) 36 | 37 | expect_equal(function_value, direct_calculation) 38 | }) 39 | 40 | test_that("rho_tilde derivative correct in realistic setting with alpha_tilde", { 41 | 42 | 43 | set.seed(0) 44 | p_mock1 <- c(rep(0,5),rep(1/5,5)) 45 | p_mock2 <- c(rep(1/5,5),rep(0,5)) 46 | p_contam <- c(rexp(5),rep(0,5)) 47 | p_contam <- p_contam/sum(p_contam) 48 | p_true <- rep(1/10,10) 49 | dilutions <- rep(3^(1:5),3) 50 | W <- matrix(NA,nrow = 15, ncol = 10) 51 | for(i in 1:15){ 52 | if(i<6){ 53 | W[i,] <- round(rexp(1,3^((i - 1)%%5)*1/10000)*(p_mock1 + dilutions[i]*p_contam),0) 54 | } else{ 55 | if(i<11){ 56 | W[i,] <- round(rexp(1,3^((i- 1)%%5)*1/10000)*(p_mock2 + dilutions[i]*p_contam),0) 57 | 58 | } else{ 59 | W[i,] <- round(rexp(1,3^((i-1)%%5)*1/10000)*(p_true + dilutions[i]*p_contam),0) 60 | 61 | } 62 | } 63 | } 64 | X <- matrix(0,ncol = 1, nrow = 15) 65 | Z <- cbind(c(rep(1,5),rep(0,10)), 66 | c(rep(0,5),rep(1,5), rep(0,5)), 67 | c(rep(0,10),rep(1,5))) 68 | Z_tilde <- matrix(dilutions/exp(mean(log(dilutions))), ncol = 1) 69 | Z_tilde_list <- list(Z_tilde*matrix(c(rep(1,5),rep(0,10))), 70 | Z_tilde*matrix(c(rep(0,5),rep(1,5),rep(0,5))), 71 | Z_tilde*matrix(c(rep(0,10),rep(1,5)))) 72 | 73 | Z_tilde_gamma_cols <- 1 74 | Z_tilde <- NULL 75 | gammas <- apply(W,1,function(x) log(sum(x))) 76 | gammas_fixed_indices <- rep(F,length(gammas)) 77 | P <- rbind(p_mock1, 78 | p_mock2, 79 | rep(.1,10)) 80 | P_fixed_indices <- matrix(FALSE, nrow = 3, ncol = 10) 81 | P_fixed_indices[1:2,] <- TRUE 82 | B <- matrix(0,ncol = 10, nrow = 1) 83 | B_fixed_indices <- matrix(TRUE, ncol = 10, nrow = 1) 84 | X_tilde <- matrix(0,ncol = 1, nrow = 1) 85 | P_tilde <- matrix(1/10,ncol = 10, nrow = 1) 86 | P_tilde_fixed_indices <- matrix(FALSE, ncol = 10, nrow = 1) 87 | gamma_tilde <- matrix(0,nrow = 1, ncol = 1) 88 | gamma_tilde_fixed_indices <- matrix(FALSE, nrow = 1, ncol = 1) 89 | alpha_tilde <- c(0,0) 90 | 91 | parameter_dfs <- parameters_to_dataframes(P, 92 | P_fixed_indices, 93 | P_tilde, 94 | P_tilde_fixed_indices, 95 | B, 96 | B_fixed_indices, 97 | gammas, 98 | gammas_fixed_indices, 99 | gamma_tilde, 100 | gamma_tilde_fixed_indices, 101 | alpha_tilde = alpha_tilde) 102 | 103 | varying_df <- parameter_dfs$varying 104 | fixed_df <- parameter_dfs$fixed 105 | varying_lr_df <- ra_to_lr(varying_df) 106 | 107 | 108 | 109 | K <- max(c(varying_df$k[varying_df$param == "P"], 110 | fixed_df$k[fixed_df$param == "P"])) 111 | 112 | fixed_P_multipliers <- sapply(1:K, function(k) 113 | 1 - sum(fixed_df$value[fixed_df$param == "P"& 114 | fixed_df$k ==k])) 115 | 116 | K_tilde <- max(c(varying_df$k[varying_df$param == "P_tilde"], 117 | fixed_df$k[fixed_df$param == "P_tilde"])) 118 | 119 | fixed_P_tilde_multipliers <- sapply(1:K_tilde, function(k) 120 | 1 - sum(fixed_df$value[fixed_df$param == "P_tilde"& 121 | fixed_df$k ==k])) 122 | 123 | Ak_list <- get_Ak_list(fixed_df, 124 | varying_df, 125 | varying_lr_df) 126 | 127 | A_tilde_k_list <- get_A_tilde_k_list(fixed_df, 128 | varying_df, 129 | varying_lr_df) 130 | 131 | # message("created Ak_list and A_tilde_k_list") 132 | 133 | #calculate at outset of optimization pass as argument to mean_jac_lr, etc. 134 | which_k_p <- sapply(1:K, function(k) ifelse(is.null(Ak_list[[k]]), 135 | NA, k)) 136 | 137 | which_k_p <- which_k_p[!is.na(which_k_p)] 138 | 139 | #calculate at outset of optimization pass as argument to mean_jac_lr, etc. 140 | which_k_p_tilde <- sapply(1:K_tilde, 141 | function(k) ifelse( 142 | is.null(A_tilde_k_list[[k]]), 143 | NA,k 144 | )) 145 | 146 | which_k_p_tilde <- which_k_p_tilde[!is.na(which_k_p_tilde)] 147 | 148 | # message("saved which_k_p and which_k_p_tilde") 149 | 150 | #calculate at outset of optimization 151 | which_B_rows <- unique(varying_df$k[varying_df$param == "B"]) 152 | which_B_rows <- which_B_rows[order(which_B_rows)] 153 | 154 | #calculate at outset of optimization 155 | which_B_keep <- lapply(which_B_rows, 156 | function(k) sapply(1:(J - 1), 157 | function(j) 158 | j %in% varying_lr_df$j[ 159 | varying_lr_df$param == "B" & 160 | varying_lr_df$k == k] 161 | )) 162 | which_B_keep <- do.call(rbind,which_B_keep) 163 | 164 | # message("saved which_B_keep") 165 | 166 | which_gammas <- unique(varying_df$k[varying_df$param == "gamma"]) 167 | 168 | which_gamma_tilde <- unique(varying_df$k[varying_df$param == "gamma_tilde"]) 169 | 170 | which_unconstrained <- varying_lr_df$param %in% c("B","gamma","gamma_tilde") 171 | which_rho <- varying_lr_df$param %in% c("rho") 172 | which_rho_tilde <- varying_lr_df$param %in% c("rho_tilde") 173 | npar <- nrow(varying_lr_df) 174 | 175 | rho_tilde_k = log(P_tilde[1,1:9]/P_tilde[1,10]) 176 | i <- 1 177 | 178 | function_value <- mu_d_rho_tilde_faster(i = i, 179 | J = 10, 180 | k_tilde = 1, 181 | gammas = gammas, 182 | B = B, 183 | rho_tilde_k = rho_tilde_k, 184 | A_tilde_k_list = A_tilde_k_list, 185 | fixed_P_tilde_multipliers = 1, 186 | X_tilde = X_tilde, 187 | Z_tilde = Z_tilde, 188 | Z_tilde_gamma_cols = Z_tilde_gamma_cols, 189 | gamma_tilde = gamma_tilde, 190 | alpha_tilde = alpha_tilde, 191 | Z_tilde_list = Z_tilde_list) 192 | 193 | #convert to matrix from dgeMatrix 194 | function_value <- as.matrix(function_value) 195 | 196 | #remove empty dimnames 197 | dimnames(function_value) <- NULL 198 | 199 | Z_tilde <- construct_Z_tilde(Z_tilde_list, 200 | alpha_tilde) 201 | 202 | 203 | direct_d_mu_dP_tilde_k <- diag(as.numeric((exp(gammas[i])*Z_tilde[i,,drop = FALSE])%*%exp(X_tilde%*%B + gamma_tilde%*%matrix(1,ncol = 10, 204 | nrow = 1)))) 205 | direct_dP_tilde_k_drho_tilde_k <- cbind(diag(exp(rho_tilde_k)/(sum(c(1,exp(rho_tilde_k))))),0) - 206 | outer(c(exp(rho_tilde_k))/(sum(c(1,exp(rho_tilde_k)))),c(exp(rho_tilde_k),1)/(sum(c(1,exp(rho_tilde_k))))) 207 | direct_calculation <- t(direct_dP_tilde_k_drho_tilde_k%*%direct_d_mu_dP_tilde_k) 208 | 209 | expect_equal(function_value, direct_calculation) 210 | 211 | get_mean <- function(rho_tilde_k){ 212 | temp_lr_df <- varying_lr_df 213 | temp_lr_df$value[temp_lr_df$param == "rho_tilde"] <- rho_tilde_k 214 | temp_params <- dataframes_to_parameters(fixed_df, 215 | lr_to_ra(fixed_df, 216 | temp_lr_df, 217 | varying_df)) 218 | 219 | return( meaninate(gammas = temp_params$gammas, 220 | B = temp_params$B, 221 | Z = Z, 222 | P = P, 223 | X = X, 224 | X_tilde = X_tilde, 225 | Z_tilde = NULL, 226 | Z_tilde_gamma_cols = 1, 227 | P_tilde = temp_params$P_tilde, 228 | gamma_tilde = temp_params$gamma_tilde, 229 | alpha_tilde = temp_params$alpha_tilde, 230 | Z_tilde_list = Z_tilde_list)[1,1]) 231 | 232 | 233 | } 234 | 235 | num_grad <- 236 | numDeriv::grad(get_mean, 237 | varying_lr_df$value[varying_lr_df$param == "rho_tilde"]) 238 | 239 | expect_equal(function_value[1,],num_grad) 240 | 241 | } 242 | ) 243 | -------------------------------------------------------------------------------- /tests/testthat/test-par_to_jacobian_row.R: -------------------------------------------------------------------------------- 1 | test_that("P derivatives work in simple case", { 2 | 3 | X <- matrix(1) 4 | X_tilde <- matrix(1) 5 | Z <- matrix(1) 6 | Z_tilde <- matrix(1) 7 | Z_tilde_gamma_cols <- 1 8 | 9 | P <- matrix(rep(1/8, 8), nrow = 1) 10 | P_fixed_indices <- matrix(rep(FALSE,8), nrow = 1) 11 | P_tilde <- matrix((1:8)/sum(1:8), nrow = 1) 12 | P_tilde_fixed_indices <- matrix(rep(TRUE,8), nrow = 1) 13 | B <- matrix(c(-3:3,0),nrow = 1) 14 | B_fixed_indices <- matrix(rep(TRUE,8), nrow = 1) 15 | gammas <- 8 16 | gammas_fixed_indices <- TRUE 17 | gamma_tilde <- matrix(log(100)) 18 | gamma_tilde_fixed_indices <- matrix(TRUE) 19 | 20 | dfs <- parameters_to_dataframes(P, 21 | P_fixed_indices, 22 | P_tilde, 23 | P_tilde_fixed_indices, 24 | B, 25 | B_fixed_indices, 26 | gammas, 27 | gammas_fixed_indices, 28 | gamma_tilde, 29 | gamma_tilde_fixed_indices) 30 | 31 | params <- dataframes_to_parameters(dfs$fixed, 32 | dfs$varying) 33 | 34 | fixed_status <- dfs$fixed 35 | fixed_status$value <- 0 36 | varying_status <- dfs$varying 37 | varying_status$value <- 1 38 | 39 | param_status <- dataframes_to_parameters(fixed_status, 40 | varying_status) 41 | 42 | function_output <- par_to_jacobian_row(params, 43 | param_status, 44 | i = 1, 45 | j = 1, 46 | X, 47 | Z, 48 | X_tilde, 49 | Z_tilde, 50 | Z_tilde_gamma_cols) 51 | theoretical_output <- c(exp(gammas + B[1]),rep(0,7)) 52 | 53 | expect_equal(function_output, theoretical_output) 54 | }) 55 | 56 | 57 | test_that("More mean derivatives work in simple case", { 58 | 59 | X <- matrix(1) 60 | X_tilde <- matrix(1) 61 | Z <- matrix(1) 62 | Z_tilde <- matrix(1) 63 | Z_tilde_gamma_cols <- 1 64 | 65 | P <- matrix(rep(1/8, 8), nrow = 1) 66 | P_fixed_indices <- matrix(rep(FALSE,8), nrow = 1) 67 | P_tilde <- matrix((1:8)/sum(1:8), nrow = 1) 68 | P_tilde_fixed_indices <- matrix(rep(TRUE,8), nrow = 1) 69 | B <- matrix(c(-3:3,0),nrow = 1) 70 | B_fixed_indices <- matrix(rep(FALSE,8), nrow = 1) 71 | gammas <- 8 72 | gammas_fixed_indices <- FALSE 73 | gamma_tilde <- matrix(log(100)) 74 | gamma_tilde_fixed_indices <- matrix(FALSE) 75 | 76 | dfs <- parameters_to_dataframes(P, 77 | P_fixed_indices, 78 | P_tilde, 79 | P_tilde_fixed_indices, 80 | B, 81 | B_fixed_indices, 82 | gammas, 83 | gammas_fixed_indices, 84 | gamma_tilde, 85 | gamma_tilde_fixed_indices) 86 | 87 | params <- dataframes_to_parameters(dfs$fixed, 88 | dfs$varying) 89 | 90 | fixed_status <- dfs$fixed 91 | if(nrow(fixed_status)>0){ 92 | fixed_status$value <- 0} 93 | varying_status <- dfs$varying 94 | varying_status$value <- 1 95 | 96 | param_status <- dataframes_to_parameters(fixed_status, 97 | varying_status) 98 | 99 | function_output <- par_to_jacobian_row(params, 100 | param_status, 101 | i = 1, 102 | j = 1, 103 | X, 104 | Z, 105 | X_tilde, 106 | Z_tilde, 107 | Z_tilde_gamma_cols) 108 | theoretical_output <- c(exp(gammas + B[1]),rep(0,7), 109 | c(P[,1]*exp(gammas + B[,1]) + 110 | P_tilde[,1]*exp(gammas + gamma_tilde + B[,1]), 111 | rep(0,6)), 112 | P[,1]*exp(gammas + B[,1]) + 113 | P_tilde[,1]*exp(gammas + gamma_tilde + B[,1]), 114 | P_tilde[,1]*exp(gammas + gamma_tilde + B[,1])) 115 | 116 | 117 | 118 | 119 | expect_equal(function_output, theoretical_output) 120 | }) 121 | -------------------------------------------------------------------------------- /tests/testthat/test-safe_divide.R: -------------------------------------------------------------------------------- 1 | test_that("safe_divide works when numerator = denominator = 0", { 2 | 3 | expect_equal(safe_divide(0,0),1) 4 | }) 5 | 6 | test_that("safe_divide works when numerator and denominator are positive", { 7 | expect_equal(safe_divide(1,2),0.5) 8 | }) 9 | 10 | test_that("safe_divide returns correct penalty when numerator nonzero and denominator 0", { 11 | expect_equal(safe_divide(1,0,100),100) 12 | }) 13 | -------------------------------------------------------------------------------- /tests/testthat/test-simulate_paper_data.R: -------------------------------------------------------------------------------- 1 | test_that("Simulated data is reproducible", { 2 | W1 <- simulate_paper_data(n = 1, 3 | J = 5, 4 | B_multiplier = 1, 5 | distrib = "Poisson", 6 | seed = 0) 7 | 8 | W2 <- simulate_paper_data(n = 1, 9 | J = 5, 10 | B_multiplier = 1, 11 | distrib = "Poisson", 12 | seed = 0) 13 | 14 | expect_equal(W1,W2) 15 | 16 | W1 <- simulate_paper_data(n = 1, 17 | J = 5, 18 | B_multiplier = 1, 19 | distrib = "NB", 20 | seed = 0) 21 | 22 | W2 <- simulate_paper_data(n = 1, 23 | J = 5, 24 | B_multiplier = 1, 25 | distrib = "NB", 26 | seed = 0) 27 | 28 | expect_equal(W1,W2) 29 | }) 30 | 31 | test_that("Simulated data is reasonably unique", { 32 | W1 <- simulate_paper_data(n = 1, 33 | J = 5, 34 | B_multiplier = 1, 35 | distrib = "Poisson", 36 | seed = 0) 37 | 38 | W2 <- simulate_paper_data(n = 1, 39 | J = 5, 40 | B_multiplier = 1, 41 | distrib = "Poisson", 42 | seed = 1) 43 | 44 | expect_true(all(W1 != W2)) 45 | 46 | W1 <- simulate_paper_data(n = 1, 47 | J = 5, 48 | B_multiplier = 1, 49 | distrib = "NB", 50 | seed = 0) 51 | 52 | W2 <- simulate_paper_data(n = 1, 53 | J = 5, 54 | B_multiplier = 1, 55 | distrib = "NB", 56 | seed = 1) 57 | 58 | expect_true(all(W1 != W2)) 59 | }) 60 | 61 | 62 | -------------------------------------------------------------------------------- /tests/testthat/test-test_meanination.R: -------------------------------------------------------------------------------- 1 | test_that("mean makes sense in sample-read-only setting", { 2 | gammas <- log(3*(1:5)) 3 | B <- matrix(rep(0,3),nrow = 1) 4 | X <- matrix(1,nrow = 5,ncol = 1) 5 | Z <- matrix(1,nrow = 5, ncol = 1) 6 | P <- matrix((1:3)/6,nrow = 1) 7 | X_tilde <- matrix(0,ncol = 1, nrow = 1) 8 | P_tilde <- P 9 | Z_tilde <- Z*0 10 | gamma_tilde <- matrix(0,ncol = 1, nrow = 1) 11 | means <- meaninate(gammas = gammas, 12 | B = B, 13 | Z = Z, 14 | X = Z, 15 | P = P, 16 | Z_tilde = Z_tilde, 17 | X_tilde = X_tilde, 18 | Z_tilde_gamma_cols = 1, 19 | P_tilde = P_tilde, 20 | gamma_tilde = gamma_tilde) 21 | 22 | #make sure meaninate returns a matrix 23 | expect_true(is.matrix(means)) 24 | #make sure its dimensions are correct 25 | expect_equal(dim(means),c(5,3)) 26 | #test that P is being used correctly here 27 | expect_equal(means[1,3]/means[1,1],3) 28 | #test that gammas are being used correctly here 29 | # expect_equal 30 | 31 | }) 32 | -------------------------------------------------------------------------------- /tests/testthat/test-universal_test.R: -------------------------------------------------------------------------------- 1 | # test_that("multiplication works", { 2 | # n <- 10 3 | # sim_p_10 <- numeric(100) 4 | # set.seed(4939323) 5 | # for(sim in 1:100){ 6 | # print(paste("Simulation ", sim, sep = "", collapse = "")) 7 | # W <- simulate_simple_data(matrix(0, nrow = 1, ncol = 2), 8 | # distrib = "Poisson", 9 | # n = n, 10 | # gamma_mean = 11) 11 | # 12 | # full_model <- fit_simple_model(W, 13 | # B_fixed_at_zero = FALSE) 14 | # 15 | # null_model <- fit_simple_model(W, 16 | # B_fixed_at_zero = TRUE) 17 | # 18 | # 19 | # 20 | # 21 | # sim_p_10[sim] <- universal_test(W, 22 | # full_model, 23 | # null_model) 24 | # 25 | # 26 | # } 27 | # 28 | # qqplot(sim_p_10,runif(10000),type = "s") 29 | # 30 | # n <- 10 31 | # sim_p_10_nb <- numeric(100) 32 | # set.seed(4939323) 33 | # for(sim in 1:100){ 34 | # print(paste("Simulation ", sim, sep = "", collapse = "")) 35 | # W <- simulate_simple_data(matrix(0, nrow = 1, ncol = 2), 36 | # distrib = "nb10", 37 | # n = n, 38 | # gamma_mean = 11) 39 | # 40 | # full_model <- fit_simple_model(W, 41 | # B_fixed_at_zero = FALSE) 42 | # 43 | # null_model <- fit_simple_model(W, 44 | # B_fixed_at_zero = TRUE) 45 | # 46 | # 47 | # 48 | # 49 | # sim_p_10_nb[sim] <- universal_test(W, 50 | # full_model, 51 | # null_model, 52 | # parallelize = TRUE) 53 | # 54 | # print(sim_p_10_nb[sim]) 55 | # 56 | # 57 | # } 58 | # 59 | # qqplot(sim_p_10,runif(10000),type = "s") 60 | # }) 61 | --------------------------------------------------------------------------------