├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
    ├── bootstrap_ci.R
    ├── bootstrap_lrt.R
    ├── calculate_U.R
    ├── calculate_criterion_lr.R
    ├── calculate_log_penalty.R
    ├── construct_Z_tilde.R
    ├── crossfit_U.R
    ├── d_mu_d_P_original.R
    ├── d_mu_d_gamma_original.R
    ├── data.R
    ├── dataframes_to_parameters.R
    ├── deriv_criterion_lr.R
    ├── dfs_to_derivs.R
    ├── dfs_to_mean.R
    ├── do_one_boot.R
    ├── do_one_lrt.R
    ├── do_one_simulation.R
    ├── estimate_parameters.R
    ├── evaluate_criterion_lr.R
    ├── fit_simple_model.R
    ├── fit_simulation_model.R
    ├── get_A_tilde_k_list.R
    ├── get_Ak_list.R
    ├── get_gmm_inv_weights.R
    ├── gmm_criterion.R
    ├── log_penalty.R
    ├── log_penalty_grad.R
    ├── log_penalty_gradient.R
    ├── log_penalty_hessian.R
    ├── logpois.R
    ├── lr_to_ra.R
    ├── make_model_bundle.R
    ├── mean_jac.R
    ├── mean_jac_lr_faster.R
    ├── meaninate.R
    ├── mu_d_alpha_tilde.R
    ├── mu_d_beta.R
    ├── mu_d_beta_faster.R
    ├── mu_d_gamma_faster.R
    ├── mu_d_gamma_tilde.R
    ├── mu_d_rho_faster.R
    ├── mu_d_rho_tilde_faster.R
    ├── nullify.R
    ├── numerical_jacobian.R
    ├── par_to_jacobian_row.R
    ├── parameters_to_dataframes.R
    ├── pb_update_one_constrained.R
    ├── pb_update_unconstrained.R
    ├── poisson_criterion.R
    ├── proximal_bootstrap.R
    ├── push_row.R
    ├── ra_to_lr.R
    ├── refit_model.R
    ├── safe_divide.R
    ├── safe_multiply.R
    ├── simpl_auglag_fnnls.R
    ├── simpl_opt_linesearch_fnnls.R
    ├── simple_poisson.R
    ├── simulate_paper_data.R
    ├── simulate_simple_data.R
    └── universal_test.R
├── README.Rmd
├── README.md
├── data
    ├── costea2017_metaphlan2_profiles.rda
    ├── costea2017_mock_composition.rda
    ├── costea2017_sample_data.rda
    ├── karstens.rdata
    └── karstens_phyloseq.rda
├── man
    ├── bootstrap_ci.Rd
    ├── calculate_log_penalty.Rd
    ├── costea2017_mock_composition.Rd
    ├── dataframes_to_parameters.Rd
    ├── estimate_parameters.Rd
    ├── mu_d_P.Rd
    ├── mu_d_beta.Rd
    ├── mu_d_gamma.Rd
    ├── mu_d_gamma_tilde.Rd
    ├── mu_d_rho_faster.Rd
    └── mu_d_rho_tilde_faster.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-bootstrap_ci.R
    │   ├── test-bootstrap_lrt.R
    │   ├── test-calculate_log_penalty.R
    │   ├── test-d_mu_d_beta_original.R
    │   ├── test-d_mu_d_gamma_original.R
    │   ├── test-deriv_criterion_lr.R
    │   ├── test-dfs_to_derivs.R
    │   ├── test-do_one_simulation.R
    │   ├── test-estimate_parameters.R
    │   ├── test-fit_simulation_model.R
    │   ├── test-get_A_tilde_k_list.R
    │   ├── test-gmm_criterion.R
    │   ├── test-lr_to_ra.R
    │   ├── test-mean_jac_lr_faster.R
    │   ├── test-mu_d_P.R
    │   ├── test-mu_d_beta.R
    │   ├── test-mu_d_gamma_faster.R
    │   ├── test-mu_d_gamma_tilde.R
    │   ├── test-mu_d_rho.R
    │   ├── test-mu_d_rho_tilde.R
    │   ├── test-optimization.R
    │   ├── test-par_to_jacobian_row.R
    │   ├── test-safe_divide.R
    │   ├── test-simulate_paper_data.R
    │   ├── test-test_meanination.R
    │   └── test-universal_test.R
└── vignettes
    ├── compare-experiments.Rmd
    └── dilution-series.Rmd


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: tinyvamp
 2 | Title: Modeling complex measurement error in microbiome experiments
 3 | Version: 0.0.6.0
 4 | Authors@R: as.person(c(
 5 |     "David Clausen <dsc24@uw.edu> [aut, cre]",
 6 |     "Amy Willis [aut]"
 7 |   ))
 8 | Description: tinyvamp estimates the model described in Clausen & Willis, 2022. Notably, our model allows relative abundances to lie on the boundary of the simplex. We present a stable algorithm for computing parameter estimates, asymptotically valid procedures for inference in this nonstandard problem, and examples of the utility of the method. Our approach can be used to select or compare experimental protocols, design experiments with appropriate control data, analyze mixed-specimen samples, and remove across-sample contamination.
 9 | License: BSD_3_clause + file LICENSE
10 | Encoding: UTF-8
11 | LazyData: true
12 | Roxygen: list(markdown = TRUE)
13 | RoxygenNote: 7.2.3
14 | Depends:
15 |     R (>= 3.5.0),
16 |     cir,
17 |     fastnnls,
18 |     logsum,
19 |     numDeriv,
20 |     parallel,
21 |     stats
22 | Imports:
23 |     data.table,
24 |     Matrix,
25 |     magrittr
26 | Suggests:
27 |     knitr,
28 |     phyloseq,
29 |     rmarkdown,
30 |     testthat (>= 3.0.0)
31 | Config/testthat/edition: 3
32 | VignetteBuilder: knitr
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | =======
 2 | License
 3 | =======
 4 | 
 5 | Files: tinyvamp/*/*.R
 6 | Copyright: 2022, The Regents of the University of Washington.
 7 | License: BSD-3-Clause
 8 | 
 9 | Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions are
11 | met:
12 | 
13 |     Redistributions of source code must retain the above copyright
14 |     notice, this list of conditions and the following disclaimer.
15 | 
16 |     Redistributions in binary form must reproduce the above copyright
17 |     notice, this list of conditions and the following disclaimer in
18 |     the documentation and/or other materials provided with the
19 |     distribution.
20 | 
21 |     Neither the name of the University of Washington nor the names of its
22 |     contributors may be used to endorse or promote products derived
23 |     from this software without specific prior written permission.
24 | 
25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(bootstrap_ci)
 4 | export(bootstrap_lrt)
 5 | export(calculate_criterion_lr)
 6 | export(estimate_parameters)
 7 | export(evaluate_criterion_lr)
 8 | import(cir)
 9 | import(fastnnls)
10 | import(logsum)
11 | import(numDeriv)
12 | import(parallel)
13 | import(stats)
14 | 


--------------------------------------------------------------------------------
/R/bootstrap_ci.R:
--------------------------------------------------------------------------------
  1 | #' Apply the Bayesian subsampled bootstrap to a fitted tinyvamp model
  2 | #' 
  3 | #' @import stats
  4 | #' @import parallel
  5 | #' 
  6 | #' 
  7 | #' @export
  8 | bootstrap_ci <- function(W,
  9 |                          fitted_model,
 10 |                          n_boot,
 11 |                          m = NULL,
 12 |                          alpha = 0.05,
 13 |                          parallelize = FALSE,
 14 |                          ncores = 5,
 15 |                          seed = NULL,
 16 |                          return_models = FALSE,
 17 |                          verbose = FALSE,
 18 |                          adjust = FALSE
 19 |                          
 20 | ){
 21 |   n <- nrow(W)
 22 |   J <- ncol(W)
 23 |   
 24 |   if(is.null(m)){
 25 |     m <- sqrt(n)
 26 |   }
 27 |   if(is.null(seed)){
 28 |     seed <- 0
 29 |   }
 30 |   
 31 |   if(is.null(fitted_model$wts)){
 32 |     fitted_model$wts <- rep(1,n*J)
 33 |   }
 34 |   wts <- fitted_model$wts
 35 |   set.seed(seed)
 36 |   boot_seeds <- sample(1:1e8,n_boot)
 37 |   
 38 |   boot_results <- vector(n_boot,
 39 |                          mode = "list")
 40 |   
 41 |   if(!parallelize){
 42 |     for(boot_iter in 1:n_boot){
 43 |       print(boot_iter)
 44 |       set.seed(boot_seeds[boot_iter])
 45 |       boot_weights <- rgamma(n,m/n)
 46 |       boot_weights <- boot_weights/sum(boot_weights)
 47 |       boot_weights <- rep(boot_weights, each = J)
 48 |       boot_weights <- boot_weights*wts
 49 |       boot_weights <- J*boot_weights/sum(boot_weights)
 50 |       
 51 |       boot_model <- estimate_parameters(W = W,
 52 |                                         X = fitted_model$X,
 53 |                                         Z = fitted_model$Z,
 54 |                                         Z_tilde = fitted_model$Z_tilde,
 55 |                                         Z_tilde_gamma_cols =
 56 |                                           fitted_model$Z_tilde_gamma_cols,
 57 |                                         gammas =
 58 |                                           fitted_model$gammas,
 59 |                                         gammas_fixed_indices =
 60 |                                           fitted_model$gammas_fixed_indices,
 61 |                                         P = fitted_model$P,
 62 |                                         P_fixed_indices =
 63 |                                           fitted_model$P_fixed_indices,
 64 |                                         B = fitted_model$B,
 65 |                                         B_fixed_indices =
 66 |                                           fitted_model$B_fixed_indices,
 67 |                                         X_tilde = fitted_model$X_tilde,
 68 |                                         P_tilde = fitted_model$P_tilde,
 69 |                                         P_tilde_fixed_indices =
 70 |                                           fitted_model$P_tilde_fixed_indices,
 71 |                                         gamma_tilde = fitted_model$gamma_tilde,
 72 |                                         gamma_tilde_fixed_indices =
 73 |                                           fitted_model$gamma_tilde_fixed_indices,
 74 |                                         alpha_tilde =
 75 |                                           fitted_model$alpha_tilde,
 76 |                                         Z_tilde_list =
 77 |                                           fitted_model$Z_tilde_list,
 78 |                                         barrier_t = 1, #starting value of reciprocal barrier penalty coef.
 79 |                                         barrier_scale = 10, #increments for value of barrier penalty
 80 |                                         max_barrier = 1e12, #maximum value of barrier_t
 81 |                                         initial_conv_tol = 1000,
 82 |                                         final_conv_tol = 0.1,
 83 |                                         constraint_tolerance = 1e-10,
 84 |                                         hessian_regularization = 0.01,
 85 |                                         criterion = "Poisson",
 86 |                                         profile_P = TRUE,
 87 |                                         verbose = verbose,
 88 |                                         wts = boot_weights,
 89 |                                         profiling_maxit = 25)
 90 |       
 91 |       boot_results[[boot_iter]] <- boot_model
 92 |       
 93 |     }
 94 |   }
 95 |   if(parallelize){
 96 |     boot_weights <- lapply(1:n_boot,
 97 |                            function(x){
 98 |                              # set.seed(x)
 99 |                              bwts <- rgamma(n,m/n)
100 |                              bwts <- rep(bwts, each = J)
101 |                              bwts <- bwts*wts
102 |                              bwts <- J*bwts/sum(bwts)
103 |                              return(bwts)
104 |                            })
105 |     
106 |     boot_results <-
107 |       parallel::mclapply(1:n_boot,
108 |                          function(k)
109 |                            do_one_boot(W = W,
110 |                                        fitted_model =
111 |                                          fitted_model,
112 |                                        m = m,
113 |                                        seed = boot_seeds[k],
114 |                                        boot_weights = boot_weights[[k]]),
115 |                          mc.cores = ncores,
116 |                          mc.set.seed = TRUE)
117 |   }
118 |   
119 |   boot_matrix <-
120 |     do.call(cbind,
121 |             lapply(1:n_boot,function(k) matrix(sqrt(m)*(
122 |               boot_results[[k]]$varying$value - fitted_model$varying$value),
123 |               ncol = 1)))
124 |   
125 |   if(adjust){
126 |     num_nonsillyparams <- sum(fitted_model$varying$param != "gamma")
127 |     adjust_factor <- (n*J)/(n*J - num_nonsillyparams)
128 |     boot_matrix <- boot_matrix*sqrt(adjust_factor)
129 |   }
130 |   
131 |   lower_boot_quantiles <- apply(boot_matrix,1, function(x)
132 |     quantile(x, alpha/2))
133 |   
134 |   upper_boot_quantiles <- apply(boot_matrix,1,function(x)
135 |     quantile(x,1-alpha/2))
136 |   
137 |   summary_df <- fitted_model$varying
138 |   
139 |   summary_df$lower_ci <- summary_df$value - (1/sqrt(n))*upper_boot_quantiles
140 |   summary_df$upper_ci <- summary_df$value - (1/sqrt(n))*lower_boot_quantiles
141 |   
142 |   summary_df$lower_ci[summary_df$param %in% c("P","P_tilde")] <-
143 |     pmax(summary_df$lower_ci[summary_df$param %in% c("P","P_tilde")],
144 |          0)
145 |   
146 |   summary_df$upper_ci[summary_df$param %in% c("P","P_tilde")] <-
147 |     pmin(summary_df$upper_ci[summary_df$param %in% c("P","P_tilde")],
148 |          1)
149 |   
150 |   if(return_models){
151 |     return(list("ci" = summary_df,
152 |                 "bootstrapped_models" = boot_results))
153 |   } else{
154 |     return(list("ci" = summary_df,
155 |                 "bootstrapped_models" = NULL))
156 |   }
157 |   
158 | }
159 | 


--------------------------------------------------------------------------------
/R/calculate_U.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | calculate_U <- function(W,
  4 |                         full_model,
  5 |                         null_model,
  6 |                         training_indicator,
  7 |                         log_scale = TRUE){
  8 |   W_train <- W[training_indicator,,drop = FALSE]
  9 |   W_test <- W[!training_indicator,,drop = FALSE]
 10 |   
 11 |   ### may need to set some entries of P to be fixed if no observations in
 12 |   ### training set correspond to them
 13 |   
 14 |   if(is.null(full_model$Z_tilde)){
 15 |     full_model_Z_tilde <- NULL
 16 |     full_model_Z_tilde_test <- NULL
 17 |   } else{
 18 |     full_model_Z_tilde <- full_model$Z_tilde[training_indicator,,drop = FALSE]
 19 |     full_model_Z_tilde_test <-
 20 |       full_model$Z_tilde[!training_indicator,,drop = FALSE]
 21 |   }
 22 |   if(!is.null(full_model$Z_tilde_list)){
 23 |     full_model_Z_tilde_list <-
 24 |       lapply(1:length(full_model$Z_tilde_list),
 25 |              function(k)
 26 |                full_model$Z_tilde_list[[k]][training_indicator,,drop = FALSE])
 27 |     full_model_Z_tilde_list_test <-
 28 |       lapply(1:length(full_model$Z_tilde_list),
 29 |              function(k)
 30 |                full_model$Z_tilde_list[[k]][!training_indicator,,drop = FALSE])
 31 |   } else{
 32 |     full_model_Z_tilde_list <- full_model$Z_tilde_list
 33 |     full_model_Z_tilde_list_test <- full_model$Z_tilde_list
 34 |   }
 35 |   
 36 |   
 37 |   if(is.null(null_model$Z_tilde)){
 38 |     null_model_Z_tilde <- NULL
 39 |     null_model_Z_tilde_test <- NULL
 40 |   } else{
 41 |     null_model_Z_tilde <- null_model$Z_tilde[training_indicator,,drop = FALSE]
 42 |     null_model_Z_tilde_test <-
 43 |       null_model$Z_tilde[!training_indicator,,drop = FALSE]
 44 |   }
 45 |   if(!is.null(null_model$Z_tilde_list)){
 46 |     null_model_Z_tilde_list <-
 47 |       lapply(1:length(null_model$Z_tilde_list),
 48 |              function(k)
 49 |                null_model$Z_tilde_list[[k]][training_indicator,,drop = FALSE])
 50 |     null_model_Z_tilde_list_test <-
 51 |       lapply(1:length(null_model$Z_tilde_list),
 52 |              function(k)
 53 |                null_model$Z_tilde_list[[k]][!training_indicator,,drop = FALSE])
 54 |   } else{
 55 |     null_model_Z_tilde_list <- null_model$Z_tilde_list
 56 |     null_model_Z_tilde_list_test <- null_model$Z_tilde_list
 57 |   }
 58 |   
 59 |   full_training <-
 60 |     estimate_parameters(W = W_train,
 61 |                         X = full_model$X[training_indicator,,drop = FALSE],
 62 |                         Z = full_model$Z[training_indicator,,drop = FALSE],
 63 |                         Z_tilde = full_model_Z_tilde,
 64 |                         Z_tilde_gamma_cols = full_model$Z_tilde_gamma_cols,
 65 |                         gammas = full_model$gammas[training_indicator],
 66 |                         gammas_fixed_indices =
 67 |                           full_model$gammas_fixed_indices[training_indicator],
 68 |                         P = full_model$P,
 69 |                         P_fixed_indices = full_model$P_fixed_indices,
 70 |                         B = full_model$B,
 71 |                         B_fixed_indices = full_model$B_fixed_indices,
 72 |                         X_tilde = full_model$X_tilde,
 73 |                         P_tilde = full_model$P_tilde,
 74 |                         P_tilde_fixed_indices = full_model$P_tilde_fixed_indices,
 75 |                         gamma_tilde = full_model$gamma_tilde,
 76 |                         gamma_tilde_fixed_indices =
 77 |                           full_model$gamma_tilde_fixed_indices,
 78 |                         alpha_tilde = full_model$alpha_tilde,
 79 |                         Z_tilde_list = full_model_Z_tilde_list)
 80 |   
 81 |   null_training <-
 82 |     estimate_parameters(W = W_train,
 83 |                         X = null_model$X[training_indicator,,drop = FALSE],
 84 |                         Z = null_model$Z[training_indicator,,drop = FALSE],
 85 |                         Z_tilde = null_model_Z_tilde,
 86 |                         Z_tilde_gamma_cols = null_model$Z_tilde_gamma_cols,
 87 |                         gammas = null_model$gammas[training_indicator],
 88 |                         gammas_fixed_indices =
 89 |                           null_model$gammas_fixed_indices[training_indicator],
 90 |                         P = null_model$P,
 91 |                         P_fixed_indices = null_model$P_fixed_indices,
 92 |                         B = null_model$B,
 93 |                         B_fixed_indices = null_model$B_fixed_indices,
 94 |                         X_tilde = null_model$X_tilde,
 95 |                         P_tilde = null_model$P_tilde,
 96 |                         P_tilde_fixed_indices = null_model$P_tilde_fixed_indices,
 97 |                         gamma_tilde = null_model$gamma_tilde,
 98 |                         gamma_tilde_fixed_indices =
 99 |                           null_model$gamma_tilde_fixed_indices,
100 |                         alpha_tilde = null_model$alpha_tilde,
101 |                         Z_tilde_list = null_model_Z_tilde_list)
102 |   
103 |   full_starter_means <- meaninate(gammas = rep(0, sum(!training_indicator)),
104 |                                   B = full_training$B,
105 |                                   X =
106 |                                     full_model$X[!training_indicator,,drop = FALSE],
107 |                                   Z =
108 |                                     full_model$Z[!training_indicator,,drop = FALSE],
109 |                                   P = full_training$P,
110 |                                   X_tilde = full_model$X_tilde,
111 |                                   Z_tilde = full_model_Z_tilde_test,
112 |                                   Z_tilde_gamma_cols =
113 |                                     full_model$Z_tilde_gamma_cols,
114 |                                   P_tilde = full_training$P_tilde,
115 |                                   gamma_tilde = full_training$gamma_tilde,
116 |                                   alpha_tilde = full_training$alpha_tilde,
117 |                                   Z_tilde_list = full_model_Z_tilde_list_test)
118 |   
119 |   full_profile_gammas <- log(apply(W_test,1,sum)) -
120 |     log(apply(full_starter_means,1,sum))
121 |   
122 |   null_starter_means <- meaninate(gamma = rep(0, sum(!training_indicator)),
123 |                                   B = null_training$B,
124 |                                   X =
125 |                                     null_model$X[!training_indicator,,drop = FALSE],
126 |                                   Z =
127 |                                     null_model$Z[!training_indicator,,drop = FALSE],
128 |                                   P = null_training$P,
129 |                                   X_tilde = null_model$X_tilde,
130 |                                   Z_tilde = null_model_Z_tilde_test,
131 |                                   Z_tilde_gamma_cols =
132 |                                     null_model$Z_tilde_gamma_cols,
133 |                                   P_tilde = null_training$P_tilde,
134 |                                   gamma_tilde = null_training$gamma_tilde,
135 |                                   alpha_tilde = null_training$alpha_tilde,
136 |                                   Z_tilde_list = null_model_Z_tilde_list_test)
137 |   
138 |   null_profile_gammas <- log(apply(W_test,1,sum)) -
139 |     log(apply(null_starter_means,1,sum))
140 |   
141 |   
142 |   full_means_test <- meaninate(gammas = full_profile_gammas,
143 |                                B = full_training$B,
144 |                                X =
145 |                                  full_model$X[!training_indicator,,drop = FALSE],
146 |                                Z =
147 |                                  full_model$Z[!training_indicator,,drop = FALSE],
148 |                                P = full_training$P,
149 |                                X_tilde = full_model$X_tilde,
150 |                                Z_tilde = full_model_Z_tilde_test,
151 |                                Z_tilde_gamma_cols =
152 |                                  full_model$Z_tilde_gamma_cols,
153 |                                P_tilde = full_training$P_tilde,
154 |                                gamma_tilde = full_training$gamma_tilde,
155 |                                alpha_tilde = full_training$alpha_tilde,
156 |                                Z_tilde_list = full_model_Z_tilde_list_test)
157 |   
158 |   null_means_test <- meaninate(gammas = null_profile_gammas,
159 |                                B = null_training$B,
160 |                                X =
161 |                                  null_model$X[!training_indicator,,drop = FALSE],
162 |                                Z =
163 |                                  null_model$Z[!training_indicator,,drop = FALSE],
164 |                                P = null_training$P,
165 |                                X_tilde = null_model$X_tilde,
166 |                                Z_tilde = null_model_Z_tilde_test,
167 |                                Z_tilde_gamma_cols =
168 |                                  null_model$Z_tilde_gamma_cols,
169 |                                P_tilde = null_training$P_tilde,
170 |                                gamma_tilde = null_training$gamma_tilde,
171 |                                alpha_tilde = null_training$alpha_tilde,
172 |                                Z_tilde_list = null_model_Z_tilde_list_test)
173 |   
174 |   l_HA <- poisson_criterion(W_test,full_means_test)
175 |   l_H0 <- poisson_criterion(W_test,null_means_test)
176 |   
177 |   if(log_scale){
178 |     return(l_HA - l_H0)
179 |   } else{
180 |     return(exp(l_HA - l_H0))
181 |   }
182 |   
183 | }
184 | 


--------------------------------------------------------------------------------
/R/calculate_criterion_lr.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | #' 
 3 | calculate_criterion_lr <- function(W,
 4 |                                    X,
 5 |                                    Z,
 6 |                                    Z_tilde = NULL,
 7 |                                    Z_tilde_gamma_cols,
 8 |                                    Z_tilde_list = NULL,
 9 |                                    alpha_tilde = NULL,
10 |                                    gammas,
11 |                                    gammas_fixed_indices,
12 |                                    P,
13 |                                    P_fixed_indices,
14 |                                    B,
15 |                                    B_fixed_indices,
16 |                                    X_tilde,
17 |                                    P_tilde,
18 |                                    P_tilde_fixed_indices,
19 |                                    gamma_tilde,
20 |                                    gamma_tilde_fixed_indices,
21 |                                    criterion = "Poisson", 
22 |                                    lr_scale = TRUE, 
23 |                                    wts = NULL) {
24 |   
25 |   stopifnot(criterion == "Poisson")
26 |   stopifnot(lr_scale == TRUE)
27 |   
28 |   means <- meaninate(gammas = gammas, 
29 |                      B = B, 
30 |                      X = X,
31 |                      Z = Z,
32 |                      P = P, 
33 |                      X_tilde = X_tilde,
34 |                      Z_tilde = Z_tilde,
35 |                      Z_tilde_gamma_cols = Z_tilde_gamma_cols,
36 |                      Z_tilde_list = Z_tilde_list,
37 |                      P_tilde = P_tilde, 
38 |                      gamma_tilde = gamma_tilde, 
39 |                      alpha_tilde = alpha_tilde, 
40 |                      return_separate = FALSE)
41 |   
42 |   poisson_criterion(W = W,
43 |                     means = means,
44 |                     wts = wts)
45 |   
46 | }


--------------------------------------------------------------------------------
/R/calculate_log_penalty.R:
--------------------------------------------------------------------------------
 1 | #################### Barrier Penalty Function #######################
 2 | #' Calculate barrier penalty to add to objective function inside
 3 | #' barrier algorithm
 4 | #'
 5 | #' @param varying_lr_df A data frame containing values of parameters that
 6 | #' are treated as unknown, with relative abundance parameters represented
 7 | #' on the log ratio scale (i.e., as phi and phi_tilde)
 8 | #' @param fixed_df A data frame containing values of parameters that are
 9 | #' treated as known
10 | #' @param barrier_t The current value of t, the barrier penalty parameter
11 | #' 
12 | #' @import logsum
13 | #'
14 | #' @return The calculated value of the barrier penalty
15 | calculate_log_penalty <- function(varying_lr_df,
16 |                                   fixed_df,
17 |                                   barrier_t){
18 | 
19 |   which_rho_k <- unique(varying_lr_df$k[varying_lr_df$param=="rho"])
20 |   which_rho_tilde_k <- unique(varying_lr_df$k[varying_lr_df$param=="rho_tilde"])
21 | 
22 |   log_P <- lapply(which_rho_k,
23 |                   function(k) (varying_lr_df$value[varying_lr_df$param == "rho" &
24 |                                                      varying_lr_df$k == k]) %>%
25 |                     (function(x) c(x,0) - logsum::sum_of_logs(c(x,0))))
26 |   log_P_tilde <- lapply(which_rho_tilde_k,
27 |                         function(k)
28 |                           (varying_lr_df$value[
29 |                             varying_lr_df$param == "rho_tilde" &
30 |                               varying_lr_df$k == k]) %>%
31 |                           (function(x) c(x,0) - logsum::sum_of_logs(c(x,0))))
32 |   log_ra_sum <- do.call(sum,log_P) + do.call(sum,log_P_tilde)
33 | 
34 |   return((-1/barrier_t)*log_ra_sum)
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/R/construct_Z_tilde.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | construct_Z_tilde <- function(Z_tilde_list,
 4 |                               alpha_tilde){
 5 | 
 6 | n_components <- length(Z_tilde_list)
 7 | if(length(alpha_tilde) != n_components - 1){
 8 |   stop("length(alpha_tilde) must equal length(Z_tilde_list) - 1")
 9 | }
10 | 
11 | Z_tilde <- Z_tilde_list[[1]]
12 | 
13 | for(a in 2:n_components){
14 |   Z_tilde <- Z_tilde + Z_tilde_list[[a]]*exp(alpha_tilde[a - 1])
15 | }
16 | return(Z_tilde)
17 | }
18 | 


--------------------------------------------------------------------------------
/R/crossfit_U.R:
--------------------------------------------------------------------------------
 1 | 
 2 | crossfit_U <- function(W,
 3 |                        full_model,
 4 |                        null_model,
 5 |                        parallelize = FALSE){
 6 |   
 7 |   n <- nrow(W)
 8 |   
 9 |   if(!parallelize){
10 |     log_us <- numeric(n)
11 |     
12 |     for(i in 1:n){
13 |       # print(i)
14 |       train_ind <- rep(TRUE,n)
15 |       train_ind[i] <- FALSE
16 |       log_us[i] <- calculate_U(W, full_model, null_model,
17 |                                training_indicator = train_ind)
18 |     }
19 |   } else{
20 |     log_us <- parallel::mclapply(1:n,
21 |                                  function(i){
22 |                                    train_ind <- rep(TRUE,n)
23 |                                    train_ind[i] <- FALSE
24 |                                    return(calculate_U(W,
25 |                                                       full_model,
26 |                                                       null_model,
27 |                                                       training_indicator = train_ind))
28 |                                    
29 |                                  })
30 |     log_us <- unlist(log_us)
31 |   }
32 |   
33 |   return(exp(logsum::sum_of_logs(log_us) - log(n)))
34 | }
35 | 


--------------------------------------------------------------------------------
/R/d_mu_d_P_original.R:
--------------------------------------------------------------------------------
 1 | #' Calculate derivative of mu_ij with respect to an entry of P
 2 | #'
 3 | #' @param i The sample index (must be in 1, ..., n)
 4 | #' @param j The taxon index (must be in 1, ..., J)
 5 | #' @param m The row of P with respect to which to take derivative
 6 | #' @param gammas Numeric vector of read intensities
 7 | #' @param B Detection efficiency matrix
 8 | #' @param X The efficiency design matrix (n x p)
 9 | #' @param Z The sample design matrix (n x K)
10 | #' @param P The sample relative abundance matrix (K x J)
11 | #' @param X_tilde The spurious read efficiency design (K_tilde x p)
12 | #' @param Z_tilde The spurious read design (n x K_tilde)
13 | #' @param Z_tilde_gamma_cols Numeric vector containing indexes of columns of
14 | #' Z_tilde to scale by exp(gamma); NULL if no columns to be scaled
15 | #' @param P_tilde The spurious source relative abundance matrix (K_tilde x J)
16 | #' @param gamma_tilde Spurious read intensity parameter
17 | #'
18 | #' @return A derivative d mu_ij / d P_kj
19 | mu_d_P <- function(i,
20 |                    j,
21 |                    m,
22 |                    gammas,
23 |                    B,
24 |                    X,
25 |                    Z,
26 |                    P){
27 | 
28 |   mu_deriv <-
29 |     Z[i,m,drop = F]*exp(gammas[i] +
30 |                           X[i,,drop = F]%*%B[,j,drop = F])
31 | 
32 |   return(mu_deriv)
33 | }
34 | 


--------------------------------------------------------------------------------
/R/d_mu_d_gamma_original.R:
--------------------------------------------------------------------------------
 1 | #' Calculate derivative of mu_ij with respect to ith entry of gamma
 2 | #'
 3 | #' @param i The sample index (must be in 1, ..., n)
 4 | #' @param j The taxon index (must be in 1, ..., J)
 5 | #' @param gammas Numeric vector of read intensities
 6 | #' @param B Detection efficiency matrix
 7 | #' @param X The efficiency design matrix (n x p)
 8 | #' @param Z The sample design matrix (n x K)
 9 | #' @param P The sample relative abundance matrix (K x J)
10 | #' @param X_tilde The spurious read efficiency design (K_tilde x p)
11 | #' @param Z_tilde The spurious read design (n x K_tilde)
12 | #' @param Z_tilde_gamma_cols Numeric vector containing indexes of columns of
13 | #' Z_tilde to scale by exp(gamma); NULL if no columns to be scaled
14 | #' @param P_tilde The spurious source relative abundance matrix (K_tilde x J)
15 | #' @param gamma_tilde Spurious read intensity parameter
16 | #'
17 | #' @return A derivative d mu_ij / d gamma_i
18 | 
19 | mu_d_gamma <- function(i,
20 |                        j,
21 |                        gammas,
22 |                        B,
23 |                        X,
24 |                        Z,
25 |                        P,
26 |                        X_tilde,
27 |                        Z_tilde,
28 |                        Z_tilde_gamma_cols,
29 |                        P_tilde,
30 |                        gamma_tilde){
31 | 
32 |   mu_deriv <- Z[i,,drop = F]%*%P[,j,drop = F]*exp(gammas[i] +
33 |                                                     X[i,,drop = F]%*%B[,j,drop = F])
34 | 
35 |   K_tilde <- dim(P_tilde)[1]
36 |   for(k_tilde in 1:K_tilde){
37 |     if(k_tilde %in% Z_tilde_gamma_cols){
38 |       mu_deriv <- mu_deriv + exp(gammas[i])*
39 |         (Z_tilde[i,k_tilde,drop = F])%*%
40 |         (P_tilde[k_tilde,j,drop = F])*
41 |         exp(gamma_tilde[k_tilde] +
42 |               X_tilde[k_tilde,,drop = F]%*%B[,j,drop = F])
43 |     }
44 |   }
45 |   return(mu_deriv)
46 | }
47 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
1 | #' The composition of 10 spike-in taxa in 19 samples observed from flow cytometry. 
2 | #' 
3 | #' For details, see Costea et al (2017). 
4 | #'
5 | #'
6 | #' @source \url{https://doi.org/10.1038/nbt.3960}
7 | "costea2017_mock_composition"


--------------------------------------------------------------------------------
/R/dataframes_to_parameters.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' Convert parameter values stored in data frame format to matrix format
 4 | #'
 5 | #' @param fixed_df A dataframe containing values of model parameters
 6 | #' treated as fixed and known (i.e. held constant at known values)
 7 | #' @param varying_df A dataframe containing current values of model parameters treated
 8 | #' as fixed and unknown (i.e., parameters to be estimated)
 9 | #' @return A list containing
10 | #' \item{P}{Specimen relative abundance matrix (of dimension K x J)}
11 | #' \item{P_tilde}{Spurious read source
12 | #' relative abundance matrix (of dimension K-tilde x J)}
13 | #' \item{B}{A matrix of detection efficiencies (of dimension p x J)}
14 | #' \item{gammas}{An n-vector of sample-specific read intensities}
15 | #' \item{gamma_tilde}{A-vector of spurious read source intensities
16 | #' (of length K-tilde)}
17 | #' @author David Clausen
18 | dataframes_to_parameters <- function(fixed_df,
19 |                                      varying_df){
20 | 
21 |   together_df <- rbind(fixed_df,varying_df)
22 | 
23 |   K <- max(together_df$k[together_df$param == "P"])
24 | 
25 |   K_tilde <- max(together_df$k[together_df$param == "P_tilde"])
26 | 
27 |   J <- max(together_df$j[together_df$param == "P"])
28 | 
29 |   p <- max(together_df$k[together_df$param == "B"])
30 | 
31 |   P <- matrix(ncol = J, nrow = K)
32 | 
33 |   P_df <- together_df[together_df$param == "P",]
34 |   for(k in 1:K){
35 |     P_row <- P_df[P_df$k ==k,]
36 |     P[k,] <- P_row$value[order(P_row$j)]
37 |   }
38 | 
39 |   P_tilde <- matrix(ncol = J, nrow = K_tilde)
40 |   P_tilde_df <- together_df[together_df$param == "P_tilde",]
41 |   for(k in 1:K_tilde){
42 |     P_tilde_row <- P_tilde_df[P_tilde_df$k ==k,]
43 |     P_tilde[k,] <- P_tilde_row$value[order(P_tilde_row$j)]
44 |   }
45 | 
46 |   B_df <- together_df[together_df$param == "B",]
47 |   B <- matrix(0,ncol = J, nrow = p)
48 | 
49 |   for(k in 1:p){
50 |     B_row <- B_df[B_df$k ==k,]
51 |     B[k,] <- B_row$value[order(B_row$j)]
52 |   }
53 | 
54 |   B[,J] <- 0
55 | 
56 |   gammas_df <- together_df[together_df$param == "gamma",]
57 | 
58 |   gammas <- matrix(nrow = nrow(gammas_df), ncol = 1)
59 | 
60 |   gammas[] <- gammas_df$value[order(gammas_df$k)]
61 | 
62 |   gamma_tilde_df <- together_df[together_df$param == "gamma_tilde",]
63 | 
64 |   gamma_tilde <- matrix(nrow = nrow(gamma_tilde_df), ncol = 1)
65 | 
66 |   gamma_tilde[] <- gamma_tilde_df$value[order(gamma_tilde_df$k)]
67 | 
68 |   if(sum(varying_df$param == "alpha_tilde") > 0){
69 |     alpha_tilde <- varying_df$value[varying_df$param == "alpha_tilde"]
70 |   } else{
71 |     alpha_tilde <- NULL
72 |   }
73 | 
74 |   return(list("P" = P,
75 |               "P_tilde" = P_tilde,
76 |               "B" = B,
77 |               "gammas" = gammas,
78 |               "gamma_tilde" = gamma_tilde,
79 |               "alpha_tilde" = alpha_tilde))
80 | }
81 | 


--------------------------------------------------------------------------------
/R/dfs_to_derivs.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | dfs_to_derivs <- function(varying_df,
  4 |               varying_lr_df = NULL,
  5 |               fixed_df,
  6 |               X,
  7 |               Z,
  8 |               X_tilde,
  9 |               Z_tilde,
 10 |               Z_tilde_gamma_cols,
 11 |               criterion = "Poisson",
 12 |               gmm_inv_wts = NULL){
 13 | 
 14 |   if(!is.null(varying_lr_df)){
 15 |     varying_df <- lr_to_ra(fixed_df,
 16 |                            varying_lr_df,
 17 |                            varying_df)
 18 |   }
 19 | 
 20 | 
 21 |   K <- max(c(varying_df$k[varying_df$param == "P"],
 22 |              fixed_df$k[fixed_df$param == "P"]))
 23 | 
 24 |   fixed_P_multipliers <- sapply(1:K, function(k)
 25 |     1 - sum(fixed_df$value[fixed_df$param == "P"&
 26 |                              fixed_df$k ==k]))
 27 | 
 28 |   K_tilde <- max(c(varying_df$k[varying_df$param == "P_tilde"],
 29 |                    fixed_df$k[fixed_df$param == "P_tilde"]))
 30 | 
 31 |   fixed_P_tilde_multipliers <- sapply(1:K_tilde, function(k)
 32 |     1 - sum(fixed_df$value[fixed_df$param == "P_tilde"&
 33 |                              fixed_df$k ==k]))
 34 | 
 35 |   Ak_list <- get_Ak_list(fixed_df,
 36 |                          varying_df,
 37 |                          varying_lr_df)
 38 | 
 39 |   A_tilde_k_list <- get_A_tilde_k_list(fixed_df,
 40 |                                        varying_df,
 41 |                                        varying_lr_df)
 42 | 
 43 |   # message("created Ak_list and A_tilde_k_list")
 44 | 
 45 |   #calculate at outset of optimization  pass as argument to mean_jac_lr, etc.
 46 |   which_k_p <- sapply(1:K, function(k) ifelse(is.null(Ak_list[[k]]),
 47 |                                               NA, k))
 48 | 
 49 |   which_k_p <- which_k_p[!is.na(which_k_p)]
 50 | 
 51 |   #calculate at outset of optimization  pass as argument to mean_jac_lr, etc.
 52 |   which_k_p_tilde <- sapply(1:K_tilde,
 53 |                             function(k) ifelse(
 54 |                               is.null(A_tilde_k_list[[k]]),
 55 |                               NA,k
 56 |                             ))
 57 | 
 58 |   which_k_p_tilde <- which_k_p_tilde[!is.na(which_k_p_tilde)]
 59 | 
 60 |   # message("saved which_k_p and which_k_p_tilde")
 61 | 
 62 |   #calculate at outset of optimization
 63 |   which_B_rows <- unique(varying_df$k[varying_df$param == "B"])
 64 |   which_B_rows <- which_B_rows[order(which_B_rows)]
 65 | 
 66 |   #calculate at outset of optimization
 67 |   which_B_keep <- lapply(which_B_rows,
 68 |                          function(k) sapply(1:(J - 1),
 69 |                                             function(j)
 70 |                                               j %in% varying_lr_df$j[
 71 |                                                 varying_lr_df$param == "B" &
 72 |                                                   varying_lr_df$k == k]
 73 |                          ))
 74 |   which_B_keep <- do.call(rbind,which_B_keep)
 75 | 
 76 |   # message("saved which_B_keep")
 77 | 
 78 |   which_gammas <- unique(varying_df$k[varying_df$param == "gamma"])
 79 | 
 80 |   which_gamma_tilde <- unique(varying_df$k[varying_df$param == "gamma_tilde"])
 81 | 
 82 |   which_unconstrained <- varying_lr_df$param %in% c("B","gamma","gamma_tilde")
 83 |   which_rho <- varying_lr_df$param %in% c("rho")
 84 |   which_rho_tilde <- varying_lr_df$param %in% c("rho_tilde")
 85 |   npar <- nrow(varying_lr_df)
 86 | 
 87 |   return(deriv_criterion_lr(W = W,
 88 |                      X = X,
 89 |                      Z = Z,
 90 |                      which_k_p = which_k_p,
 91 |                      which_k_p_tilde = which_k_p_tilde,
 92 |                      fixed_P_multipliers = fixed_P_multipliers,
 93 |                      fixed_P_tilde_multipliers = fixed_P_tilde_multipliers,
 94 |                      which_B_keep = which_B_keep,
 95 |                      which_B_rows = which_B_rows,
 96 |                      which_gammas = which_gammas,
 97 |                      which_gamma_tilde = which_gamma_tilde,
 98 |                      Z_tilde = Z_tilde,
 99 |                      Z_tilde_gamma_cols = Z_tilde_gamma_cols,
100 |                      X_tilde = X_tilde,
101 |                      Ak_list = Ak_list,
102 |                      A_tilde_k_list = A_tilde_k_list,
103 |                      fixed_df = fixed_df,
104 |                      varying_df = varying_df,
105 |                      varying_lr_df = varying_lr_df,
106 |                      K = K,
107 |                      K_tilde = K_tilde,
108 |                      barrier_t = 1,
109 |                      criterion = "Poisson",
110 |                      lr_scale = TRUE,
111 |                      include_log_penalty_derivatives = TRUE,
112 |                      return_info = FALSE,
113 |                      wts = NULL,
114 |                      gmm_inv_wts = gmm_inv_wts))
115 | }
116 | 


--------------------------------------------------------------------------------
/R/dfs_to_mean.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | dfs_to_mean <- function(varying_df,
 4 |                         varying_lr_df = NULL,
 5 |                         fixed_df,
 6 |                         X,
 7 |                         Z,
 8 |                         X_tilde,
 9 |                         Z_tilde,
10 |                         Z_tilde_gamma_cols){
11 | 
12 | 
13 |   if(!is.null(varying_lr_df)){
14 |     varying_df <- lr_to_ra(fixed_df,varying_lr_df,
15 |                            varying_df)
16 |   }
17 | 
18 |   params <- dataframes_to_parameters(fixed_df,
19 |                                      varying_df)
20 | 
21 |   return(meaninate(gammas = params$gammas,
22 |             B = B,
23 |             X = X,
24 |             Z = Z,
25 |             P = params$P,
26 |             X_tilde = X_tilde,
27 |             Z_tilde = Z_tilde,
28 |             Z_tilde_gamma_cols = Z_tilde_gamma_cols,
29 |             P_tilde = params$P_tilde,
30 |             gamma_tilde = params$gamma_tilde))
31 | }
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/R/do_one_boot.R:
--------------------------------------------------------------------------------
 1 | 
 2 | do_one_boot <- function(W,
 3 |                         fitted_model,
 4 |                         m = NULL,
 5 |                         seed = NULL,
 6 |                         boot_weights = NULL){
 7 |   
 8 |   
 9 |   n <- nrow(W)
10 |   if(is.null(m)){
11 |     m <- sqrt(n)
12 |   }
13 |   
14 |   
15 |   if(is.null(boot_weights)){
16 |     boot_weights <- rgamma(n,m/n)
17 |     boot_weights <- boot_weights/sum(boot_weights)
18 |     boot_weights <- rep(boot_weights, each = J)
19 |   }
20 |   
21 |   if(!is.null(fitted_model$Z_tilde_list)){
22 |     fitted_model$Z_tilde <- NULL
23 |   }
24 |   
25 |   boot_model <- estimate_parameters(W = W,
26 |                                     X = fitted_model$X,
27 |                                     Z = fitted_model$Z,
28 |                                     Z_tilde = fitted_model$Z_tilde,
29 |                                     Z_tilde_gamma_cols =
30 |                                       fitted_model$Z_tilde_gamma_cols,
31 |                                     gammas =
32 |                                       fitted_model$gammas,
33 |                                     gammas_fixed_indices =
34 |                                       fitted_model$gammas_fixed_indices,
35 |                                     P = fitted_model$P,
36 |                                     P_fixed_indices =
37 |                                       fitted_model$P_fixed_indices,
38 |                                     B = fitted_model$B,
39 |                                     B_fixed_indices =
40 |                                       fitted_model$B_fixed_indices,
41 |                                     X_tilde = fitted_model$X_tilde,
42 |                                     P_tilde = fitted_model$P_tilde,
43 |                                     P_tilde_fixed_indices =
44 |                                       fitted_model$P_tilde_fixed_indices,
45 |                                     gamma_tilde = fitted_model$gamma_tilde,
46 |                                     gamma_tilde_fixed_indices =
47 |                                       fitted_model$gamma_tilde_fixed_indices,
48 |                                     alpha_tilde =
49 |                                       fitted_model$alpha_tilde,
50 |                                     Z_tilde_list =
51 |                                       fitted_model$Z_tilde_list,
52 |                                     barrier_t = 1, #starting value of reciprocal barrier penalty coef.
53 |                                     barrier_scale = 10, #increments for value of barrier penalty
54 |                                     max_barrier = 1e12, #maximum value of barrier_t
55 |                                     initial_conv_tol = 1000,
56 |                                     final_conv_tol = 0.1,
57 |                                     constraint_tolerance = 1e-10,
58 |                                     hessian_regularization = 0.01,
59 |                                     criterion = "Poisson",
60 |                                     profile_P = TRUE,
61 |                                     wts = boot_weights,
62 |                                     profiling_maxit = 25)
63 |   
64 |   return(boot_model)
65 |   
66 | }
67 | 


--------------------------------------------------------------------------------
/R/do_one_lrt.R:
--------------------------------------------------------------------------------
  1 | 
  2 | do_one_lrt <- function(W0,
  3 |                        full_model,
  4 |                        null_model,#null model specification
  5 |                        boot_method = "bayesian_subsample",
  6 |                        m = NULL,
  7 |                        seed = NULL,
  8 |                        boot_weights = NULL,
  9 |                        return_models = FALSE){
 10 | 
 11 | 
 12 |   n <- nrow(W0)
 13 |   if(is.null(m)){
 14 |     m <- sqrt(n)
 15 |   }
 16 | 
 17 | 
 18 |   if(is.null(boot_weights)){
 19 |     stop("Bootstrapping weights boot_weights must be provided.")
 20 |   }
 21 | 
 22 |   boot_full <- estimate_parameters(W = W0,
 23 |                                    X = full_model$X,
 24 |                                    Z = full_model$Z,
 25 |                                    Z_tilde = full_model$Z_tilde,
 26 |                                    Z_tilde_gamma_cols =
 27 |                                      full_model$Z_tilde_gamma_cols,
 28 |                                    gammas =
 29 |                                      full_model$gammas,
 30 |                                    gammas_fixed_indices =
 31 |                                      full_model$gammas_fixed_indices,
 32 |                                    P = full_model$P,
 33 |                                    P_fixed_indices =
 34 |                                      full_model$P_fixed_indices,
 35 |                                    B = full_model$B,
 36 |                                    B_fixed_indices =
 37 |                                      full_model$B_fixed_indices,
 38 |                                    X_tilde = full_model$X_tilde,
 39 |                                    P_tilde = full_model$P_tilde,
 40 |                                    P_tilde_fixed_indices =
 41 |                                      full_model$P_tilde_fixed_indices,
 42 |                                    gamma_tilde = full_model$gamma_tilde,
 43 |                                    gamma_tilde_fixed_indices =
 44 |                                      full_model$gamma_tilde_fixed_indices,
 45 |                                    alpha_tilde =
 46 |                                      full_model$alpha_tilde,
 47 |                                    Z_tilde_list =
 48 |                                      full_model$Z_tilde_list,
 49 |                                    barrier_t = 1, #starting value of reciprocal barrier penalty coef.
 50 |                                    barrier_scale = 10, #increments for value of barrier penalty
 51 |                                    max_barrier = 1e12, #maximum value of barrier_t
 52 |                                    initial_conv_tol = 1000,
 53 |                                    final_conv_tol = 0.1,
 54 |                                    
 55 |                                    constraint_tolerance = 1e-10,
 56 |                                    hessian_regularization = 0.01,
 57 |                                    criterion = "Poisson",
 58 |                                    
 59 |                                    profile_P = TRUE,
 60 |                                    wts = boot_weights,
 61 |                                    verbose = TRUE,
 62 |                                    profiling_maxit = 25)
 63 | 
 64 |   print(paste("full model weights sum to "), sum(boot_weights),
 65 |         sep = "", collapse = "")
 66 | 
 67 |   # if(boot_full$criterion == "reweighted_Poisson"){
 68 |   #   null_model$criterion <- "Poisson"
 69 |   #   boot_weights <- boot_full$weights
 70 |   # }
 71 | 
 72 |   boot_null <- estimate_parameters(W = W0,
 73 |                                    X = null_model$X,
 74 |                                    Z = null_model$Z,
 75 |                                    Z_tilde = null_model$Z_tilde,
 76 |                                    Z_tilde_gamma_cols =
 77 |                                      null_model$Z_tilde_gamma_cols,
 78 |                                    gammas =
 79 |                                      null_model$gammas,
 80 |                                    gammas_fixed_indices =
 81 |                                      null_model$gammas_fixed_indices,
 82 |                                    P = null_model$P,
 83 |                                    P_fixed_indices =
 84 |                                      null_model$P_fixed_indices,
 85 |                                    B = null_model$B,
 86 |                                    B_fixed_indices =
 87 |                                      null_model$B_fixed_indices,
 88 |                                    X_tilde = null_model$X_tilde,
 89 |                                    P_tilde = null_model$P_tilde,
 90 |                                    P_tilde_fixed_indices =
 91 |                                      null_model$P_tilde_fixed_indices,
 92 |                                    gamma_tilde = null_model$gamma_tilde,
 93 |                                    gamma_tilde_fixed_indices =
 94 |                                      null_model$gamma_tilde_fixed_indices,
 95 |                                    alpha_tilde =
 96 |                                      null_model$alpha_tilde,
 97 |                                    Z_tilde_list =
 98 |                                      null_model$Z_tilde_list,
 99 |                                    barrier_t = 1, #starting value of reciprocal barrier penalty coef.
100 |                                    barrier_scale = 10, #increments for value of barrier penalty
101 |                                    max_barrier = 1e12, #maximum value of barrier_t
102 |                                    initial_conv_tol = 1000,
103 |                                    final_conv_tol = 0.1,
104 |                                    
105 |                                    constraint_tolerance = 1e-10,
106 |                                    hessian_regularization = 0.01,
107 |                                    criterion = "Poisson",
108 |                                    
109 |                                    profile_P = TRUE,
110 |                                    wts = boot_weights,
111 |                                    verbose = TRUE,
112 |                                    profiling_maxit = 25)
113 | 
114 |   print(paste("null model weights sum to "), sum(boot_weights),
115 |         sep = "", collapse = "")
116 | 
117 | 
118 |   lr_stat <- 2*(boot_null$objective -
119 |                                             boot_full$objective)
120 | 
121 | 
122 |   if(!return_models){
123 |   return(lr_stat)
124 |   } else{
125 |       return(list("lr_stat" = lr_stat,
126 |                   "full_model" = boot_full_model,
127 |                   "null_model" = boot_null_model))
128 |     }
129 | 
130 | }
131 | 


--------------------------------------------------------------------------------
/R/do_one_simulation.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | do_one_simulation <- function(n,
  4 |                               J,
  5 |                               distrib,
  6 |                               B_multiplier,
  7 |                               seed,
  8 |                               label,
  9 |                               n_boot,
 10 |                               # load_tinyvamp = FALSE,
 11 |                               folder_name,
 12 |                               return_dont_save = FALSE,
 13 |                               parallelize = TRUE,
 14 |                               verbose = FALSE,
 15 |                               return_variance = FALSE){
 16 | 
 17 |   # if(load_tinyvamp){
 18 |   #   devtools::load_all()
 19 |   # }
 20 | 
 21 |   print(paste("n = ",n,sep = "",collapse = ""))
 22 |   print(paste("J = ",J,sep = "",collapse = ""))
 23 |   print(paste("distrib = ",distrib,sep = "",collapse = ""))
 24 |   print(paste("B_multiplier = ", B_multiplier,sep = "",collapse = ""))
 25 |   print(paste("seed = ",seed,sep = "",collapse = ""))
 26 |   print(paste("label = ",label,sep = "",collapse = ""))
 27 | 
 28 |   W <- simulate_paper_data(n = n,
 29 |                            J = J,
 30 |                            B_multiplier = B_multiplier,
 31 |                            distrib = distrib,
 32 |                            seed = seed)
 33 | 
 34 |   poisson_fit <- try(fit_simulation_model(W,"Poisson"))
 35 |   reweighted_fit <- try(fit_simulation_model(W, "reweighted_Poisson",
 36 |                                              return_variance = return_variance))
 37 | 
 38 |   ### Do Bootstrapped LRT for both
 39 | 
 40 |   print("Bootstrapping...")
 41 |   if(is.list(poisson_fit)){
 42 |     poisson_null <- poisson_fit
 43 |     poisson_null$B[] <- 0
 44 |     poisson_null$B_fixed_indices[] <- TRUE
 45 |     poisson_lrt <-
 46 |       try(bootstrap_lrt(W = W,
 47 |                         fitted_model = poisson_fit,
 48 |                         null_param = poisson_null,
 49 |                         n_boot = n_boot,
 50 |                         parallelize = parallelize,
 51 |                         ncores = 5,
 52 |                         verbose = verbose))
 53 | 
 54 |     poisson_ci <- try(bootstrap_ci(W = W,
 55 |                                    fitted_model = poisson_fit,
 56 |                                    n_boot = n_boot,
 57 |                                    alpha = 0.05,
 58 |                                    parallelize = parallelize,
 59 |                                    ncores = 5,
 60 |                                    seed = seed,
 61 |                                    verbose = verbose))
 62 | 
 63 | 
 64 |   }
 65 | 
 66 |   if(is.list(reweighted_fit)){
 67 |     reweighted_null <- reweighted_fit
 68 |     reweighted_null$B[] <- 0
 69 |     reweighted_null$B_fixed_indices[] <- TRUE
 70 |     reweighted_lrt <-
 71 |       try(bootstrap_lrt(W,
 72 |                         fitted_model = reweighted_fit,
 73 |                         null_param = reweighted_null,
 74 |                         n_boot = n_boot,
 75 |                         parallelize = parallelize,
 76 |                         ncores = 5,
 77 |                         verbose = verbose))
 78 | 
 79 |     reweighted_ci <- try(bootstrap_ci(W = W,
 80 |                                       fitted_model = reweighted_fit,
 81 |                                       n_boot = n_boot,
 82 |                                       alpha = 0.05,
 83 |                                       parallelize = parallelize,
 84 |                                       ncores = 5,
 85 |                                       seed = seed,
 86 |                                       verbose = verbose))
 87 | 
 88 |   }
 89 | 
 90 |   if(!return_dont_save){
 91 |     saveRDS(list("poisson_lrt" = poisson_lrt,
 92 |                  "poisson_ci" = poisson_ci,
 93 |                  "reweighted_lrt" = reweighted_lrt,
 94 |                  "reweighted_ci" = reweighted_ci),
 95 |             paste(folder_name,"/",
 96 |                   paste(label,"n",n,"J", J, distrib,
 97 |                         "Bmult", B_multiplier,"nboot", n_boot,"sim",
 98 |                         seed,
 99 |                         sep = "_",collapse = "_"),
100 |                   sep = "",collapse = ""))
101 |   } else{
102 |     return(list("poisson_lrt" = poisson_lrt,
103 |                 "poisson_ci" = poisson_ci,
104 |                 "reweighted_lrt" = reweighted_lrt,
105 |                 "reweighted_ci" = reweighted_ci))
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/R/evaluate_criterion_lr.R:
--------------------------------------------------------------------------------
 1 | #' Criterion Evaluation Function 
 2 | #' 
 3 | #' @export
 4 | evaluate_criterion_lr <- function(W,
 5 |                                   X,
 6 |                                   Z,
 7 |                                   Z_tilde,
 8 |                                   Z_tilde_gamma_cols,
 9 |                                   Z_tilde_list = NULL,
10 |                                   X_tilde,
11 |                                   fixed_df,
12 |                                   varying_df,
13 |                                   varying_lr_df = NULL,
14 |                                   barrier_t = NULL,
15 |                                   criterion = "Poisson",
16 |                                   lr_scale = TRUE,
17 |                                   include_log_penalty = TRUE,
18 |                                   wts = NULL,
19 |                                   gmm_inv_wts = NULL,
20 |                                   return_gmm_inv_weights = FALSE){
21 | 
22 |   if(lr_scale){
23 |     varying_df <- lr_to_ra(fixed_df,
24 |                            varying_lr_df,
25 |                            varying_df)
26 |   }
27 | 
28 |   params <- dataframes_to_parameters(fixed_df, varying_df)
29 | 
30 |   if(lr_scale){
31 |     if(include_log_penalty){
32 |       log_penalty <- calculate_log_penalty(varying_lr_df,
33 |                                            fixed_df,
34 |                                            barrier_t)
35 |     } else{
36 |       log_penalty <- 0
37 |     }
38 | 
39 |   } else{
40 |     log_penalty <- 0
41 |   }
42 | 
43 |   means <- meaninate(gammas  = params$gammas,
44 |                      B = params$B,
45 |                      X = X,
46 |                      Z = Z,
47 |                      P = params$P,
48 |                      X_tilde = X_tilde,
49 |                      Z_tilde = Z_tilde,
50 |                      Z_tilde_gamma_cols = Z_tilde_gamma_cols,
51 |                      Z_tilde_list = Z_tilde_list,
52 |                      P_tilde = params$P_tilde,
53 |                      gamma_tilde = params$gamma_tilde,
54 |                      alpha_tilde = params$alpha_tilde,
55 |                      return_separate = FALSE)
56 | 
57 | 
58 | 
59 |   if(criterion == "Poisson"){
60 |     return(poisson_criterion(W = W,
61 |                              means = means,
62 |                              wts = wts) + log_penalty)
63 | 
64 |   }
65 |   if(criterion == "GMM"){
66 |     n <- nrow(W)
67 |     W_long <- lapply(1:n,function(i) as.numeric(W[i,]))
68 |     W_long <- do.call(c,W_long)
69 |     means_long <- lapply(1:n, function(i) as.numeric(means[i,]))
70 |     means_long <- do.call(c,means_long)
71 | 
72 |     if(is.null(gmm_inv_wts)){
73 | 
74 |     inv_wts <- get_gmm_inv_weights(W_long = W_long,
75 |                            means_long = means_long)
76 |     } else{
77 |       inv_wts <- gmm_inv_wts
78 |     }
79 | 
80 |     if(!return_gmm_inv_weights){
81 |     return(gmm_criterion(W_long = W_long,
82 |                              means_long = means_long,
83 |                          inv_wts= inv_wts) + log_penalty)
84 |     } else{
85 |       return(list("gmm_crit" = gmm_criterion(W_long = W_long,
86 |                            means_long = means_long,
87 |                            inv_wts = inv_wts) + log_penalty,
88 |              "inv_wts" = inv_wts))
89 |     }
90 | 
91 |   }
92 | 
93 | 
94 | }
95 | 


--------------------------------------------------------------------------------
/R/fit_simple_model.R:
--------------------------------------------------------------------------------
 1 | 
 2 | fit_simple_model <- function(W,
 3 |                              B_fixed_at_zero = FALSE,
 4 |                              reweight = FALSE){
 5 | 
 6 |   n <- nrow(W)
 7 |   J <- ncol(W)
 8 | 
 9 |   if(B_fixed_at_zero){
10 |     B_fixed_indices <-  matrix(TRUE,nrow = 1, ncol = J)
11 |   }else{
12 |     B_fixed_indices <-  matrix(
13 |       c(rep(FALSE, J - 1),TRUE),nrow = 1, ncol = J)
14 |   }
15 | 
16 |   if(reweight){
17 |     criterion <- "reweighted_Poisson"
18 |   } else{
19 |     criterion <- "Poisson"
20 |   }
21 | 
22 | 
23 | 
24 | fitted_model <- estimate_parameters(W = W,
25 |                       X = matrix(1, nrow = n,ncol = 1),
26 |                       Z = matrix(1, nrow = n, ncol = 1),
27 |                       P = matrix(1/J,nrow = 1, ncol = J),
28 |                       P_fixed_indices = matrix(TRUE, nrow = 1, ncol = J),
29 |                       X_tilde = matrix(0,nrow = 1, ncol = 1),
30 |                       Z_tilde = matrix(0,nrow = n, ncol = 1),
31 |                       Z_tilde_gamma_cols = 1,
32 |                       P_tilde = matrix(1/J,nrow = 1, ncol = J),
33 |                       P_tilde_fixed_indices = matrix(TRUE, nrow = 1, ncol = J),
34 |                       gammas = apply(W,1,function(x) log(sum(x))),
35 |                       gammas_fixed_indices = rep(FALSE, n),
36 |                       B = matrix(0, ncol = J, nrow = 1),
37 |                       B_fixed_indices = B_fixed_indices,
38 |                       gamma_tilde = matrix(0,ncol = 1, nrow = 1),
39 |                       gamma_tilde_fixed_indices = matrix(TRUE,
40 |                                                          ncol = 1, nrow = 1),
41 |                       alpha_tilde = NULL,
42 |                       Z_tilde_list = NULL,
43 |                       barrier_t = 1, #starting value of reciprocal barrier penalty coef.
44 |                       barrier_scale = 10, #increments for value of barrier penalty
45 |                       max_barrier = 1e12, #maximum value of barrier_t
46 |                       initial_conv_tol = 1000,
47 |                       final_conv_tol = 0.1,
48 |                       constraint_tolerance = 1e-10,
49 |                       hessian_regularization = 0.01,
50 |                       criterion = criterion,
51 |                       profile_P = TRUE,
52 |                       profiling_maxit = 25,
53 |                       wts = NULL,
54 |                       verbose = FALSE,
55 |                       bootstrap_failure_cutoff = NULL)
56 | 
57 | return(fitted_model)
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/R/fit_simulation_model.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | fit_simulation_model <- function(W,
  4 |                                  estimator,
  5 |                                  return_variance = FALSE){
  6 | 
  7 |   n <- nrow(W)/16
  8 |   J <- ncol(W)
  9 | 
 10 | 
 11 |   Z_tilde <- do.call(rbind,lapply(1:n,
 12 |                                   function(x) matrix(rep(c(1,9,81,729),
 13 |                                                          4),
 14 |                                                      ncol = 1)))
 15 | 
 16 |   Z_tilde_list <- list(Z_tilde*rbind(matrix(1,nrow = 4*n,ncol = 1),
 17 |                                      matrix(0, nrow = 3*4*n,ncol = 1)),
 18 |                        Z_tilde*rbind(matrix(0,nrow = 4*n,ncol = 1),
 19 |                                      matrix(1, nrow = 4*n,ncol = 1),
 20 |                                      matrix(0,nrow = 2*4*n,ncol = 1)),
 21 |                        Z_tilde*rbind(matrix(0,nrow = 2*4*n,ncol = 1),
 22 |                                      matrix(1, nrow = 4*n,ncol = 1),
 23 |                                      matrix(0,nrow = 4*n,ncol = 1)),
 24 |                        Z_tilde*rbind(matrix(0,nrow = 3*4*n,ncol = 1),
 25 |                                      matrix(1, nrow = 4*n,ncol = 1)))
 26 | 
 27 |   Z_tilde <- NULL
 28 | 
 29 |   Z_tilde_gamma_cols <- 1
 30 | 
 31 |   alpha_tilde <- c(0,0,0)
 32 |   gamma_tilde <- matrix(-5,ncol = 1, nrow = 1)
 33 | 
 34 |   ### generate Z
 35 |   Z <- do.call(rbind,lapply(1:4,
 36 |                             function(x) do.call(rbind,
 37 |                                                 lapply(1:(4*n),function(k) matrix(
 38 |                                                   as.numeric(x == 1:4),nrow = 1
 39 |                                                 )))))
 40 | 
 41 |   X <- matrix(1,nrow = n*16,ncol = 1)
 42 | 
 43 |   B <- matrix(0,ncol = J, nrow = 1)
 44 |   B_fixed_indices <- matrix(c(rep(FALSE,J - 1), TRUE), nrow = 1)
 45 | 
 46 |   P <- matrix(1/J, nrow = 4, ncol = J)
 47 |   P[1,] <- 2^(seq(0,4,length.out = J))
 48 |   P[1,] <- P[1,]/sum(P[1,])
 49 |   P[2,] <- P[1,J:1]
 50 | 
 51 |   P_fixed_indices <- rbind(matrix(TRUE, nrow = 1, ncol = J),
 52 |                            matrix(TRUE,nrow = 1, ncol = J),
 53 |                            matrix(FALSE, nrow = 1, ncol = J),
 54 |                            matrix(FALSE, nrow = 1, ncol = J))
 55 | 
 56 |   P_tilde <- matrix(1/J,ncol = J, nrow = 1)
 57 |   P_tilde_fixed_indices <- matrix(FALSE, nrow = 1, ncol = J)
 58 |   X_tilde <- matrix(1,ncol = 1, nrow = 1)
 59 |   gamma_tilde <- matrix(-3.7,ncol = 1,nrow = 1)
 60 |   gamma_tilde_fixed_indices <- matrix(FALSE, ncol = 1, nrow =1)
 61 | 
 62 |   gammas <- apply(W, 1, function(x) log(sum(x)))
 63 |   gammas_fixed_indices <- rep(FALSE, 16*n)
 64 | 
 65 |   fitted_model <- estimate_parameters(W = W,
 66 |                       X = X,
 67 |                       Z = Z,
 68 |                       Z_tilde_gamma_cols = Z_tilde_gamma_cols,
 69 |                       gammas = gammas,
 70 |                       gammas_fixed_indices = gammas_fixed_indices,
 71 |                       P = P,
 72 |                       P_fixed_indices = P_fixed_indices,
 73 |                       B = B,
 74 |                       B_fixed_indices = B_fixed_indices,
 75 |                       X_tilde = X_tilde,
 76 |                       P_tilde = P_tilde,
 77 |                       P_tilde_fixed_indices = P_tilde_fixed_indices,
 78 |                       gamma_tilde = gamma_tilde,
 79 |                       gamma_tilde_fixed_indices = gamma_tilde_fixed_indices,
 80 |                       alpha_tilde = alpha_tilde,
 81 |                       Z_tilde_list = Z_tilde_list,
 82 |                                   barrier_t = 1, #starting value of reciprocal barrier penalty coef.
 83 |                                   barrier_scale = 10, #increments for value of barrier penalty
 84 |                                   max_barrier = 1e12, #maximum value of barrier_t
 85 |                                   initial_conv_tol = 1000,
 86 |                                   final_conv_tol = 0.1,
 87 |                                   constraint_tolerance = 1e-10,
 88 |                                   hessian_regularization = 0.01,
 89 |                                   criterion = estimator,
 90 |                                   profile_P = TRUE,
 91 |                                   profiling_maxit = 25,
 92 |                                   wts = NULL,
 93 |                                   verbose = TRUE,
 94 |                                   bootstrap_failure_cutoff = NULL,
 95 |                       return_variance = return_variance
 96 |   )
 97 | 
 98 | 
 99 | 
100 | return(fitted_model)
101 | 
102 | }
103 | 


--------------------------------------------------------------------------------
/R/get_A_tilde_k_list.R:
--------------------------------------------------------------------------------
 1 | get_A_tilde_k_list <- function(fixed_df,
 2 |                                varying_df,
 3 |                                varying_lr_df){
 4 | 
 5 |   J <- max(c(varying_df$j,fixed_df$j))
 6 |   K_tilde <- max(c(varying_df$k[varying_df$param == "P_tilde"],
 7 |                    fixed_df$k[fixed_df$param == "P_tilde"]))
 8 | 
 9 |   which_k_p_tilde <- unique(varying_lr_df$k[varying_lr_df$param == "rho_tilde"])
10 |   which_k_p_tilde <- which_k_p_tilde[order(which_k_p_tilde)]
11 | 
12 |   A_tilde_k_list <- vector(mode = "list", K_tilde)
13 |   for(k in which_k_p_tilde){
14 |     fixed_p_tilde_k <- fixed_df[(fixed_df$param == "P_tilde")&(fixed_df$k == k),]
15 |     varying_p_tilde_k <- varying_df[(varying_df$param == "P_tilde")&
16 |                                       (varying_df$k == k),]
17 |     varying_j <- varying_p_tilde_k$j
18 |     varying_j <- varying_j[order(varying_j)]
19 |     C_k <- J - nrow(fixed_p_tilde_k)
20 |     A_tilde_k <- matrix(0, nrow = J,
21 |                         ncol = C_k)
22 |     for(jstar in 1:length(varying_j)){
23 |       A_tilde_k[varying_j[jstar],jstar] <- 1
24 |     }
25 |     A_tilde_k_list[[k]] <- A_tilde_k
26 |   }
27 |   return(A_tilde_k_list)
28 | }
29 | 


--------------------------------------------------------------------------------
/R/get_Ak_list.R:
--------------------------------------------------------------------------------
 1 | get_Ak_list <- function(fixed_df,
 2 |                         varying_df,
 3 |                         varying_lr_df){
 4 | 
 5 |   J <- max(c(varying_df$j,fixed_df$j))
 6 |   K <- max(c(varying_df$k[varying_df$param %in% c("P")],
 7 |              fixed_df$k[fixed_df$param %in% c("P")]))
 8 | 
 9 |   which_k_p <- unique(varying_lr_df$k[varying_lr_df$param == "rho"])
10 |   which_k_p <- which_k_p[order(which_k_p)]
11 | 
12 |   Ak_list <- vector(mode = "list", K)
13 |   for(k in which_k_p){
14 |     fixed_p_k <- fixed_df[(fixed_df$param == "P")&(fixed_df$k == k),]
15 |     varying_p_k <- varying_df[(varying_df$param == "P")&
16 |                                 (varying_df$k == k),]
17 |     varying_j <- varying_p_k$j
18 |     varying_j <- varying_j[order(varying_j)]
19 |     C_k <- J - nrow(fixed_p_k)
20 |     A_k <- matrix(0, nrow = J,
21 |                   ncol = C_k)
22 |     for(jstar in 1:length(varying_j)){
23 |       A_k[varying_j[jstar],jstar] <- 1
24 |     }
25 |     Ak_list[[k]] <- A_k
26 |   }
27 |   return(Ak_list)
28 | }
29 | 


--------------------------------------------------------------------------------
/R/get_gmm_inv_weights.R:
--------------------------------------------------------------------------------
 1 | get_gmm_inv_weights <- function(W_long,
 2 |                           means_long){
 3 | 
 4 |   squerror_long <- (W_long - means_long)^2
 5 | 
 6 |   pre_wts <- isoreg(means_long,squerror_long)
 7 | 
 8 |   means_long_ordered <- means_long[order(means_long)]
 9 |   squerror_long_ordered <- squerror_long[order(means_long)]
10 | 
11 |   # pre_wts$yf <- pre_wts$yf*(mean(1/pre_wts$yf))
12 |   # sum(1/pre_wts$yf)
13 | 
14 |   # if any variances estimated to be zero at positive means
15 |   # if(min(pre_wts$yf[means_long_ordered>0]) ==0){
16 |   #   # if there are zero means, do linear interpolation
17 |   #   if(sum(means_long_ordered == 0)<0){
18 |   #     pre_wts$yf[means_long_ordered == 0] <- 0
19 |   #
20 |   #     max_index <- max(which(pre_wts$yf[means_long_ordered>0] ==0))
21 |   #     (pre_wts$yf[means_long_ordered>0])[1:max_index] <-
22 |   #       ((means_long_ordered[means_long_ordered>0])[1:max_index])*
23 |   #       (pre_wts$yf[means_long_ordered>0])[max_index + 1]
24 |   #   }
25 |   #   #if no zero means, set min var equal to min nonzero var
26 |   #   if(sum(means_long_ordered == 0) == 0){
27 |   #     pre_wts$yf[pre_wts$yf ==0] <- min(pre_wts$yf[pre_wts$yf > 0])
28 |   #   }
29 |   #
30 |   # }
31 |   inv_wts <- numeric(length(W_long))
32 |   inv_wts[order(means_long)] <- pre_wts$yf
33 | 
34 |   return(inv_wts)
35 | }
36 | 


--------------------------------------------------------------------------------
/R/gmm_criterion.R:
--------------------------------------------------------------------------------
 1 | 
 2 | gmm_criterion <- function(W_long,
 3 |                           means_long,
 4 |                           inv_wts){
 5 | 
 6 | 
 7 | squerror_long <- (W_long - means_long)^2
 8 | 
 9 | gmm_crit <- 0.5*sum(sapply(1:length(means_long),
10 |        function(i){
11 |          ifelse(inv_wts[i]>0,
12 |                 squerror_long[i]/inv_wts[i],
13 |                 ifelse(squerror_long[i] ==0,0,Inf))
14 |        }
15 |          ))
16 | 
17 | return(gmm_crit)
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/R/log_penalty.R:
--------------------------------------------------------------------------------
 1 | #################### Barrier Penalty Function #######################
 2 | 
 3 | calculate_log_penalty <- function(varying_lr_df,
 4 |                                   fixed_df,
 5 |                                   barrier_t){
 6 | 
 7 |   which_rho_k <- unique(varying_lr_df$k[varying_lr_df$param=="rho"])
 8 |   which_rho_tilde_k <- unique(varying_lr_df$k[varying_lr_df$param=="rho_tilde"])
 9 | 
10 |   log_P <- lapply(which_rho_k,
11 |                   function(k) (varying_lr_df$value[varying_lr_df$param == "rho" &
12 |                                                      varying_lr_df$k == k]) %>%
13 |                     (function(x) -c(x,0) + sum_of_logs(c(x,0))))
14 |   log_P_tilde <- lapply(which_rho_tilde_k,
15 |                         function(k)
16 |                           (varying_lr_df$value[
17 |                             varying_lr_df$param == "rho_tilde" &
18 |                               varying_lr_df$k == k]) %>%
19 |                           (function(x) -c(x,0) + sum_of_logs(c(x,0))))
20 |   log_ra_sum <- do.call(sum,log_P) + do.call(sum,log_P_tilde)
21 | 
22 |   return((1/barrier_t)*log_ra_sum)
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/R/log_penalty_grad.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/R/log_penalty_grad.R


--------------------------------------------------------------------------------
/R/log_penalty_gradient.R:
--------------------------------------------------------------------------------
 1 | 
 2 | log_penalty_grad <- function(rho,barrier_t){
 3 |   # d/d_rho sum(-log(exp(c(rho, 0))/sum(exp(rho,0)))
 4 |   # = d/d_rho sum( - c(rho,0) + log(sum(exp(rho,0))))
 5 |   rho <- as.numeric(rho)
 6 |   log_bt <- log(barrier_t)
 7 | 
 8 |   n_rho <- length(rho)
 9 | 
10 |   return(rep(-1/barrier_t,n_rho) + (n_rho +1)*exp(rho - log_bt - sum_of_logs(c(rho,0))))
11 | }
12 | 


--------------------------------------------------------------------------------
/R/log_penalty_hessian.R:
--------------------------------------------------------------------------------
 1 | log_penalty_hess <- function(rho,barrier_t){
 2 |   rho <- as.numeric(rho)
 3 |   n_rho <- length(rho)
 4 |   log_bt <- log(barrier_t)
 5 | 
 6 |   ras <- exp(rho - sum_of_logs(c(rho,0)))
 7 |   ras_bt <- exp(rho - log_bt - sum_of_logs(c(rho,0)))
 8 | 
 9 |   return((n_rho + 1)*(diag(ras_bt) -
10 |                         matrix(ras_bt,ncol = 1)%*%
11 |                         matrix(ras,nrow = 1)))
12 | 
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/R/logpois.R:
--------------------------------------------------------------------------------
 1 | ##################### Poisson-Type (Log) Density #####################
 2 | logpois <- function(x,intensity){
 3 | 
 4 |   if(!is.finite(intensity)){
 5 |     intensity <- 0
 6 |   }
 7 |   if((x==0)&(intensity==0)){
 8 |     return(0)
 9 |   } else{
10 |     if(intensity <= 0){
11 |       return(-Inf)
12 |     } else{
13 |       return(x*log(intensity) - lgamma(x + 1) - intensity)
14 |     }
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/R/lr_to_ra.R:
--------------------------------------------------------------------------------
 1 | 
 2 | lr_to_ra <- function(fixed_df,
 3 |                      varying_lr_df,
 4 |                      varying_df){
 5 | 
 6 |   rho_df <- varying_lr_df[varying_lr_df$param == "rho",]
 7 |   rho_tilde_df <- varying_lr_df[varying_lr_df$param == "rho_tilde",]
 8 | 
 9 |   P_df <- varying_df[varying_df$param == "P",]
10 |   P_tilde_df <- varying_df[varying_df$param == "P_tilde",]
11 | 
12 | 
13 | 
14 |   #update values in P_df with values from varying_lr_df
15 |   rho_rows <- unique(rho_df$k)
16 |   for(rho_row in rho_rows){
17 | 
18 |     ref_j <- max(varying_df$j[varying_df$param == "P" & varying_df$k == rho_row])
19 | 
20 |     rho_mini <- rho_df[rho_df$k == rho_row,]
21 | 
22 |     unscaled_ra <- exp(c(rho_mini$value, 0) - sum_of_logs(c(rho_mini$value, 0)))
23 | 
24 |     scaling <- 1 - sum(fixed_df$value[(fixed_df$k == rho_row) &(fixed_df$param == "rho")])
25 |     scaled_ra <- unscaled_ra*scaling
26 | 
27 |     for(j_index in 1:length(c(rho_mini$j, ref_j))){
28 |       P_df$value[
29 |         (P_df$k == rho_row) &
30 |           (P_df$j == c(rho_mini$j, ref_j)[j_index])
31 |       ] <-
32 |         scaled_ra[j_index]
33 | 
34 |     }
35 | 
36 |   }
37 | 
38 |   #update values in P_tilde_df with values from varying_lr_df
39 |   rho_tilde_rows <- unique(rho_tilde_df$k)
40 |   for(rho_tilde_row in rho_tilde_rows){
41 | 
42 |     ref_j <- max(varying_df$j[varying_df$param == "P_tilde" &
43 |                                 varying_df$k == rho_tilde_row])
44 | 
45 |     rho_tilde_mini <- rho_tilde_df[rho_tilde_df$k == rho_tilde_row,]
46 | 
47 |     unscaled_ra <- exp(c(rho_tilde_mini$value, 0) - sum_of_logs(c(rho_tilde_mini$value, 0)))
48 | 
49 |     scaling <- 1 - sum(fixed_df$value[(fixed_df$k == rho_tilde_row) &
50 |                                         (fixed_df$param == "rho_tilde")])
51 |     scaled_ra <- unscaled_ra*scaling
52 | 
53 |     for(j_index in 1:length(c(rho_tilde_mini$j, ref_j))){
54 |       P_tilde_df$value[
55 |         (P_tilde_df$k == rho_tilde_row) &
56 |           (P_tilde_df$j == c(rho_tilde_mini$j, ref_j)[j_index])
57 |       ] <-
58 |         scaled_ra[j_index]
59 | 
60 |     }
61 | 
62 |   }
63 | 
64 |   P_df <- P_df[order(P_df$k,P_df$j),]
65 |   P_tilde_df <- P_tilde_df[order(P_tilde_df$k,
66 |                                  P_tilde_df$j),]
67 | 
68 |   to_return_df <-  rbind(varying_lr_df[
69 |     !(varying_lr_df$param %in% c("rho", "rho_tilde","alpha_tilde")),
70 |   ],
71 |   P_df,
72 |   P_tilde_df)
73 | 
74 |   to_return_df <- rbind(to_return_df,
75 |                         varying_lr_df[varying_lr_df$para == "alpha_tilde",])
76 | 
77 |   to_return_df <- rbind(to_return_df[to_return_df$param == "P",],
78 |                       to_return_df[to_return_df$param == "P_tilde",],
79 |                       to_return_df[to_return_df$param == "B",],
80 |                       to_return_df[to_return_df$param == "gamma",],
81 |                       to_return_df[to_return_df$param == "gamma_tilde",],
82 |                       to_return_df[to_return_df$param == "alpha_tilde",])
83 | 
84 |   return(to_return_df)
85 | }
86 | 


--------------------------------------------------------------------------------
/R/make_model_bundle.R:
--------------------------------------------------------------------------------
  1 | #
  2 | # make_model_bundle <- function(W,
  3 | #                               X,
  4 | #                               Z,
  5 | #                               Z_tilde,
  6 | #                               Z_tilde_gamma_cols,
  7 | #                               gammas,
  8 | #                               gammas_fixed_indices,
  9 | #                               P,
 10 | #                               P_fixed_indices,
 11 | #                               B,
 12 | #                               B_fixed_indices,
 13 | #                               X_tilde,
 14 | #                               P_tilde,
 15 | #                               P_tilde_fixed_indices,
 16 | #                               gamma_tilde,
 17 | #                               gamma_tilde_fixed_indices,
 18 | #                               criterion = "Poisson"){
 19 | #
 20 | #
 21 | #   model_bundle <- list()
 22 | #   model_bundle$W <- W
 23 | #   model_bundle$designs <- list(Z = Z,
 24 | #                                Z_tilde = Z_tilde,
 25 | #                                Z_tilde_gamma_cols = Z_tilde_gamma_cols,
 26 | #                                X_tilde = X_tilde)
 27 | #   model_bundle$parameters <-
 28 | #     list(gammas = gammas,
 29 | #          gammas_fixed_indices = gammas_fixed_indices,
 30 | #          P = P,
 31 | #          P_fixed_indices = P_fixed_indices,
 32 | #          B = B,
 33 | #          B_fixed_indices = B_fixed_indices,
 34 | #          P_tilde = P_tilde,
 35 | #          P_tilde_fixed_indices = P_tilde_fixed_indices,
 36 | #          gamma_tilde = gamma_tilde,
 37 | #          gamma_tilde_fixed_indices = gamma_tilde_fixed_indices)
 38 | #   model_bundle$criterion <- criterion
 39 | #
 40 | #
 41 | #   model_bundle$n <- nrow(W)
 42 | #   model_bundle$J <- ncol(W)
 43 | #   model_bundle$K <- NA
 44 | #   model_bundle$K_tilde <- NA
 45 | #
 46 | #   model_bundle$optimization_info <- list()
 47 | #   model_bundle$optimization_info$nsteps <-
 48 | #     ceiling(log(max_barrier/barrier_t)/log(barrier_scale)) + 1
 49 | #
 50 | #   model_bundle$optimization_info$tolerances <-
 51 | #     exp(seq(log(initial_conv_tol),log(final_conv_tol), length.out = nsteps))
 52 | #
 53 | #   model_bundle$parameter_dfs <-  parameters_to_dataframes(P,
 54 | #                               P_fixed_indices,
 55 | #                               P_tilde,
 56 | #                               P_tilde_fixed_indices,
 57 | #                               B,
 58 | #                               B_fixed_indices,
 59 | #                               gammas,
 60 | #                               gammas_fixed_indices,
 61 | #                               gamma_tilde,
 62 | #                               gamma_tilde_fixed_indices)
 63 | #
 64 | #
 65 | #
 66 | #
 67 | #   # message("created parameter dfs")
 68 | #
 69 | #   model_bundle$K <-
 70 | #     with(model_bundle$parameter_dfs,
 71 | #          max(c(varying_df$k[varying_df$param == "P"],
 72 | #              fixed_df$k[fixed_df$param == "P"])))
 73 | #
 74 | #   model_bundle$K_tilde <-
 75 | #     with(model_bundle$parameter_dfs,
 76 | #     max(c(varying_df$k[varying_df$param == "P_tilde"],
 77 | #           fixed_df$k[fixed_df$param == "P_tilde"]))
 78 | #     )
 79 | #
 80 | #   model_bundle$rho_P_conversion <-list()
 81 | #   model_bundle$rho_P_conversion$fixed_P_multipliers <-
 82 | #   sapply(1:model_bundle$K, function(k)
 83 | #     with(model_bundle$parameter_dfs,1 - sum(fixed_df$value[fixed_df$param == "P"&
 84 | #                              fixed_df$k ==k])))
 85 | #
 86 | #   model_bundle$rho_P_conversion$fixed_P_tilde_multipliers <-
 87 | #     sapply(1:model_bundle$K_tilde, function(k)
 88 | #     with(model_bundle$parameter_dfs, 1 - sum(fixed_df$value[fixed_df$param == "P_tilde"&
 89 | #                              fixed_df$k ==k])))
 90 | #
 91 | #
 92 | #   # create matrices to track rho-P and rho_tilde-P_tilde relationships
 93 | #   model_bundle$rho_P_conversion$Ak_list <-
 94 | #     with(model_bundle$parameter_dfs,
 95 | #          get_Ak_list(fixed_df,
 96 | #                          varying_df,
 97 | #                          varying_lr_df))
 98 | #
 99 | #   A_tilde_k_list <- get_A_tilde_k_list(fixed_df,
100 | #                                        varying_df,
101 | #                                        varying_lr_df)
102 | #
103 | #   # message("created Ak_list and A_tilde_k_list")
104 | #
105 | #   #calculate at outset of optimization  pass as argument to mean_jac_lr, etc.
106 | #   which_k_p <- sapply(1:K, function(k) ifelse(is.null(Ak_list[[k]]),
107 | #                                               NA, k))
108 | #
109 | #   which_k_p <- which_k_p[!is.na(which_k_p)]
110 | #
111 | #   #calculate at outset of optimization  pass as argument to mean_jac_lr, etc.
112 | #   which_k_p_tilde <- sapply(1:K_tilde,
113 | #                             function(k) ifelse(
114 | #                               is.null(A_tilde_k_list[[k]]),
115 | #                               NA,k
116 | #                             ))
117 | #
118 | #   which_k_p_tilde <- which_k_p_tilde[!is.na(which_k_p_tilde)]
119 | #
120 | #   # message("saved which_k_p and which_k_p_tilde")
121 | #
122 | #   #calculate at outset of optimization
123 | #   which_B_rows <- unique(varying_df$k[varying_df$param == "B"])
124 | #   which_B_rows <- which_B_rows[order(which_B_rows)]
125 | #
126 | #   #calculate at outset of optimization
127 | #   which_B_keep <- lapply(which_B_rows,
128 | #                          function(k) sapply(1:(J - 1),
129 | #                                             function(j)
130 | #                                               j %in% varying_lr_df$j[
131 | #                                                 varying_lr_df$param == "B" &
132 | #                                                   varying_lr_df$k == k]
133 | #                          ))
134 | #   which_B_keep <- do.call(rbind,which_B_keep)
135 | #
136 | #   # message("saved which_B_keep")
137 | #
138 | #   which_gammas <- unique(varying_df$k[varying_df$param == "gamma"])
139 | #
140 | #   which_gamma_tilde <- unique(varying_df$k[varying_df$param == "gamma_tilde"])
141 | #
142 | #   which_unconstrained <- varying_lr_df$param %in% c("B","gamma","gamma_tilde")
143 | #   which_rho <- varying_lr_df$param %in% c("rho")
144 | #   which_rho_tilde <- varying_lr_df$param %in% c("rho_tilde")
145 | #   npar <- nrow(varying_lr_df)
146 | # }
147 | 


--------------------------------------------------------------------------------
/R/mean_jac.R:
--------------------------------------------------------------------------------
 1 | mean_jac <- function(varying_df,
 2 |                      fixed_df,
 3 |                      X,
 4 |                      Z,
 5 |                      X_tilde,
 6 |                      Z_tilde,
 7 |                      Z_tilde_gamma_cols,
 8 |                      Z_tilde_list = NULL){
 9 | 
10 | 
11 | 
12 |   params <- dataframes_to_parameters(fixed_df,
13 |                                      varying_df)
14 | 
15 |   if(!is.null(params$alpha_tilde)){
16 |     Z_tilde <- construct_Z_tilde(Z_tilde_list,
17 |                                  params$alpha_tilde)
18 |   }
19 | 
20 |   fixed_status <- fixed_df
21 |   if(nrow(fixed_status)>0){
22 |     fixed_status$value <- 0
23 |   }
24 |   varying_status <- varying_df
25 |   varying_status$value <- 1
26 | 
27 |   param_status <- dataframes_to_parameters(fixed_status,
28 |                                            varying_status)
29 | 
30 |   n <- nrow(X)
31 |   J <- ncol(params$P)
32 |   n_varying <- nrow(varying_df)
33 | 
34 |   jacobian <- matrix(nrow = n*J,
35 |                      ncol = n_varying)
36 |   counter <- 0
37 |   for(i in 1:n){
38 |     for(j in 1:J){
39 |       counter <- counter + 1
40 |       jacobian[counter,] <-
41 |         par_to_jacobian_row(params,
42 |                             param_status,
43 |                             i,
44 |                             j,
45 |                             X,
46 |                             Z,
47 |                             X_tilde,
48 |                             Z_tilde,
49 |                             Z_tilde_gamma_cols)
50 | 
51 |     }
52 |   }
53 |   return(jacobian)
54 | }
55 | 


--------------------------------------------------------------------------------
/R/meaninate.R:
--------------------------------------------------------------------------------
 1 | ##################### Mean Function #####################
 2 | meaninate <- function(gammas,
 3 |                       B,
 4 |                       X,
 5 |                       Z,
 6 |                       P,
 7 |                       X_tilde,
 8 |                       Z_tilde = NULL,
 9 |                       Z_tilde_gamma_cols,
10 |                       P_tilde,
11 |                       gamma_tilde,
12 |                       alpha_tilde = NULL,
13 |                       Z_tilde_list = NULL,
14 |                       return_separate = FALSE,
15 |                       exclude_gammas = FALSE){
16 | 
17 |   if(!is.null(alpha_tilde)){
18 |     Z_tilde <- construct_Z_tilde(Z_tilde_list,
19 |                                  alpha_tilde)
20 |   }
21 | 
22 |   J <- ncol(B)
23 |   n <- nrow(X)
24 | 
25 |   if(!exclude_gammas){
26 |   #multiply appropriate columns of Z_tilde by exp(gamma)
27 |   if(length(Z_tilde_gamma_cols >0)){
28 |     for(colnum in Z_tilde_gamma_cols){
29 |       Z_tilde[,colnum] <- exp(gammas)* Z_tilde[,colnum]
30 |     }
31 |   }
32 | 
33 |   sample_part <-
34 |     (Z%*%P)*(
35 |       exp(gammas%*%matrix(1,nrow = 1, ncol = J)
36 |           + X%*%B))
37 |   spurious_part <- Z_tilde%*%(P_tilde *
38 |                                 exp(gamma_tilde %*% matrix(1,
39 |                                                            nrow = 1,
40 |                                                            ncol = J) +
41 |                                       X_tilde%*%B))
42 |   } else{
43 | 
44 | 
45 |     sample_part <-
46 |       (Z%*%P)*(
47 |         exp(X%*%B))
48 |     spurious_part <- Z_tilde%*%(P_tilde *
49 |                                   exp(gamma_tilde %*% matrix(1,
50 |                                                              nrow = 1,
51 |                                                              ncol = J) +
52 |                                         X_tilde%*%B))
53 | 
54 |   }
55 | 
56 |   if(return_separate){
57 |     return(list("sample" = sample_part,
58 |                 "spurious" = spurious_part))
59 |   } else{
60 |     return(sample_part + spurious_part)
61 |   }
62 | 
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/R/mu_d_alpha_tilde.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | mu_d_alpha_tilde <- function(i,
 4 |                              J,
 5 |                              a_tilde,
 6 |                              gammas,
 7 |                              B,
 8 |                              X_tilde,
 9 |                              Z_tilde_gamma_cols,
10 |                              alpha_tilde,
11 |                              Z_tilde_list,
12 |                              P_tilde,
13 |                              gamma_tilde){
14 | 
15 |   Z_tilde_piece <- Z_tilde_list[[a_tilde + 1]]
16 | 
17 |   if(length(Z_tilde_gamma_cols >0)){
18 |     for(colnum in Z_tilde_gamma_cols){
19 |       Z_tilde_piece[,colnum] <- exp(gammas)* Z_tilde_piece[,colnum]
20 |     }
21 |   }
22 | 
23 |   return(as.numeric(
24 |     exp(alpha_tilde[a_tilde])*Z_tilde_piece[i,]%*%(
25 |     P_tilde*exp(matrix(gamma_tilde,ncol = 1)%*%matrix(1, nrow = 1, ncol = J) +
26 |                   X_tilde%*%B))
27 |   )
28 |   )
29 | 
30 | 
31 | 
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/R/mu_d_beta.R:
--------------------------------------------------------------------------------
 1 | #' Calculate derivative of mu_ij with respect to B
 2 | #'
 3 | #' @param i The sample index (must be in 1, ..., n)
 4 | #' @param j The taxon index (must be in 1, ..., J)
 5 | #' @param q Which row of B to take derivative with respect to
 6 | #' (must be in 1, ..., p)
 7 | #' @param gammas Numeric vector of read intensities
 8 | #' @param B Detection efficiency matrix
 9 | #' @param X The efficiency design matrix (n x p)
10 | #' @param Z The sample design matrix (n x K)
11 | #' @param P The sample relative abundance matrix (K x J)
12 | #' @param X_tilde The spurious read efficiency design (K_tilde x p)
13 | #' @param Z_tilde The spurious read design (n x K_tilde)
14 | #' @param Z_tilde_gamma_cols Numeric vector containing indexes of columns of
15 | #' Z_tilde to scale by exp(gamma); NULL if no columns to be scaled
16 | #' @param P_tilde The spurious source relative abundance matrix (K_tilde x J)
17 | #' @param gamma_tilde Spurious read intensity parameter
18 | #'
19 | #' @return A derivative d mu_ij / d B_qj
20 | 
21 | mu_d_beta <- function(i,
22 |                       j,
23 |                       q,
24 |                       gammas,
25 |                       B,
26 |                       X,
27 |                       Z,
28 |                       P,
29 |                       X_tilde,
30 |                       Z_tilde,
31 |                       Z_tilde_gamma_cols,
32 |                       alpha_tilde = NULL,
33 |                       Z_tilde_list = NULL,
34 |                       P_tilde,
35 |                       gamma_tilde){
36 | 
37 |   if(!is.null(alpha_tilde)){
38 |     Z_tilde <- construct_Z_tilde(Z_tilde_list,
39 |                                  alpha_tilde)
40 |   }
41 | 
42 | 
43 | 
44 | 
45 |   mu_deriv <-
46 |     t(X[i,q,drop = F])%*%(Z[i,,drop = F]%*%P[,j,drop = F]*exp(gammas[i] +
47 |                                                                 X[i,,drop = F]%*%
48 |                                                                 B[,j,drop = F]))
49 | 
50 |   K_tilde <- dim(P_tilde)[1]
51 |   for(k_tilde in 1:K_tilde){
52 |     if(k_tilde %in% Z_tilde_gamma_cols){
53 | 
54 |       mu_deriv <- mu_deriv + exp(gammas[i])*
55 |         Z_tilde[i,k_tilde]%*%(P_tilde[k_tilde,j] *
56 |                                 exp(gamma_tilde[k_tilde]  +
57 |                                       X_tilde[k_tilde,]%*%B[,j]))*X_tilde[k_tilde,q]
58 | 
59 | 
60 |     } else{
61 |       mu_deriv <- mu_deriv +
62 |         Z_tilde[i,k_tilde]%*%(P_tilde[k_tilde,j] *
63 |                                 exp(gamma_tilde[k_tilde]  +
64 |                                       X_tilde[k_tilde,]%*%B[,j]))*X_tilde[k_tilde,q]
65 |     }
66 |   }
67 | 
68 | 
69 | 
70 |   return(mu_deriv)
71 | }
72 | 


--------------------------------------------------------------------------------
/R/mu_d_beta_faster.R:
--------------------------------------------------------------------------------
 1 | mu_d_beta_faster <- function(i,
 2 |                              J,
 3 |                              k,
 4 |                              gammas,
 5 |                              B,
 6 |                              X,
 7 |                              Z,
 8 |                              P,
 9 |                              X_tilde,
10 |                              Z_tilde,
11 |                              Z_tilde_gamma_cols,
12 |                              P_tilde,
13 |                              gamma_tilde){
14 | 
15 |   mu_deriv <- sapply(1:J, function(j)
16 |     t(X[i,k,drop = F])%*%(Z[i,,drop = F]%*%P[,j,drop = F]*exp(gammas[i] +
17 |                                                                 X[i,,drop = F]%*%B[,j,drop = F])))
18 | 
19 |   K_tilde <- dim(P_tilde)[1]
20 |   for(k_tilde in 1:K_tilde){
21 |     if(k_tilde %in% Z_tilde_gamma_cols){
22 |       mu_deriv <- mu_deriv + sapply(1:J, function(j)
23 |         exp(gammas[i])*
24 |           (Z_tilde[i,k_tilde,drop = F])%*%
25 |           P_tilde[k_tilde,j,drop = F]*
26 |           exp(gamma_tilde[k_tilde] +
27 |                 X_tilde[k_tilde,,drop = F]%*%B[,j,drop = F])*
28 |           X_tilde[k_tilde,k,drop = F])
29 |     } else{
30 |       mu_deriv <- mu_deriv +
31 |         sapply(1:J, function(j) (Z_tilde[i,k_tilde,drop = F])%*%
32 |                  P_tilde[k_tilde,j,drop = F]*
33 |                  exp(gamma_tilde[k_tilde] +
34 |                        X_tilde[k_tilde,,drop = F]%*%B[,j,drop = F])*
35 |                  X_tilde[k_tilde,k,drop = F])
36 |     }
37 |   }
38 | 
39 | 
40 | 
41 |   return(mu_deriv)
42 | }
43 | 


--------------------------------------------------------------------------------
/R/mu_d_gamma_faster.R:
--------------------------------------------------------------------------------
 1 | 
 2 | mu_d_gamma_faster <- function(i,
 3 |                               J,
 4 |                               gammas,
 5 |                               B,
 6 |                               X,
 7 |                               Z,
 8 |                               P,
 9 |                               X_tilde,
10 |                               Z_tilde,
11 |                               Z_tilde_gamma_cols,
12 |                               alpha_tilde = NULL,
13 |                               Z_tilde_list = NULL,
14 |                               P_tilde,
15 |                               gamma_tilde){
16 | 
17 |   if(!is.null(alpha_tilde)){
18 |     Z_tilde <- construct_Z_tilde(Z_tilde_list,
19 |                                  alpha_tilde)
20 |   }
21 | 
22 |   mu_deriv <- Z[i,,drop = F]%*%P*exp(gammas[i] +
23 |                                        X[i,,drop = F]%*%B)
24 | 
25 |   K_tilde <- dim(P_tilde)[1]
26 |   for(k_tilde in 1:K_tilde){
27 |     if(k_tilde %in% Z_tilde_gamma_cols){
28 |       mu_deriv <- mu_deriv + exp(gammas[i])*
29 |         (Z_tilde[i,k_tilde,drop = F])%*%
30 |         (P_tilde[k_tilde,,drop = F])*
31 |         exp(gamma_tilde[k_tilde] +
32 |               X_tilde[k_tilde,,drop = F]%*%B)
33 |     }
34 |   }
35 |   return(mu_deriv)
36 | }
37 | 


--------------------------------------------------------------------------------
/R/mu_d_gamma_tilde.R:
--------------------------------------------------------------------------------
 1 | #' Calculate derivative of mu_ij with respect to an entry of gamma_tilde
 2 | #'
 3 | #' @param i The sample index (must be in 1, ..., n)
 4 | #' @param j The taxon index (must be in 1, ..., J)
 5 | #' @param k_tilde The element of gamma_tilde with respect to which to take derivative
 6 | #' @param gammas Numeric vector of read intensities
 7 | #' @param B Detection efficiency matrix
 8 | #' @param X The efficiency design matrix (n x p)
 9 | #' @param Z The sample design matrix (n x K)
10 | #' @param P The sample relative abundance matrix (K x J)
11 | #' @param X_tilde The spurious read efficiency design (K_tilde x p)
12 | #' @param Z_tilde The spurious read design (n x K_tilde)
13 | #' @param Z_tilde_gamma_cols Numeric vector containing indexes of columns of
14 | #' Z_tilde to scale by exp(gamma); NULL if no columns to be scaled
15 | #' @param P_tilde The spurious source relative abundance matrix (K_tilde x J)
16 | #' @param gamma_tilde Spurious read intensity parameter
17 | #'
18 | #' @return A derivative d mu_ij / d gamma_tilde_k_tilde
19 | mu_d_gamma_tilde <- function(i,
20 |                              j,
21 |                              k_tilde,
22 |                              gammas,
23 |                              B,
24 |                              X,
25 |                              Z,
26 |                              P,
27 |                              X_tilde,
28 |                              Z_tilde,
29 |                              Z_tilde_gamma_cols,
30 |                              alpha_tilde = NULL,
31 |                              Z_tilde_list = NULL,
32 |                              P_tilde,
33 |                              gamma_tilde){
34 | 
35 |   if(!is.null(alpha_tilde)){
36 |     Z_tilde <- construct_Z_tilde(Z_tilde_list,
37 |                                  alpha_tilde)
38 |   }
39 | 
40 |   mu_deriv <- 0
41 | 
42 |   if(k_tilde %in% Z_tilde_gamma_cols){
43 |     mu_deriv <- mu_deriv + exp(gammas[i])*
44 |       (Z_tilde[i,k_tilde,drop = F]%*%
45 |          P_tilde[k_tilde,j,drop = F])*
46 |       exp(gamma_tilde[k_tilde] +
47 |             X_tilde[k_tilde,,drop = F]%*%B[,j,drop = F])
48 |   } else{
49 |     mu_deriv <- mu_deriv +
50 |       (Z_tilde[i,k_tilde,drop = F]%*%
51 |          P_tilde[k_tilde,j,drop = F])*
52 |       exp(gamma_tilde[k_tilde] +
53 |             X_tilde[k_tilde,,drop = F]%*%B[,j,drop = F])
54 | 
55 |   }
56 | 
57 |   return(mu_deriv)
58 | }
59 | 


--------------------------------------------------------------------------------
/R/mu_d_rho_faster.R:
--------------------------------------------------------------------------------
 1 | #' Calculate derivative of mu_ij with respect to a row of matrix-valued
 2 | #' parameter rho
 3 | #'
 4 | #' @param i The sample index (must be in 1, ..., n)
 5 | #' @param J The total number of taxa modeled
 6 | #' @param k Row index (which row of rho with respect to which to take derivative)
 7 | #' @param gammas Numeric vector of read intensities
 8 | #' @param B Detection efficiency matrix
 9 | #' @param X The efficiency design matrix (n x p)
10 | #' @param Z The sample design matrix (n x K)
11 | #' @param Ak_list List containing matrices that map back-transformed
12 | #' rho to entries of P
13 | #' @param rho_k Value of kth row of rho
14 | #' @param fixed_P_multipliers Numeric vector of length K containing values in (0,1]
15 | #' equal to 1 - sum(fixed relative abundances in row k of P)
16 | #' Z_tilde to scale by exp(gamma); NULL if no columns to be scaled
17 | #'
18 | #' @return A derivative d mu_i / d rho_k
19 | mu_d_rho_faster <- function(i,
20 |                             J,
21 |                             k,
22 |                             gammas,
23 |                             B,
24 |                             X,
25 |                             Z,
26 |                             Ak_list,
27 |                             rho_k,
28 |                             fixed_P_multipliers,
29 |                             proportion_scale = FALSE){
30 |   dmu_i_dpk <- Matrix::Diagonal(x = as.numeric(Z[i,k]*exp(matrix(gammas[i],nrow = 1, ncol = J) + X[i,,drop=F]%*%B)))
31 |   if(!proportion_scale){
32 |   nu_k <- exp(rho_k)
33 |   front_term <- 1/(1 + sum(nu_k))
34 |   dpk_drhok <- Matrix::Matrix(fixed_P_multipliers[k]*(
35 |     front_term*rbind(diag(nu_k), 0) -
36 |       front_term^2*outer(c(nu_k,1),nu_k))
37 |   )
38 | 
39 |   rho_jacob_i <- dmu_i_dpk%*%dpk_drhok
40 | 
41 |   return(rho_jacob_i)
42 |   } else{
43 |     return(dmu_i_dpk)
44 |   }
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/R/mu_d_rho_tilde_faster.R:
--------------------------------------------------------------------------------
 1 | #' Calculate derivative of mu_i with respect to a row of matrix-valued
 2 | #' parameter rho_tilde
 3 | #'
 4 | #' @param i The sample index (must be in 1, ..., n)
 5 | #' @param J The total number of taxa modeled
 6 | #' @param k_tilde Row index (which row of rho_tilde with respect to which to take derivative)
 7 | #' @param gammas Numeric vector of read intensities
 8 | #' @param B Detection efficiency matrix
 9 | #' @param rho_tilde_k Value of kth row of rho_tilde
10 | #' @param A_tilde_k_list List containing matrices that map back-transformed
11 | #' rho_tilde to entries of P_tilde
12 | #' @param fixed_P_multipliers Numeric vector of length K containing values in (0,1]
13 | #' equal to 1 - sum(fixed relative abundances in row k of P_tilde)
14 | #'
15 | #' @return A derivative d mu_i / d rho_tilde_k
16 | mu_d_rho_tilde_faster <- function(i,
17 |                                   J,
18 |                                   k_tilde,
19 |                                   gammas,
20 |                                   B,
21 |                                   rho_tilde_k,
22 |                                   A_tilde_k_list,
23 |                                   fixed_P_tilde_multipliers,
24 |                                   X_tilde,
25 |                                   Z_tilde,
26 |                                   Z_tilde_gamma_cols,
27 |                                   alpha_tilde = NULL,
28 |                                   Z_tilde_list = NULL,
29 |                                   gamma_tilde,
30 |                                   proportion_scale = FALSE)
31 | {
32 | 
33 |   if(!is.null(alpha_tilde)){
34 |     Z_tilde <- construct_Z_tilde(Z_tilde_list,
35 |                                  alpha_tilde)
36 |   }
37 | 
38 |   for(zcol in Z_tilde_gamma_cols){
39 |     Z_tilde[,zcol] <- exp(gammas)*Z_tilde[,zcol]
40 |   }
41 | 
42 |   dmu_i_dptildektilde <- Matrix::Diagonal(x = as.numeric(Z_tilde[i,k_tilde]*
43 |                                                            exp(matrix(gamma_tilde[k_tilde],nrow = 1, ncol = J) +
44 |                                                                  X_tilde[k_tilde,,drop = F]%*%B)))
45 | 
46 |   if(!proportion_scale){
47 |   nu_tilde_k <- exp(rho_tilde_k)
48 | 
49 |   front_term <- 1/(1 + sum(nu_tilde_k))
50 |   dptildek_drhotildek <- Matrix::Matrix(fixed_P_tilde_multipliers[k_tilde]*(
51 |     front_term*rbind(diag(nu_tilde_k), 0) -
52 |       front_term^2*outer(c(nu_tilde_k,1),nu_tilde_k)
53 |   ))
54 | 
55 |   rho_tilde_jacob_i <- dmu_i_dptildektilde%*%A_tilde_k_list[[k_tilde]]%*%dptildek_drhotildek
56 | 
57 |   return(rho_tilde_jacob_i)
58 |   } else{
59 |     return(dmu_i_dptildektilde)
60 |   }
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/R/nullify.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | nullify <- function(W,
 4 |                     full_model,
 5 |                     null_model){
 6 | 
 7 |   J <- ncol(full_model$W)
 8 |   n <- nrow(full_model$W)
 9 | 
10 | full_means <-
11 |   meaninate(gammas = full_model$gammas,
12 |             B = full_model$B,
13 |             X = full_model$X,
14 |             Z = full_model$Z,
15 |             P = full_model$P,
16 |             X_tilde = full_model$X_tilde,
17 |             Z_tilde = full_model$Z_tilde,
18 |             Z_tilde_gamma_cols = full_model$Z_tilde_gamma_cols,
19 |             P_tilde = full_model$P_tilde,
20 |             gamma_tilde = full_model$gamma_tilde,
21 |             alpha_tilde = full_model$alpha_tilde,
22 |             Z_tilde_list = full_model$Z_tilde_list,
23 |             exclude_gammas= TRUE)
24 | 
25 | null_means <-
26 |   meaninate(gammas = null_model$gammas,
27 |             B = null_model$B,
28 |             X = null_model$X,
29 |             Z = null_model$Z,
30 |             P = null_model$P,
31 |             X_tilde = null_model$X_tilde,
32 |             Z_tilde = null_model$Z_tilde,
33 |             Z_tilde_gamma_cols = null_model$Z_tilde_gamma_cols,
34 |             P_tilde = null_model$P_tilde,
35 |             gamma_tilde = null_model$gamma_tilde,
36 |             alpha_tilde = null_model$alpha_tilde,
37 |             Z_tilde_list = null_model$Z_tilde_list,
38 |             exclude_gammas = TRUE)
39 | 
40 | W0 <- W*(null_means/full_means)
41 | W0[full_means == 0] <- 0
42 | 
43 | for(i in 1:nrow(W0)){
44 |   W0[i,] <- (W0[i,]/sum(W0[i,]))*sum(W[i,])
45 | }
46 | 
47 | return(W0)
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/R/numerical_jacobian.R:
--------------------------------------------------------------------------------
 1 | #' @import numDeriv
 2 | #' 
 3 | numerical_jacobian <- function(varying_lr_df,
 4 |                                fixed_df,
 5 |                                varying_df,
 6 |                                gammas,
 7 |                                B,
 8 |                                X,
 9 |                                Z,
10 |                                P,
11 |                                X_tilde,
12 |                                Z_tilde = NULL,
13 |                                Z_tilde_gamma_cols,
14 |                                P_tilde,
15 |                                gamma_tilde,
16 |                                alpha_tilde = NULL,
17 |                                Z_tilde_list = NULL
18 | ){
19 |   n <- nrow(Z)
20 |   J <- ncol(P)
21 |   npar <- nrow(varying_lr_df)
22 |   mean_func <- function(x,index,i,j){
23 |     temp_lr <- varying_lr_df
24 |     temp_lr$value[index] <- x
25 |     temp_params <- dataframes_to_parameters(fixed_df,
26 |                                             lr_to_ra(fixed_df,
27 |                                                      temp_lr,
28 |                                                      varying_df))
29 |     return(with(temp_params,meaninate(gammas,
30 |                                       B,
31 |                                       X,
32 |                                       Z,
33 |                                       P,
34 |                                       X_tilde,
35 |                                       Z_tilde = NULL,
36 |                                       Z_tilde_gamma_cols,
37 |                                       P_tilde,
38 |                                       gamma_tilde,
39 |                                       alpha_tilde = alpha_tilde,
40 |                                       Z_tilde_list = Z_tilde_list,
41 |                                       return_separate = FALSE,
42 |                                       exclude_gammas = FALSE)[i,j]))
43 |   }
44 |   
45 |   
46 |   num_jacob <- matrix(0,nrow = n*J, ncol = npar)
47 |   
48 |   for(i in 1:n){
49 |     print(paste("i = ", i, sep = "", collapse = ""))
50 |     for(j in 1:J){
51 |       print(paste("j = ", j, sep = "", collapse = ""))
52 |       for(parindex in 1:npar){
53 |         num_jacob[(i - 1)*J + j, parindex] <- numDeriv::grad(
54 |           function(x) mean_func(x,index = parindex,i = i,j = j),
55 |           varying_lr_df$value[parindex])
56 |         
57 |       }
58 |     }
59 |   }
60 |   
61 |   return(num_jacob)
62 |   
63 |   
64 |   
65 | }
66 | 


--------------------------------------------------------------------------------
/R/par_to_jacobian_row.R:
--------------------------------------------------------------------------------
  1 | par_to_jacobian_row <- function(params,
  2 |                                 param_status,
  3 |                                 i,
  4 |                                 j,
  5 |                                 X,
  6 |                                 Z,
  7 |                                 X_tilde,
  8 |                                 Z_tilde,
  9 |                                 Z_tilde_gamma_cols){
 10 |   J <- ncol(params$P)
 11 | 
 12 |   ### jacobian row in P
 13 |   which_P_rows <- which(apply(param_status$P,1, max) ==1)
 14 |   P_jac_row <- numeric(J*length(which_P_rows))
 15 | 
 16 |   if(length(which_P_rows) >0){
 17 |     for(row_index in 1:length(which_P_rows)){
 18 |       P_jac_row[(row_index - 1)*J + j] <- mu_d_P(i,
 19 |                                                  j,
 20 |                                                  m = which_P_rows[row_index],
 21 |                                                  gammas = params$gammas,
 22 |                                                  B = params$B,
 23 |                                                  X = X,
 24 |                                                  Z = Z,
 25 |                                                  P = params$P)
 26 |     }
 27 |   }
 28 | 
 29 |   ### jacobian row in P_tilde
 30 |   which_P_tilde_rows <- which(apply(param_status$P_tilde,1, max) ==1)
 31 |   P_tilde_jac_row <- numeric(J*length(which_P_tilde_rows))
 32 | 
 33 |   if(length(which_P_tilde_rows)>0){
 34 |     for(row_index in 1:length(which_P_tilde_rows)){
 35 |       P_tilde_jac_row[(row_index - 1)*J + j] <- mu_d_P_tilde(i,
 36 |                                                              j,
 37 |                                                              k_tilde = which_P_tilde_rows[row_index],
 38 |                                                              gammas = params$gammas,
 39 |                                                              B = params$B,
 40 |                                                              X = X,
 41 |                                                              Z = Z,
 42 |                                                              P = params$P,
 43 |                                                              X_tilde = X_tilde,
 44 |                                                              Z_tilde = Z_tilde,
 45 |                                                              Z_tilde_gamma_cols = Z_tilde_gamma_cols,
 46 |                                                              P_tilde = params$P_tilde,
 47 |                                                              gamma_tilde = params$gamma_tilde)
 48 |     }
 49 |   }
 50 | 
 51 |   ### jacobian row in B
 52 |   which_B_rows <- which(apply(param_status$B,1, max) ==1)
 53 |   B_jac_row <- numeric((J - 1)*length(which_B_rows))
 54 | 
 55 |   if(length(which_B_rows)>0){
 56 |     if(j < J){ #row of jacobian is zero if j = J
 57 |       for(row_index in 1:length(which_B_rows)){
 58 |         B_jac_row[(row_index - 1)*(J - 1) + j] <- mu_d_beta(i,
 59 |                                                             j,
 60 |                                                             q = which_B_rows[row_index],
 61 |                                                             gammas = params$gammas,
 62 |                                                             B = params$B,
 63 |                                                             X = X,
 64 |                                                             Z = Z,
 65 |                                                             P = params$P,
 66 |                                                             X_tilde = X_tilde,
 67 |                                                             Z_tilde = Z_tilde,
 68 |                                                             Z_tilde_gamma_cols = Z_tilde_gamma_cols,
 69 |                                                             P_tilde = params$P_tilde,
 70 |                                                             gamma_tilde = params$gamma_tilde)
 71 |       }
 72 |     }
 73 |   }
 74 | 
 75 |   ### gamma jacobian row
 76 |   which_gammas <- which(apply(param_status$gammas,1, max) ==1)
 77 |   gammas_jac_row <- numeric(sum(param_status$gammas))
 78 | 
 79 |   if(length(which_gammas)>0){
 80 |     gammas_jac_row[i] <- mu_d_gamma(i,
 81 |                                     j,
 82 |                                     gammas = params$gammas,
 83 |                                     B = params$B,
 84 |                                     X = X,
 85 |                                     Z = Z,
 86 |                                     P = params$P,
 87 |                                     X_tilde = X_tilde,
 88 |                                     Z_tilde = Z_tilde,
 89 |                                     Z_tilde_gamma_cols = Z_tilde_gamma_cols,
 90 |                                     P_tilde = params$P_tilde,
 91 |                                     gamma_tilde = params$gamma_tilde)
 92 |   }
 93 |   gammas_jac_row <- gammas_jac_row[which_gammas] #???
 94 | 
 95 |   ### gamma tilde jacobian row
 96 |   which_gamma_tilde <- which(apply(param_status$gamma_tilde,1, max) ==1)
 97 |   gamma_tilde_jac_row <- numeric(sum(param_status$gamma_tilde))
 98 | 
 99 |   if(length(which_gamma_tilde)>0){
100 |     for(row_index in 1:length(which_gamma_tilde)){
101 |       gamma_tilde_jac_row[row_index] <- mu_d_gamma_tilde(i,
102 |                                                          j,
103 |                                                          k_tilde = which_gamma_tilde[row_index],
104 |                                                          gammas = params$gammas,
105 |                                                          B = params$B,
106 |                                                          X = X,
107 |                                                          Z = Z,
108 |                                                          P = params$P,
109 |                                                          X_tilde = X_tilde,
110 |                                                          Z_tilde = Z_tilde,
111 |                                                          Z_tilde_gamma_cols = Z_tilde_gamma_cols,
112 |                                                          P_tilde = params$P_tilde,
113 |                                                          gamma_tilde = params$gamma_tilde)
114 |     }
115 |   }
116 | 
117 |   return(c(P_jac_row,
118 |            P_tilde_jac_row,
119 |            B_jac_row,
120 |            gammas_jac_row,
121 |            gamma_tilde_jac_row))
122 | 
123 | }
124 | 


--------------------------------------------------------------------------------
/R/parameters_to_dataframes.R:
--------------------------------------------------------------------------------
  1 | parameters_to_dataframes <- function(P,
  2 |                                      P_fixed_indices,
  3 |                                      P_tilde,
  4 |                                      P_tilde_fixed_indices,
  5 |                                      B,
  6 |                                      B_fixed_indices,
  7 |                                      gammas,
  8 |                                      gammas_fixed_indices,
  9 |                                      gamma_tilde,
 10 |                                      gamma_tilde_fixed_indices,
 11 |                                      alpha_tilde = NULL){
 12 | 
 13 |   ### extract important dimensions
 14 |   J <- ncol(P)
 15 |   K <- nrow(P)
 16 |   K_tilde <- nrow(P_tilde)
 17 |   p_B <- nrow(B)
 18 |   n <- nrow(as.matrix(gammas,ncol = 1))
 19 |   
 20 |   if(length(c(J, K, K_tilde, p_B, n)) < 5) {
 21 |     badbad <- which(sapply(c(J, K, K_tilde, p_B, n), is.null))
 22 |     bad_param <- c("J", "K", "K_tilde", "p_B", "n")[badbad]
 23 |     stop(c("Unfortunately, one of J, K, K_tilde, p_B or n is NULL.\n", 
 24 |            "One of P, P_tilde, B or gammas may be a vector, not a matrix.\n", 
 25 |            "Please try again."))
 26 |   }
 27 |   
 28 |   P_fixed_indices <- apply(P_fixed_indices, c(1,2), as.logical)
 29 |   P_tilde_fixed_indices <- apply(P_tilde_fixed_indices, c(1,2), as.logical)
 30 | 
 31 |   ### check that P_fixed_indices and P satisfy RA requirements
 32 |   for(k in 1:K){
 33 |     if(sum(!P_fixed_indices[k,]) >0){
 34 |       C_k <- sum(P_fixed_indices[k,])
 35 |       fixed_sum_k <- sum(P[k,P_fixed_indices[k,]])
 36 |       if(C_k >= J - 1){
 37 |         P_fixed_indices[k,] <- TRUE
 38 |         warning(paste("Row ", k, " of P set to fixed; ",
 39 |                       "\nprovided number of known elements via P_fixed_indices is greater ",
 40 |                       "\nthan number of taxa - 2, which implies entire row is known."))
 41 |       }
 42 |       if(fixed_sum_k >=1){
 43 |         P_fixed_indices[k,] <- TRUE
 44 |         warning(paste("Row ", k, " of P set to fixed; ",
 45 |                       "\ntotal relative abundances across taxa indicated as known",
 46 |                       "\nvia P_fixed_indices is 1, which implies entire row is known."))
 47 |       }
 48 |     }
 49 |   }
 50 | 
 51 |   ### check that P_tilde_fixed_indices and P_tilde satisfy RA requirements
 52 |   for(k in 1:K_tilde){
 53 |     if(sum(!P_tilde_fixed_indices[k,]) >0){
 54 |       C_k <- sum(P_tilde_fixed_indices[k,])
 55 |       fixed_sum_k <- sum(P_tilde[k,P_tilde_fixed_indices[k,]])
 56 |       if(C_k >= J - 1){
 57 |         P_tilde_fixed_indices[k,] <- TRUE
 58 |         warning(paste("Row ", k, " of P_tilde set to fixed; ",
 59 |                       "\nprovided number of known elements via P_tilde_fixed_indices is greater ",
 60 |                       "\nthan number of taxa - 2, which implies entire row is known."))
 61 |       }
 62 |       if(fixed_sum_k >=1){
 63 |         P_tilde_fixed_indices[k,] <- TRUE
 64 |         warning(paste("Row ", k, " of P_tilde set to fixed; ",
 65 |                       "\ntotal relative abundances across taxa indicated as known",
 66 |                       "\nvia P_tilde_fixed_indices is 1, which implies entire row is known."))
 67 |       }
 68 |     }
 69 |   }
 70 | 
 71 |   ### set up fixed parameter and varying parameter data.frames
 72 |   fixed_df <- data.frame("value" = numeric(0),
 73 |                          "param" = character(0),
 74 |                          "j" = numeric(0))
 75 | 
 76 |   varying_df <- data.frame("value" = numeric(0),
 77 |                            "param" = character(0),
 78 |                            "j" = numeric(0))
 79 | 
 80 | 
 81 |   ### Set up matrix to track specimen provenance
 82 |   p_k_matrix <- P
 83 |   for(k in 1:K){
 84 |     p_k_matrix[k,] <- rep(k,J)
 85 |   }
 86 | 
 87 |   ### Set up matrix to track taxon provenance
 88 | 
 89 |   p_j_matrix <- P
 90 |   for(j in 1:J){
 91 |     p_j_matrix[,j] <- rep(j,K)
 92 |   }
 93 | 
 94 |   known_temp_df <- data.frame("value" = P[P_fixed_indices],
 95 |                               "param" = rep("P",sum(P_fixed_indices)),
 96 |                               "k" = p_k_matrix[P_fixed_indices],
 97 |                               "j" = p_j_matrix[P_fixed_indices])
 98 | 
 99 |   varying_temp_df <- data.frame("value" = P[!P_fixed_indices],
100 |                                 "param" = rep("P",
101 |                                               sum(!P_fixed_indices)),
102 |                                 "k" = p_k_matrix[!P_fixed_indices],
103 |                                 "j" = p_j_matrix[!P_fixed_indices])
104 | 
105 |   fixed_df <- rbind(fixed_df,
106 |                     known_temp_df)
107 | 
108 |   varying_df <- rbind(varying_df,
109 |                       varying_temp_df)
110 | 
111 |   ### Set up matrix to track spurious read specimen provenance
112 |   p_tilde_k_matrix <- P_tilde
113 |   for(k in 1:K_tilde){
114 |     p_tilde_k_matrix[k,] <- rep(k,J)
115 |   }
116 | 
117 |   ### Set up matrix to track spurious read taxon provenance
118 | 
119 |   p_tilde_j_matrix <- P_tilde
120 |   for(j in 1:J){
121 |     p_tilde_j_matrix[,j] <- rep(j,K_tilde)
122 |   }
123 | 
124 |   known_temp_df <- data.frame("value" = P_tilde[P_tilde_fixed_indices],
125 |                               "param" = rep("P_tilde",sum(P_tilde_fixed_indices)),
126 |                               "k" = p_tilde_k_matrix[P_tilde_fixed_indices],
127 |                               "j" = p_tilde_j_matrix[P_tilde_fixed_indices])
128 | 
129 |   varying_temp_df <- data.frame("value" = P_tilde[!P_tilde_fixed_indices],
130 |                                 "param" = rep("P_tilde",sum(!P_tilde_fixed_indices)),
131 |                                 "k" = p_tilde_k_matrix[!P_tilde_fixed_indices],
132 |                                 "j" = p_tilde_j_matrix[!P_tilde_fixed_indices])
133 | 
134 |   fixed_df <- rbind(fixed_df,
135 |                     known_temp_df)
136 | 
137 |   varying_df <- rbind(varying_df,
138 |                       varying_temp_df)
139 | 
140 | 
141 | 
142 |   ### Set up matrix to track effects
143 |   B_k_matrix <- B
144 |   for(k in 1:p_B){
145 |     B_k_matrix[k,] <- rep(k,J)
146 |   }
147 | 
148 |   ### Set up matrix to track effects by taxon
149 | 
150 |   B_j_matrix <- B
151 |   for(j in 1:J){
152 |     B_j_matrix[,j] <- rep(j,p_B)
153 |   }
154 | 
155 | 
156 |   known_temp_df <- data.frame("value" = B[B_fixed_indices],
157 |                               "param" = rep("B", sum(B_fixed_indices)),
158 |                               "k" = B_k_matrix[B_fixed_indices],
159 |                               "j" = B_j_matrix[B_fixed_indices])
160 | 
161 |   varying_temp_df <- data.frame("value" = B[!B_fixed_indices],
162 |                                 "param" = rep("B",sum(!B_fixed_indices)),
163 |                                 "k" = B_k_matrix[!B_fixed_indices],
164 |                                 "j" = B_j_matrix[!B_fixed_indices])
165 | 
166 |   ### make sure B is ordered by k and *then* j
167 | 
168 |   varying_temp_df <- varying_temp_df[order(varying_temp_df$k,
169 |                                            varying_temp_df$j),]
170 | 
171 |   fixed_df <- rbind(fixed_df,
172 |                     known_temp_df)
173 | 
174 |   varying_df <- rbind(varying_df,
175 |                       varying_temp_df)
176 | 
177 | 
178 | 
179 |   temp_df <- data.frame("value" = as.numeric(gammas),
180 |                         "param" = "gamma",
181 |                         "k" = 1:n,
182 |                         "j" = 0)
183 |   fixed_df <- rbind(fixed_df,temp_df[((1:n) %in%  which(gammas_fixed_indices)),])
184 | 
185 |   varying_df <- rbind(varying_df,temp_df[!((1:n) %in%  which(gammas_fixed_indices)),])
186 | 
187 |   temp_df <- data.frame("value" = as.numeric(gamma_tilde),
188 |                         "param" = "gamma_tilde",
189 |                         "k" = 1:nrow(gamma_tilde),
190 |                         "j" = 0)
191 | 
192 |   K_tilde <- length(as.numeric(gamma_tilde))
193 |   fixed_df <- rbind(fixed_df,temp_df[((1:K_tilde) %in%  which(gamma_tilde_fixed_indices)),])
194 | 
195 |   varying_df <- rbind(varying_df,temp_df[!((1:K_tilde) %in%  which(gamma_tilde_fixed_indices)),])
196 | 
197 |   if(!is.null(alpha_tilde)){
198 |     varying_df <- rbind(varying_df,
199 |                         data.frame("value" = alpha_tilde,
200 |                                    "param" = "alpha_tilde",
201 |                                    "k" = 1:length(alpha_tilde),
202 |                                    "j" = 0))
203 |   }
204 | 
205 |   return(list("fixed" = fixed_df,
206 |               "varying" = varying_df))
207 | 
208 | }
209 | 


--------------------------------------------------------------------------------
/R/pb_update_one_constrained.R:
--------------------------------------------------------------------------------
 1 | #
 2 | # pb_update_one_constrained <-
 3 | #   function(constr_gradient_term, # alpha root-n grad l-star - grad l
 4 | #            H2dot_transp_theta_hat,
 5 | #            #H2dot_n^Ttheta^hat %*%
 6 | #            #rbind(curr_theta_const - theta_const_mle,
 7 | #            #-theta_var_mle)
 8 | #            # where theta_var is (simplex-constrained) parameter to be optimized
 9 | #            # over, and theta_const is all other parameters (held constant in
10 | #            # this step)
11 | #            H22n, #submatrix of criterion hessian corresp. to theta_var
12 | #            curr_theta_var,
13 | #            curr_theta_const,
14 | #            theta_hat_var,
15 | #            theta_hat_const
16 | #   ){
17 | #
18 | #     #directly use auglag - no simpl_auglag_fnnls
19 | #     # prox_criterion <- function(theta_var){
20 | #     #   theta_diff <- ( rbind(curr_theta_const,
21 | #     #                   x) -
22 | #     #               rbind(theta_hat_const,
23 | #     #                     theta_hat_var))
24 | #     #   gr_term <- constr_gradient_term %*%theta_diff
25 | #     #   hess_term <- t(theta_diff)%*%H22n%*%theta_diff
26 | #     #   return(as.numeric(gr_term + 0.5*hess_term))
27 | #     # }
28 | #     #
29 | #     #
30 | #     # simpl_auglag_fnnls(x = curr_theta_var,
31 | #     #                    fn = prox_criterion, #function of x to optimize
32 | #     #                    xhess = H22n, #hessian at x
33 | #     #                    xgrad = , #gradient at x
34 | #     #                                lambda, #penalty parameters
35 | #     #                                nu = 1, #starting lagrangian penalty
36 | #     #                                mu = 1, #starting augmented lagrangian penalty
37 | #     #                                constraint_tolerance = 1e-10, #sum-to-one constraint tolerance
38 | #     #                                maxit = 100 # maximum number of iterations (outer loop)
39 | #     #
40 | #     # )
41 | #   }
42 | 


--------------------------------------------------------------------------------
/R/pb_update_unconstrained.R:
--------------------------------------------------------------------------------
 1 | #
 2 | #
 3 | #
 4 | # pb_update_unconstrained <-
 5 | #   function(hessian_premultiplier, #H22n_inv%**H2dotn
 6 | #            theta_hat_unconstr, #mle
 7 | #            premult_unconstr_gradient_term, #H22n_inv %*% alpha root-n diff unconstr. gradients
 8 | #            theta_hat_constr, #mle for unconstrained variables
 9 | #            theta_hat_unconstr, #mle for constrained variables
10 | #            curr_theta_constr #current value of constrained pars (in pb update)
11 | #            ){
12 | #   return(
13 | #     -hessian_premultiplier%*% rbind(curr_theta_constr - theta_hat_constr,
14 | #                                    -theta_hat_unconstr) -
15 | #       premult_unconstr_gradient_term
16 | #   )
17 | # }
18 | 


--------------------------------------------------------------------------------
/R/poisson_criterion.R:
--------------------------------------------------------------------------------
 1 | 
 2 | poisson_criterion <- function(W,
 3 |                               means,
 4 |                               wts = NULL){
 5 |   n <- nrow(W)
 6 |   J <- ncol(W)
 7 |   wt_list <- lapply(1:n,
 8 |                     function(i) wts[1:J + (i - 1)*J])
 9 |   wts <- do.call(rbind,wt_list)
10 | 
11 |   rm(wt_list)
12 | 
13 |   if( (ncol(W) != ncol(means)) | (nrow(W) != nrow(means))){
14 |     stop("W and means must have the same dimensions.")
15 |   }
16 | 
17 |   if(is.null(wts)){
18 |     return(sum(sapply(1:nrow(means), function(i) sapply(1:ncol(means),
19 |                                                         function(j)
20 |                                                           -logpois(W[i,j],means[i,j])))))
21 |   }
22 |   else{
23 |     if((ncol(W) != ncol(wts)) | (nrow(W) != nrow(wts))){
24 |       stop("Weight matrix wts must have same dimesions as W.")
25 |     }
26 |     return(sum(sapply(1:nrow(means), function(i) sapply(1:ncol(means),
27 |                                                         function(j)
28 |                                                           -wts[i,j]*logpois(W[i,j],means[i,j])))))
29 | 
30 |   }
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/R/proximal_bootstrap.R:
--------------------------------------------------------------------------------
  1 | #
  2 | #
  3 | # proximal_bootstrap <- function(
  4 | #   n, #number of observations
  5 | #   alpha_n, #bootstrap scaling term
  6 | #   varying_df, #dataframe containing fitted parameter values
  7 | #   fixed_df, #dataframe containing fixed parameter values
  8 | #   W, #observations
  9 | #   X, #sample efficiency design
 10 | #   Z, #sample specimen design
 11 | #   Z_tilde = NULL,
 12 | #   Z_tilde_gamma_cols,
 13 | #   gammas_fixed_indices,
 14 | #   P_fixed_indices,
 15 | #   B_fixed_indices,
 16 | #   X_tilde,
 17 | #   P_tilde_fixed_indices,
 18 | #   gamma_tilde_fixed_indices,
 19 | #   Z_tilde_list = NULL,
 20 | #   constraint_tolerance = 1e-10,
 21 | #   hessian_regularization = 0.01,
 22 | #   criterion = "Poisson",
 23 | #   gmm_inv_wts = NULL,
 24 | #   nu = 1, #starting lagrangian penalty
 25 | #   mu = 1, #starting augmented lagrangian penalty
 26 | #   ){
 27 | #
 28 | #   params <- dataframes_to_parameters(fixed_df,
 29 | #                                      varying_df)
 30 | #
 31 | #   jacobian <- mean_jac_lr_faster(fixed_df = fixed_df,
 32 | #                                  varying_lr_df = varying_lr_df,
 33 | #                                  varying_df = varying_df,
 34 | #                                  which_k_p = which_k_p,
 35 | #                                  which_k_p_tilde = which_k_p_tilde,
 36 | #                                  which_B_rows = which_B_rows,
 37 | #                                  which_B_keep = which_B_keep,
 38 | #                                  which_gammas = which_gammas,
 39 | #                                  which_gamma_tilde = which_gamma_tilde,
 40 | #                                  Ak_list = Ak_list,
 41 | #                                  A_tilde_k_list = A_tilde_k_list,
 42 | #                                  fixed_P_multipliers = fixed_P_multipliers,
 43 | #                                  fixed_P_tilde_multipliers = fixed_P_tilde_multipliers,
 44 | #                                  params = params,
 45 | #                                  X = X,
 46 | #                                  Z = Z,
 47 | #                                  K = K,
 48 | #                                  K_tilde = K_tilde,
 49 | #                                  X_tilde = X_tilde,
 50 | #                                  Z_tilde = Z_tilde,
 51 | #                                  Z_tilde_gamma_cols =Z_tilde_gamma_cols,
 52 | #                                  Z_tilde_list = Z_tilde_list,
 53 | #                                  sparse = TRUE,
 54 | #                                  proportion_scale = TRUE,
 55 | #                                  P_fixed_indices = P_fixed_indices,
 56 | #                                  P_tilde_fixed_indices = P_tilde_fixed_indices)
 57 | #
 58 | #   means <- meaninate(gammas = params$gammas,
 59 | #                      B = params$B,
 60 | #                      X = X,
 61 | #                      Z = Z,
 62 | #                      P = params$P,
 63 | #                      X_tilde = X_tilde,
 64 | #                      Z_tilde = Z_tilde,
 65 | #                      Z_tilde_gamma_cols = Z_tilde_gamma_cols,
 66 | #                      P_tilde = params$P_tilde,
 67 | #                      gamma_tilde = params$gamma_tilde,
 68 | #                      alpha_tilde = params$alpha_tilde,
 69 | #                      Z_tilde_list = Z_tilde_list)
 70 | #
 71 | #   W_long <- lapply(1:n,function(i) as.numeric(W[i,]))
 72 | #   W_long <- do.call(c,W_long)
 73 | #   means_long <- lapply(1:n, function(i) as.numeric(means[i,]))
 74 | #   means_long <- do.call(c,means_long)
 75 | #
 76 | #   ### only for Poisson:
 77 | #   if(criterion == "Poisson"){
 78 | #     V <- diag(1/means_long)
 79 | #   } else{
 80 | #     V <- diag(rep(1, length(means_long)))
 81 | #   }
 82 | #
 83 | #   if(!is.null(gmm_inv_wts)){
 84 | #     diag(V) <- diag(V)/gmm_inv_wts
 85 | #   }
 86 | #
 87 | #   dli_dtheta <-
 88 | #     lapply(1:n, function(i)
 89 | #            -Matrix::crossprod(jacobian[1:J + (i - 1)*J,],
 90 | #                               V[1:J + (i - 1)*J,1:J + (i - 1)*J])%*%(
 91 | #     Matrix::Matrix(W_long[1:J + (i - 1)*J] - means_long[1:J + (i - 1)*J],
 92 | #                    ncol = 1)))
 93 | #
 94 | #   dli_dtheta <- do.call(cbind,dli_dtheta)
 95 | #   dli_dtheta <- dli_dtheta/n
 96 | #
 97 | #   lgrad <- apply(dli_dtheta,1,sum)
 98 | #
 99 | #   V2 <- as(sqrt(abs(V)),"sparseMatrix")
100 | #   pre_info <- V2%*%jacobian
101 | #   H_n <-  Matrix::crossprod(pre_info)
102 | #
103 | #
104 | #   boot_weights <- rexp(n)
105 | #   boot_weights <- boot_weights/sum(boot_weights)
106 | #
107 | #
108 | #   lgrad_star <- apply(dli_dtheta%*%diag(boot_weights),1,sum)
109 | #
110 | #   diff_dls <- alpha_n*sqrt(n)*as.numeric(
111 | #     (apply(dli_star_dtheta,1,sum) - apply(dli_dtheta,1,sum)))
112 | #   diff_dls <- matrix(diff_dls,nrow = 1)
113 | #
114 | #   prox_crit <- function(x){
115 | #     x_deviation <- matrix(x - varying_df$value,ncol = 1)
116 | #
117 | #     return(as.numeric(diff_dls%*%x_deviation +
118 | #       0.5*t(x_deviation)%*%H_n%*%x_deviation))
119 | #
120 | #   }
121 | #
122 | #   K <- length(unique(varying_df$k[varying_df$param == "P"]))
123 | #   K_tilde <- length(unique(varying_df$k[varying_df$param == "P_tilde"]))
124 | #   aug_lag_params <- matrix(1,ncol = 2, nrow = n_simplex_constraints)
125 | #
126 | #   varying_p_k <- unique(varying_df$k[varying_df$param == "P"])
127 | #   simplex_matrix_P <-
128 | #     lapply(varying_p_k,
129 | #            function(d) as.numeric(
130 | #              (varying_df$k == d) & (varying_df$param == "P"))
131 | #     )
132 | #
133 | #   simplex_matrix_P <- do.call(rbind,simplex_matrix_P)
134 | #
135 | #   varying_p_tilde_k <- unique(varying_df$k[varying_df$param == "P_tilde"])
136 | #   simplex_matrix_P_tilde <-
137 | #     lapply(varying_p_tilde_k,
138 | #            function(d) as.numeric(
139 | #              (varying_df$k == d) & (varying_df$param == "P_tilde"))
140 | #     )
141 | #
142 | #   simplex_matrix_P_tilde <- do.call(rbind,simplex_matrix_P_tilde)
143 | #
144 | #   for(k in 1:maxit){
145 | #     counter <- counter + 1
146 | #     # print(counter)
147 | #
148 | #     new_x <- optim(varying_df$value,
149 | #                    )
150 | #
151 | #     # print("checking constraints")
152 | #
153 | #     V <- abs(sum(x) - 1)
154 | #
155 | #     satisfied <- V< constraint_tolerance
156 | #
157 | #
158 | #
159 | #     if(V < constraint_tolerance){
160 | #       return(x)
161 | #     }
162 | #
163 | #     if(V < 0.25*previous_V){
164 | #       nu <- nu + 2*mu*V
165 | #     } else{
166 | #       mu <- 2*mu
167 | #     }
168 | #     # print(mu)
169 | #     # print(nu)
170 | #     previous_V <- V
171 | #
172 | #   }
173 | #
174 | #
175 | #
176 | #
177 | # }
178 | 


--------------------------------------------------------------------------------
/R/push_row.R:
--------------------------------------------------------------------------------
1 | push_row <- function(row_vec,
2 |                      row_num,
3 |                      target_matrix){
4 |   target_matrix[row_num,] <- row_vec
5 |   return(target_matrix)
6 | }
7 | 


--------------------------------------------------------------------------------
/R/ra_to_lr.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ra_to_lr <- function(varying_df){
 3 | 
 4 |   P_df <- varying_df[varying_df$param == "P",]
 5 |   P_tilde_df <- varying_df[varying_df$param == "P_tilde",]
 6 | 
 7 |   ### Generate rho data.frame
 8 |   P_rows <- unique(P_df$k)
 9 | 
10 |   rho_df <- P_df[rep(F, nrow(P_df)),]
11 |   for(p_row in P_rows){
12 |     mini_P <- P_df[P_df$k == p_row,]
13 |     ref_j <- max(mini_P$j)
14 |     mini_rho <- mini_P[mini_P$j != ref_j,]
15 |     mini_rho$param <- "rho"
16 |     mini_rho$value <- log(mini_rho$value) - log(mini_P$value[mini_P$j == ref_j])
17 |     rho_df <- rbind(rho_df, mini_rho)
18 |   }
19 | 
20 |   ### Generate rho_tilde data.frame
21 |   P_tilde_rows <- unique(P_tilde_df$k)
22 | 
23 |   rho_tilde_df <- P_tilde_df[rep(F, nrow(P_df)),]
24 |   for(p_tilde_row in P_tilde_rows){
25 |     mini_P_tilde <- P_tilde_df[P_tilde_df$k == p_tilde_row,]
26 |     ref_j <- max(mini_P_tilde$j)
27 |     mini_rho_tilde <- mini_P_tilde[mini_P_tilde$j != ref_j,]
28 |     mini_rho_tilde$param <- "rho_tilde"
29 |     mini_rho_tilde$value <- log(mini_rho_tilde$value) -
30 |       log(mini_P_tilde$value[mini_P_tilde$j == ref_j])
31 |     rho_tilde_df <- rbind(rho_tilde_df, mini_rho_tilde)
32 |   }
33 | 
34 |   varying_lr_df <- rbind(varying_df[!(varying_df$param %in% c("P","P_tilde")),],
35 |                          rho_df,
36 |                          rho_tilde_df)
37 | 
38 |   # varying_lr_df <- rbind(varying_lr_df[varying_lr_df$param == "rho",],
39 |   #                     varying_lr_df[varying_lr_df$param == "rho_tilde",],
40 |   #                     varying_lr_df[varying_lr_df$param == "B",],
41 |   #                     varying_lr_df[varying_lr_df$param == "gamma",],
42 |   #                     varying_lr_df[varying_lr_df$param == "gamma_tilde",])
43 | 
44 |   rownames(varying_lr_df) <- 1:nrow(varying_lr_df)
45 | 
46 |   varying_lr_df <- rbind(varying_lr_df[varying_lr_df$param != "alpha_tilde",],
47 |                          varying_lr_df[varying_lr_df$param == "alpha_tilde",])
48 | 
49 |   return(varying_lr_df)
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/R/refit_model.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | refit_model <- function(fitted_model,
 4 |                         m,
 5 |                         seed = NULL,
 6 |                         bootstrap_method = "bayesian_subsample",
 7 |                         bootstrap_failure_cutoff = -1e4){
 8 | 
 9 |   n <- nrow(fitted_model$W)
10 |   J <- ncol(fitted_model$W)
11 | 
12 |   if(!is.null(seed)){
13 |     set.seed(seed)
14 |   }
15 | 
16 |   if(!(boot_method %in% c("bayesian_subsample",
17 |                         "subsample"))){
18 |     stop("Argument boot_method must be equal to `bayesian_subsample`
19 | or `subsample`.")
20 |   }
21 | 
22 | if(boot_method == "bayesian_subsample"){
23 |     bootstrap_weights <- rgamma(n_effective,shape = m/n_effective)
24 |     bootstrap_weights <- bootstrap_weights/sum(bootstrap_weights)
25 |     }
26 | if(boot_method == "subsample"){
27 |   bootstrap_weights <- rmultinom(1,m,rep(1/n,n))
28 |   bootstrap_weights <- bootstrap_weights/sum(bootstrap_weights)
29 | }
30 | 
31 |   bootstrap_weights <- rep(bootstrap_weights, each = J)
32 | 
33 |   refit <-
34 |     estimate_parameters(W = fitted_model$W,
35 |                         X = fitted_model$X,
36 |                         Z = fitted_model$Z,
37 |                         Z_tilde = fitted_model$Z_tilde,
38 |                         Z_tilde_gamma_cols = fitted_model$Z_tilde_gamma_cols,
39 |                         gammas = fitted_model$gammas,
40 |                         gammas_fixed_indices =
41 |                           fitted_model$gammas_fixed_indices,
42 |                         P = fitted_model$P + .01,
43 |                         P_fixed_indices = fitted_model$P_fixed_indices,
44 |                         B = fitted_model$B,
45 |                         B_fixed_indices = fitted_model$B_fixed_indices,
46 |                         X_tilde = fitted_model$X_tilde,
47 |                         P_tilde = fitted_model$P_tilde + .01,
48 |                         P_tilde_fixed_indices =
49 |                           fitted_model$P_tilde_fixed_indices,
50 |                         gamma_tilde = fitted_model$gamma_tilde,
51 |                         gamma_tilde_fixed_indices =
52 |                           fitted_model$gamma_tilde_fixed_indices,
53 |                         alpha_tilde = fitted_model$alpha_tilde,
54 |                         Z_tilde_list = fitted_model$Z_tilde_list,
55 |                         barrier_t = 1, #starting value of reciprocal barrier penalty coef.
56 |                         barrier_scale = 10, #increments for value of barrier penalty
57 |                         max_barrier = 1e10, #maximum value of barrier_t
58 |                         initial_conv_tol = 1000,
59 |                         final_conv_tol = 0.1,
60 |                         constraint_tolerance = 1e-10,
61 |                         hessian_regularization = .01,
62 |                         criterion = fitted_model$criterion,
63 |                         profile_P = TRUE,
64 |                         profiling_maxit = 25,
65 |                         wts = bootstrap_weights,
66 |                         verbose = FALSE,
67 |                         bootstrap_failure_cutoff = bootstrap_failure_cutoff)
68 | 
69 |   return(list("varying" = refit$varying,
70 |               "objective" = refit$objective))
71 | }
72 | 


--------------------------------------------------------------------------------
/R/safe_divide.R:
--------------------------------------------------------------------------------
 1 | ##################### Division for Log Likelihood Derivatives #####################
 2 | safe_divide <- function(numer,denom, zero_divisor_penalty = 0){
 3 |   if((numer ==0 )&(denom == 0)){
 4 |     return(1)
 5 |   } else{
 6 |     if(denom == 0){
 7 |       return(zero_divisor_penalty)
 8 |     }
 9 |   }
10 |   return(numer/denom)
11 | }
12 | 


--------------------------------------------------------------------------------
/R/safe_multiply.R:
--------------------------------------------------------------------------------
 1 | 
 2 | safe_multiply <- function(x,y){
 3 |   if(length(x)!= length(y)){
 4 |     stop("Arguments x and y must have equal length")
 5 |   }
 6 | 
 7 |   product <- x*y
 8 | 
 9 |   #define inf*0 = 1; -inf*0 = -1
10 |   product[is.infinite(x) & y == 0] <- sign(x)[is.infinite(x) & y == 0]
11 |   product[is.infinite(y) & x == 0] <- sign(y)[is.infinite(y) & x == 0]
12 | 
13 |   return(product)
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/R/simpl_auglag_fnnls.R:
--------------------------------------------------------------------------------
 1 | #' @import fastnnls
 2 | #' 
 3 | #' 
 4 | simpl_auglag_fnnls <- function(x,
 5 |                                fn, #function of x to optimize
 6 |                                xhess, #hessian at x
 7 |                                xgrad, #gradient at x
 8 |                                lambda, #penalty parameters
 9 |                                nu = 1, #starting lagrangian penalty
10 |                                mu = 1, #starting augmented lagrangian penalty
11 |                                constraint_tolerance = 1e-10, #sum-to-one constraint tolerance
12 |                                maxit = 100 # maximum number of iterations (outer loop)
13 | 
14 | ){
15 | 
16 |   npar <- nrow(xhess)
17 | 
18 |   x0 <- matrix(x, ncol = 1)
19 |   xgrad <- matrix(xgrad,ncol = 1)
20 | 
21 |   previous_V <- Inf
22 |   # grad_norm <- sqrt(sum(xgrad^2))
23 |   counter <- 0
24 |   for(k in 1:maxit){
25 |     counter <- counter + 1
26 |     # print(counter)
27 | 
28 |     ATA <- 0.5*xhess + lambda*diag(rep(1,length(xgrad))) + matrix(mu,nrow = npar, ncol = npar)
29 | 
30 | 
31 |     # print(npar)
32 |     Ab <- t((-0.5)*(matrix(nu - 2*mu,nrow = 1, ncol = npar) -
33 |                       2*lambda*t(x0) -
34 |                       t(x0)%*%xhess + t(xgrad)))
35 |     #
36 |     #     eigen_ATA <- eigen(ATA)
37 |     #
38 |     #     sqrt_eigen_values <- sapply(eigen_ATA$values,
39 |     #                                 function(t) ifelse(t>=0,sqrt(t),0))
40 |     #
41 |     #     inv_sqrt_eigen_values <- sapply(eigen_ATA$values,
42 |     #                                     function(t) ifelse(t>0,1/sqrt(t),0))
43 |     #
44 |     #     A <- eigen_ATA$vectors%*%diag(sqrt_eigen_values)%*%t(eigen_ATA$vectors)
45 |     #     A_inv <- eigen_ATA$vectors%*%diag(inv_sqrt_eigen_values)%*%t(eigen_ATA$vectors)
46 |     #     b <- A_inv%*%Ab
47 |     #
48 |     #
49 |     #     x <- nnls::nnls(A,b)$x
50 |     
51 |     ## BEGIN Amy July 8 2023
52 |     # stopifnot(counter<60)
53 |     if (counter >= 60) {
54 |       warning(paste("In fastNNLS, the counter is", counter, "which is >=60. Potential convergence issue?"))
55 |     }
56 |     ## END Amy July 8 2023
57 |     
58 |     # print("doing fastnnls")
59 |     x <- fastnnls::fast_nnls(ZTx = Ab, ZTZ = ATA,
60 |                              tolerance = constraint_tolerance)
61 | 
62 |     # print("checking constraints")
63 | 
64 |     V <- abs(sum(x) - 1)
65 | 
66 |     satisfied <- V< constraint_tolerance
67 | 
68 |     if(V < constraint_tolerance){
69 |       return(x)
70 |     }
71 | 
72 |     if(V < 0.25*previous_V){
73 |       nu <- nu + 2*mu*V
74 |     } else{
75 |       mu <- 2*mu
76 |     }
77 |     # print(mu)
78 |     # print(nu)
79 |     previous_V <- V
80 | 
81 |   }
82 | 
83 | 
84 | 
85 |   warning("Maximum iterations reached; returning initial value")
86 |   return(x0)
87 | }
88 | 


--------------------------------------------------------------------------------
/R/simpl_opt_linesearch_fnnls.R:
--------------------------------------------------------------------------------
 1 | simpl_opt_linesearch_fnnls <- function(fn, #objective function to be minimized
 2 |                                        x, #starting values of simplex-constrained parameter
 3 |                                        xhess, # hessian matrix of objective function at x
 4 |                                        xgrad, # gradient of objective function at x
 5 |                                        lambda = 0, #trust penalty
 6 |                                        maxit = 100,
 7 |                                        constraint_tolerance = 1e-10
 8 | ){
 9 | 
10 |   x0 <- x
11 |   curr_fn_value <- fn(x0)
12 | 
13 | 
14 |   new_x <-  simpl_auglag_fnnls(x = x,
15 |                                fn = fn,
16 |                                xhess = xhess,
17 |                                xgrad = xgrad,
18 |                                lambda = lambda,
19 |                                constraint_tolerance = constraint_tolerance)
20 | 
21 |   new_fn_value <- fn(new_x)
22 | 
23 |   step_direction <-  new_x - x0
24 | 
25 |   stepsize <- 1
26 | 
27 |   fn_decrease <- 10
28 | 
29 |   while((fn_decrease >  0)&(stepsize>1e-2)){
30 | 
31 | 
32 |     prop_x <- x0 + stepsize*step_direction
33 | 
34 |     new_fn_value <- fn(prop_x)
35 |     if(is.nan(new_fn_value)){
36 |       new_fn_value <- 1e100
37 |     }
38 | 
39 |     if(min(prop_x)<0){
40 |       new_fn_value <- 1e100
41 |     }
42 |     #
43 |     fn_decrease <- new_fn_value - curr_fn_value
44 |     stepsize <- stepsize/2
45 | 
46 |   }
47 | 
48 |   if(fn_decrease <= 0){
49 |     return(prop_x)
50 |   } else{
51 |     return(x0)
52 |   }
53 | 
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/R/simulate_paper_data.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | simulate_paper_data <- function(n,
 4 |                                 J,
 5 |                                 B_multiplier,
 6 |                                 distrib,
 7 |                                 seed){
 8 | 
 9 | if(!(J %in% c(5,20))){
10 |   stop("Number of taxa J must be equal to 5 or 20 in this simulation")
11 | }
12 | 
13 | if(!(n %in% c(1,3))){
14 |   stop("Number of technical replicates n must be 1 or 3 in this simulation")
15 | }
16 | 
17 |   ### generate Z_tilde (alpha_tilde_k = 0 for k = 1, 2)
18 | Z_tilde <- do.call(rbind,lapply(1:n,
19 |                                 function(x) matrix(rep(c(1,9,81,729),
20 |                                                        4),
21 |                                                    ncol = 1)))
22 | 
23 |  ### generate Z
24 | Z <- do.call(rbind,lapply(1:4,
25 |                     function(x) do.call(rbind,
26 |                                         lapply(1:(4*n),function(k) matrix(
27 |                                           as.numeric(x == 1:4),nrow = 1
28 |                                         )))))
29 | 
30 | X <- matrix(1,nrow = n*16,ncol = 1)
31 | 
32 | B_star <- matrix(c(
33 |   rep(c(3,-1,1,-3),ceiling(J/4))[1:(J - 1)],
34 |                    0),nrow = 1,ncol = J)
35 | 
36 | B <- B_star*B_multiplier
37 | 
38 | P1 <- 2^(seq(0,4,length.out = J))
39 | P1 <- P1/sum(P1)
40 | P2 <- P1[J:1]
41 | P3 <- c(rep(0,0.4*J),
42 |         10^seq(0,2,length.out = 0.6*J))
43 | P3 <- P3/sum(P3)
44 | P4 <- P3[J:1]
45 | 
46 | P <- rbind(P1,P2,P3,P4)
47 | 
48 | P_tilde <- matrix(1/J,ncol = J, nrow = 1)
49 | X_tilde <- matrix(1,ncol = 1, nrow = 1)
50 | gamma_tilde <- -3.7
51 | 
52 | dilutions <- log(as.numeric(Z_tilde))/log(9)
53 | 
54 | gamma_means <- sapply(dilutions, function(d) min(13.5 - 1.5*d,12) )
55 | set.seed(seed)
56 | gammas <- sapply(gamma_means, function(x) rnorm(1,mean = x, sd = sqrt(0.05)))
57 | 
58 | 
59 | means <- meaninate(gammas = gammas,
60 |                    B = B,
61 |                    X= X,
62 |                    Z = Z,
63 |                    P = P,
64 |                    X_tilde = X_tilde,
65 |                    Z_tilde = Z_tilde,
66 |                    Z_tilde_gamma_cols = ncol(Z_tilde),
67 |                    P_tilde = P_tilde,
68 |                    gamma_tilde = gamma_tilde)
69 | 
70 | 
71 | if(distrib== "Poisson"){
72 |   W <- apply(means, c(1,2), function(x) rpois(1,lambda = x))
73 | }
74 | 
75 | if(distrib == "NB"){
76 |   W <- apply(means, c(1,2), function(x) rnbinom(1,mu = x, size = 13))
77 | }
78 | 
79 | return(W)
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/R/simulate_simple_data.R:
--------------------------------------------------------------------------------
 1 | #' @import stats
 2 | #' 
 3 | #' 
 4 | simulate_simple_data <- function(B,
 5 |                                  distrib = "Poisson",
 6 |                                  n = 5,
 7 |                                  gamma_mean = 11){
 8 |   if(nrow(B) != 1){
 9 |     stop("B must have a single row")
10 |   }
11 |   J <- ncol(B)
12 |   gammas <- rnorm(n,gamma_mean)
13 | 
14 |   means <- meaninate(gammas,
15 |             B = B,
16 |             X = matrix(1, nrow = n,ncol = 1),
17 |             Z = matrix(1, nrow = n, ncol = 1),
18 |             P = matrix(1/J,nrow = 1, ncol = J),
19 |             X_tilde = matrix(0,nrow = 1, ncol = 1),
20 |             Z_tilde = matrix(0,nrow = n, ncol = 1),
21 |             Z_tilde_gamma_cols = 1,
22 |             P_tilde = matrix(1/J,nrow = 1, ncol = J),
23 |             gamma_tilde = matrix(0, ncol = 1, nrow = 1),
24 |                         alpha_tilde = NULL,
25 |                         Z_tilde_list = NULL,
26 |                         return_separate = FALSE,
27 |                         exclude_gammas = FALSE)
28 | 
29 |   if(distrib == "Poisson"){
30 |   W <- apply(means,c(1,2),function(x) rpois(1,x))
31 |   }
32 |   if(distrib == "nb10"){
33 |     W <- apply(means,c(1,2), function(x) rnbinom(1, mu = x, size = 10))
34 |   }
35 | 
36 |   if(distrib == "nb.5"){
37 |     W <- apply(means,c(1,2), function(x) rnbinom(1, mu = x, size = .5))
38 |   }
39 | 
40 | return(W)
41 | }
42 | 


--------------------------------------------------------------------------------
/R/universal_test.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | universal_test <- function(W,
 4 |                            full_model,
 5 |                            null_model,
 6 |                            parallelize = FALSE){
 7 |   U <- crossfit_U(W = W,
 8 |                   full_model = full_model,
 9 |                   null_model = null_model,
10 |                   parallelize = parallelize)
11 | 
12 |   return(1/U)
13 | }
14 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 6 | 
 7 | ```{r, include = FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>",
11 |   fig.path = "man/figures/README-",
12 |   out.width = "100%"
13 | )
14 | ```
15 | 
16 | # tinyvamp
17 | 
18 | <!-- badges: start -->
19 | <!-- [![R-CMD-check](https://github.com/statdivlab/tinyvamp/workflows/R-CMD-check/badge.svg)](https://github.com/statdivlab/tinyvamp/actions) -->
20 | [![Coverage status](https://codecov.io/gh/statdivlab/tinyvamp/branch/main/graph/badge.svg)](https://codecov.io/github/statdivlab/tinyvamp?branch=main)
21 | <!-- badges: end -->
22 | 
23 | `tinyvamp` is a package for estimation and removal of measurement error in high-throughput sequencing data. 
24 | 
25 | Full details on the model and estimation method are available in the [preprint](https://arxiv.org/abs/2204.12733). 
26 | 
27 | **Documentation and vignettes are under construction - please check back soon!**
28 | 
29 | **Needs something more urgently? File an [issue](https://github.com/statdivlab/tinyvamp/issues) or [send Amy an email](http://statisticaldiversitylab.com/team/amy-willis)!**
30 | 
31 | ## Installation
32 | 
33 | You can install the development version of tinyvamp from [GitHub](https://github.com/) with:
34 | 
35 | ``` r
36 | # install.packages("remotes")
37 | remotes::install_github("statdivlab/tinyvamp")
38 | ```
39 | 
40 | If you haven't already, you may need to install `fastnnls` and `logsum`, too: 
41 | 
42 | ``` r
43 | # install.packages("remotes")
44 | remotes::install_github("ailurophilia/fastnnls")
45 | remotes::install_github("ailurophilia/logsum")
46 | remotes::install_github("statdivlab/tinyvamp")
47 | ```
48 | 
49 | <!-- ## Example -->
50 | 
51 | <!-- This is a basic example which shows you how to solve a common problem: -->
52 | 
53 | <!-- ```{r example} -->
54 | <!-- library(tinyvamp) -->
55 | <!-- ## basic example code -->
56 | <!-- ``` -->
57 | 
58 | 
59 | ## Humans
60 | 
61 | Authors:  [David Clausen](https://www.biostat.washington.edu/people/david-clausen) and [Amy Willis](http://statisticaldiversitylab.com)
62 | 
63 | Do you have a request for us? Let us know! We want folks to use `tinyvamp` and will try to make it as easy as possible. 
64 | 
65 | Do you have a question? Check out the above documentation list, then shoot us an email or open a [Discussion](https://github.com/adw96/breakaway/discussions). We receive a lot of emails from users, so we try to answer questions on the Wiki rather than responding to everyone individually.
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 3 | 
 4 | # tinyvamp
 5 | 
 6 | <!-- badges: start -->
 7 | <!-- [![R-CMD-check](https://github.com/statdivlab/tinyvamp/workflows/R-CMD-check/badge.svg)](https://github.com/statdivlab/tinyvamp/actions) -->
 8 | 
 9 | [![Coverage
10 | status](https://codecov.io/gh/statdivlab/tinyvamp/branch/main/graph/badge.svg)](https://codecov.io/github/statdivlab/tinyvamp?branch=main)
11 | <!-- badges: end -->
12 | 
13 | `tinyvamp` is a package for estimation and removal of measurement error
14 | in high-throughput sequencing data.
15 | 
16 | Full details on the model and estimation method are available in the
17 | [preprint](https://arxiv.org/abs/2204.12733).
18 | 
19 | **Documentation and vignettes are under construction - please check back
20 | soon!**
21 | 
22 | **Needs something more urgently? File an
23 | [issue](https://github.com/statdivlab/tinyvamp/issues) or [send Amy an
24 | email](http://statisticaldiversitylab.com/team/amy-willis)!**
25 | 
26 | ## Installation
27 | 
28 | You can install the development version of tinyvamp from
29 | [GitHub](https://github.com/) with:
30 | 
31 | ``` r
32 | # install.packages("remotes")
33 | remotes::install_github("statdivlab/tinyvamp")
34 | ```
35 | 
36 | If you haven’t already, you may need to install `fastnnls` and `logsum`,
37 | too:
38 | 
39 | ``` r
40 | # install.packages("remotes")
41 | remotes::install_github("ailurophilia/fastnnls")
42 | remotes::install_github("ailurophilia/logsum")
43 | remotes::install_github("statdivlab/tinyvamp")
44 | ```
45 | 
46 | <!-- ## Example -->
47 | <!-- This is a basic example which shows you how to solve a common problem: -->
48 | <!-- ```{r example} -->
49 | <!-- library(tinyvamp) -->
50 | <!-- ## basic example code -->
51 | <!-- ``` -->
52 | 
53 | ## Humans
54 | 
55 | Authors: [David
56 | Clausen](https://www.biostat.washington.edu/people/david-clausen) and
57 | [Amy Willis](http://statisticaldiversitylab.com)
58 | 
59 | Do you have a request for us? Let us know! We want folks to use
60 | `tinyvamp` and will try to make it as easy as possible.
61 | 
62 | Do you have a question? Check out the above documentation list, then
63 | shoot us an email or open a
64 | [Discussion](https://github.com/adw96/breakaway/discussions). We receive
65 | a lot of emails from users, so we try to answer questions on the Wiki
66 | rather than responding to everyone individually.
67 | 


--------------------------------------------------------------------------------
/data/costea2017_metaphlan2_profiles.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/data/costea2017_metaphlan2_profiles.rda


--------------------------------------------------------------------------------
/data/costea2017_mock_composition.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/data/costea2017_mock_composition.rda


--------------------------------------------------------------------------------
/data/costea2017_sample_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/data/costea2017_sample_data.rda


--------------------------------------------------------------------------------
/data/karstens.rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/data/karstens.rdata


--------------------------------------------------------------------------------
/data/karstens_phyloseq.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statdivlab/tinyvamp/cf31c9ddcd1fee00e1232b3091eed94f18aeefab/data/karstens_phyloseq.rda


--------------------------------------------------------------------------------
/man/bootstrap_ci.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bootstrap_ci.R
 3 | \name{bootstrap_ci}
 4 | \alias{bootstrap_ci}
 5 | \title{Apply the Bayesian subsampled bootstrap to a fitted tinyvamp model}
 6 | \usage{
 7 | bootstrap_ci(
 8 |   W,
 9 |   fitted_model,
10 |   n_boot,
11 |   m = NULL,
12 |   alpha = 0.05,
13 |   parallelize = FALSE,
14 |   ncores = 5,
15 |   seed = NULL,
16 |   return_models = FALSE,
17 |   verbose = FALSE,
18 |   adjust = FALSE
19 | )
20 | }
21 | \description{
22 | Apply the Bayesian subsampled bootstrap to a fitted tinyvamp model
23 | }
24 | 


--------------------------------------------------------------------------------
/man/calculate_log_penalty.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_log_penalty.R
 3 | \name{calculate_log_penalty}
 4 | \alias{calculate_log_penalty}
 5 | \title{Calculate barrier penalty to add to objective function inside
 6 | barrier algorithm}
 7 | \usage{
 8 | calculate_log_penalty(varying_lr_df, fixed_df, barrier_t)
 9 | }
10 | \arguments{
11 | \item{varying_lr_df}{A data frame containing values of parameters that
12 | are treated as unknown, with relative abundance parameters represented
13 | on the log ratio scale (i.e., as phi and phi_tilde)}
14 | 
15 | \item{fixed_df}{A data frame containing values of parameters that are
16 | treated as known}
17 | 
18 | \item{barrier_t}{The current value of t, the barrier penalty parameter}
19 | }
20 | \value{
21 | The calculated value of the barrier penalty
22 | }
23 | \description{
24 | Calculate barrier penalty to add to objective function inside
25 | barrier algorithm
26 | }
27 | 


--------------------------------------------------------------------------------
/man/costea2017_mock_composition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{costea2017_mock_composition}
 5 | \alias{costea2017_mock_composition}
 6 | \title{The composition of 10 spike-in taxa in 19 samples observed from flow cytometry.}
 7 | \format{
 8 | An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 19 rows and 6 columns.
 9 | }
10 | \source{
11 | \url{https://doi.org/10.1038/nbt.3960}
12 | }
13 | \usage{
14 | costea2017_mock_composition
15 | }
16 | \description{
17 | For details, see Costea et al (2017).
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/dataframes_to_parameters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataframes_to_parameters.R
 3 | \name{dataframes_to_parameters}
 4 | \alias{dataframes_to_parameters}
 5 | \title{Convert parameter values stored in data frame format to matrix format}
 6 | \usage{
 7 | dataframes_to_parameters(fixed_df, varying_df)
 8 | }
 9 | \arguments{
10 | \item{fixed_df}{A dataframe containing values of model parameters
11 | treated as fixed and known (i.e. held constant at known values)}
12 | 
13 | \item{varying_df}{A dataframe containing current values of model parameters treated
14 | as fixed and unknown (i.e., parameters to be estimated)}
15 | }
16 | \value{
17 | A list containing
18 | \item{P}{Specimen relative abundance matrix (of dimension K x J)}
19 | \item{P_tilde}{Spurious read source
20 | relative abundance matrix (of dimension K-tilde x J)}
21 | \item{B}{A matrix of detection efficiencies (of dimension p x J)}
22 | \item{gammas}{An n-vector of sample-specific read intensities}
23 | \item{gamma_tilde}{A-vector of spurious read source intensities
24 | (of length K-tilde)}
25 | }
26 | \description{
27 | Convert parameter values stored in data frame format to matrix format
28 | }
29 | \author{
30 | David Clausen
31 | }
32 | 


--------------------------------------------------------------------------------
/man/estimate_parameters.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/estimate_parameters.R
  3 | \name{estimate_parameters}
  4 | \alias{estimate_parameters}
  5 | \title{Fit tinyvamp model to HTS microbiome data}
  6 | \usage{
  7 | estimate_parameters(
  8 |   W,
  9 |   X,
 10 |   Z,
 11 |   Z_tilde = NULL,
 12 |   Z_tilde_gamma_cols,
 13 |   gammas,
 14 |   gammas_fixed_indices,
 15 |   P,
 16 |   P_fixed_indices,
 17 |   B,
 18 |   B_fixed_indices,
 19 |   X_tilde,
 20 |   P_tilde,
 21 |   P_tilde_fixed_indices,
 22 |   gamma_tilde,
 23 |   gamma_tilde_fixed_indices,
 24 |   alpha_tilde = NULL,
 25 |   Z_tilde_list = NULL,
 26 |   barrier_t = 1,
 27 |   barrier_scale = 10,
 28 |   max_barrier = 1e+12,
 29 |   initial_conv_tol = 1000,
 30 |   final_conv_tol = 0.1,
 31 |   constraint_tolerance = 1e-10,
 32 |   hessian_regularization = 0.01,
 33 |   criterion = "Poisson",
 34 |   profile_P = TRUE,
 35 |   barrier_maxit = 500,
 36 |   profiling_maxit = 25,
 37 |   wts = NULL,
 38 |   verbose = FALSE,
 39 |   bootstrap_failure_cutoff = NULL,
 40 |   tinker_zeroes = 0.1,
 41 |   return_variance = FALSE
 42 | )
 43 | }
 44 | \arguments{
 45 | \item{W}{An \eqn{n \times J} matrix of numeric HTS output (e.g., read counts, coverages, etc.)}
 46 | 
 47 | \item{X}{The sample efficiency design -- an \eqn{n \times p} matrix}
 48 | 
 49 | \item{Z}{The sample-specimen design -- an \eqn{n \times K} matrix whose \eqn{ij}-th entry
 50 | indicates the proportional contribution of specimen \eqn{j} to sample \eqn{i}. Rows must
 51 | sum to 1 or be identically 0.}
 52 | 
 53 | \item{Z_tilde}{The spurious read design -- an \eqn{n \times \tilde{K}} matrix where
 54 | \eqn{\tilde{K}} is the number of spurious read sources modeled.}
 55 | 
 56 | \item{Z_tilde_gamma_cols}{A numeric vector containing the columns of Z_tilde which should be
 57 | multiplied by exp(gamma).}
 58 | 
 59 | \item{gammas}{A numeric vector of length n of starting values for read intensity parameter gamma}
 60 | 
 61 | \item{gammas_fixed_indices}{A logical vector of length n whose \eqn{i}-th entry is TRUE if the
 62 | \eqn{i}-th entry of gamma should be treated as fixed and known, and FALSE otherwise}
 63 | 
 64 | \item{P}{A \eqn{K \times J} numeric matrix giving initial values for the relative abundance matrix.}
 65 | 
 66 | \item{P_fixed_indices}{P_fixed_indices A \eqn{K \times J} logical matrix specifying any entries of P that are known. If known, the corresponding values from \code{P} will be treated as the fixed, known values.}
 67 | 
 68 | \item{B}{A \eqn{p \times J} numeric matrix giving initial values for the sample efficiencies.}
 69 | 
 70 | \item{B_fixed_indices}{A \eqn{p \times J} logical matrix specifying any entries of B that are known. If known, the corresponding values from \code{B} will be treated as the fixed, known values.}
 71 | 
 72 | \item{X_tilde}{A \eqn{\tilde{K} \times p} matrix giving the spurious read source efficiency design matrix}
 73 | 
 74 | \item{P_tilde}{A \eqn{\tilde{K} \times J} numeric matrix giving initial values for the spurious read source relative abundances.}
 75 | 
 76 | \item{P_tilde_fixed_indices}{A \eqn{\tilde{K} \times J} logical matrix indicating if the \eqn{(i,j)}th entry of \code{P_tilde} should be treated as fixed and known.}
 77 | 
 78 | \item{gamma_tilde}{A numeric vector of length \eqn{\tilde{K}} of starting values for spurious read intensity parameter gamma_tilde}
 79 | 
 80 | \item{gamma_tilde_fixed_indices}{A logical vector of length \eqn{\tilde{K}} whose \eqn{i}-th entry is TRUE if the
 81 | \eqn{i}-th entry of gamma_tilde should be treated as fixed and known, and FALSE otherwise.}
 82 | 
 83 | \item{alpha_tilde}{A numeric vector containing starting values of length \eqn{M}. If used, \code{Z_tilde_list} must be provided.}
 84 | 
 85 | \item{Z_tilde_list}{A list of length \eqn{M + 1} containing matrices \eqn{\tilde{Z}_1,\dots,\tilde{Z}_{M + 1}} to be linearly combined to
 86 | generate \code{Z_tilde}: \eqn{\tilde{Z} = \tilde{Z}_{(1)} + \sum_{m = 1}^M \exp(\tilde{\alpha}_m)\tilde{Z}_{(m + 1)}}. If used,
 87 | \code{alpha_tilde} must be provided.}
 88 | 
 89 | \item{barrier_t}{Starting value of reciprocal barrier penalty coef. Defaults to 1.}
 90 | 
 91 | \item{barrier_scale}{Increments for value of barrier penalty. Defaults to 10.}
 92 | 
 93 | \item{max_barrier}{Maximum value of barrier_t. Defaults to 1e12.}
 94 | 
 95 | \item{constraint_tolerance}{The tolerance for the augmented Lagrangian algorithm. Final estimates of P are relative abundances to within \code{constraint_tolerance} of 1, i.e., abs(sum p_{kj} - 1) <  \code{constraint_tolerance}. Defaults to 1e-10.}
 96 | 
 97 | \item{hessian_regularization}{The second step of optimization involves a quadratic approximation to the likelihood, for which we use a modified Taylor series for stability. This is the constant that dampens the second term. Defaults to 0.01.}
 98 | 
 99 | \item{criterion}{Should the algorithm return the Poisson maximum likelihood estimates or the reweighted Poisson maximum likelihood estimates? Options are "Poisson" or "reweighted_Poisson".}
100 | 
101 | \item{profile_P}{Defaults to TRUE Run profiling step after barrier algorithm has run? If TRUE, this step is performed, possibly setting some estimated relative abundances in P equal to zero. If FALSE, profiling step is skipped and back-transformed log-ratio parameter estimated via barrier algorithm is returned for P.}
102 | 
103 | \item{barrier_maxit}{The maximum number of iterations for the barrier method}
104 | 
105 | \item{profiling_maxit}{Maximum number of iterations to run profiling step in P for (default is 25).}
106 | 
107 | \item{wts}{Weights for reweighting the likelihood contributions. This is usually done to improve efficiency. Defaults to NULL. We compute the weights for you even if you choose \code{criterion = "reweighted_Poisson"}.}
108 | 
109 | \item{verbose}{Do you want to know what I'm doing? Defaults to FALSE.}
110 | 
111 | \item{bootstrap_failure_cutoff}{Defaults to NULL.}
112 | 
113 | \item{tinker_zeroes}{Because the barrier method can only be applied to relative abundances in the interior of the simplex, tinker_zeroes divided by the number of taxa is added to all relative abundances to be estimated before the barrier method is applied. Default 0.1.}
114 | 
115 | \item{return_variance}{Defaults to FALSE.}
116 | }
117 | \value{
118 | A list containing estimated parameter values, along with the given inputs
119 | }
120 | \description{
121 | This function fits a model to HTS microbiome data that allows for estimation of
122 | detection efficiency effects as well as modeling of spurious read sources
123 | (e.g., contamination).
124 | }
125 | \author{
126 | David Clausen
127 | 
128 | Amy Willis
129 | }
130 | 


--------------------------------------------------------------------------------
/man/mu_d_P.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/d_mu_d_P_original.R
 3 | \name{mu_d_P}
 4 | \alias{mu_d_P}
 5 | \title{Calculate derivative of mu_ij with respect to an entry of P}
 6 | \usage{
 7 | mu_d_P(i, j, m, gammas, B, X, Z, P)
 8 | }
 9 | \arguments{
10 | \item{i}{The sample index (must be in 1, ..., n)}
11 | 
12 | \item{j}{The taxon index (must be in 1, ..., J)}
13 | 
14 | \item{m}{The row of P with respect to which to take derivative}
15 | 
16 | \item{gammas}{Numeric vector of read intensities}
17 | 
18 | \item{B}{Detection efficiency matrix}
19 | 
20 | \item{X}{The efficiency design matrix (n x p)}
21 | 
22 | \item{Z}{The sample design matrix (n x K)}
23 | 
24 | \item{P}{The sample relative abundance matrix (K x J)}
25 | 
26 | \item{X_tilde}{The spurious read efficiency design (K_tilde x p)}
27 | 
28 | \item{Z_tilde}{The spurious read design (n x K_tilde)}
29 | 
30 | \item{Z_tilde_gamma_cols}{Numeric vector containing indexes of columns of
31 | Z_tilde to scale by exp(gamma); NULL if no columns to be scaled}
32 | 
33 | \item{P_tilde}{The spurious source relative abundance matrix (K_tilde x J)}
34 | 
35 | \item{gamma_tilde}{Spurious read intensity parameter}
36 | }
37 | \value{
38 | A derivative d mu_ij / d P_kj
39 | }
40 | \description{
41 | Calculate derivative of mu_ij with respect to an entry of P
42 | }
43 | 


--------------------------------------------------------------------------------
/man/mu_d_beta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mu_d_beta.R
 3 | \name{mu_d_beta}
 4 | \alias{mu_d_beta}
 5 | \title{Calculate derivative of mu_ij with respect to B}
 6 | \usage{
 7 | mu_d_beta(
 8 |   i,
 9 |   j,
10 |   q,
11 |   gammas,
12 |   B,
13 |   X,
14 |   Z,
15 |   P,
16 |   X_tilde,
17 |   Z_tilde,
18 |   Z_tilde_gamma_cols,
19 |   alpha_tilde = NULL,
20 |   Z_tilde_list = NULL,
21 |   P_tilde,
22 |   gamma_tilde
23 | )
24 | }
25 | \arguments{
26 | \item{i}{The sample index (must be in 1, ..., n)}
27 | 
28 | \item{j}{The taxon index (must be in 1, ..., J)}
29 | 
30 | \item{q}{Which row of B to take derivative with respect to
31 | (must be in 1, ..., p)}
32 | 
33 | \item{gammas}{Numeric vector of read intensities}
34 | 
35 | \item{B}{Detection efficiency matrix}
36 | 
37 | \item{X}{The efficiency design matrix (n x p)}
38 | 
39 | \item{Z}{The sample design matrix (n x K)}
40 | 
41 | \item{P}{The sample relative abundance matrix (K x J)}
42 | 
43 | \item{X_tilde}{The spurious read efficiency design (K_tilde x p)}
44 | 
45 | \item{Z_tilde}{The spurious read design (n x K_tilde)}
46 | 
47 | \item{Z_tilde_gamma_cols}{Numeric vector containing indexes of columns of
48 | Z_tilde to scale by exp(gamma); NULL if no columns to be scaled}
49 | 
50 | \item{P_tilde}{The spurious source relative abundance matrix (K_tilde x J)}
51 | 
52 | \item{gamma_tilde}{Spurious read intensity parameter}
53 | }
54 | \value{
55 | A derivative d mu_ij / d B_qj
56 | }
57 | \description{
58 | Calculate derivative of mu_ij with respect to B
59 | }
60 | 


--------------------------------------------------------------------------------
/man/mu_d_gamma.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/d_mu_d_gamma_original.R
 3 | \name{mu_d_gamma}
 4 | \alias{mu_d_gamma}
 5 | \title{Calculate derivative of mu_ij with respect to ith entry of gamma}
 6 | \usage{
 7 | mu_d_gamma(
 8 |   i,
 9 |   j,
10 |   gammas,
11 |   B,
12 |   X,
13 |   Z,
14 |   P,
15 |   X_tilde,
16 |   Z_tilde,
17 |   Z_tilde_gamma_cols,
18 |   P_tilde,
19 |   gamma_tilde
20 | )
21 | }
22 | \arguments{
23 | \item{i}{The sample index (must be in 1, ..., n)}
24 | 
25 | \item{j}{The taxon index (must be in 1, ..., J)}
26 | 
27 | \item{gammas}{Numeric vector of read intensities}
28 | 
29 | \item{B}{Detection efficiency matrix}
30 | 
31 | \item{X}{The efficiency design matrix (n x p)}
32 | 
33 | \item{Z}{The sample design matrix (n x K)}
34 | 
35 | \item{P}{The sample relative abundance matrix (K x J)}
36 | 
37 | \item{X_tilde}{The spurious read efficiency design (K_tilde x p)}
38 | 
39 | \item{Z_tilde}{The spurious read design (n x K_tilde)}
40 | 
41 | \item{Z_tilde_gamma_cols}{Numeric vector containing indexes of columns of
42 | Z_tilde to scale by exp(gamma); NULL if no columns to be scaled}
43 | 
44 | \item{P_tilde}{The spurious source relative abundance matrix (K_tilde x J)}
45 | 
46 | \item{gamma_tilde}{Spurious read intensity parameter}
47 | }
48 | \value{
49 | A derivative d mu_ij / d gamma_i
50 | }
51 | \description{
52 | Calculate derivative of mu_ij with respect to ith entry of gamma
53 | }
54 | 


--------------------------------------------------------------------------------
/man/mu_d_gamma_tilde.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mu_d_gamma_tilde.R
 3 | \name{mu_d_gamma_tilde}
 4 | \alias{mu_d_gamma_tilde}
 5 | \title{Calculate derivative of mu_ij with respect to an entry of gamma_tilde}
 6 | \usage{
 7 | mu_d_gamma_tilde(
 8 |   i,
 9 |   j,
10 |   k_tilde,
11 |   gammas,
12 |   B,
13 |   X,
14 |   Z,
15 |   P,
16 |   X_tilde,
17 |   Z_tilde,
18 |   Z_tilde_gamma_cols,
19 |   alpha_tilde = NULL,
20 |   Z_tilde_list = NULL,
21 |   P_tilde,
22 |   gamma_tilde
23 | )
24 | }
25 | \arguments{
26 | \item{i}{The sample index (must be in 1, ..., n)}
27 | 
28 | \item{j}{The taxon index (must be in 1, ..., J)}
29 | 
30 | \item{k_tilde}{The element of gamma_tilde with respect to which to take derivative}
31 | 
32 | \item{gammas}{Numeric vector of read intensities}
33 | 
34 | \item{B}{Detection efficiency matrix}
35 | 
36 | \item{X}{The efficiency design matrix (n x p)}
37 | 
38 | \item{Z}{The sample design matrix (n x K)}
39 | 
40 | \item{P}{The sample relative abundance matrix (K x J)}
41 | 
42 | \item{X_tilde}{The spurious read efficiency design (K_tilde x p)}
43 | 
44 | \item{Z_tilde}{The spurious read design (n x K_tilde)}
45 | 
46 | \item{Z_tilde_gamma_cols}{Numeric vector containing indexes of columns of
47 | Z_tilde to scale by exp(gamma); NULL if no columns to be scaled}
48 | 
49 | \item{P_tilde}{The spurious source relative abundance matrix (K_tilde x J)}
50 | 
51 | \item{gamma_tilde}{Spurious read intensity parameter}
52 | }
53 | \value{
54 | A derivative d mu_ij / d gamma_tilde_k_tilde
55 | }
56 | \description{
57 | Calculate derivative of mu_ij with respect to an entry of gamma_tilde
58 | }
59 | 


--------------------------------------------------------------------------------
/man/mu_d_rho_faster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mu_d_rho_faster.R
 3 | \name{mu_d_rho_faster}
 4 | \alias{mu_d_rho_faster}
 5 | \title{Calculate derivative of mu_ij with respect to a row of matrix-valued
 6 | parameter rho}
 7 | \usage{
 8 | mu_d_rho_faster(
 9 |   i,
10 |   J,
11 |   k,
12 |   gammas,
13 |   B,
14 |   X,
15 |   Z,
16 |   Ak_list,
17 |   rho_k,
18 |   fixed_P_multipliers,
19 |   proportion_scale = FALSE
20 | )
21 | }
22 | \arguments{
23 | \item{i}{The sample index (must be in 1, ..., n)}
24 | 
25 | \item{J}{The total number of taxa modeled}
26 | 
27 | \item{k}{Row index (which row of rho with respect to which to take derivative)}
28 | 
29 | \item{gammas}{Numeric vector of read intensities}
30 | 
31 | \item{B}{Detection efficiency matrix}
32 | 
33 | \item{X}{The efficiency design matrix (n x p)}
34 | 
35 | \item{Z}{The sample design matrix (n x K)}
36 | 
37 | \item{Ak_list}{List containing matrices that map back-transformed
38 | rho to entries of P}
39 | 
40 | \item{rho_k}{Value of kth row of rho}
41 | 
42 | \item{fixed_P_multipliers}{Numeric vector of length K containing values in (0,1]
43 | equal to 1 - sum(fixed relative abundances in row k of P)
44 | Z_tilde to scale by exp(gamma); NULL if no columns to be scaled}
45 | }
46 | \value{
47 | A derivative d mu_i / d rho_k
48 | }
49 | \description{
50 | Calculate derivative of mu_ij with respect to a row of matrix-valued
51 | parameter rho
52 | }
53 | 


--------------------------------------------------------------------------------
/man/mu_d_rho_tilde_faster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mu_d_rho_tilde_faster.R
 3 | \name{mu_d_rho_tilde_faster}
 4 | \alias{mu_d_rho_tilde_faster}
 5 | \title{Calculate derivative of mu_i with respect to a row of matrix-valued
 6 | parameter rho_tilde}
 7 | \usage{
 8 | mu_d_rho_tilde_faster(
 9 |   i,
10 |   J,
11 |   k_tilde,
12 |   gammas,
13 |   B,
14 |   rho_tilde_k,
15 |   A_tilde_k_list,
16 |   fixed_P_tilde_multipliers,
17 |   X_tilde,
18 |   Z_tilde,
19 |   Z_tilde_gamma_cols,
20 |   alpha_tilde = NULL,
21 |   Z_tilde_list = NULL,
22 |   gamma_tilde,
23 |   proportion_scale = FALSE
24 | )
25 | }
26 | \arguments{
27 | \item{i}{The sample index (must be in 1, ..., n)}
28 | 
29 | \item{J}{The total number of taxa modeled}
30 | 
31 | \item{k_tilde}{Row index (which row of rho_tilde with respect to which to take derivative)}
32 | 
33 | \item{gammas}{Numeric vector of read intensities}
34 | 
35 | \item{B}{Detection efficiency matrix}
36 | 
37 | \item{rho_tilde_k}{Value of kth row of rho_tilde}
38 | 
39 | \item{A_tilde_k_list}{List containing matrices that map back-transformed
40 | rho_tilde to entries of P_tilde}
41 | 
42 | \item{fixed_P_multipliers}{Numeric vector of length K containing values in (0,1]
43 | equal to 1 - sum(fixed relative abundances in row k of P_tilde)}
44 | }
45 | \value{
46 | A derivative d mu_i / d rho_tilde_k
47 | }
48 | \description{
49 | Calculate derivative of mu_i with respect to a row of matrix-valued
50 | parameter rho_tilde
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(tinyvamp)
3 | 
4 | test_check("tinyvamp")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bootstrap_ci.R:
--------------------------------------------------------------------------------
 1 | test_that("bootstrap_ci works", {
 2 |   W <- tinyvamp:::simulate_paper_data(n =3,
 3 |                            J = 5,
 4 |                            B_multiplier = 1,
 5 |                            distrib = "NB",
 6 |                            seed = 0)
 7 | 
 8 | 
 9 |   fitted_model <- tinyvamp:::fit_simulation_model(W,"reweighted_Poisson")
10 | 
11 |   library(parallel)
12 |   cis <- bootstrap_ci(W = W,
13 |                       fitted_model = fitted_model,
14 |                       n_boot = 10,
15 |                       m = NULL,
16 |                       alpha = 0.05,
17 |                       parallelize = TRUE,
18 |                       ncores = 5,
19 |                       seed = 3,
20 |                       return_models = FALSE
21 | 
22 |   )
23 | 
24 |   expect_type(cis,"list")
25 | 
26 | 
27 | })
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bootstrap_lrt.R:
--------------------------------------------------------------------------------
  1 | # # # # # test_that("multiplication works", {
  2 | # # # # #
  3 | # # # # n <- 10
  4 | # # # # sim_p_10 <- numeric(100)
  5 | # # # # set.seed(4939323)
  6 | # # # # for(sim in 1:100){
  7 | # # # #   print(paste("Simulation ", sim, sep = "", collapse = ""))
  8 | # # # #   W <- simulate_simple_data(matrix(0, nrow = 1, ncol = 2),
  9 | # # # #                             distrib = "nb10",
 10 | # # # #                             n = n,
 11 | # # # #                             gamma_mean = 11)
 12 | # # # #
 13 | # # # #   fitted_model <- fit_simple_model(W,
 14 | # # # #                                    B_fixed_at_zero = FALSE,
 15 | # # # #                                    reweight = TRUE)
 16 | # # # #
 17 | # # # #   null_param <- fitted_model
 18 | # # # #   null_param$B[] <- 0
 19 | # # # #   null_param$B_fixed_indices[] <- TRUE
 20 | # # # #
 21 | # # # #   boot_fit <- bootstrap_lrt(W = W,
 22 | # # # #                             fitted_model = fitted_model,
 23 | # # # #                             null_param = null_param,
 24 | # # # #                             n_boot = 100,
 25 | # # # #                             m = n^(3/4),
 26 | # # # #                             recalculate_W0 = FALSE,
 27 | # # # #                             parallelize = TRUE,
 28 | # # # #                             ncores = 7,
 29 | # # # #                             save_models = FALSE)
 30 | # # # #
 31 | # # # #   sim_p_10[sim] <- boot_fit$boot_pval
 32 | # # # #
 33 | # # # #   print(sim_p_10[sim])
 34 | # # # #   hist(boot_fit$boot_lr_stats,breaks = 7)
 35 | # # # #   abline(v = boot_fit$observed_lr_stat, col = "red")
 36 | # # # #
 37 | # # # # }
 38 | # # # # #
 39 | # # # # qs <- seq(0.01,.99,by = .01)
 40 | # # # # #
 41 | # # # # plot(qs, sapply(qs,function(k) quantile(sim_p_10,k)), type = "s",
 42 | # # # #      ylim = c(0,1),
 43 | # # # #      xlim = c(0,1))
 44 | # # # # points(qs, sapply(qs,function(k) quantileCI(sim_p_10,k,
 45 | # # # #                                            method = "asymptotic")$conf.int[1]),
 46 | # # # #      type = "s",lty = 2)
 47 | # # # # points(qs, sapply(qs,function(k) quantileCI(sim_p_10,k,
 48 | # # # #                                             method = "asymptotic")$conf.int[2]),
 49 | # # # #        type = "s",lty = 2)
 50 | # # # # abline(a = 0, b = 1, lty = 2)
 51 | # # # ################################ error in how weights are assigned?
 52 | # n <- 25
 53 | # sim_p_25 <- numeric(1000)
 54 | # sim_p_25_weighted <- numeric(1000)
 55 | # qs <- seq(0,1,by = .01)
 56 | # set.seed(4939323)
 57 | # boot_fits <- vector(1000,mode = "list")
 58 | # boot_fits_weighted <- boot_fits
 59 | # for(sim in 1:1000){
 60 | #   print(paste("Simulation ", sim, sep = "", collapse = ""))
 61 | #   W <- simulate_simple_data(matrix(c(0,0), nrow = 1, ncol = 2),
 62 | #                             distrib = "nb10",
 63 | #                             n = n,
 64 | #                             gamma_mean = 11)
 65 | #
 66 | #   fitted_model <- fit_simple_model(W = W,
 67 | #                                    B_fixed_at_zero = FALSE,
 68 | #                                    reweight = FALSE)
 69 | #
 70 | #   null_param <- fitted_model
 71 | #   null_param$B[] <- 0
 72 | #   null_param$B_fixed_indices[] <- TRUE
 73 | #
 74 | #   boot_fit <- bootstrap_lrt(W = W,
 75 | #                             fitted_model = fitted_model,
 76 | #                             null_param = null_param,
 77 | #                             n_boot = 1000,
 78 | #                             m = sqrt(m),
 79 | #                             recalculate_W0 = FALSE,
 80 | #                             parallelize = TRUE,
 81 | #                             ncores = 5,
 82 | #                             save_models = FALSE)
 83 | #
 84 | #   boot_fits[[sim]] <- boot_fit
 85 | #
 86 | #   sim_p_25[sim] <- boot_fit$boot_pval
 87 | #
 88 | #   fitted_model_weighted <- fit_simple_model(W = W,
 89 | #                                    B_fixed_at_zero = FALSE,
 90 | #                                    reweight = TRUE)
 91 | #
 92 | #   null_param <- fitted_model_weighted
 93 | #   null_param$B[] <- 0
 94 | #   null_param$B_fixed_indices[] <- TRUE
 95 | #
 96 | #   boot_fit_weighted <- bootstrap_lrt(W = W,
 97 | #                             fitted_model = fitted_model_weighted,
 98 | #                             null_param = null_param,
 99 | #                             n_boot = 1000,
100 | #                             m = sqrt(m),
101 | #                             recalculate_W0 = FALSE,
102 | #                             parallelize = TRUE,
103 | #                             ncores = 5,
104 | #                             save_models = FALSE)
105 | #
106 | #   boot_fits_weighted[[sim]] <- boot_fit_weighted
107 | #
108 | #   sim_p_25_weighted[sim] <- boot_fit_weighted$boot_pval
109 | #
110 | #   print(sim_p_25[sim])
111 | #   print(sim_p_25_weighted[sim])
112 | #
113 | #
114 | #   plot(qs,sapply(qs, function(k) quantile(sim_p_25[1:sim],k)),
115 | #                  type = "s",
116 | #        xlim = c(0,1),
117 | #        ylim = c(0,1))
118 | #
119 | #   lines(qs,sapply(qs, function(k) quantile(sim_p_25_weighted[1:sim],k)),
120 | #        type = "s",
121 | #        xlim = c(0,1),
122 | #        ylim = c(0,1),
123 | #        col = "red")
124 | #
125 | #   abline(a = 0, b = 1, lty = 2)
126 | #
127 | #
128 | #
129 | # }
130 | #
131 | # qs <- seq(0,1,by = .01)
132 | # #
133 | # plot(qs, sapply(qs,function(k) quantile(sim_p_25,k)), type = "s")
134 | # abline(a = 0, b = 1, lty = 2)
135 | # #
136 | # # boot_lrs <- lapply(1:100, function(k) boot_fits[[k]]$boot_lr_stats)
137 | # # obs_lrs <- sapply(1:100, function(k) boot_fits[[k]]$observed_lr_stat)
138 | # # plot(1:100, log(sapply(1:100, function(k) median(boot_lrs[[k]]))/obs_lrs))
139 | # # abline(a = 0, b= 1, lty = 2)
140 | # # })
141 | 


--------------------------------------------------------------------------------
/tests/testthat/test-calculate_log_penalty.R:
--------------------------------------------------------------------------------
 1 | test_that("log penalty is correct", {
 2 |   require(logsum)
 3 | 
 4 |   P <- matrix(1/7,ncol = 7, nrow = 1)
 5 |   P_tilde <- matrix(1/7,ncol = 7, nrow = 1)
 6 | 
 7 |   param_dfs <- parameters_to_dataframes(P = P,
 8 |                            P_fixed_indices = matrix(FALSE,ncol = 7, nrow = 1),
 9 |                            P_tilde = P_tilde,
10 |                            P_tilde_fixed_indices = matrix(FALSE,ncol = 7, nrow = 1),
11 |                            B = matrix(0, ncol = 7, nrow = 1),
12 |                            B_fixed_indices = matrix(FALSE,ncol = 7, nrow = 1),
13 |                            gammas = 0,
14 |                            gammas_fixed_indices = FALSE,
15 |                            gamma_tilde = matrix(0,ncol = 1, nrow = 1),
16 |                            gamma_tilde_fixed_indices = FALSE
17 |                            )
18 | 
19 |   varying_lr_df <- ra_to_lr(param_dfs$varying)
20 | 
21 |   penalty_from_function <- calculate_log_penalty(varying_lr_df = varying_lr_df,
22 |                         fixed_df = param_dfs$fixed,
23 |                         barrier_t = 1)
24 | 
25 |   direct_penalty <- -log(1/7)*14
26 | 
27 |   expect_equal(penalty_from_function, direct_penalty)
28 | })
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test-d_mu_d_beta_original.R:
--------------------------------------------------------------------------------
 1 | # test_that("beta ", {
 2 | #   load("data/karstens_beta_problems.rdata")
 3 | #
 4 | #   # mean_function <- function(x,i,j){
 5 | #     temp <- varying_lr_df
 6 | #     temp <- lr_to_ra(fixed_df,varying_lr_df,
 7 | #                      varying_df)
 8 | #     temp$value[temp$param == "B"] <- x
 9 | #     temp_params <- dataframes_to_parameters(fixed_df,
10 | #                                             temp)
11 | #
12 | #     means <- meaninate(gammas = temp_params$gammas,
13 | #                        B = temp_params$B,
14 | #                        X = X,
15 | #                        Z = Z,
16 | #                        P = temp_params$P,
17 | #                        X_tilde = X_tilde,
18 | #                        Z_tilde = Z_tilde,
19 | #                        Z_tilde_gamma_cols = Z_tilde_gamma_cols,
20 | #                        P_tilde = temp_params$P_tilde,
21 | #                        gamma_tilde = temp_params$gamma_tilde)
22 | #     return(means[i,j])
23 | #   }
24 | #
25 | #   numeric_deriv <- numDeriv::grad(func = function(x) mean_function(x, 1,247),
26 | #                               varying_lr_df$value[varying_lr_df$param == "B"])[7]
27 | #
28 | #   varying_df <- lr_to_ra(fixed_df,varying_lr_df,
29 | #                    varying_df)
30 | #   params <- dataframes_to_parameters(fixed_df,varying_df)
31 | #   analytic_deriv <- mu_d_beta(i = 1,
32 | #                                j = 247,
33 | #                                q = 1,
34 | #                                gammas = params$gammas,
35 | #                                B = params$B,
36 | #                                X = X,
37 | #                                Z = Z,
38 | #                                P = params$P,
39 | #                                X_tilde = X_tilde,
40 | #                                Z_tilde = Z_tilde,
41 | #                                Z_tilde_gamma_cols = Z_tilde_gamma_cols,
42 | #                                P_tilde = params$P_tilde,
43 | #                                gamma_tilde = params$gamma_tilde)
44 | #
45 | #   expect_equal(numeric_deriv,as.numeric(analytic_deriv))
46 | # })
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test-d_mu_d_gamma_original.R:
--------------------------------------------------------------------------------
 1 | test_that("gamma derivative is correct when all Z_tilde rows are multiplied by exp(gamma)", {
 2 |   gammas <-  4.53
 3 |   B <-  matrix(c(rnorm(4),0),nrow = 1)
 4 |   X <-  matrix(1, nrow = 1, ncol = 1)
 5 |   Z <-  matrix(1, nrow = 1, ncol = 1)
 6 |   P <-  matrix((1:5)/15, nrow = 1, ncol = 5)
 7 |   X_tilde <-  matrix(1,nrow = 1, ncol = 1)
 8 |   Z_tilde <-  matrix(1, nrow = 1, ncol = 1)
 9 |   P_tilde <-  matrix((5:1)/15, nrow = 1, ncol = 5)
10 |   gamma_tilde <- 1
11 | function_value <- mu_d_gamma(i = 1,
12 |            j = 5,
13 |            gammas = gammas,
14 |            B = B,
15 |            X = X,
16 |            Z = Z,
17 |            P = P,
18 |            X_tilde = X_tilde,
19 |            Z_tilde = Z_tilde,
20 |            Z_tilde_gamma_cols = 1,
21 |            P_tilde = P_tilde,
22 |            gamma_tilde = gamma_tilde)
23 | 
24 | direct_calculation <- ((Z%*%P)*exp(X%*%B + gammas) +
25 |   Z_tilde%*%(P_tilde*exp(X_tilde%*%B + gamma_tilde + gammas)))[5]
26 | 
27 | expect_equal(as.numeric(function_value), direct_calculation)
28 | })
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test-dfs_to_derivs.R:
--------------------------------------------------------------------------------
 1 | #
 2 | #
 3 | # test_that("We get output at all",
 4 | #           {
 5 | #
 6 | #             set.seed(0)
 7 | #             W <- matrix(sapply(1:10,function(x) rpois(1,1000)),
 8 | #                         ncol = 5)
 9 | #             X <- matrix(1,ncol = 1, nrow = 2)
10 | #             Z <- matrix(1,nrow = 2, ncol = 1)
11 | #             Z_tilde <- matrix(0,nrow = 2, ncol = 1)
12 | #             Z_tilde_gamma_cols <- 1
13 | #             gammas <- apply(W,1,function(x) log(sum(x)))
14 | #             gammas_fixed_indices <- rep(F,2)
15 | #             P <- matrix(1/5, nrow = 1, ncol = 5)
16 | #             P_fixed_indices <- matrix(FALSE, nrow = 1, ncol = 5)
17 | #             B <- matrix(0,ncol = 5, nrow = 1)
18 | #             B_fixed_indices <- matrix(TRUE, ncol = 5, nrow = 1)
19 | #             X_tilde <- matrix(0,ncol = 1, nrow = 1)
20 | #             P_tilde <- matrix(1/5,ncol = 5, nrow = 1)
21 | #             P_tilde_fixed_indices <- matrix(TRUE, ncol = 5, nrow = 1)
22 | #             gamma_tilde <- matrix(0,nrow = 1, ncol = 1)
23 | #             gamma_tilde_fixed_indices <- matrix(TRUE, nrow = 1, ncol = 1)
24 | #
25 | #
26 | #             parameter_dfs <- parameters_to_dataframes(P,
27 | #                                                       P_fixed_indices,
28 | #                                                       P_tilde,
29 | #                                                       P_tilde_fixed_indices,
30 | #                                                       B,
31 | #                                                       B_fixed_indices,
32 | #                                                       gammas,
33 | #                                                       gammas_fixed_indices,
34 | #                                                       gamma_tilde,
35 | #                                                       gamma_tilde_fixed_indices)
36 | #
37 | #
38 | #             dfs_to_derivs(varying_df = parameter_dfs$varying,
39 | #                                       varying_lr_df = NULL,
40 | #                                       fixed_df = parameter_dfs$fixed_df,
41 | #                                       X = X,
42 | #                                       Z = Z,
43 | #                                       X_tilde = X_tilde,
44 | #                                       Z_tilde = Z_tilde,
45 | #                                       Z_tilde_gamma_cols = Z_tilde_gamma_cols,
46 | #                                       criterion = "Poisson",
47 | #                                       gmm_inv_wts = NULL)
48 | #
49 | #           })
50 | 


--------------------------------------------------------------------------------
/tests/testthat/test-do_one_simulation.R:
--------------------------------------------------------------------------------
  1 | test_that("do_one_simulation returns list of lists", {
  2 | 
  3 |   tinysim <- do_one_simulation(n = 1,
  4 |                                 J = 5,
  5 |                                 distrib = "NB",
  6 |                                 B_multiplier = 0,
  7 |                                 seed = 1,
  8 |                                 label = "test",
  9 |                                 n_boot = 5,
 10 |                                 verbose =FALSE,
 11 |                                 # load_tinyvamp = FALSE,
 12 |                                 folder_name = "test",
 13 |                                 return_dont_save = TRUE)
 14 | 
 15 |   expect_type(tinysim$poisson_lrt,"list")
 16 |   expect_type(tinysim$poisson_ci,"list")
 17 |   expect_type(tinysim$reweighted_lrt,"list")
 18 |   expect_type(tinysim$reweighted_ci,"list")
 19 | 
 20 | })
 21 | 
 22 | 
 23 | 
 24 | # test_that("Simulation that was stalling on Bayes
 25 | # is not stalling",{
 26 | #
 27 | #   please_dont_stall <-
 28 | #     do_one_simulation(n = 1,
 29 | #                       J = 20,
 30 | #                       distrib = "NB",
 31 | #                       B_multiplier = 0,
 32 | #                       seed = 1,
 33 | #                       label = "testrun",
 34 | #                       n_boot = 5,
 35 | #                       load_tinyvamp = FALSE,
 36 | #                       folder_name,
 37 | #                       return_dont_save = TRUE,
 38 | #                       parallelize = TRUE,
 39 | #                       verbose= TRUE)
 40 | #
 41 | #   expect_type(please_dont_stall$poisson_lrt,"list")
 42 | #   expect_type(please_dont_stall$poisson_ci,"list")
 43 | #   expect_type(please_dont_stall$reweighted_lrt,"list")
 44 | #   expect_type(please_dont_stall$reweighted_ci,"list")
 45 | #
 46 | # }
 47 | #           )
 48 | 
 49 | test_that("Simulation that failed bc of log penalty
 50 | gradient off-by-one-type error now succeeds.",{
 51 | 
 52 |   failing_on_bayes <- do_one_simulation(n = 1,
 53 |                     J = 5,
 54 |                     distrib = "Poisson",
 55 |                     B_multiplier = 1,
 56 |                     seed = 1,
 57 |                     label = "test",
 58 |                     n_boot = 5,
 59 |                     parallelize= FALSE,
 60 |                     # load_tinyvamp = FALSE,
 61 |                     folder_name = "test",
 62 |                     verbose = FALSE,
 63 |                     return_dont_save = TRUE)
 64 | 
 65 |   expect_type(failing_on_bayes$poisson_lrt,"list")
 66 |   expect_type(failing_on_bayes$poisson_ci,"list")
 67 |   expect_type(failing_on_bayes$reweighted_lrt,"list")
 68 |   expect_type(failing_on_bayes$reweighted_ci,"list")
 69 | 
 70 | })
 71 | 
 72 | 
 73 | # test_that("Simulation that might be stalled on Bayes
 74 | # actually runs.",{
 75 | #
 76 | #   possibly_failing_on_bayes <- do_one_simulation(n = 3,
 77 | #                                         J = 5,
 78 | #                                         distrib = "NB",
 79 | #                                         B_multiplier = 1,
 80 | #                                         seed = 1,
 81 | #                                         label = "test",
 82 | #                                         n_boot = 5,
 83 | #                                         parallelize= FALSE,
 84 | #                                         load_tinyvamp = FALSE,
 85 | #                                         folder_name = "test",
 86 | #                                         verbose = TRUE,
 87 | #                                         return_dont_save = TRUE)
 88 | #
 89 | #   expect_type(possibly_failing_on_bayes$poisson_lrt,"list")
 90 | #   expect_type(possibly_failing_on_bayes$poisson_ci,"list")
 91 | #   expect_type(possibly_failing_on_bayes$reweighted_lrt,"list")
 92 | #   expect_type(possibly_failing_on_bayes$reweighted_ci,"list")
 93 | #
 94 | # })
 95 | 
 96 | # test_that("Yet another thing that stalled on Bayes doesn't stall anymore.", {
 97 | #
 98 | #   stalling_on_bayes <- do_one_simulation(n = 1,
 99 | #                                         J = 5,
100 | #                                         distrib = "Poisson",
101 | #                                         B_multiplier = 1,
102 | #                                         seed = 1,
103 | #                                         label = "test",
104 | #                                         n_boot = 5,
105 | #                                         parallelize= FALSE,
106 | #                                         load_tinyvamp = FALSE,
107 | #                                         folder_name = "test",
108 | #                                         return_dont_save = TRUE,
109 | #                                         verbose = TRUE)
110 | #
111 | #
112 | # })
113 | 
114 | test_that("Another simulation stalled on Bayes can run and
115 |           gives different estimates depending on estimator used.",{
116 | 
117 |   stalling_out_on_bayes <-
118 |     do_one_simulation(n = 3,
119 |                       J = 20,
120 |                       distrib = "Poisson",
121 |                       B_multiplier = 0,
122 |                       seed = 67791629,
123 |                       label = "trying_for_hundo",
124 |                       n_boot = 10,
125 |                       parallelize= FALSE,
126 |                       # load_tinyvamp = FALSE,
127 |                       folder_name = "test",
128 |                       verbose = TRUE,
129 |                       return_dont_save = TRUE)
130 | 
131 |   expect_true(mean(stalling_out_on_bayes$poisson_lrt$boot_lr_stats) !=
132 |                 mean(stalling_out_on_bayes$reweighted_lrt$boot_lr_stats))
133 | 
134 |   expect_true(sd(stalling_out_on_bayes$poisson_ci$ci$lower_ci -
135 |     stalling_out_on_bayes$reweighted_ci$ci$lower_ci) >1e-4)
136 | 
137 |   expect_true(sd(stalling_out_on_bayes$poisson_ci$ci$upper_ci -
138 |                    stalling_out_on_bayes$reweighted_ci$ci$upper_ci) >1e-4)
139 | 
140 |   expect_true(sd(stalling_out_on_bayes$poisson_ci$ci$value -
141 |     stalling_out_on_bayes$reweighted_ci$ci$value)>1e-4)
142 | 
143 | 
144 | 
145 | 
146 | })
147 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fit_simulation_model.R:
--------------------------------------------------------------------------------
  1 | test_that("We are able to fit a simulation model (Poisson likelihood) to
  2 | Poisson simulation data", {
  3 |   W <- simulate_paper_data(n = 1,
  4 |                                   J = 5,
  5 |                                   B_multiplier = 1,
  6 |                                   distrib = "Poisson",
  7 |                                   seed = 0)
  8 | 
  9 | 
 10 |   fitted.model <- try(fit_simulation_model(W,"Poisson"))
 11 |   expect_type(fitted.model,"list")
 12 | })
 13 | 
 14 | test_that("We are able to fit a simulation model (Poisson likelihood) to
 15 | negative binomial simulation data", {
 16 |   W <- simulate_paper_data(n = 1,
 17 |                            J = 5,
 18 |                            B_multiplier = 1,
 19 |                            distrib = "NB",
 20 |                            seed = 0)
 21 | 
 22 | 
 23 |   fitted.model <- try(fit_simulation_model(W,"Poisson"))
 24 |   expect_type(fitted.model,"list")
 25 | })
 26 | 
 27 | test_that("We are able to fit a simulation model (Poisson likelihood)
 28 | to Poisson simulation data", {
 29 |   W <- simulate_paper_data(n = 1,
 30 |                            J = 5,
 31 |                            B_multiplier = 1,
 32 |                            distrib = "Poisson",
 33 |                            seed = 0)
 34 | 
 35 | 
 36 |   fitted.model <- try(fit_simulation_model(W,"reweighted_Poisson"))
 37 |   expect_type(fitted.model,"list")
 38 | })
 39 | 
 40 | test_that("We are able to fit a simulation model (reweighted Poisson likelihood) to
 41 | negative binomial simulation data", {
 42 |   W <- simulate_paper_data(n = 1,
 43 |                            J = 5,
 44 |                            B_multiplier = 1,
 45 |                            distrib = "NB",
 46 |                            seed = 0)
 47 | 
 48 | 
 49 |   fitted.model <- try(fit_simulation_model(W,"reweighted_Poisson",
 50 |                                            return_variance= TRUE))
 51 |   expect_type(fitted.model,"list")
 52 | 
 53 |   # fitted.model$variance_function %>%
 54 |   #   ggplot() +
 55 |   #   geom_point(aes(x= mean, y = squerror),
 56 |   #              size = 0.5) +
 57 |   #   geom_line(aes(x = mean, y = estd_var),
 58 |   #             color = "red") +
 59 |   #   theme_bw()+
 60 |   #   scale_y_sqrt() +
 61 |   #   scale_x_sqrt()
 62 | })
 63 | 
 64 | test_that("We get different estimates from Poisson and
 65 | reweighted Poisson estimators fit to negative binomial data", {
 66 |   W <- simulate_paper_data(n = 3,
 67 |                            J = 20,
 68 |                            B_multiplier = 1,
 69 |                            distrib = "NB",
 70 |                            seed = 0)
 71 | 
 72 | 
 73 |   poisson_fit <- try(fit_simulation_model(W,"Poisson"))
 74 |   reweighted_fit <- try(fit_simulation_model(W,"reweighted_Poisson",
 75 |                                              return_variance = TRUE))
 76 | 
 77 |   expect_true(
 78 |     mean(abs(poisson_fit$varying$value - reweighted_fit$varying$value))
 79 |     >0.01)
 80 | #
 81 | #
 82 | #   poisson_cis <- bootstrap_ci(W,
 83 | #                               fitted_model = poisson_fit,
 84 | #                               n_boot = 100,
 85 | #                               verbose= TRUE,
 86 | #                               parallelize = TRUE)
 87 | #
 88 | #   reweighted_cis <- bootstrap_ci(W,
 89 | #                                  fitted_model = reweighted_fit,
 90 | #                                  n_boot = 100,
 91 | #                                  verbose = TRUE,
 92 | #                                  parallelize = TRUE
 93 | #                                  )
 94 | #
 95 | # poisson_cis$ci$method <- "Poisson"
 96 | # reweighted_cis$ci$method <- "Reweighted"
 97 | #
 98 | # rbind(poisson_cis$ci,
 99 | #       reweighted_cis$ci) %>%
100 | #   dplyr::filter(param == "B") %>%
101 | #   ggplot() +
102 | #   geom_errorbar(aes(x = j, ymin = lower_ci, ymax= upper_ci, color = method),
103 | #                 position = position_dodge(0.5)) +
104 | #   facet_wrap(~k)+
105 | #   theme_bw()
106 | #
107 | # rbind(poisson_cis$ci,
108 | #       reweighted_cis$ci) %>%
109 | #   dplyr::group_by(param, method) %>%
110 | #   dplyr::summarize(mean_width = mean(abs(upper_ci - lower_ci))) %>%
111 | #   ggplot() +
112 | #   geom_point(aes(x= param, y = mean_width, color = method),
113 | #              position = position_dodge(0.5)) +
114 | #   scale_y_log10()+
115 | #   theme_bw()
116 | #
117 | # rbind(poisson_cis$ci,
118 | #       reweighted_cis$ci) %>%
119 | #   mutate(width = upper_ci - lower_ci) %>%
120 | #   filter(param == "P_tilde") %>%
121 | #   select(param, j, method, width) %>%
122 | #   pivot_wider(values_from = width, names_from = method) %>%
123 | #   ggplot(aes(x = j, y = Poisson/Reweighted)) +
124 | #   geom_point() +
125 | #   theme_bw() +
126 | #   scale_y_log10()
127 | #
128 | #
129 | #
130 | # reweighted_fit$variance_function %>%
131 | #   ggplot() +
132 | #   geom_point(aes(x= mean,y = squerror)) +
133 | #   geom_line(aes(x = mean,y = estd_var),color="red") +
134 | #   scale_y_log10() +
135 | #   scale_x_log10() +
136 | #   theme_bw()
137 | 
138 | 
139 | })
140 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get_A_tilde_k_list.R:
--------------------------------------------------------------------------------
 1 | test_that("get_A_tilde_k_list works when  P_tilde entries fixed", {
 2 | 
 3 |   X <- matrix(1)
 4 |   X_tilde <- matrix(1)
 5 |   Z <- matrix(1)
 6 |   Z_tilde <- matrix(1)
 7 |   Z_tilde_gamma_cols <- 1
 8 | 
 9 |   P <- matrix(rep(1/8, 8), nrow = 1)
10 |   P_fixed_indices <- matrix(rep(FALSE,8), nrow = 1)
11 |   P_tilde <- matrix((1:8)/sum(1:8), nrow = 1)
12 |   P_tilde_fixed_indices <- matrix(rep(FALSE,8), nrow = 1)
13 |   B <- matrix(c(-3:3,0),nrow = 1)
14 |   B_fixed_indices <- matrix(rep(TRUE,8), nrow = 1)
15 |   gammas <- 8
16 |   gammas_fixed_indices <- TRUE
17 |   gamma_tilde <- matrix(log(100))
18 |   gamma_tilde_fixed_indices <- matrix(TRUE)
19 | 
20 |   dfs <- parameters_to_dataframes(P,
21 |                                   P_fixed_indices,
22 |                                   P_tilde,
23 |                                   P_tilde_fixed_indices,
24 |                                   B,
25 |                                   B_fixed_indices,
26 |                                   gammas,
27 |                                   gammas_fixed_indices,
28 |                                   gamma_tilde,
29 |                                   gamma_tilde_fixed_indices)
30 | 
31 |   varying_lr_df <- ra_to_lr(dfs$varying)
32 | 
33 |   expect_equal(get_A_tilde_k_list(fixed_df = dfs$fixed,
34 |                      varying_df = dfs$varying,
35 |                      varying_lr_df = varying_lr_df),
36 |                list(diag(8)))
37 | 
38 | })
39 | 
40 | test_that("get_A_tilde_k_list works when some P_tilde entries fixed", {
41 | 
42 |   X <- matrix(1)
43 |   X_tilde <- matrix(1)
44 |   Z <- matrix(1)
45 |   Z_tilde <- matrix(1)
46 |   Z_tilde_gamma_cols <- 1
47 | 
48 |   P <- matrix(rep(1/8, 8), nrow = 1)
49 |   P_fixed_indices <- matrix(rep(FALSE,8), nrow = 1)
50 |   P_tilde <- matrix((1:8)/sum(1:8), nrow = 1)
51 |   P_tilde_fixed_indices <- matrix(c(FALSE, TRUE, TRUE, rep(FALSE,5)), nrow = 1)
52 |   B <- matrix(c(-3:3,0),nrow = 1)
53 |   B_fixed_indices <- matrix(rep(TRUE,8), nrow = 1)
54 |   gammas <- 8
55 |   gammas_fixed_indices <- TRUE
56 |   gamma_tilde <- matrix(log(100))
57 |   gamma_tilde_fixed_indices <- matrix(TRUE)
58 | 
59 |   dfs <- parameters_to_dataframes(P,
60 |                                   P_fixed_indices,
61 |                                   P_tilde,
62 |                                   P_tilde_fixed_indices,
63 |                                   B,
64 |                                   B_fixed_indices,
65 |                                   gammas,
66 |                                   gammas_fixed_indices,
67 |                                   gamma_tilde,
68 |                                   gamma_tilde_fixed_indices)
69 | 
70 |   varying_lr_df <- ra_to_lr(dfs$varying)
71 | 
72 |   expect_equal(get_A_tilde_k_list(fixed_df = dfs$fixed,
73 |                                   varying_df = dfs$varying,
74 |                                   varying_lr_df = varying_lr_df),
75 |                list(rbind(diag(6)[1,],
76 |                           0*diag(6)[1:2,],
77 |                           diag(6)[2:6,])))
78 | 
79 | })
80 | 


--------------------------------------------------------------------------------
/tests/testthat/test-gmm_criterion.R:
--------------------------------------------------------------------------------
 1 | test_that("gmm criterion behaves predictably in the inv_wts argument", {
 2 |   set.seed(0)
 3 |   W <- matrix(sapply(1:10,function(x) rpois(1,1000)),
 4 |               ncol = 5)
 5 |   X <- matrix(1,ncol = 1, nrow = 2)
 6 |   Z <- matrix(1,nrow = 2, ncol = 1)
 7 |   Z_tilde <- matrix(0,nrow = 2, ncol = 1)
 8 |   Z_tilde_gamma_cols <- 1
 9 |   gammas <- apply(W,1,function(x) log(sum(x)))
10 |   gammas_fixed_indices <- rep(F,2)
11 |   P <- matrix(1/5, nrow = 1, ncol = 5)
12 |   P_fixed_indices <- matrix(FALSE, nrow = 1, ncol = 5)
13 |   B <- matrix(0,ncol = 5, nrow = 1)
14 |   B_fixed_indices <- matrix(TRUE, ncol = 5, nrow = 1)
15 |   X_tilde <- matrix(0,ncol = 1, nrow = 1)
16 |   P_tilde <- matrix(1/5,ncol = 5, nrow = 1)
17 |   P_tilde_fixed_indices <- matrix(TRUE, ncol = 5, nrow = 1)
18 |   gamma_tilde <- matrix(0,nrow = 1, ncol = 1)
19 |   gamma_tilde_fixed_indices <- matrix(TRUE, nrow = 1, ncol = 1)
20 | 
21 |   param_dfs <- parameters_to_dataframes(P,
22 |                                         P_fixed_indices,
23 |                                         P_tilde,
24 |                                         P_tilde_fixed_indices,
25 |                                         B,
26 |                                         B_fixed_indices,
27 |                                         gammas,
28 |                                         gammas_fixed_indices,
29 |                                         gamma_tilde,
30 |                                         gamma_tilde_fixed_indices)
31 | 
32 |   means <- meaninate(gammas = gammas,
33 |                      B = B,
34 |                      X = X,
35 |                      Z = Z,
36 |                      P = P,
37 |                      X_tilde = X_tilde,
38 |                      Z_tilde = Z_tilde,
39 |                      Z_tilde_gamma_cols = 1,
40 |                      P_tilde = P_tilde,
41 |                      gamma_tilde = gamma_tilde)
42 |   n <- nrow(W)
43 |   W_long <- lapply(1:n,function(i) as.numeric(W[i,]))
44 |   W_long <- do.call(c,W_long)
45 |   means_long <- lapply(1:n, function(i) as.numeric(means[i,]))
46 |   means_long <- do.call(c,means_long)
47 | 
48 |   sse <- sum((W_long - means_long)^2)
49 | 
50 |   expect_equal(0.5*sse,gmm_criterion(W_long,means_long,
51 |                                  rep(1,length(W_long))))
52 | 
53 |   expect_true(0.5*sse > gmm_criterion(W_long,means_long,
54 |                                   rep(2,length(W_long))))
55 | 
56 | 
57 |   W_long[1] <- means_long[1]
58 | 
59 |   expect_equal(gmm_criterion(W_long,means_long,c(0,rep(1,length(W_long) - 1))),
60 |                gmm_criterion(W_long,means_long,c(1,rep(1,length(W_long) - 1))))
61 | 
62 | })
63 | 


--------------------------------------------------------------------------------
/tests/testthat/test-lr_to_ra.R:
--------------------------------------------------------------------------------
 1 | test_that("Get same dfs out applying lr_to_ra on output of ra_to_lr", {
 2 |    
 3 |    fixed_df <- data.frame("value" = 1/5,
 4 |                           "param" = "P_tilde",
 5 |                           k = 1,
 6 |                           j = 1:5)
 7 |    
 8 |    varying_df <- data.frame("value" = (1:5)/sum(1:5),
 9 |                             "param" = "P",
10 |                             k = 1,
11 |                             j = 1:5)
12 |    
13 |    expect_equal(lr_to_ra(fixed_df = fixed_df,
14 |                          varying_lr_df =ra_to_lr(varying_df),
15 |                          varying_df = varying_df),
16 |                 varying_df)
17 |    
18 | })
19 | 
20 | # test_that("Get same dfs out applying ra_to_lr on output of lr_to_ra",{
21 | #    temp_varying <-
22 | #       with(list_of_dfs,
23 | #            lr_to_ra(fixed_df,varying_lr_df,varying_df))
24 | #    temp_lr <- ra_to_lr(temp_varying)
25 | #    
26 | #    expect_equal(temp_lr,list_of_dfs$varying_lr_df)
27 | # })
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mu_d_P.R:
--------------------------------------------------------------------------------
 1 | test_that("P derivative is correct in simple setting", {
 2 |   gammas <-  4.53
 3 |   B <-  matrix(c(rnorm(4),0),nrow = 1)
 4 |   X <-  matrix(1, nrow = 1, ncol = 1)
 5 |   Z <-  matrix(1, nrow = 1, ncol = 1)
 6 |   P <-  matrix((1:5)/15, nrow = 1, ncol = 5)
 7 |   X_tilde <-  matrix(1,nrow = 1, ncol = 1)
 8 |   Z_tilde <-  matrix(1, nrow = 1, ncol = 1)
 9 |   P_tilde <-  matrix((5:1)/15, nrow = 1, ncol = 5)
10 |   gamma_tilde <- 1
11 |   function_value <- mu_d_P(i = 1,
12 |                                      j = 5,
13 |                                      m = 1,
14 |                                      gammas = gammas,
15 |                                      B = B,
16 |                                      X = X,
17 |                                      Z = Z,
18 |                                      P = P)
19 | 
20 |   direct_calculation <- (Z%*%exp(X%*%B + gammas))[5]
21 | 
22 |   expect_equal(as.numeric(function_value), direct_calculation)
23 | })
24 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mu_d_beta.R:
--------------------------------------------------------------------------------
 1 | test_that("Beta derivative is correct in no-contamination case", {
 2 | 
 3 |   set.seed(9382)
 4 |   B <- matrix(c(rnorm(6),0), nrow = 1)
 5 |   X <-  matrix(1,ncol = 1, nrow = 1)
 6 |   Z <-  matrix(1, ncol = 1, nrow = 1)
 7 |   P <-  matrix(1/7, nrow =1 , ncol = 7)
 8 |   X_tilde <-  matrix(1, ncol = 1, nrow = 1)
 9 |   Z_tilde <-  matrix(1, ncol = 1, nrow = 1)
10 |   P_tilde <-  matrix(1/7, ncol = 7, nrow = 1)
11 |   deriv_from_fn <- mu_d_beta(i = 1,
12 |             j = 2,
13 |             q = 1,
14 |             gammas = 8,
15 |             B = B,
16 |             X = X,
17 |             Z = Z,
18 |             P = P,
19 |             X_tilde = X_tilde,
20 |             Z_tilde = Z_tilde,
21 |             Z_tilde_gamma_cols = 1,
22 |             P_tilde = P_tilde,
23 |             gamma_tilde = matrix(-100, ncol = 1, nrow = 1))
24 | 
25 |  direct_deriv <- ((Z%*%P)*(exp(X%*%B + 8)))[1,2]
26 |  expect_equal(as.numeric(deriv_from_fn), direct_deriv)
27 | })
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mu_d_gamma_faster.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_that("mu_d_gamma_faster output equal to mean when gamma_tilde negligible",{
 3 | set.seed(75943)
 4 | B <- matrix(c(rnorm(6),0),nrow = 1)
 5 | dgamma <- mu_d_gamma_faster(i = 1,
 6 |                   J = 7,
 7 |                   gammas = 1,
 8 |                   B = B,
 9 |                   X = matrix(1, ncol = 1),
10 |                   Z = matrix(1, ncol = 1),
11 |                   P = matrix((1:7)/(28),nrow = 1),
12 |                   X_tilde = matrix(1, ncol = 1),
13 |                   Z_tilde = matrix(1, ncol = 1),
14 |                   Z_tilde_gamma_cols = NULL,
15 |                   P_tilde = matrix((7:1)/28, nrow = 1),
16 |                   gamma_tilde = -100)
17 | 
18 | 
19 | mean_term <- meaninate(gammas = 1,
20 |           B = B,
21 |           X = matrix(1, ncol = 1),
22 |           Z = matrix(1, ncol = 1),
23 |           P = matrix((1:7)/(28),nrow = 1),
24 |           X_tilde = matrix(1, ncol = 1),
25 |           Z_tilde = matrix(1, ncol = 1),
26 |           Z_tilde_gamma_cols = NULL,
27 |           P_tilde = matrix((7:1)/28, nrow = 1),
28 |           gamma_tilde = -100)
29 | 
30 | expect_equal(mean_term, dgamma)
31 | })
32 | 
33 | 
34 | test_that("mu_d_gamma_faster output equal to mean when gamma_tilde non-negligible
35 |           and all columns of Z_tilde in Z_tilde_gamma_tilde_cols",{
36 |   set.seed(742343)
37 |   B <- matrix(c(rnorm(6),0),nrow = 1)
38 |   dgamma <- mu_d_gamma_faster(i = 1,
39 |                               J = 7,
40 |                               gammas = 1,
41 |                               B = B,
42 |                               X = matrix(1, ncol = 1),
43 |                               Z = matrix(1, ncol = 1),
44 |                               P = matrix((1:7)/(28),nrow = 1),
45 |                               X_tilde = matrix(1, ncol = 1),
46 |                               Z_tilde = matrix(1, ncol = 1),
47 |                               Z_tilde_gamma_cols = 1,
48 |                               P_tilde = matrix((7:1)/28, nrow = 1),
49 |                               gamma_tilde = 2)
50 | 
51 | 
52 |   mean_term <- meaninate(gammas = 1,
53 |                          B = B,
54 |                          X = matrix(1, ncol = 1),
55 |                          Z = matrix(1, ncol = 1),
56 |                          P = matrix((1:7)/(28),nrow = 1),
57 |                          X_tilde = matrix(1, ncol = 1),
58 |                          Z_tilde = matrix(1, ncol = 1),
59 |                          Z_tilde_gamma_cols = 1,
60 |                          P_tilde = matrix((7:1)/28, nrow = 1),
61 |                          gamma_tilde = 2)
62 | 
63 |   expect_equal(mean_term, dgamma)
64 | })
65 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mu_d_gamma_tilde.R:
--------------------------------------------------------------------------------
 1 | test_that("gamma_tilde derivative is correct when all Z_tilde rows are multiplied by exp(gamma)", {
 2 |   gammas <-  4.53
 3 |   B <-  matrix(c(rnorm(4),0),nrow = 1)
 4 |   X <-  matrix(1, nrow = 1, ncol = 1)
 5 |   Z <-  matrix(1, nrow = 1, ncol = 1)
 6 |   P <-  matrix((1:5)/15, nrow = 1, ncol = 5)
 7 |   X_tilde <-  matrix(1,nrow = 1, ncol = 1)
 8 |   Z_tilde <-  matrix(1, nrow = 1, ncol = 1)
 9 |   P_tilde <-  matrix((5:1)/15, nrow = 1, ncol = 5)
10 |   gamma_tilde <- 1
11 |   function_value <- mu_d_gamma_tilde(i = 1,
12 |                                j = 5,
13 |                                k_tilde = 1,
14 |                                gammas = gammas,
15 |                                B = B,
16 |                                X = X,
17 |                                Z = Z,
18 |                                P = P,
19 |                                X_tilde = X_tilde,
20 |                                Z_tilde = Z_tilde,
21 |                                Z_tilde_gamma_cols = 1,
22 |                                P_tilde = P_tilde,
23 |                                gamma_tilde = gamma_tilde)
24 | 
25 |   direct_calculation <- (
26 |                            Z_tilde%*%(P_tilde*exp(X_tilde%*%B + gamma_tilde + gammas)))[5]
27 | 
28 |   expect_equal(as.numeric(function_value), direct_calculation)
29 | })
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mu_d_rho.R:
--------------------------------------------------------------------------------
 1 | test_that("rho derivative is correct when all Z_tilde rows are multiplied by exp(gamma)", {
 2 |   gammas <-  4.53
 3 |   B <-  matrix(c(rnorm(4),0),nrow = 1)
 4 |   X <-  matrix(1, nrow = 1, ncol = 1)
 5 |   Z <-  matrix(1, nrow = 1, ncol = 1)
 6 |   P <-  matrix((1:5)/15, nrow = 1, ncol = 5)
 7 |   X_tilde <-  matrix(1,nrow = 1, ncol = 1)
 8 |   Z_tilde <-  matrix(1, nrow = 1, ncol = 1)
 9 |   P_tilde <-  matrix((5:1)/15, nrow = 1, ncol = 5)
10 |   rho_k = log(P[1,1:4]/P[1,5])
11 |   gamma_tilde <- 1
12 |   function_value <- mu_d_rho_faster(i = 1,
13 |                            J = 5,
14 |                            k = 1,
15 |                            gammas = gammas,
16 |                            B = B,
17 |                            X = X,
18 |                            Z = Z,
19 |                            rho_k = rho_k,
20 |                            Ak_list = list(diag(5)),
21 |                            fixed_P_multipliers = 1
22 |                           )
23 | 
24 |   #convert to matrix from dgeMatrix
25 |   function_value <- as.matrix(function_value)
26 | 
27 |   #remove empty dimnames
28 |   dimnames(function_value) <- NULL
29 | 
30 | 
31 |   direct_d_mu_dPk <- diag(as.numeric((Z%*%exp(X%*%B + gammas))))
32 |   direct_dPk_drho_k <- cbind(diag(exp(rho_k)/(sum(c(1,exp(rho_k))))),0) -
33 |     outer(c(exp(rho_k))/(sum(c(1,exp(rho_k)))),c(exp(rho_k),1)/(sum(c(1,exp(rho_k)))))
34 |   direct_calculation <- t(direct_dPk_drho_k%*%direct_d_mu_dPk)
35 | 
36 |   expect_equal(function_value, direct_calculation)
37 | })
38 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mu_d_rho_tilde.R:
--------------------------------------------------------------------------------
  1 | test_that("rho derivative is correct when all Z_tilde rows are multiplied by exp(gamma)", {
  2 |   require(Matrix)
  3 |   gammas <-  4.53
  4 |   B <-  matrix(c(rnorm(4),0),nrow = 1)
  5 |   X_tilde <-  matrix(1,nrow = 1, ncol = 1)
  6 |   Z_tilde <-  matrix(1, nrow = 1, ncol = 1)
  7 |   P_tilde <-  matrix((5:1)/15, nrow = 1, ncol = 5)
  8 |   rho_tilde_k = log(P_tilde[1,1:4]/P_tilde[1,5])
  9 |   gamma_tilde <- 1
 10 |   Z_tilde_gamma_cols <- 1
 11 | 
 12 |   function_value <- mu_d_rho_tilde_faster(i = 1,
 13 |                                     J = 5,
 14 |                                     k_tilde = 1,
 15 |                                     gammas = gammas,
 16 |                                     B = B,
 17 |                                     rho_tilde_k = rho_tilde_k,
 18 |                                     A_tilde_k_list = list(diag(5)),
 19 |                                     fixed_P_tilde_multipliers = 1,
 20 |                                     X_tilde = X_tilde,
 21 |                                     Z_tilde = Z_tilde,
 22 |                                     Z_tilde_gamma_cols = Z_tilde_gamma_cols,
 23 |                                     gamma_tilde = gamma_tilde)
 24 | 
 25 |   #convert to matrix from dgeMatrix
 26 |   function_value <- as.matrix(function_value)
 27 | 
 28 |   #remove empty dimnames
 29 |   dimnames(function_value) <- NULL
 30 | 
 31 | 
 32 |   direct_d_mu_dP_tilde_k <- diag(as.numeric((exp(gammas)*Z_tilde)%*%exp(X_tilde%*%B + gamma_tilde)))
 33 |   direct_dP_tilde_k_drho_tilde_k <- cbind(diag(exp(rho_tilde_k)/(sum(c(1,exp(rho_tilde_k))))),0) -
 34 |     outer(c(exp(rho_tilde_k))/(sum(c(1,exp(rho_tilde_k)))),c(exp(rho_tilde_k),1)/(sum(c(1,exp(rho_tilde_k)))))
 35 |   direct_calculation <- t(direct_dP_tilde_k_drho_tilde_k%*%direct_d_mu_dP_tilde_k)
 36 | 
 37 |   expect_equal(function_value, direct_calculation)
 38 | })
 39 | 
 40 | test_that("rho_tilde derivative correct in realistic setting with alpha_tilde", {
 41 | 
 42 | 
 43 |   set.seed(0)
 44 |   p_mock1 <- c(rep(0,5),rep(1/5,5))
 45 |   p_mock2 <- c(rep(1/5,5),rep(0,5))
 46 |   p_contam <- c(rexp(5),rep(0,5))
 47 |   p_contam <- p_contam/sum(p_contam)
 48 |   p_true <- rep(1/10,10)
 49 |   dilutions <- rep(3^(1:5),3)
 50 |   W <- matrix(NA,nrow = 15, ncol = 10)
 51 |   for(i in 1:15){
 52 |     if(i<6){
 53 |       W[i,] <- round(rexp(1,3^((i - 1)%%5)*1/10000)*(p_mock1 + dilutions[i]*p_contam),0)
 54 |     } else{
 55 |       if(i<11){
 56 |         W[i,] <- round(rexp(1,3^((i- 1)%%5)*1/10000)*(p_mock2 + dilutions[i]*p_contam),0)
 57 | 
 58 |       } else{
 59 |         W[i,] <- round(rexp(1,3^((i-1)%%5)*1/10000)*(p_true + dilutions[i]*p_contam),0)
 60 | 
 61 |       }
 62 |     }
 63 |   }
 64 |   X <- matrix(0,ncol = 1, nrow = 15)
 65 |   Z <- cbind(c(rep(1,5),rep(0,10)),
 66 |              c(rep(0,5),rep(1,5), rep(0,5)),
 67 |              c(rep(0,10),rep(1,5)))
 68 |   Z_tilde <- matrix(dilutions/exp(mean(log(dilutions))), ncol = 1)
 69 |   Z_tilde_list <- list(Z_tilde*matrix(c(rep(1,5),rep(0,10))),
 70 |                        Z_tilde*matrix(c(rep(0,5),rep(1,5),rep(0,5))),
 71 |                        Z_tilde*matrix(c(rep(0,10),rep(1,5))))
 72 | 
 73 |   Z_tilde_gamma_cols <- 1
 74 |   Z_tilde <- NULL
 75 |   gammas <- apply(W,1,function(x) log(sum(x)))
 76 |   gammas_fixed_indices <- rep(F,length(gammas))
 77 |   P <- rbind(p_mock1,
 78 |              p_mock2,
 79 |              rep(.1,10))
 80 |   P_fixed_indices <- matrix(FALSE, nrow = 3, ncol = 10)
 81 |   P_fixed_indices[1:2,] <- TRUE
 82 |   B <- matrix(0,ncol = 10, nrow = 1)
 83 |   B_fixed_indices <- matrix(TRUE, ncol = 10, nrow = 1)
 84 |   X_tilde <- matrix(0,ncol = 1, nrow = 1)
 85 |   P_tilde <- matrix(1/10,ncol = 10, nrow = 1)
 86 |   P_tilde_fixed_indices <- matrix(FALSE, ncol = 10, nrow = 1)
 87 |   gamma_tilde <- matrix(0,nrow = 1, ncol = 1)
 88 |   gamma_tilde_fixed_indices <- matrix(FALSE, nrow = 1, ncol = 1)
 89 |   alpha_tilde <- c(0,0)
 90 | 
 91 |   parameter_dfs <- parameters_to_dataframes(P,
 92 |                                             P_fixed_indices,
 93 |                                             P_tilde,
 94 |                                             P_tilde_fixed_indices,
 95 |                                             B,
 96 |                                             B_fixed_indices,
 97 |                                             gammas,
 98 |                                             gammas_fixed_indices,
 99 |                                             gamma_tilde,
100 |                                             gamma_tilde_fixed_indices,
101 |                                             alpha_tilde = alpha_tilde)
102 | 
103 |   varying_df <- parameter_dfs$varying
104 |   fixed_df <- parameter_dfs$fixed
105 |   varying_lr_df <- ra_to_lr(varying_df)
106 | 
107 | 
108 | 
109 |   K <- max(c(varying_df$k[varying_df$param == "P"],
110 |              fixed_df$k[fixed_df$param == "P"]))
111 | 
112 |   fixed_P_multipliers <- sapply(1:K, function(k)
113 |     1 - sum(fixed_df$value[fixed_df$param == "P"&
114 |                              fixed_df$k ==k]))
115 | 
116 |   K_tilde <- max(c(varying_df$k[varying_df$param == "P_tilde"],
117 |                    fixed_df$k[fixed_df$param == "P_tilde"]))
118 | 
119 |   fixed_P_tilde_multipliers <- sapply(1:K_tilde, function(k)
120 |     1 - sum(fixed_df$value[fixed_df$param == "P_tilde"&
121 |                              fixed_df$k ==k]))
122 | 
123 |   Ak_list <- get_Ak_list(fixed_df,
124 |                          varying_df,
125 |                          varying_lr_df)
126 | 
127 |   A_tilde_k_list <- get_A_tilde_k_list(fixed_df,
128 |                                        varying_df,
129 |                                        varying_lr_df)
130 | 
131 |   # message("created Ak_list and A_tilde_k_list")
132 | 
133 |   #calculate at outset of optimization  pass as argument to mean_jac_lr, etc.
134 |   which_k_p <- sapply(1:K, function(k) ifelse(is.null(Ak_list[[k]]),
135 |                                               NA, k))
136 | 
137 |   which_k_p <- which_k_p[!is.na(which_k_p)]
138 | 
139 |   #calculate at outset of optimization  pass as argument to mean_jac_lr, etc.
140 |   which_k_p_tilde <- sapply(1:K_tilde,
141 |                             function(k) ifelse(
142 |                               is.null(A_tilde_k_list[[k]]),
143 |                               NA,k
144 |                             ))
145 | 
146 |   which_k_p_tilde <- which_k_p_tilde[!is.na(which_k_p_tilde)]
147 | 
148 |   # message("saved which_k_p and which_k_p_tilde")
149 | 
150 |   #calculate at outset of optimization
151 |   which_B_rows <- unique(varying_df$k[varying_df$param == "B"])
152 |   which_B_rows <- which_B_rows[order(which_B_rows)]
153 | 
154 |   #calculate at outset of optimization
155 |   which_B_keep <- lapply(which_B_rows,
156 |                          function(k) sapply(1:(J - 1),
157 |                                             function(j)
158 |                                               j %in% varying_lr_df$j[
159 |                                                 varying_lr_df$param == "B" &
160 |                                                   varying_lr_df$k == k]
161 |                          ))
162 |   which_B_keep <- do.call(rbind,which_B_keep)
163 | 
164 |   # message("saved which_B_keep")
165 | 
166 |   which_gammas <- unique(varying_df$k[varying_df$param == "gamma"])
167 | 
168 |   which_gamma_tilde <- unique(varying_df$k[varying_df$param == "gamma_tilde"])
169 | 
170 |   which_unconstrained <- varying_lr_df$param %in% c("B","gamma","gamma_tilde")
171 |   which_rho <- varying_lr_df$param %in% c("rho")
172 |   which_rho_tilde <- varying_lr_df$param %in% c("rho_tilde")
173 |   npar <- nrow(varying_lr_df)
174 | 
175 |   rho_tilde_k = log(P_tilde[1,1:9]/P_tilde[1,10])
176 |   i <- 1
177 | 
178 |   function_value <- mu_d_rho_tilde_faster(i = i,
179 |                                           J = 10,
180 |                                           k_tilde = 1,
181 |                                           gammas = gammas,
182 |                                           B = B,
183 |                                           rho_tilde_k = rho_tilde_k,
184 |                                           A_tilde_k_list = A_tilde_k_list,
185 |                                           fixed_P_tilde_multipliers = 1,
186 |                                           X_tilde = X_tilde,
187 |                                           Z_tilde = Z_tilde,
188 |                                           Z_tilde_gamma_cols = Z_tilde_gamma_cols,
189 |                                           gamma_tilde = gamma_tilde,
190 |                                           alpha_tilde = alpha_tilde,
191 |                                           Z_tilde_list = Z_tilde_list)
192 | 
193 |   #convert to matrix from dgeMatrix
194 |   function_value <- as.matrix(function_value)
195 | 
196 |   #remove empty dimnames
197 |   dimnames(function_value) <- NULL
198 | 
199 |   Z_tilde <- construct_Z_tilde(Z_tilde_list,
200 |                                alpha_tilde)
201 | 
202 | 
203 |   direct_d_mu_dP_tilde_k <- diag(as.numeric((exp(gammas[i])*Z_tilde[i,,drop = FALSE])%*%exp(X_tilde%*%B + gamma_tilde%*%matrix(1,ncol = 10,
204 |                                                                                                            nrow = 1))))
205 |   direct_dP_tilde_k_drho_tilde_k <- cbind(diag(exp(rho_tilde_k)/(sum(c(1,exp(rho_tilde_k))))),0) -
206 |     outer(c(exp(rho_tilde_k))/(sum(c(1,exp(rho_tilde_k)))),c(exp(rho_tilde_k),1)/(sum(c(1,exp(rho_tilde_k)))))
207 |   direct_calculation <- t(direct_dP_tilde_k_drho_tilde_k%*%direct_d_mu_dP_tilde_k)
208 | 
209 |   expect_equal(function_value, direct_calculation)
210 | 
211 |   get_mean <- function(rho_tilde_k){
212 |     temp_lr_df <- varying_lr_df
213 |     temp_lr_df$value[temp_lr_df$param == "rho_tilde"] <- rho_tilde_k
214 |     temp_params <- dataframes_to_parameters(fixed_df,
215 |                                        lr_to_ra(fixed_df,
216 |                                                 temp_lr_df,
217 |                                                 varying_df))
218 | 
219 |    return( meaninate(gammas = temp_params$gammas,
220 |               B = temp_params$B,
221 |               Z = Z,
222 |               P = P,
223 |               X = X,
224 |               X_tilde = X_tilde,
225 |               Z_tilde = NULL,
226 |               Z_tilde_gamma_cols = 1,
227 |               P_tilde = temp_params$P_tilde,
228 |               gamma_tilde = temp_params$gamma_tilde,
229 |               alpha_tilde = temp_params$alpha_tilde,
230 |               Z_tilde_list = Z_tilde_list)[1,1])
231 | 
232 | 
233 |   }
234 | 
235 |   num_grad <-
236 |     numDeriv::grad(get_mean,
237 |                    varying_lr_df$value[varying_lr_df$param == "rho_tilde"])
238 | 
239 |   expect_equal(function_value[1,],num_grad)
240 | 
241 | }
242 | )
243 | 


--------------------------------------------------------------------------------
/tests/testthat/test-par_to_jacobian_row.R:
--------------------------------------------------------------------------------
  1 | test_that("P derivatives work in simple case", {
  2 | 
  3 |   X <- matrix(1)
  4 |   X_tilde <- matrix(1)
  5 |   Z <- matrix(1)
  6 |   Z_tilde <- matrix(1)
  7 |   Z_tilde_gamma_cols <- 1
  8 | 
  9 |   P <- matrix(rep(1/8, 8), nrow = 1)
 10 |   P_fixed_indices <- matrix(rep(FALSE,8), nrow = 1)
 11 |   P_tilde <- matrix((1:8)/sum(1:8), nrow = 1)
 12 |   P_tilde_fixed_indices <- matrix(rep(TRUE,8), nrow = 1)
 13 |   B <- matrix(c(-3:3,0),nrow = 1)
 14 |   B_fixed_indices <- matrix(rep(TRUE,8), nrow = 1)
 15 |   gammas <- 8
 16 |   gammas_fixed_indices <- TRUE
 17 |   gamma_tilde <- matrix(log(100))
 18 |   gamma_tilde_fixed_indices <- matrix(TRUE)
 19 | 
 20 |   dfs <- parameters_to_dataframes(P,
 21 |                                   P_fixed_indices,
 22 |                                   P_tilde,
 23 |                                   P_tilde_fixed_indices,
 24 |                                   B,
 25 |                                   B_fixed_indices,
 26 |                                   gammas,
 27 |                                   gammas_fixed_indices,
 28 |                                   gamma_tilde,
 29 |                                   gamma_tilde_fixed_indices)
 30 | 
 31 |   params <- dataframes_to_parameters(dfs$fixed,
 32 |                                      dfs$varying)
 33 | 
 34 |   fixed_status <- dfs$fixed
 35 |   fixed_status$value <- 0
 36 |   varying_status <- dfs$varying
 37 |   varying_status$value <- 1
 38 | 
 39 |   param_status <- dataframes_to_parameters(fixed_status,
 40 |                                            varying_status)
 41 | 
 42 |   function_output <- par_to_jacobian_row(params,
 43 |                       param_status,
 44 |                       i = 1,
 45 |                       j = 1,
 46 |                       X,
 47 |                       Z,
 48 |                       X_tilde,
 49 |                       Z_tilde,
 50 |                       Z_tilde_gamma_cols)
 51 |   theoretical_output <- c(exp(gammas + B[1]),rep(0,7))
 52 | 
 53 |   expect_equal(function_output, theoretical_output)
 54 | })
 55 | 
 56 | 
 57 | test_that("More mean derivatives work in simple case", {
 58 | 
 59 |   X <- matrix(1)
 60 |   X_tilde <- matrix(1)
 61 |   Z <- matrix(1)
 62 |   Z_tilde <- matrix(1)
 63 |   Z_tilde_gamma_cols <- 1
 64 | 
 65 |   P <- matrix(rep(1/8, 8), nrow = 1)
 66 |   P_fixed_indices <- matrix(rep(FALSE,8), nrow = 1)
 67 |   P_tilde <- matrix((1:8)/sum(1:8), nrow = 1)
 68 |   P_tilde_fixed_indices <- matrix(rep(TRUE,8), nrow = 1)
 69 |   B <- matrix(c(-3:3,0),nrow = 1)
 70 |   B_fixed_indices <- matrix(rep(FALSE,8), nrow = 1)
 71 |   gammas <- 8
 72 |   gammas_fixed_indices <- FALSE
 73 |   gamma_tilde <- matrix(log(100))
 74 |   gamma_tilde_fixed_indices <- matrix(FALSE)
 75 | 
 76 |   dfs <- parameters_to_dataframes(P,
 77 |                                   P_fixed_indices,
 78 |                                   P_tilde,
 79 |                                   P_tilde_fixed_indices,
 80 |                                   B,
 81 |                                   B_fixed_indices,
 82 |                                   gammas,
 83 |                                   gammas_fixed_indices,
 84 |                                   gamma_tilde,
 85 |                                   gamma_tilde_fixed_indices)
 86 | 
 87 |   params <- dataframes_to_parameters(dfs$fixed,
 88 |                                      dfs$varying)
 89 | 
 90 |   fixed_status <- dfs$fixed
 91 |   if(nrow(fixed_status)>0){
 92 |   fixed_status$value <- 0}
 93 |   varying_status <- dfs$varying
 94 |   varying_status$value <- 1
 95 | 
 96 |   param_status <- dataframes_to_parameters(fixed_status,
 97 |                                            varying_status)
 98 | 
 99 |   function_output <- par_to_jacobian_row(params,
100 |                                          param_status,
101 |                                          i = 1,
102 |                                          j = 1,
103 |                                          X,
104 |                                          Z,
105 |                                          X_tilde,
106 |                                          Z_tilde,
107 |                                          Z_tilde_gamma_cols)
108 |   theoretical_output <- c(exp(gammas + B[1]),rep(0,7),
109 |                           c(P[,1]*exp(gammas + B[,1]) +
110 |                           P_tilde[,1]*exp(gammas + gamma_tilde + B[,1]),
111 |                           rep(0,6)),
112 |                           P[,1]*exp(gammas + B[,1]) +
113 |                             P_tilde[,1]*exp(gammas + gamma_tilde + B[,1]),
114 |                             P_tilde[,1]*exp(gammas + gamma_tilde + B[,1]))
115 | 
116 | 
117 | 
118 | 
119 |   expect_equal(function_output, theoretical_output)
120 | })
121 | 


--------------------------------------------------------------------------------
/tests/testthat/test-safe_divide.R:
--------------------------------------------------------------------------------
 1 | test_that("safe_divide works when numerator = denominator = 0", {
 2 | 
 3 |   expect_equal(safe_divide(0,0),1)
 4 | })
 5 | 
 6 | test_that("safe_divide works when numerator and denominator are positive", {
 7 |   expect_equal(safe_divide(1,2),0.5)
 8 | })
 9 | 
10 | test_that("safe_divide returns correct penalty when numerator nonzero and denominator 0", {
11 |   expect_equal(safe_divide(1,0,100),100)
12 | })
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-simulate_paper_data.R:
--------------------------------------------------------------------------------
 1 | test_that("Simulated data is reproducible", {
 2 |   W1 <- simulate_paper_data(n = 1,
 3 |                            J = 5,
 4 |                            B_multiplier = 1,
 5 |                            distrib = "Poisson",
 6 |                            seed = 0)
 7 | 
 8 |   W2 <- simulate_paper_data(n = 1,
 9 |                            J = 5,
10 |                            B_multiplier = 1,
11 |                            distrib = "Poisson",
12 |                            seed = 0)
13 | 
14 |   expect_equal(W1,W2)
15 | 
16 |   W1 <- simulate_paper_data(n = 1,
17 |                             J = 5,
18 |                             B_multiplier = 1,
19 |                             distrib = "NB",
20 |                             seed = 0)
21 | 
22 |   W2 <- simulate_paper_data(n = 1,
23 |                             J = 5,
24 |                             B_multiplier = 1,
25 |                             distrib = "NB",
26 |                             seed = 0)
27 | 
28 |   expect_equal(W1,W2)
29 | })
30 | 
31 | test_that("Simulated data is reasonably unique", {
32 |   W1 <- simulate_paper_data(n = 1,
33 |                             J = 5,
34 |                             B_multiplier = 1,
35 |                             distrib = "Poisson",
36 |                             seed = 0)
37 | 
38 |   W2 <- simulate_paper_data(n = 1,
39 |                             J = 5,
40 |                             B_multiplier = 1,
41 |                             distrib = "Poisson",
42 |                             seed = 1)
43 | 
44 |   expect_true(all(W1 != W2))
45 | 
46 |   W1 <- simulate_paper_data(n = 1,
47 |                             J = 5,
48 |                             B_multiplier = 1,
49 |                             distrib = "NB",
50 |                             seed = 0)
51 | 
52 |   W2 <- simulate_paper_data(n = 1,
53 |                             J = 5,
54 |                             B_multiplier = 1,
55 |                             distrib = "NB",
56 |                             seed = 1)
57 | 
58 |   expect_true(all(W1 != W2))
59 | })
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/tests/testthat/test-test_meanination.R:
--------------------------------------------------------------------------------
 1 | test_that("mean makes sense in sample-read-only setting", {
 2 |   gammas <- log(3*(1:5))
 3 |   B <- matrix(rep(0,3),nrow = 1)
 4 |   X <- matrix(1,nrow = 5,ncol = 1)
 5 |   Z <- matrix(1,nrow = 5, ncol = 1)
 6 |   P <- matrix((1:3)/6,nrow = 1)
 7 |   X_tilde <- matrix(0,ncol = 1, nrow = 1)
 8 |   P_tilde <- P
 9 |   Z_tilde <- Z*0
10 |   gamma_tilde <- matrix(0,ncol = 1, nrow = 1)
11 |   means <- meaninate(gammas = gammas,
12 |                      B = B,
13 |                      Z = Z,
14 |                      X = Z,
15 |                      P = P,
16 |                      Z_tilde = Z_tilde,
17 |                      X_tilde = X_tilde,
18 |                      Z_tilde_gamma_cols = 1,
19 |                      P_tilde = P_tilde,
20 |                      gamma_tilde = gamma_tilde)
21 |   
22 |   #make sure meaninate returns a matrix
23 |   expect_true(is.matrix(means))
24 |   #make sure its dimensions are correct
25 |   expect_equal(dim(means),c(5,3))
26 |   #test that P is being used correctly here
27 |   expect_equal(means[1,3]/means[1,1],3)
28 |   #test that gammas are being used correctly here
29 |   # expect_equal
30 |   
31 | })
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test-universal_test.R:
--------------------------------------------------------------------------------
 1 | # test_that("multiplication works", {
 2 | #   n <- 10
 3 | #   sim_p_10 <- numeric(100)
 4 | #   set.seed(4939323)
 5 | #   for(sim in 1:100){
 6 | #     print(paste("Simulation ", sim, sep = "", collapse = ""))
 7 | #     W <- simulate_simple_data(matrix(0, nrow = 1, ncol = 2),
 8 | #                               distrib = "Poisson",
 9 | #                               n = n,
10 | #                               gamma_mean = 11)
11 | #
12 | #     full_model <- fit_simple_model(W,
13 | #                                      B_fixed_at_zero = FALSE)
14 | #
15 | #     null_model <- fit_simple_model(W,
16 | #                                    B_fixed_at_zero = TRUE)
17 | #
18 | #
19 | #
20 | #
21 | #     sim_p_10[sim] <- universal_test(W,
22 | #                                     full_model,
23 | #                                     null_model)
24 | #
25 | #
26 | #   }
27 | #
28 | #   qqplot(sim_p_10,runif(10000),type = "s")
29 | #
30 | #   n <- 10
31 | #   sim_p_10_nb <- numeric(100)
32 | #   set.seed(4939323)
33 | #   for(sim in 1:100){
34 | #     print(paste("Simulation ", sim, sep = "", collapse = ""))
35 | #     W <- simulate_simple_data(matrix(0, nrow = 1, ncol = 2),
36 | #                               distrib = "nb10",
37 | #                               n = n,
38 | #                               gamma_mean = 11)
39 | #
40 | #     full_model <- fit_simple_model(W,
41 | #                                    B_fixed_at_zero = FALSE)
42 | #
43 | #     null_model <- fit_simple_model(W,
44 | #                                    B_fixed_at_zero = TRUE)
45 | #
46 | #
47 | #
48 | #
49 | #     sim_p_10_nb[sim] <- universal_test(W,
50 | #                                     full_model,
51 | #                                     null_model,
52 | #                                     parallelize = TRUE)
53 | #
54 | #     print(sim_p_10_nb[sim])
55 | #
56 | #
57 | #   }
58 | #
59 | #   qqplot(sim_p_10,runif(10000),type = "s")
60 | # })
61 | 


--------------------------------------------------------------------------------