├── LICENSE ├── README.md ├── first_process ├── README.md ├── config.R ├── docs │ └── post-analysis.md ├── examples │ ├── process_continuous_results.R │ ├── upload_continuous_results.R │ ├── upload_dichotomous_results.R │ └── upload_loglinear_results.R ├── run_pipeline.R └── src │ ├── 01_create_template.R │ ├── 02_loglinear_models.R │ ├── 03_covariate_selection.R │ ├── 04_mixed_effects_models.R │ ├── 05_evidence_score_continuous.R │ ├── 05_evidence_score_dichotomous.R │ ├── 05_evidence_score_legacy.R │ ├── 05_evidence_score_loglinear.R │ ├── 05_evidence_score_mixed.R │ ├── upload_continuous.R │ ├── upload_dichotomous.R │ ├── upload_loglinear.R │ └── utils │ ├── continuous_functions.R │ ├── dichotomous_functions.R │ ├── egger_functions.R │ ├── extract_old_results.R │ ├── loglinear_functions.R │ ├── mixed_functions.R │ ├── plot_3_curves.R │ ├── prep_diet_data_function.R │ └── qsub_function.R ├── limetr ├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── check_requirements.py ├── conda_pkg │ ├── build.sh │ ├── conda_build_config.yaml │ ├── meta.yaml │ └── run_test.sh ├── experiments │ └── test_trimming_with_certain_inlier.ipynb ├── setup.py ├── src │ └── limetr │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── special_mat.f90 │ │ └── utils.py └── tests │ ├── check_limetr.py │ ├── check_utils.py │ ├── izmat_block_izdiag.py │ ├── izmat_block_izmm.py │ ├── izmat_block_izmv.py │ ├── izmat_izdiag.py │ ├── izmat_izeig.py │ ├── izmat_izmm.py │ ├── izmat_izmv.py │ ├── izmat_lsvd.py │ ├── izmat_zdecomp.py │ ├── limetr_gradient.py │ ├── limetr_gradientTrimming.py │ ├── limetr_lasso.py │ ├── limetr_objective.py │ ├── limetr_objectiveTrimming.py │ ├── projCappedSimplex.py │ ├── varmat_diag.py │ ├── varmat_dot.py │ ├── varmat_invDiag.py │ ├── varmat_invDot.py │ └── varmat_logDet.py ├── mrtool ├── .github │ └── workflows │ │ └── python-build.yml ├── .gitignore ├── .readthedocs.yml ├── LICENSE ├── Makefile ├── README.rst ├── docs │ ├── Makefile │ ├── make.bat │ ├── requirements.txt │ └── source │ │ ├── _static │ │ └── css │ │ │ └── custom.css │ │ ├── api_reference │ │ ├── index.rst │ │ ├── mrtool.core.rst │ │ ├── mrtool.cov_selection.rst │ │ └── mrtool.evidence_score.rst │ │ ├── concepts │ │ ├── data_gen │ │ │ ├── index.rst │ │ │ ├── range_exposure.rst │ │ │ ├── rr1_binary.rst │ │ │ └── rr2_log_linear.rst │ │ ├── index.rst │ │ ├── optimization │ │ │ └── index.rst │ │ └── priors │ │ │ └── index.rst │ │ ├── conf.py │ │ ├── examples │ │ ├── example_linear.rst │ │ └── index.rst │ │ └── index.rst ├── setup.py ├── src │ └── mrtool │ │ ├── __about__.py │ │ ├── __init__.py │ │ ├── core │ │ ├── __init__.py │ │ ├── cov_model.py │ │ ├── data.py │ │ ├── model.py │ │ ├── other_sampling.py │ │ ├── plots.py │ │ └── utils.py │ │ ├── cov_selection │ │ ├── __init__.py │ │ └── covfinder.py │ │ └── evidence_score │ │ ├── __init__.py │ │ ├── continuous.py │ │ ├── dichotomous.py │ │ ├── loglinear.py │ │ ├── mixed.py │ │ └── scorelator.py └── tests │ ├── test_covmodel.py │ ├── test_data.py │ └── test_utils.py ├── risks ├── README.md ├── alcohol_ihd │ ├── cleaning_1.R │ ├── cleaning_2.R │ ├── cleaning_3.R │ └── tables.R ├── chewing_tobacco │ ├── 01.0_rr_data_cleaning.R │ └── 02.0_rr_data_formatting.R ├── ipv_csa │ ├── 01_clean_all_extracted_data.R │ ├── 02_prep_csa_anorexia.R │ ├── 02_prep_csa_anx.R │ ├── 02_prep_csa_asthma.R │ ├── 02_prep_csa_aud.R │ ├── 02_prep_csa_bulimia.R │ ├── 02_prep_csa_conduct.R │ ├── 02_prep_csa_diabetes.R │ ├── 02_prep_csa_druguse.R │ ├── 02_prep_csa_hiv.R │ ├── 02_prep_csa_ihd.R │ ├── 02_prep_csa_mat_abort_mis.R │ ├── 02_prep_csa_mdd.R │ ├── 02_prep_csa_schizophrenia.R │ ├── 02_prep_csa_selfharm.R │ ├── 02_prep_csa_sti.R │ ├── 02_prep_ipv_anx.R │ ├── 02_prep_ipv_hiv.R │ ├── 02_prep_ipv_mat_abort_mis.R │ ├── 02_prep_ipv_mdd.R │ ├── 02_prep_ipv_selfharm.R │ ├── 03_csa_main_forest_plots.R │ ├── 03_ipv_main_forest_plot.R │ └── README.md ├── processed_foods │ ├── Bop_summary_table.R │ ├── Data_cleaning_and_formatting.R │ ├── code_tocnvert_per_unitEffectisize_measures.R │ ├── create_bc_gamma_table.R │ └── create_parameter_table.R ├── red_meat │ ├── README.md │ ├── model_functions.R │ ├── prep_diet_data_function.R │ └── redmeat_aggregate_curve.R ├── smoking │ ├── binary_risk │ │ ├── 00_data_cleaning_binary.R │ │ ├── 01_binary_rr_pipeline.R │ │ ├── 02_upload_dichotomous_launcher.R │ │ ├── 03_forest_plot.R │ │ └── upload_dichotomous.R │ ├── config.R │ ├── continous_risk │ │ ├── 00_data_cleaning_formatting.R │ │ ├── 00_data_cleaning_new_extraction.R │ │ ├── 01_full_rr_pipeline.R │ │ ├── 02_upload_continuous_launcher.R │ │ ├── 03_create_draws.R │ │ ├── 04_format_rr_draws_non_cvd.R │ │ ├── 05_00_age_rr_data_cleaning.R │ │ ├── 05_01_age_rr_trend.R │ │ ├── 05_02_age_specific_rr_curves.R │ │ ├── age_rr_utils.R │ │ ├── helper_functions.R │ │ └── upload_continuous.R │ └── prep_data_function.R └── vegetables │ ├── Prep bias covariates for diet MR_BRT.r │ ├── README.md │ ├── config.R │ └── veg_TMREL.R └── second_process ├── .gitignore ├── LICENSE ├── README.md ├── examples ├── gbd2020_continuous_risk.py └── summary.ipynb ├── setup.py └── src └── espipeline ├── __about__.py ├── __init__.py ├── continuous.py ├── dichotomous.py ├── filemanager.py ├── loglinear.py ├── pipline.py ├── process.py ├── utils.py └── validator.py /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2022, IHME Math Sciences 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # burden-of-proof 2 | Time capsule for burden of proof paper. 3 | This repository include, 4 | 5 | * `limetr` is the meta-regression engine 6 | * `mrtool` provides the model specification interface 7 | * `first_process` is the first part of the data processing 8 | * `second_process` is the second part of the data processing 9 | * `risks` is risk-specific custom code 10 | -------------------------------------------------------------------------------- /first_process/README.md: -------------------------------------------------------------------------------- 1 | # Evidence score pipeline 2 | 3 | **Note: The pipeline is still under development.** 4 | 5 | ## Configuration 6 | 7 | In `config.R`, the variables are global variables that will be used in scripts in the pipeline. Users can change the value of variables based on their tasks. 8 | 9 | Users need to change some settings before running the pipeline, including `WORK_DIR`, `PROJ`, `VERSION_ID`, `OUT_DIR`. Other settings are task-specific, including, but not limited to, `INPUT_DATA_DIR`, `OBS_VAR` and an array of settings for models in each stage. Users are advised to read through the `config.R` file to make sure the settings match their needs. 10 | 11 | ## Run script 12 | 13 | `run_pipeline.R` is the main script to run the pipeline. Users need to change `WORK_DIR` to specify the location where the scripts are saved. 14 | 15 | The pipeline consists of five stages: 16 | 1. Ensemble model with exposure only to get signal; no random effects 17 | 2. Log-linear model to get slope prior for covariate selection 18 | 3. Covariate selection model 19 | 4. Final mixed effects model that combines the signal and selected covariates 20 | 5. Get evidence score with signal model and final model; plots 21 | 22 | Each stage is run sequentially but paralleled for risk-outcome pairs. Note that Step 5 cannot be run in parallel on cluster. It need to be run mannully after typing "repl_python()" and "esc". Details can be found in script. 23 | 24 | ## Results 25 | 26 | The plots of evidence score will be saved in `05_evidence_score` under the `OUT_DIR` specified. 27 | 28 | ## Useful links 29 | [GBD 2020 Guidance on Evidence Score](https://docs.google.com/document/d/1gP7-T6cxah2rLfjaTxWZaO0ejRw7Wk4eVFDGoszetj4/edit) 30 | 31 | [Introduction to MR-BRT](https://rpubs.com/rsoren/mrbrt_gbd2020) 32 | 33 | [MR-BRT examples](https://rpubs.com/rsoren/mrbrt_examples_gbd2020) -------------------------------------------------------------------------------- /first_process/config.R: -------------------------------------------------------------------------------- 1 | # Configuration of pipeline 2 | 3 | # User settings 4 | # ------------------------------------------------------------------------------ 5 | USER <- Sys.getenv("USER") 6 | WORK_DIR <- "/ihme/homes/jiaweihe/msca/mrbrt/evidence_score_pipeline" 7 | CODE_PATH <- paste0(WORK_DIR, "/src/") 8 | ARCHIVE <- "/mnt/team/msca/pub/archive/evidence-score/gbd2020" 9 | 10 | # Cluster settings 11 | # ------------------------------------------------------------------------------ 12 | PROJ <- "proj_mscm" 13 | SINGULARITY_IMG <- "/ihme/singularity-images/rstudio/ihme_rstudio_3631.img" 14 | 15 | # Version settings 16 | # ------------------------------------------------------------------------------ 17 | VERSION_ID <- "prod" 18 | 19 | # Directory settings 20 | # ------------------------------------------------------------------------------ 21 | OUT_DIR <- paste0("/ihme/scratch/users/", USER, "/evidence_score_pipeline/", VERSION_ID, "/") 22 | INPUT_DATA_DIR = "/home/j/temp/hkl1/mr_brt/03_evidence_score/input_data/for_ryan/version15" 23 | 24 | # Output directory for each stage 25 | SUB_DIRS <- c( 26 | paste0(OUT_DIR, "00_prepped_data"), 27 | paste0(OUT_DIR, "01_template_pkl_files"), 28 | paste0(OUT_DIR, "01_template_models"), 29 | paste0(OUT_DIR, "02_loglinear_models"), 30 | paste0(OUT_DIR, "02_loglinear_pkl_files"), 31 | paste0(OUT_DIR, "03_covariate_selection_models"), 32 | paste0(OUT_DIR, "03_covariate_selection_pkl_files"), 33 | paste0(OUT_DIR, "04_mixed_effects_models"), 34 | paste0(OUT_DIR, "04_mixed_effects_pkl_files"), 35 | paste0(OUT_DIR, "05_evidence_score") 36 | ) 37 | 38 | # data settings 39 | # ------------------------------------------------------------------------------ 40 | ALL_RO_PAIRS <- gsub(".csv", "", list.files(INPUT_DATA_DIR)) 41 | EXCLUDED_RO_PAIRS <- c("dairy_stroke", "fruit_oral", "fruit_larynx") 42 | RO_PAIRS <- ALL_RO_PAIRS[!(ALL_RO_PAIRS %in% EXCLUDED_RO_PAIRS)] 43 | RO_PAIRS <- c("alcohol") 44 | RO_PAIRS <- c("redmeat_colorectal") 45 | RO_PAIRS <- c("lpa_ihd", "bmi_diabetes", "bmi_uter_canc", 46 | "bmi_leuk", "fruit_ihd", "redmeat_colorectal", 47 | "nuts_ihd", "wholegrain_ihd", "fiber_stroke", 48 | "alcohol_lri", "alcohol_tb", "lung_cancer", 49 | "copd", "ihd_19", "diabetes", "peptic_ulcer") 50 | 51 | OBS_VAR <- "ln_effect" 52 | OBS_SE_VAR <- "ln_se" 53 | STUDY_ID_VAR <- "nid" 54 | 55 | ALT_EXPOSURE_COLS <- c("b_0", "b_1") 56 | REF_EXPOSURE_COLS <- c("a_0", "a_1") 57 | 58 | # Sarah's 59 | # RO_PAIRS <- c("air_pmhap_neo_lung", "air_pmhap_lri", "air_pmhap_t2_dm", 60 | # "air_pmhap_resp_copd", "air_pmhap_cvd_stroke_60") 61 | # ALT_EXPOSURE_COLS <- c("conc") 62 | # REF_EXPOSURE_COLS <- c("conc_den") 63 | 64 | 65 | # model settings 66 | # ------------------------------------------------------------------------------ 67 | BIAS_COVARIATES_AS_INTX <- TRUE 68 | 69 | # For diet 70 | # DIRECTION = list( 71 | # calcium = "decreasing", 72 | # cheese = "decreasing", 73 | # dairy = "decreasing", 74 | # fiber = "decreasing", 75 | # fish = "decreasing", 76 | # fruit = "decreasing", 77 | # legumes = "decreasing", 78 | # milk = "decreasing", 79 | # nuts = "decreasing", 80 | # omega3 = "decreasing", 81 | # veg = "decreasing", 82 | # wholegrain = "decreasing", 83 | # pufa = "decreasing", 84 | # yogurt = "decreasing", 85 | # procmeat = "increasing", 86 | # redmeat = "increasing", 87 | # sodium = "increasing", 88 | # ssb = "increasing", 89 | # sugar = "increasing", 90 | # transfat = "increasing" 91 | # ) 92 | 93 | # Get monotonicity direction. 94 | # tmp <- read.csv("/ihme/homes/jiaweihe/msca/mrbrt/evidence_score_pipeline/all_pairs.csv") 95 | # tmp$mono <- ifelse(tmp$type=='protective', 'decreasing', 'increasing') 96 | # DIRECTION <- setNames(as.character(tmp$mono), tmp$ro_pair) 97 | DIRECTION = list( 98 | lpa_ihd = "increasing" 99 | ) 100 | 101 | 102 | BETA_PRIOR_MULTIPLIER = 0.1 103 | COV_FINDER_CONFIG = list( 104 | pre_selected_covs = list("exposure_linear"), 105 | num_samples = 1000L, 106 | power_range = list(-4, 4), 107 | power_step_size = 0.05, 108 | laplace_threshold = 1e-5, 109 | inlier_pct = 1.0, 110 | bias_zero = TRUE 111 | ) 112 | 113 | # Not used by new pipeline 114 | 115 | # N_I_KNOTS <- 3 116 | # PRIOR_VAR_RSLOPE = 1e-6 117 | # PRIOR_VAR_MAXDER <- 1e-4 118 | # MONOSPLINE_SLOPE_MULTIPLIER <- 2 119 | 120 | # MONOSPLINE_CONFIG = list( 121 | # use_re = TRUE, 122 | # use_spline = TRUE, 123 | # spline_degree = 3L, 124 | # spline_knots_type = 'domain', 125 | # spline_r_linear = TRUE, 126 | # prior_spline_funval_uniform = array(c(-1 + 1e-6, 19)), 127 | # prior_spline_num_constraint_points = 150L, 128 | # spline_knots = array(seq(0, 1, length.out = N_I_KNOTS + 2)), 129 | # prior_spline_maxder_gaussian = cbind(rbind(rep(0, N_I_KNOTS), 130 | # rep(sqrt(PRIOR_VAR_MAXDER), N_I_KNOTS)), c(0, sqrt(PRIOR_VAR_RSLOPE))), 131 | # prior_spline_der2val_gaussian = NULL, 132 | # prior_spline_der2val_gaussian_domain = array(c(0.0, 1.0)), 133 | # name = "exposure" 134 | # ) 135 | 136 | # MONOSPLINE_BIAS_CONFIG = list( 137 | # spline_degree = 3L 138 | # ) 139 | 140 | # LOGLINEAR_BIAS_CONFIG = list( 141 | # spline_degree = 3L 142 | # ) 143 | -------------------------------------------------------------------------------- /first_process/docs/post-analysis.md: -------------------------------------------------------------------------------- 1 | # Post Analysis 2 | 3 | Here, we document the processes after the model fitting, including 4 | 5 | * detect and adjust for publication-bias 6 | * compute evidence score 7 | * output diagnostic figures and data 8 | 9 | ## Detect Publication-Bias 10 | 11 | Publication-bias analysis is an important part of systematic review. 12 | As a metric of evaluating the evidence in the dataset, evidence score needs to take 13 | publication-bias into account. 14 | 15 | To detect publication-bias, we use a data-driven approach known as [Egger's Regression](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2127453/). 16 | The idea is simple. We want to detect if there is a significant correlation between the 17 | residuals and their standard errors. Egger's regression function can be found [here](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/utils/continuous_functions.R#106-112). 18 | 19 | Interestingly, we find out that the trimming algorithm helps against the publication-bias. 20 | In the process, we apply Egger's Regression on both untrimmed and trimmed data. 21 | Examples can be found [here](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#39-42). 22 | 23 | ## Adjust for Publication-Bias 24 | 25 | If publication-bias has been detected, to adjust for it, we use an algorithm called [The Trim-and-Fill Method](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6571372/). 26 | The method is based on the assumption that residual should be distributed symmetrically, and if there is asymmetry, it means that there are certain studies missing. 27 | The algorithm involve two major steps, iteratively trim data to get accurate mean estimation, and fill in the "missing" data based on the mean estimation and get final result. 28 | 29 | Our cases are slightly different than the ones considered in "The Trim-and-Fill Method". 30 | Nonetheless, we could modify and applied to our problem. 31 | One major change we need to make is that we remove the "trim" step, since we have our own trimming and we trust our mean estimation. And our adjustment process involves 32 | 33 | * use the rank statistics and the residual to compute the number of points need to be filled 34 | * fill the data and re-fit the model 35 | 36 | Create filled data function can be found [here](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/utils/continuous_functions.R#90-104). 37 | And re-fit the model step can be found [here](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#53-70). 38 | 39 | ## Get Scores and Diagnostics 40 | 41 | Finally, we need to get [the evidence scores](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#73-78) and diagnostics including [risk function plot](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#88-93), [residual funnel plot](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#86) and [summary dataframe](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#96-103). 42 | Notice that for dichotomous outcomes there is no plot residual step, because it will be exactly the same with the model plot. 43 | 44 | ## File Structure 45 | 46 | These processes is organized in `src/05_evidence_score_*.R` and their corresponding functions is in `src/utils/*_functions.R`. Currently we have the scripts for continuous and dichotomous outcomes. 47 | The old evidence score step is saved as `src/05_evidence_score_legacy.R`. 48 | 49 | -------------------------------------------------------------------------------- /first_process/examples/process_continuous_results.R: -------------------------------------------------------------------------------- 1 | # unpack information from results and create draws 2 | rm(list = ls()) 3 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/") 4 | 5 | # define functions 6 | # ================================================================================================= 7 | get_cov_names <- function(signal_model) { 8 | cov_model <- signal_model$sub_models[[1]]$cov_models[[1]] 9 | list(alt_covs = cov_model$alt_cov, 10 | ref_covs = cov_model$ref_cov) 11 | } 12 | 13 | get_risk_limits <- function(signal_model) { 14 | cov_names <- get_cov_names(signal_model) 15 | risk_data <- signal_model$data$get_covs(unlist(cov_names)) 16 | c(min(risk_data), max(risk_data)) 17 | } 18 | 19 | get_signal <- function(signal_model, risk) { 20 | cov_names <- get_cov_names(signal_model) 21 | risk_limits <- get_risk_limits(signal_model) 22 | df_covs <- data.frame( 23 | c(sapply(cov_names$ref_covs, function(x) rep(risk_limits[1], length.out = length(risk)), 24 | simplify = FALSE, USE.NAMES = TRUE), 25 | sapply(cov_names$alt_covs, function(x) risk, 26 | simplify = FALSE, USE.NAMES = TRUE)) 27 | ) 28 | data <- MRData() 29 | data$load_df(df_covs, col_covs=unlist(cov_names)) 30 | signal_model$predict(data) 31 | } 32 | 33 | get_beta <- function(linear_model) { 34 | beta <- linear_model$beta_soln 35 | names(beta) <- linear_model$cov_names 36 | specs <- mrbrt001::core$other_sampling$extract_simple_lme_specs(linear_model) 37 | beta_hessian <- mrbrt001::core$other_sampling$extract_simple_lme_hessian(specs) 38 | beta_sd <- 1/sqrt(diag(beta_hessian)) 39 | names(beta_sd) <- linear_model$cov_names 40 | c(beta["signal"], beta_sd["signal"]) 41 | } 42 | 43 | get_gamma <- function(linear_model) { 44 | gamma <- linear_model$gamma_soln[[1]] 45 | gamma_fisher <- linear_model$lt$get_gamma_fisher(linear_model$gamma_soln) 46 | gamma_sd <- 1/sqrt(diag(gamma_fisher)[[1]]) 47 | c(gamma, gamma_sd) 48 | } 49 | 50 | get_soln <- function(linear_model) { 51 | list( 52 | beta_soln = get_beta(linear_model), 53 | gamma_soln = get_gamma(linear_model) 54 | ) 55 | } 56 | 57 | get_ln_rr_draws <- function(signal_model, 58 | linear_model, 59 | risk, 60 | num_draws = 1000L, 61 | normalize_to_tmrel = FALSE, 62 | include_re = TRUE) { 63 | # set seed inside function 64 | set.seed(1234) 65 | 66 | signal <- get_signal(signal_model, risk) 67 | re_signal <- signal 68 | soln <- get_soln(linear_model) 69 | 70 | fe_samples <- rnorm(num_draws, mean=soln$beta[1], sd=soln$beta[2]) 71 | re_samples <- rnorm(num_draws, mean=0, sd=sqrt(soln$gamma[1] + 2*soln$gamma[2])) 72 | 73 | draws <- outer(signal, fe_samples) 74 | if (include_re) { 75 | draws <- draws + outer(re_signal, re_samples) 76 | } 77 | 78 | if (normalize_to_tmrel) { 79 | tmrel_index <- which.min(signal) 80 | draws <- apply(draws, 2, function(x) x - x[tmrel_index]) 81 | } 82 | 83 | df <- as.data.frame(cbind(risk, draws)) 84 | names(df) <- c("risk", sapply(1:num_draws, function(i) paste0("draw_", i))) 85 | return(df) 86 | } 87 | 88 | # process results 89 | # ================================================================================================= 90 | # load models 91 | signal_model_path <- "/mnt/team/msca/pub/archive/evidence-score/gbd2020-process/nuts_ihd/signal_model.pkl" 92 | linear_model_path <- "/mnt/team/msca/pub/archive/evidence-score/gbd2020-process/nuts_ihd/new_linear_model.pkl" 93 | 94 | signal_model <- py_load_object(filename = signal_model_path, pickle = "dill") 95 | linear_model <- py_load_object(filename = linear_model_path, pickle = "dill") 96 | 97 | # specify risk, you need to input the exposures that you want to predict 98 | risk <- 0:100 99 | 100 | # get_draws 101 | df <- get_ln_rr_draws(signal_model, 102 | linear_model, 103 | risk, 104 | num_draws = 1000L, 105 | normalize_to_tmrel = FALSE) 106 | 107 | # visual check draws 108 | draws <- df[, 2:ncol(df)] 109 | draw_mean <- apply(draws, 1, function(x) mean(x)) 110 | draw_lower <- apply(draws, 1, function(x) quantile(x, probs=.05)) 111 | draw_upper <- apply(draws, 1, function(x) quantile(x, probs=.95)) 112 | 113 | lines(risk, draw_mean) 114 | lines(risk, draw_lower) 115 | lines(risk, draw_upper) 116 | -------------------------------------------------------------------------------- /first_process/examples/upload_continuous_results.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | source("src/upload_continuous.R") 3 | 4 | results_folder <- "/mnt/team/msca/pub/archive/evidence-score-test/gbd2020-results" 5 | 6 | pair_info <- list( 7 | dairy_diabetes = list( 8 | rei_id = "unknown", 9 | cause_id = "unkown", 10 | risk_unit = "unknown", 11 | signal_model_path = file.path(results_folder, "dairy_diabetes", "signal_model.pkl"), 12 | linear_model_path = file.path(results_folder, "dairy_diabetes", "linear_model.pkl") 13 | ), 14 | air_pmhap_lri = list( 15 | rei_id = "unknown", 16 | cause_id = "unkown", 17 | risk_unit = "unknown", 18 | signal_model_path = file.path(results_folder, "air_pmhap_lri", "signal_model.pkl"), 19 | linear_model_path = file.path(results_folder, "air_pmhap_lri", "linear_model.pkl") 20 | ), 21 | fpg_neo_liver = list( 22 | rei_id = "unknown", 23 | cause_id = "unkown", 24 | risk_unit = "unknown", 25 | signal_model_path = file.path(results_folder, "fpg_neo_liver", "signal_model.pkl"), 26 | linear_model_path = file.path(results_folder, "fpg_neo_liver", "linear_model.pkl") 27 | ) 28 | ) 29 | 30 | for (pair in names(pair_info)) { 31 | print(paste0("upload pair=", pair)) 32 | results_folder <- file.path(ARCHIVE, pair) 33 | if (!dir.exists(results_folder)) { 34 | dir.create(results_folder) 35 | } 36 | do.call(upload_results, c(pair_info[[pair]], list(results_folder = results_folder))) 37 | } 38 | -------------------------------------------------------------------------------- /first_process/examples/upload_dichotomous_results.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | source("src/upload_dichotomous.R") 3 | 4 | results_folder <- "/mnt/team/msca/pub/archive/evidence-score-test/gbd2020-results" 5 | 6 | pair_info <- list( 7 | opioid_suicide = list( 8 | rei_id = "unknown", 9 | cause_id = "unkown", 10 | model_path = file.path(results_folder, "opioid_suicide", "model.pkl") 11 | ), 12 | idu_hepB = list( 13 | rei_id = "unknown", 14 | cause_id = "unkown", 15 | model_path = file.path(results_folder, "idu_hepB", "model.pkl") 16 | ), 17 | idu_hepC = list( 18 | rei_id = "unknown", 19 | cause_id = "unkown", 20 | model_path = file.path(results_folder, "idu_hepC", "model.pkl") 21 | ) 22 | ) 23 | 24 | for (pair in names(pair_info)) { 25 | print(paste0("upload pair=", pair)) 26 | results_folder <- file.path(ARCHIVE, pair) 27 | if (!dir.exists(results_folder)) { 28 | dir.create(results_folder) 29 | } 30 | do.call(upload_results, c(pair_info[[pair]], list(results_folder = results_folder))) 31 | } 32 | -------------------------------------------------------------------------------- /first_process/examples/upload_loglinear_results.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | source("src/upload_loglinear.R") 3 | 4 | 5 | pair_info <- list( 6 | air_no2_resp_asthma = list( 7 | rei_id = 404, 8 | cause_id = 515, 9 | risk_unit = "ppb", 10 | model_path = "/ihme/erf/GBD2020/air_no2/rr/models/20/model.pkl" 11 | ) 12 | ) 13 | 14 | 15 | for (pair in names(pair_info)) { 16 | print(paste0("upload pair=", pair)) 17 | results_folder <- file.path(ARCHIVE, pair) 18 | if (!dir.exists(results_folder)) { 19 | dir.create(results_folder) 20 | } 21 | do.call(upload_results, c(pair_info[[pair]], list(results_folder = results_folder))) 22 | } 23 | -------------------------------------------------------------------------------- /first_process/run_pipeline.R: -------------------------------------------------------------------------------- 1 | # 2 | # run_pipeline_parallel.R 3 | # 4 | 5 | library(dplyr) 6 | library(parallel) 7 | 8 | ##### 9 | # user params 10 | # 11 | WORK_DIR <- "/ihme/homes/jiaweihe/msca/mrbrt/evidence_score_pipeline" 12 | source(paste0(WORK_DIR, "/config.R")) 13 | source(paste0(WORK_DIR, "/src/utils/prep_diet_data_function.R")) 14 | source(paste0(WORK_DIR, "/src/utils/qsub_function.R")) 15 | 16 | 17 | ##### 18 | # create directories 19 | # 20 | 21 | if (!dir.exists(OUT_DIR)) { 22 | if (!dir.exists(dirname(OUT_DIR))) { 23 | dir.create(dirname(OUT_DIR)) 24 | } 25 | dir.create(OUT_DIR) 26 | } else { 27 | warning("Directory '", OUT_DIR, "' already exists") 28 | } 29 | 30 | for (dir in SUB_DIRS) { 31 | if (!dir.exists(dir)) { 32 | dir.create(dir) 33 | } else { 34 | warning("Directory '", dir, "' already exists") 35 | } 36 | } 37 | 38 | 39 | submit_jobs <- function(pair, WORK_DIR) { 40 | # stage 1, create signal 41 | submit_sub_job(pair, "01_create_template.R", "_01_template", WORK_DIR) 42 | qwait("01_create_template_models", pair) 43 | 44 | # # stage 2, loglinear model 45 | submit_sub_job(pair, "02_loglinear_models.R", "_02_loglinear", WORK_DIR) 46 | qwait("02_loglinear_models", pair) 47 | 48 | # stage 3, covariate selection 49 | submit_sub_job(pair, "03_covariate_selection.R", "_03_cov_selection", WORK_DIR) 50 | qwait("03_covariate_selection_models", pair) 51 | 52 | # stage 4, final model 53 | submit_sub_job(pair, "04_mixed_effects_models.R", "_04_mixed_effects", WORK_DIR) 54 | } 55 | 56 | ##### 57 | # data prep for diet risks 58 | # 59 | 60 | stage0_results <- lapply(RO_PAIRS, function(ro_pair) { 61 | x <- try({ 62 | prep_diet_data( 63 | ro_pair = ro_pair, 64 | obs_var = OBS_VAR, 65 | obs_se_var = OBS_SE_VAR, 66 | ref_vars = REF_EXPOSURE_COLS, 67 | alt_vars = ALT_EXPOSURE_COLS, 68 | allow_ref_gt_alt = FALSE, 69 | diet_dir = INPUT_DATA_DIR, 70 | study_id_var = "nid", 71 | verbose = TRUE 72 | ) 73 | }) 74 | 75 | saveRDS(x, paste0(OUT_DIR, "00_prepped_data/", ro_pair, ".RDS")) 76 | return(x) 77 | }) 78 | 79 | names(stage0_results) <- RO_PAIRS 80 | saveRDS(stage0_results, paste0(OUT_DIR, "stage0_results.RDS")) 81 | 82 | # Submit stage jobs for each pair 83 | mclapply(RO_PAIRS, function(pair) { 84 | submit_jobs(pair, WORK_DIR) 85 | }, mc.cores = length(RO_PAIRS)) 86 | 87 | -------------------------------------------------------------------------------- /first_process/src/01_create_template.R: -------------------------------------------------------------------------------- 1 | # 2 | # 01_create_template.R 3 | # 4 | # 5 | library(dplyr) 6 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/") 7 | 8 | args <- commandArgs(trailingOnly = TRUE) 9 | 10 | ro_pair <- args[1] 11 | out_dir <- args[2] 12 | WORK_DIR <- args[3] 13 | setwd(WORK_DIR) 14 | source("./config.R") 15 | 16 | # xiaochen's example 17 | # model <- py_load_object( 18 | # filename="/ihme/homes/xdai88/gbd_tobacco/gbd2019_alcohol/evidence_score/testing/test_run1_2020_09_05/04_monospline_pkl_files/lung_cancer_0.9_ensemble.pkl", 19 | # pickle = "dill") 20 | # data = model$data 21 | # df <- data$to_df() 22 | 23 | # diet example 24 | # data <- readRDS(paste0(out_dir, "00_prepped_data/", ro_pair, ".RDS")) 25 | # df <- data$df 26 | 27 | library(readxl) 28 | df_meta <- read_excel("/ihme/homes/jiaweihe/msca/mrbrt/evidence_score_pipeline/evidence_score.xlsx") 29 | infile <- df_meta[df_meta$ro_pair==ro_pair, 'data'] 30 | df <- read.csv(infile[[1]]) 31 | 32 | # Deal with inconsistency of naming 33 | OBS_VAR <- "ln_effect" 34 | OBS_SE_VAR <- "ln_se" 35 | STUDY_ID_VAR <- "nid" 36 | 37 | if (!(STUDY_ID_VAR %in% names(df))){ 38 | STUDY_ID_VAR <- "study_id" 39 | } 40 | 41 | if (!(OBS_VAR %in% names(df))){ 42 | OBS_VAR <- "obs" 43 | if (!(OBS_VAR %in% names(df))){ 44 | OBS_VAR <- "log_rr" 45 | } 46 | } 47 | 48 | if (!(OBS_SE_VAR %in% names(df))){ 49 | OBS_SE_VAR <- "obs_se" 50 | if (!(OBS_SE_VAR %in% names(df))){ 51 | OBS_SE_VAR <- "log_se" 52 | } 53 | } 54 | 55 | # Specify all the columns you need for your application 56 | cov_names <- c(REF_EXPOSURE_COLS, ALT_EXPOSURE_COLS) 57 | 58 | mrdata <- MRData() 59 | 60 | mrdata$load_df( 61 | data = df, 62 | col_obs = OBS_VAR, 63 | col_obs_se = OBS_SE_VAR, 64 | col_study_id = STUDY_ID_VAR, 65 | col_covs = as.list(cov_names) 66 | ) 67 | 68 | monotonicity <- DIRECTION[ro_pair][[1]] 69 | # if (is.na(monotonicity)){ 70 | # monotonicity <- NULL 71 | # } 72 | 73 | N_I_KNOTS <- 3 74 | PRIOR_VAR_RSLOPE = 1e-6 75 | PRIOR_VAR_MAXDER <- 1e-4 76 | 77 | ensemble_cov_model <- LogCovModel( 78 | alt_cov = ALT_EXPOSURE_COLS, 79 | ref_cov = REF_EXPOSURE_COLS, 80 | use_spline = TRUE, 81 | use_re = FALSE, 82 | spline_degree = 3L, 83 | spline_knots_type = 'domain', 84 | spline_r_linear = TRUE, 85 | prior_spline_funval_uniform = array(c(-1 + 1e-6, 19)), 86 | prior_spline_num_constraint_points = 150L, 87 | spline_knots = array(seq(0, 1, length.out = N_I_KNOTS + 2)), 88 | prior_spline_maxder_gaussian = cbind(rbind(rep(0, N_I_KNOTS), 89 | rep(sqrt(PRIOR_VAR_MAXDER), N_I_KNOTS)), c(0, sqrt(PRIOR_VAR_RSLOPE))), 90 | prior_spline_der2val_gaussian = NULL, 91 | prior_spline_der2val_gaussian_domain = array(c(0.0, 1.0)), 92 | prior_spline_monotonicity = monotonicity 93 | ) 94 | 95 | # Create knot samples 96 | knots <- import("mrtool.core.model") 97 | knots_samples <- knots$create_knots_samples( 98 | data = mrdata, l_zero = TRUE, num_splines = 50L, 99 | num_knots = 5L, width_pct = 0.2, 100 | alt_cov_names = ALT_EXPOSURE_COLS, 101 | ref_cov_names = REF_EXPOSURE_COLS 102 | ) 103 | 104 | # Ensemble model with exposure only 105 | signal_model <- MRBeRT(mrdata, 106 | ensemble_cov_model=ensemble_cov_model, 107 | ensemble_knots=knots_samples, 108 | inlier_pct=0.9) 109 | 110 | signal_model$fit_model(inner_print_level=5L, inner_max_iter=200L, 111 | outer_step_size=200L, outer_max_iter=100L) 112 | 113 | # create "new covariates" for later use 114 | signal <- signal_model$predict(mrdata, predict_for_study=FALSE) 115 | 116 | # Extract weights of data point 117 | w <- t(do.call(rbind, 118 | lapply(1:length(signal_model$sub_models), 119 | function(i){signal_model$sub_models[[i]]$w_soln})) 120 | ) %*% signal_model$weights 121 | 122 | df_data <- mrdata$to_df() 123 | # Assign signal to data for use in later stage 124 | df_data$signal <- signal 125 | # Drop data trimmed 126 | df_data <- df_data[w >= 0.1,] 127 | 128 | # Save data and model 129 | py_save_object(object = signal_model, 130 | filename = paste0(out_dir, "01_template_pkl_files/", ro_pair, ".pkl"), 131 | pickle = "dill") 132 | 133 | out <- append(data, list(df_data=df_data)) 134 | saveRDS(out, paste0(out_dir, "01_template_models/", ro_pair, ".RDS")) 135 | -------------------------------------------------------------------------------- /first_process/src/02_loglinear_models.R: -------------------------------------------------------------------------------- 1 | # 2 | # 02_loglinear_models.R 3 | # 4 | # 5 | library(dplyr) 6 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/") 7 | 8 | args <- commandArgs(trailingOnly = TRUE) 9 | 10 | ro_pair <- args[1] 11 | out_dir <- args[2] 12 | WORK_DIR <- args[3] 13 | setwd(WORK_DIR) 14 | source("./config.R") 15 | 16 | 17 | data <- readRDS(paste0(out_dir, "01_template_models/", ro_pair, ".RDS")) 18 | df_data <- data$df_data 19 | 20 | mrdata <- MRData() 21 | 22 | mrdata$load_df( 23 | data = df_data, 24 | col_obs = c('obs'), 25 | col_obs_se = c('obs_se'), 26 | col_study_id = c('study_id'), 27 | col_covs = as.list(c("signal")) 28 | ) 29 | 30 | # Fit Linear Cov model with signal 31 | cov_models <- list(LinearCovModel( 32 | alt_cov = "signal", 33 | use_re = TRUE, 34 | prior_beta_uniform=array(c(1.0, 1.0)) 35 | )) 36 | 37 | # No trimming 38 | model <- MRBRT( 39 | data = mrdata, 40 | cov_models = cov_models, 41 | inlier_pct = 1.0 42 | ) 43 | 44 | model$fit_model(inner_print_level=5L, inner_max_iter=200L, 45 | outer_step_size=200L, outer_max_iter=100L) 46 | 47 | # Sample betas to use as priors for covariate selection. 48 | sampling <- import("mrtool.core.other_sampling") 49 | beta_samples <- sampling$sample_simple_lme_beta(1000L, model) 50 | beta_std <- sd(beta_samples) 51 | 52 | # Save data and model 53 | py_save_object(object = model, 54 | filename = paste0(out_dir, "02_loglinear_pkl_files/", ro_pair, ".pkl"), 55 | pickle = "dill") 56 | 57 | out <- append(data, list(beta_std = beta_std)) 58 | saveRDS(out, paste0(out_dir, "02_loglinear_models/", ro_pair, ".RDS")) 59 | 60 | -------------------------------------------------------------------------------- /first_process/src/03_covariate_selection.R: -------------------------------------------------------------------------------- 1 | # 2 | # 03_covariate_selection.R 3 | # 4 | # 5 | library(dplyr) 6 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/") 7 | 8 | args <- commandArgs(trailingOnly = TRUE) 9 | 10 | ro_pair <- args[1] 11 | out_dir <- args[2] 12 | WORK_DIR <- args[3] 13 | setwd(WORK_DIR) 14 | source("./config.R") 15 | 16 | # Read data 17 | data <- readRDS(paste0(out_dir, "02_loglinear_models/", ro_pair, ".RDS")) 18 | df_data <- data$df_data 19 | df_tmp <- data$df 20 | df_tmp <- df_tmp[as.numeric(rownames(df_data)),] 21 | 22 | cov_names <- c("exposure_linear", data$x_covs) 23 | 24 | # Delete in future development; skip covariate selection for now. 25 | cov_names <- c("exposure_linear") 26 | candidate_covs <- cov_names[!cov_names == "exposure_linear"] 27 | 28 | # Interaction with signal 29 | if (BIAS_COVARIATES_AS_INTX){ 30 | for (cov in candidate_covs) df_data[, cov] <- df_data$signal * df_tmp[, cov] 31 | } 32 | 33 | # Change the name of signal to exposure_linear, since some 34 | # underlying code deal with column name `exposure_linear` 35 | df_data$exposure_linear <- df_data$signal 36 | mrdata <- MRData() 37 | 38 | mrdata$load_df( 39 | data = df_data, 40 | col_obs = c('obs'), 41 | col_obs_se = c('obs_se'), 42 | col_study_id = c('study_id'), 43 | col_covs = as.list(cov_names) 44 | ) 45 | 46 | loglinear_model <- readRDS(paste0(out_dir, "02_loglinear_models/", ro_pair, ".RDS")) 47 | 48 | # Beta prior from first loglinear model results. 49 | beta_gprior_std <- loglinear_model$beta_std 50 | covfinder <- do.call( 51 | CovFinder, 52 | c(COV_FINDER_CONFIG, 53 | list( 54 | data = mrdata, 55 | covs = as.list(candidate_covs)), 56 | beta_gprior_std = BETA_PRIOR_MULTIPLIER * beta_gprior_std 57 | ) 58 | ) 59 | 60 | covfinder$select_covs(verbose = TRUE) 61 | 62 | selected_covs <- covfinder$selected_covs 63 | selected_covs 64 | 65 | # Save data and selected covariates 66 | out <- append(data, list(df_cov_selection=df_data, selected_covs=selected_covs)) 67 | saveRDS(out, paste0(out_dir, "03_covariate_selection_models/", ro_pair, ".RDS")) -------------------------------------------------------------------------------- /first_process/src/04_mixed_effects_models.R: -------------------------------------------------------------------------------- 1 | # 2 | # 04_mixed_effects_models.R 3 | # 4 | # 5 | library(dplyr) 6 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/") 7 | 8 | args <- commandArgs(trailingOnly = TRUE) 9 | 10 | ro_pair <- args[1] 11 | out_dir <- args[2] 12 | WORK_DIR <- args[3] 13 | setwd(WORK_DIR) 14 | source("./config.R") 15 | 16 | # Extract selected covariates 17 | data <- readRDS(paste0(out_dir, "03_covariate_selection_models/", ro_pair, ".RDS")) 18 | df_data <- data$df_data 19 | df_tmp <- data$df 20 | # Only keep rows that are not trimmed 21 | df_tmp <- df_tmp[as.numeric(rownames(df_data)),] 22 | 23 | cov_names <- data$selected_covs 24 | bias_covs <- cov_names[!cov_names == "exposure_linear"] 25 | 26 | # Add interaction 27 | for (cov in bias_covs) df_data[, cov] <- df_data$signal * df_tmp[, cov] 28 | 29 | # Selected bias covariates plus signal 30 | covs <- c("signal", bias_covs) 31 | 32 | mrdata <- MRData() 33 | mrdata$load_df( 34 | df_data, 35 | col_obs = c('obs'), 36 | col_obs_se = c('obs_se'), 37 | col_study_id = c('study_id'), 38 | col_covs=as.list(covs) 39 | ) 40 | 41 | 42 | loglinear_model <- readRDS(paste0(out_dir, "02_loglinear_models/", ro_pair, ".RDS")) 43 | 44 | # Beta prior from first loglinear model results. 45 | beta_gprior_std <- loglinear_model$beta_std 46 | 47 | # Combine cov models 48 | cov_models <- list() 49 | for (cov in bias_covs) cov_models <- append(cov_models, 50 | list( 51 | do.call( 52 | LinearCovModel, 53 | list( 54 | alt_cov=cov, 55 | beta_gprior_std=BETA_PRIOR_MULTIPLIER * beta_gprior_std 56 | ) 57 | ) 58 | ) 59 | ) 60 | 61 | # Mixed effects model 62 | cov_models <- append(cov_models, LinearCovModel('signal', use_re=TRUE, 63 | prior_beta_uniform=array(c(1.0, 1.0)))) 64 | 65 | model <- MRBRT( 66 | data=mrdata, 67 | cov_models = cov_models, 68 | inlier_pct = 1.0 69 | ) 70 | 71 | model$fit_model(inner_print_level=5L, inner_max_iter=200L, 72 | outer_step_size=200L, outer_max_iter=100L) 73 | 74 | # Load signal model and data in Stage 1 75 | signal_model <- py_load_object(filename=paste0(out_dir, "01_template_pkl_files/", ro_pair, ".pkl"), 76 | pickle = "dill") 77 | orig_data <- readRDS(paste0(out_dir, "01_template_models/", ro_pair, ".RDS")) 78 | df <- orig_data$df 79 | 80 | # This should be provided by the user 81 | NUM_POINTS <- 100L 82 | exposure_lower <- min(df[,c(REF_EXPOSURE_COLS, ALT_EXPOSURE_COLS)]) 83 | exposure_upper <- max(df[,c(REF_EXPOSURE_COLS, ALT_EXPOSURE_COLS)]) 84 | exposure <- seq(exposure_lower, exposure_upper, length.out=NUM_POINTS) 85 | min_cov <- rep(exposure_lower, NUM_POINTS) 86 | 87 | # Deal with Sarah's data 88 | if ('a_0' %in% REF_EXPOSURE_COLS){ 89 | df_signal_pred <- data.frame(a_0=min_cov, a_1=min_cov, b_0=exposure, b_1=exposure) 90 | } else { 91 | df_signal_pred <- data.frame(a_0=min_cov, b_0=exposure) 92 | names(df_signal_pred) <- c(REF_EXPOSURE_COLS, ALT_EXPOSURE_COLS) 93 | } 94 | 95 | # Predict using signal model and gridded exposure 96 | data_signal_pred <- MRData() 97 | data_signal_pred$load_df( 98 | df_signal_pred, 99 | col_covs = as.list(c(REF_EXPOSURE_COLS, ALT_EXPOSURE_COLS)) 100 | ) 101 | signal_pred <- signal_model$predict(data_signal_pred) 102 | 103 | # TODO: data of selected covariates to be added 104 | df_final_pred <- data.frame(signal=signal_pred) 105 | data_final_pred <- MRData() 106 | data_final_pred$load_df( 107 | df_final_pred, 108 | col_covs = as.list(c("signal")) 109 | ) 110 | 111 | # create draws and prediction 112 | sampling <- import("mrtool.core.other_sampling") 113 | num_samples <- 1000L 114 | beta_samples <- sampling$sample_simple_lme_beta(num_samples, model) 115 | gamma_samples <- rep(model$gamma_soln, num_samples) * matrix(1, num_samples) 116 | 117 | curve <- model$predict(data_final_pred) 118 | draws <- model$create_draws( 119 | data_final_pred, 120 | beta_samples=beta_samples, 121 | gamma_samples=gamma_samples 122 | ) 123 | 124 | # Save model 125 | py_save_object(object = model, 126 | filename = paste0(out_dir, "04_mixed_effects_pkl_files/", ro_pair, ".pkl"), 127 | pickle = "dill") 128 | 129 | # OBS_VAR <- "ln_effect" 130 | # OBS_SE_VAR <- "ln_se" 131 | 132 | # if (!(OBS_VAR %in% names(df))){ 133 | # OBS_VAR <- "obs" 134 | # } 135 | 136 | # if (!(OBS_SE_VAR %in% names(df))){ 137 | # OBS_SE_VAR <- "obs_se" 138 | # } 139 | 140 | # Sanity check 141 | # pdf(paste0(out_dir, "04_mixed_effects_models/", ro_pair, ".pdf")) 142 | 143 | # if (length(ALT_EXPOSURE_COLS) == 1){ 144 | # plot(df[,ALT_EXPOSURE_COLS] - df[,REF_EXPOSURE_COLS], 145 | # df[, OBS_VAR], cex=1/(7*df[, OBS_SE_VAR]), xlab="exposure", ylab="ln_effect", 146 | # main=ro_pair, col=c('blue')) 147 | # } else { 148 | # plot(apply(df[,ALT_EXPOSURE_COLS], 1, mean) - apply(df[,REF_EXPOSURE_COLS], 1, mean), 149 | # df[, OBS_VAR], cex=1/(7*df[, OBS_SE_VAR]), xlab="exposure", ylab="ln_effect", 150 | # main=ro_pair, col=c('blue')) 151 | # } 152 | 153 | # lines(exposure, curve) 154 | # dev.off() -------------------------------------------------------------------------------- /first_process/src/05_evidence_score_continuous.R: -------------------------------------------------------------------------------- 1 | # 2 | # 06_publication_bias.R 3 | # 4 | # 5 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/") 6 | args <- commandArgs(trailingOnly = TRUE) 7 | 8 | 9 | ### Running settings 10 | # ro_pair <- args[1] 11 | # out_dir <- args[2] 12 | # WORK_DIR <- args[3] 13 | ro_pair <- c("lpa_ihd") 14 | out_dir <- "" 15 | work_dir <- "/ihme/homes/zhengp/Repositories/evidence_score_pipeline" 16 | 17 | setwd(work_dir) 18 | source("./config.R") 19 | source("./src/utils/continuous_functions.R") 20 | 21 | linear_model_path <- paste0("/home/j/temp/zhengp/escore/", ro_pair, "_linear.pkl") 22 | signal_model_path <- paste0("/home/j/temp/zhengp/escore/", ro_pair, "_signal.pkl") 23 | ref_covs <- c("a_0", "a_1") 24 | alt_covs <- c("b_0", "b_1") 25 | 26 | 27 | ### Load model objects 28 | linear_model <- py_load_object(filename = linear_model_path, pickle = "dill") 29 | signal_model <- py_load_object(filename = signal_model_path, pickle = "dill") 30 | 31 | data_info <- extract_data_info(signal_model, 32 | linear_model, 33 | ref_covs = ref_covs, 34 | alt_covs = alt_covs) 35 | data_info$ro_pair <- ro_pair 36 | df <- data_info$df 37 | 38 | ### Detect publication bias 39 | df_no_outlier <- df[!df$outlier,] 40 | egger_model_all <- egger_regression(df$residual, df$residual_se) 41 | egger_model <- egger_regression(df_no_outlier$residual, df_no_outlier$residual_se) 42 | has_pub_bias <- egger_model$pval < 0.05 43 | 44 | ### Adjust for publication bias 45 | if (has_pub_bias) { 46 | df_fill <- get_df_fill(df[!df$outlier,]) 47 | num_fill <- nrow(df_fill) 48 | } else { 49 | num_fill <- 0 50 | } 51 | 52 | # fill the data if needed and refit the model 53 | if (num_fill > 0) { 54 | df <- rbind(df, df_fill) 55 | data_info$df <- df 56 | 57 | # refit the model 58 | data = MRData() 59 | data$load_df( 60 | data=df[!df$outlier,], 61 | col_obs='obs', 62 | col_obs_se='obs_se', 63 | col_covs=as.list(linear_model$cov_names), 64 | col_study_id='study_id' 65 | ) 66 | linear_model_fill <- MRBRT(data, cov_models=linear_model$cov_models) 67 | linear_model_fill$fit_model() 68 | } else { 69 | linear_model_fill <- NULL 70 | } 71 | 72 | ### Extract scores 73 | uncertainty_info <- get_uncertainty_info(data_info, linear_model) 74 | if (is.null(linear_model_fill)) { 75 | uncertainty_info_fill <- NULL 76 | } else { 77 | uncertainty_info_fill <- get_uncertainty_info(data_info, linear_model_fill) 78 | } 79 | 80 | 81 | ### Output diagnostics 82 | # figures 83 | title <- paste0(ro_pair, ": egger_mean=", round(egger_model$mean, 3), 84 | ", egger_sd=", round(egger_model$sd,3), ", egger_pval=", 85 | round(egger_model$pval, 3)) 86 | plot_residual(df, title) 87 | 88 | plot_model(data_info, 89 | uncertainty_info, 90 | linear_model, 91 | signal_model, 92 | uncertainty_info_fill, 93 | linear_model_fill) 94 | 95 | # summary 96 | summary <- summarize_model(data_info, 97 | uncertainty_info, 98 | linear_model, 99 | signal_model, 100 | egger_model, 101 | egger_model_all, 102 | uncertainty_info_fill, 103 | linear_model_fill) 104 | 105 | draws <- get_draws(data_info, linear_model) 106 | -------------------------------------------------------------------------------- /first_process/src/05_evidence_score_dichotomous.R: -------------------------------------------------------------------------------- 1 | # 2 | # 06_publication_bias.R 3 | # 4 | # 5 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/") 6 | args <- commandArgs(trailingOnly = TRUE) 7 | 8 | 9 | ### Running settings 10 | # ro_pair <- args[1] 11 | # out_dir <- args[2] 12 | # WORK_DIR <- args[3] 13 | ro_pair <- c("sim") 14 | out_dir <- "" 15 | work_dir <- "/ihme/homes/zhengp/Repositories/evidence_score_pipeline" 16 | 17 | setwd(work_dir) 18 | source("./config.R") 19 | source("./src/utils/dichotomous_functions.R") 20 | 21 | model_path <- "/home/j/temp/zhengp/escore/sim_dicho.pkl" 22 | 23 | 24 | ### Load model objects 25 | model <- py_load_object(filename = model_path, pickle = "dill") 26 | 27 | 28 | ### Extract data 29 | df <- extract_data_info(model) 30 | 31 | 32 | ### Detect publication bias 33 | egger_model_all <- egger_regression(df$residual, df$residual_se) 34 | egger_model <- egger_regression(df[!df$outlier,]$residual, df[!df$outlier,]$residual_se) 35 | has_pub_bias <- egger_model$pval < 0.05 36 | 37 | 38 | ### Adjust for publication bias 39 | if (has_pub_bias) { 40 | df_fill <- get_df_fill(df) 41 | num_fill <- nrow(df_fill) 42 | } else { 43 | num_fill <- 0 44 | } 45 | 46 | # fill the data if needed and refit the model 47 | if (num_fill > 0) { 48 | df <- rbind(df, df_fill) 49 | 50 | # refit the model 51 | data = MRData() 52 | data$load_df( 53 | data=df[!df$outlier,], 54 | col_obs='obs', 55 | col_obs_se='obs_se', 56 | col_covs=as.list(model$cov_names), 57 | col_study_id='study_id' 58 | ) 59 | model_fill <- MRBRT(data, cov_models=model$cov_models) 60 | model_fill$fit_model() 61 | } else { 62 | model_fill <- NULL 63 | } 64 | 65 | 66 | ### Extract scores 67 | uncertainty_info <- get_uncertainty_info(model) 68 | if (is.null(model_fill)) { 69 | uncertainty_info_fill <- NULL 70 | } else { 71 | uncertainty_info_fill <- get_uncertainty_info(model_fill) 72 | } 73 | 74 | 75 | ### Output diagnostics 76 | plot_model(df, uncertainty_info, model, uncertainty_info_fill, model_fill, ro_pair) 77 | summary <- summarize_model(ro_pair, model, model_fill, egger_model, egger_model_all, uncertainty_info) 78 | draws <- get_draws(model) 79 | -------------------------------------------------------------------------------- /first_process/src/05_evidence_score_legacy.R: -------------------------------------------------------------------------------- 1 | # 2 | # 05_evidence_score.R 3 | # 4 | # 5 | library(dplyr) 6 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/") 7 | 8 | args <- commandArgs(trailingOnly = TRUE) 9 | 10 | ro_pair <- args[1] 11 | out_dir <- args[2] 12 | WORK_DIR <- args[3] 13 | setwd(WORK_DIR) 14 | source("./config.R") 15 | 16 | 17 | # Load signal_model and final_model 18 | signal_model <- py_load_object(filename=paste0(out_dir, "01_template_pkl_files/", ro_pair, ".pkl"), 19 | pickle = "dill") 20 | 21 | final_model <- py_load_object(filename=paste0(out_dir, "04_mixed_effects_pkl_files/", ro_pair, ".pkl"), 22 | pickle = "dill") 23 | 24 | # using the scorelator 25 | 26 | # need to run 'repl_python()' to open an interactive Python interpreter, 27 | # then immediately type 'exit' to get back to the R interpreter 28 | # -- this helps to load a required Python package 29 | repl_python() 30 | # -- type 'exit' or hit escape 31 | 32 | evidence_score <- import("mrtool.evidence_score.scorelator") 33 | scorelator <- evidence_score$ContinuousScorelator(signal_model = signal_model, final_model = final_model, 34 | alt_cov_names= as.list(ALT_EXPOSURE_COLS), 35 | ref_cov_names = as.list(REF_EXPOSURE_COLS), 36 | name=ro_pair) 37 | scorelator$plot_model(folder = paste0(out_dir, "05_evidence_score/")) 38 | score <- scorelator$get_score() 39 | low_score <- scorelator$get_score(use_gamma_ub=TRUE) 40 | -------------------------------------------------------------------------------- /first_process/src/05_evidence_score_loglinear.R: -------------------------------------------------------------------------------- 1 | # 2 | # 06_publication_bias.R 3 | # 4 | # 5 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/") 6 | args <- commandArgs(trailingOnly = TRUE) 7 | 8 | 9 | ### Running settings 10 | # ro_pair <- args[1] 11 | # out_dir <- args[2] 12 | # WORK_DIR <- args[3] 13 | ro_pair <- c("lpa_ihd") 14 | out_dir <- "" 15 | work_dir <- "/ihme/homes/zhengp/Repositories/evidence_score_pipeline" 16 | 17 | setwd(work_dir) 18 | source("./config.R") 19 | source("./src/utils/loglinear_functions.R") 20 | 21 | model_path <- paste0("/home/j/temp/zhengp/escore/sim_loglinear.pkl") 22 | ref_covs <- NULL 23 | alt_covs <- c("exp") 24 | 25 | 26 | ### Load model objects 27 | model <- py_load_object(filename = model_path, pickle = "dill") 28 | 29 | data_info <- extract_data_info(model, 30 | ref_covs = ref_covs, 31 | alt_covs = alt_covs) 32 | data_info$ro_pair <- ro_pair 33 | df <- data_info$df 34 | 35 | ### Detect publication bias 36 | df_no_outlier <- df[!df$outlier,] 37 | egger_model_all <- egger_regression(df$residual, df$residual_se) 38 | egger_model <- egger_regression(df_no_outlier$residual, df_no_outlier$residual_se) 39 | has_pub_bias <- egger_model$pval < 0.05 40 | 41 | ### Adjust for publication bias 42 | if (has_pub_bias) { 43 | df_fill <- get_df_fill(df[!df$outlier,]) 44 | num_fill <- nrow(df_fill) 45 | } else { 46 | num_fill <- 0 47 | } 48 | 49 | # fill the data if needed and refit the model 50 | if (num_fill > 0) { 51 | df <- rbind(df, df_fill) 52 | data_info$df <- df 53 | 54 | # refit the model 55 | data = MRData() 56 | data$load_df( 57 | data=df[!df$outlier,], 58 | col_obs='obs', 59 | col_obs_se='obs_se', 60 | col_covs=as.list(model$cov_names), 61 | col_study_id='study_id' 62 | ) 63 | model_fill <- MRBRT(data, cov_models=model$cov_models) 64 | model_fill$fit_model() 65 | } else { 66 | model_fill <- NULL 67 | } 68 | 69 | ### Extract scores 70 | uncertainty_info <- get_uncertainty_info(data_info, model) 71 | if (is.null(model_fill)) { 72 | uncertainty_info_fill <- NULL 73 | } else { 74 | uncertainty_info_fill <- get_uncertainty_info(data_info, model_fill) 75 | } 76 | 77 | 78 | ### Output diagnostics 79 | # figures 80 | title <- paste0(ro_pair, ": egger_mean=", round(egger_model$mean, 3), 81 | ", egger_sd=", round(egger_model$sd,3), ", egger_pval=", 82 | round(egger_model$pval, 3)) 83 | plot_residual(df, title) 84 | 85 | plot_model(data_info, 86 | uncertainty_info, 87 | model, 88 | uncertainty_info_fill, 89 | model_fill) 90 | 91 | # summary 92 | summary <- summarize_model(data_info, 93 | uncertainty_info, 94 | model, 95 | egger_model, 96 | egger_model_all, 97 | uncertainty_info_fill, 98 | model_fill) 99 | summary 100 | 101 | draws <- get_draws(data_info, model) 102 | -------------------------------------------------------------------------------- /first_process/src/05_evidence_score_mixed.R: -------------------------------------------------------------------------------- 1 | # 2 | # 06_publication_bias.R 3 | # 4 | # 5 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/") 6 | args <- commandArgs(trailingOnly = TRUE) 7 | 8 | 9 | ### Running settings 10 | # ro_pair <- args[1] 11 | # out_dir <- args[2] 12 | # WORK_DIR <- args[3] 13 | ro_pair <- c("sim") 14 | out_dir <- "" 15 | work_dir <- "/ihme/homes/zhengp/Repositories/evidence_score_pipeline" 16 | 17 | setwd(work_dir) 18 | source("./config.R") 19 | source("./src/utils/mixed_functions.R") 20 | 21 | model_path <- "/ihme/code/qwr/ckd_qwr/evidence_score_pipeline/mrbrt_model_outputs/vmixed/stage3_str_bp_vmixed/mod1.pkl" 22 | 23 | 24 | ### Load model objects 25 | model <- py_load_object(filename = model_path, pickle = "dill") 26 | 27 | 28 | ### Extract data 29 | data_info <- extract_data_info(model, cont_cov = "age_mean") 30 | df <- data_info$df 31 | data_info$ro_pair <- ro_pair 32 | 33 | ### Detect publication bias 34 | egger_model_all <- egger_regression(df$residual, df$residual_se) 35 | egger_model <- egger_regression(df[!df$outlier,]$residual, df[!df$outlier,]$residual_se) 36 | has_pub_bias <- egger_model$pval < 0.05 37 | 38 | 39 | ### Adjust for publication bias 40 | if (has_pub_bias) { 41 | df_fill <- get_df_fill(df) 42 | num_fill <- nrow(df_fill) 43 | } else { 44 | num_fill <- 0 45 | } 46 | 47 | # fill the data if needed and refit the model 48 | if (num_fill > 0) { 49 | df <- rbind(df, df_fill) 50 | data_info$df <- df 51 | 52 | # refit the model 53 | data = MRData() 54 | data$load_df( 55 | data=df[!df$outlier,], 56 | col_obs='obs', 57 | col_obs_se='obs_se', 58 | col_covs=as.list(model$cov_names), 59 | col_study_id='study_id' 60 | ) 61 | model_fill <- MRBRT(data, cov_models=model$cov_models) 62 | model_fill$fit_model() 63 | } else { 64 | model_fill <- NULL 65 | } 66 | 67 | 68 | ### Extract scores 69 | uncertainty_info <- get_uncertainty_info(data_info, model) 70 | if (is.null(model_fill)) { 71 | uncertainty_info_fill <- NULL 72 | } else { 73 | uncertainty_info_fill <- get_uncertainty_info(data_info, model_fill) 74 | } 75 | 76 | 77 | ### Output diagnostics 78 | title <- paste0("sim", ": egger_mean=", round(egger_model$mean, 3), 79 | ", egger_sd=", round(egger_model$sd,3), ", egger_pval=", 80 | round(egger_model$pval, 3)) 81 | plot_residual(df, title) 82 | 83 | plot_model(data_info, 84 | uncertainty_info, 85 | model, 86 | uncertainty_info_fill, 87 | model_fill) 88 | summary <- summarize_model(data_info, 89 | uncertainty_info, 90 | model, 91 | uncertainty_info_fill, 92 | model_fill) 93 | 94 | draws <- get_draws(data_info, model) 95 | -------------------------------------------------------------------------------- /first_process/src/utils/egger_functions.R: -------------------------------------------------------------------------------- 1 | egger_regression <- function(residual, residual_sd, one_sided = TRUE) { 2 | weighted_residual <- residual/residual_sd 3 | r_mean <- mean(weighted_residual) 4 | r_sd <- 1/sqrt(length(weighted_residual)) 5 | r_pval <- get_pval(r_mean, r_sd, one_sided = one_sided) 6 | list(mean = r_mean, sd = r_sd, pval = r_pval) 7 | } 8 | 9 | get_pval <- function(beta, beta_sd, one_sided = FALSE) { 10 | zscore <- abs(beta/beta_sd) 11 | if (one_sided) { 12 | pval <- 1 - pnorm(zscore) 13 | } else { 14 | pval <- 2*(1 - pnorm(zscore)) 15 | } 16 | pval 17 | } -------------------------------------------------------------------------------- /first_process/src/utils/extract_old_results.R: -------------------------------------------------------------------------------- 1 | # 2 | # extract_old_results.R 3 | # 4 | # Reed Sorensen 5 | # June 2020 6 | # 7 | 8 | 9 | library(reticulate) 10 | library(dplyr) 11 | use_condaenv(condaenv="mr_brt_refactor_env", conda="/ihme/code/evidence_score/miniconda3/bin/conda", required = TRUE) 12 | 13 | py_cmds <- c( 14 | "import sys", 15 | "import os", 16 | "import dill as pickle", 17 | "import argparse", 18 | "import numpy as np", 19 | "import pandas as pd", 20 | "sys.path.append(os.path.dirname('/home/j/temp/reed/prog/repos/mr_brt_ihme/refactor/'))", 21 | "from mrbrt.__init__ import MR_BRT, MR_BeRT", 22 | "from mrbrt.utils import ratioInit, sampleKnots" 23 | ) 24 | 25 | 26 | 27 | 28 | 29 | for (cmd in py_cmds) py_run_string(cmd) 30 | 31 | path1 <- "/home/j/temp/rmbarber/red_meat_paper/diet_model_pipeline_2020_01_23" 32 | dirs1 <- list.dirs(path1)[-c(1:2)] 33 | 34 | path2 <- "/home/j/temp/reed/jiawei/red_meat_paper/diet_model_pipeline_2020_06_29_100_iters" 35 | dirs2 <- list.dirs(path2)[-c(1)] 36 | 37 | path3 <- "/home/j/temp/reed/jiawei/red_meat_paper/diet_model_pipeline_2020_06_29_200_iters" 38 | dirs3 <- list.dirs(path3)[-c(1)] 39 | 40 | 41 | get_old_diet_results <- function(dir, verbose = TRUE) { 42 | 43 | dev <- FALSE 44 | if (dev) { 45 | dir <- dirs1[27] 46 | } 47 | if (verbose) cat(dir, "\n") 48 | 49 | try({ 50 | path_stage1 <- paste0(dir, "/stage1.pkl") 51 | 52 | if (file.exists(path_stage1)) { 53 | py_run_string(paste0("with open('", path_stage1, "', 'rb') as fopen: model1 = pickle.load(fopen)")) 54 | x_covs <- py$model1$ratio_x_covs 55 | z_covs <- py$model1$ratio_z_covs 56 | } else { 57 | x_covs <- z_covs <- NA 58 | } 59 | 60 | path_mod_mono <- paste0(dir, "/ratio_mod_mono.pkl") 61 | if (file.exists(path_mod_mono)) { 62 | py_run_string(paste0("with open('", path_mod_mono, "', 'rb') as fopen: model2 = pickle.load(fopen)")) 63 | 64 | knots_tmp = py$model2$mr$spline_list[[1]]$knots 65 | k0 = knots_tmp[1] 66 | k1 = knots_tmp[length(knots_tmp)] 67 | # pred_x_cov_list, pred_z_cov_list, y_samples, y_samples_fe 68 | py_run_string(paste0("a, b, c, d = model2.mr_predict(domain = [", k0, ", ", k1, "])")) 69 | # pred = y_samples.mean(axis = 1) 70 | pred = apply(py$c, 1, mean) 71 | pred_fe = apply(py$d, 1, mean) 72 | # exp_tmp = pred_x_cov_list[0]['mat'] 73 | exp_tmp = py$a[[1]][['mat']] 74 | df <- py$model2$df 75 | } else { 76 | knots_tmp <- pred <- exp_tmp <- df <- NA 77 | } 78 | 79 | ro_pair_tmp <- strsplit(dir, "\\/")[[1]] 80 | ro_pair <- ro_pair_tmp[length(ro_pair_tmp)] 81 | 82 | out <- list( 83 | ro_pair=ro_pair, x_covs=x_covs, z_covs=z_covs, 84 | knots_tmp=knots_tmp, pred=pred, pred_fe=pred_fe, exp_tmp=exp_tmp, df=df 85 | ) 86 | return(out) 87 | 88 | }) 89 | } 90 | 91 | ##### 92 | 93 | old_results <- lapply(dirs1, get_old_diet_results) 94 | old_results <- old_results[!sapply(old_results, function(x) class(x) == "try-error")] 95 | names(old_results) <- sapply(old_results, function(x) x$ro_pair) 96 | # saveRDS(old_results, "/home/j/temp/reed/misc/old_results_fe.RDS") 97 | 98 | old_results <- readRDS("/home/j/temp/reed/misc/old_results_fe.RDS") 99 | tmp1 <- lapply(old_results, function(x) { 100 | # x <- old_results[[1]] # dev 101 | list( 102 | n_rows = nrow(x$df), 103 | col_names = names(x$df), 104 | x_covs = x$x_covs 105 | ) 106 | }) 107 | names(tmp11) <- names(old_results) 108 | 109 | new_dir <- "/ihme/scratch/users/rsoren/evidence_score_diet/v5_test/04_monospline_models/" 110 | new_results <- lapply(list.files(new_dir, full.names = TRUE), readRDS) 111 | tmp2 <- lapply(new_results, function(x) { 112 | # x <- new_results[[1]] # dev 113 | list( 114 | n_rows = nrow(x$df), 115 | col_names = names(x$df), 116 | x_covs = x$selected_covs[x$selected_covs != "exposure_linear"] 117 | ) 118 | }) 119 | names(tmp2) <- gsub(".RDS", "", list.files(new_dir)) 120 | 121 | tmp3 <- do.call("rbind", lapply(gsub(".RDS", "", list.files(new_dir)), function(x) { 122 | # x <- gsub(".RDS", "", list.files(new_dir))[4] # dev 123 | data.frame( 124 | pair = x, 125 | nrows_old = tmp1[[x]]$n_rows, 126 | nrows_new = tmp2[[x]]$n_rows, 127 | xcovs_old = paste(tmp1[[x]]$x_covs, collapse = ","), 128 | xcovs_new = paste(tmp2[[x]]$x_covs, collapse = ","), 129 | colnames_old = paste(tmp1[[x]]$col_names, collapse = ","), 130 | colnames_new = paste(tmp2[[x]]$col_names, collapse = ",") 131 | ) 132 | })) 133 | write.csv(tmp3, "/home/j/temp/reed/misc/comparison_with_ryans_data_prep.csv") 134 | 135 | 136 | # get pairs without selected covs in Ryan's model 137 | tmp <- sapply(old_results, function(x) x$x_covs) 138 | tmp2 <- names(tmp)[sapply(tmp, function(x) length(x) == 0)] 139 | saveRDS(tmp2, "/home/j/temp/reed/misc/pairs_with_no_selectedcovs.RDS") 140 | 141 | ##### jiawei's results with 100 iterations 142 | old_results2 <- lapply(dirs2, get_old_diet_results) 143 | old_results2 <- old_results2[!sapply(old_results2, function(x) class(x) == "try-error")] 144 | names(old_results2) <- sapply(old_results2, function(x) x$ro_pair) 145 | saveRDS(old_results2, "/home/j/temp/reed/misc/old_results_fe2.RDS") 146 | 147 | ##### jiawei's results with 200 iterations 148 | old_results3 <- lapply(dirs3, get_old_diet_results) 149 | old_results3 <- old_results3[!sapply(old_results3, function(x) class(x) == "try-error")] 150 | names(old_results3) <- sapply(old_results3, function(x) x$ro_pair) 151 | saveRDS(old_results3, "/home/j/temp/reed/misc/old_results_fe3.RDS") 152 | 153 | -------------------------------------------------------------------------------- /first_process/src/utils/plot_3_curves.R: -------------------------------------------------------------------------------- 1 | # 2 | # plot_3_curves.R 3 | # 4 | # Reed Sorensen 5 | # June 2020 6 | # 7 | CURDIR <- "/ihme/homes/jiaweihe/msca/mrbrt/evidence_score_diet" 8 | CODE_PATH <- paste0(CURDIR, "/parallel/") 9 | source(paste0(CODE_PATH, "00_globals.R")) 10 | 11 | library(dplyr) 12 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/") 13 | 14 | pairs_without_selectedcovs <- readRDS(paste0(J_DIR, "/pairs_with_no_selectedcovs.RDS")) 15 | output_dir <- paste0(RESULTS_DIR, VERSION_ID, "/04_monospline_pkl_files/") 16 | ro_pairs <- gsub(".pkl", "", list.files(output_dir)) 17 | rds_dir <- paste0(RESULTS_DIR, VERSION_ID, "/04_monospline_models/") 18 | old_dir <- "/home/j/temp/jiaweihe/red_meat_paper/predict_draws/" 19 | 20 | pdf(paste0(RESULTS_DIR, VERSION_ID, "/04_monospline_pdf1/compare_curves.pdf")) 21 | for (pair in ro_pairs) { 22 | # for (pair in ro_pairs[ro_pairs %in% pairs_without_selectedcovs]) { 23 | dev <- FALSE 24 | if (dev) { 25 | pair <- "redmeat_diabetes" 26 | } 27 | cat(pair, "\n") 28 | 29 | # try({ 30 | # Sys.sleep(2) 31 | x <- readRDS(paste0(rds_dir, pair, ".RDS")) 32 | mod2 <- py_load_object(paste0(output_dir, pair, ".pkl")) 33 | 34 | get_knots_ensemble <- function(model) { 35 | cov_name_tmp <- model$ensemble_cov_model_name 36 | tmp <- model$sub_models[[1]] 37 | tmp2 <- tmp$get_cov_model(name = cov_name_tmp) 38 | tmp2$spline_knots 39 | } 40 | 41 | draws_path <- paste0( 42 | paste0(RESULTS_DIR, VERSION_ID, "/04_monospline_pdf1/"), 43 | x$ro_pair, "_y_draws_fe.pkl" 44 | ) 45 | 46 | draws_dat <- py_load_object(draws_path) 47 | draws_mean <- exp(apply(draws_dat, 1, mean)) 48 | # draws_mean <- exp(apply(draws_dat, 1, function(x) quantile(x, 0.5))) 49 | pred_lo <- exp(apply(draws_dat, 1, function(x) quantile(x, 0.025))) 50 | pred_hi <- exp(apply(draws_dat, 1, function(x) quantile(x, 0.975))) 51 | 52 | df_pred2 <- x$df_pred2 53 | 54 | old_results <- readRDS(paste0(J_DIR, "old_results_fe.RDS")) 55 | names(old_results) <- sapply(old_results, function(x) x$ro_pair) 56 | tmp <- old_results[[x$ro_pair]] 57 | 58 | tmp_vec <- c(draws_mean, tmp$pred_fe) 59 | x_data <- seq(min(tmp$exp_tmp), max(tmp$exp_tmp), length.out = length(draws_mean)) 60 | new_df <- data.frame(exposure=x_data, pred_lo=pred_lo, pred_hi=pred_hi) 61 | 62 | bias_covs <- x$selected_covs[x$selected_covs != "exposure_linear"] 63 | 64 | old_draws_path <- paste0(old_dir, x$ro_pair, "_y_draws_fe.pkl") 65 | old_draws_dat <- py_load_object(old_draws_path) 66 | old_draws_mean <- apply(old_draws_dat, 1, mean) 67 | old_pred_lo <- apply(old_draws_dat, 1, function(x) quantile(x, 0.025)) 68 | old_pred_hi <- apply(old_draws_dat, 1, function(x) quantile(x, 0.975)) 69 | old_x_data <- seq(min(tmp$exp_tmp), max(tmp$exp_tmp), length.out = length(old_draws_mean)) 70 | old_df <- data.frame(exposure=old_x_data, pred_lo=old_pred_lo, pred_hi=old_pred_hi) 71 | 72 | if (length(bias_covs) == 0) { 73 | covlabel <- "[None]" 74 | }else { 75 | covlabel <- paste(bias_covs, collapse = ", ") 76 | } 77 | 78 | covlabel1 <- paste0("c(", paste(bias_covs, collapse = ", "), ")") 79 | covlabel2 <- paste0("[", paste(tmp$x_covs, collapse = ", "), "]") 80 | 81 | min_y <- min(min(pred_lo), min(tmp_vec), min(old_pred_lo)) 82 | max_y <- max(max(pred_hi), max(tmp_vec), max(old_pred_hi)) 83 | 84 | with(df_pred2, plot( 85 | x_data, draws_mean, 86 | lwd = 3, type = "l", col="blue", 87 | main = paste0(x$ro_pair), 88 | ylim = c(min_y, max_y) 89 | )) 90 | abline(h = 1, lwd = 2, lty = 2) 91 | mtext(text = paste0(paste0(covlabel1, "; ", covlabel2)), side = 3, line = 0.4) 92 | 93 | lines(tmp$exp_tmp, tmp$pred_fe, col = adjustcolor("red", 0.6), lwd = 2) 94 | 95 | # function for plotting uncertainty intervals 96 | add_ui <- function(dat, x_var, lo_var, hi_var, color = "darkblue", opacity = 0.1) { 97 | polygon( 98 | x = c(dat[, x_var], rev(dat[, x_var])), 99 | y = c(dat[, lo_var], rev(dat[, hi_var])), 100 | col = adjustcolor(col = color, alpha.f = opacity), 101 | border = FALSE 102 | ) 103 | } 104 | 105 | add_ui(new_df, 'exposure', 'pred_lo', 'pred_hi') 106 | 107 | add_ui(old_df, 'exposure', 'pred_lo', 'pred_hi', color="firebrick1") 108 | 109 | if (min(pred_hi) < 1){ 110 | legend_pos <- c("bottomleft") 111 | }else if (max(pred_lo) > 1){ 112 | legend_pos <- c("topleft") 113 | } 114 | 115 | legend(legend_pos, 116 | legend = c("New model", "Old model, 20 iter."), 117 | lwd = 2, 118 | col = c("blue", "red"), 119 | cex = 0.85 120 | ) 121 | # }) 122 | 123 | } 124 | 125 | dev.off() 126 | -------------------------------------------------------------------------------- /first_process/src/utils/qsub_function.R: -------------------------------------------------------------------------------- 1 | 2 | run_script <- function(script, img, args = c("")) { 3 | cmd <- paste( 4 | "/ihme/singularity-images/rstudio/shells/execRscript.sh", 5 | "-i", img, "-s", script, paste(args, collapse = " ") 6 | ) 7 | system(cmd) 8 | } 9 | 10 | submit_qsub <- function(script, job_name, img, proj = PROJ, 11 | queue = "long.q", hours = 6, threads = 1, 12 | error_logs = paste0("/share/temp/sgeoutput/", USER, "/errors"), 13 | output_logs = paste0("/share/temp/sgeoutput/", USER, "/output"), 14 | memory = "8G", args = "", verbose = TRUE) { 15 | cmd <- paste( 16 | "qsub -terse -N", job_name, 17 | "-q", queue, 18 | paste0("-l fthread=", threads), 19 | paste0("-l m_mem_free=", memory), 20 | paste0("-l h_rt=", hours, ":00:00"), 21 | paste0("-l archive=TRUE"), 22 | "-P", proj, 23 | "-e", error_logs, 24 | "-o", output_logs, 25 | "/ihme/singularity-images/rstudio/shells/execRscript.sh ", 26 | "-i", img, 27 | "-s", script, 28 | paste(args, collapse = " ") 29 | ) 30 | 31 | if (verbose) cat(cmd, "\n") 32 | system(cmd) 33 | } 34 | 35 | # Wait for upstream job to finish for each pair 36 | qwait <- function(sub_dir, pair){ 37 | outfile <- paste0(OUT_DIR, sub_dir, "/", pair, ".RDS") 38 | while (!file.exists(outfile)) { 39 | Sys.sleep(1) 40 | } 41 | } 42 | 43 | # Submit job for each stage for each pair 44 | submit_sub_job <- function(pair, script, job_name_suffix, script_dir) { 45 | submit_qsub( 46 | script = paste0(CODE_PATH, script), 47 | job_name = paste0(pair, job_name_suffix), 48 | img = SINGULARITY_IMG, 49 | args = c(pair, OUT_DIR, script_dir) 50 | ) 51 | } 52 | 53 | # Submit job for plotting risk function and derivative fit. 54 | submit_plot_job <- function(pair) { 55 | cmd <- paste0( 56 | paste0("sh ", CODE_PATH, "submit_qsub_python.sh "), 57 | paste0(CODE_PATH, " "), 58 | paste0(OUT_DIR, "04_monospline_pkl_files/ "), 59 | paste0(OUT_DIR, "05_monospline_pdf/ "), 60 | paste0(pair, " "), 61 | paste0(USER, " "), 62 | PROJ 63 | ) 64 | system(cmd) 65 | } 66 | 67 | # Submit job for generating evidence score. 68 | submit_score_job <- function(pair) { 69 | cmd <- paste0( 70 | paste0("sh ", CODE_PATH, "submit_score_qsub_python.sh "), 71 | paste0(CODE_PATH, " "), 72 | paste0(OUT_DIR, "05_monospline_pdf/ "), 73 | paste0(OUT_DIR, "06_evidence_score/ "), 74 | paste0(pair, " "), 75 | paste0(USER, " "), 76 | PROJ 77 | ) 78 | system(cmd) 79 | } -------------------------------------------------------------------------------- /limetr/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | notebooks/ 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | # vscode 133 | .vscode/ 134 | 135 | # mac 136 | .DS_Store 137 | -------------------------------------------------------------------------------- /limetr/.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | os: linux 3 | dist: xenial 4 | 5 | jobs: 6 | name: "Conda/ Python Linux" 7 | python: 3.7 8 | # env: no env. variables needed 9 | 10 | before_install: 11 | # Here we just install Miniconda, which you shouldn't have to change. 12 | - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 13 | - chmod +x miniconda.sh 14 | - ./miniconda.sh -b 15 | - export PATH=/home/travis/miniconda3/bin:$PATH 16 | - conda update --yes conda 17 | 18 | install: 19 | # We just set up a conda environment with the right Python version. 20 | - conda create --yes -n limetr_conda python=$TRAVIS_PYTHON_VERSION 21 | - source activate limetr_conda 22 | - conda env list 23 | - conda install --yes -c defaults -c conda-forge conda-build conda-verify 24 | - conda install --yes -c conda-forge -c defaults gcc_linux-64 gfortran_linux-64 gxx_linux-64 numpy==1.19.1 scipy==1.5.2 cyipopt 25 | - conda update --yes conda 26 | - make build 27 | - make install 28 | 29 | script: 30 | - make tests 31 | - make sdist 32 | - make package 33 | 34 | -------------------------------------------------------------------------------- /limetr/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2020, Peng Zheng 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /limetr/Makefile: -------------------------------------------------------------------------------- 1 | # make file for pynlme class 2 | OUTPUT_DIR=output 3 | CONDA_PKG_DIR=conda_pkg 4 | NUMPY_VER=1.19.1 5 | 6 | .PHONY: clean, tests 7 | 8 | build: setup.py 9 | python setup.py build 10 | 11 | install: setup.py 12 | python check_requirements.py 13 | python setup.py install 14 | 15 | sdist: setup.py 16 | python setup.py sdist 17 | 18 | tests: 19 | python tests/check_utils.py 20 | python tests/check_limetr.py 21 | 22 | clean: 23 | find . -name "*.so*" | xargs rm -rf 24 | find . -name "*.pyc" | xargs rm -rf 25 | find . -name "__pycache__" | xargs rm -rf 26 | find . -name "build" | xargs rm -rf 27 | find . -name "dist" | xargs rm -rf 28 | find . -name "MANIFEST" | xargs rm -rf 29 | rm -rf ./lib ./$(OUTPUT_DIR) 30 | 31 | uninstall: 32 | find $(CONDA_PREFIX)/lib/ -name "*limetr*" | xargs rm -rf 33 | 34 | package: src/limetr/Makefile src/limetr/special_mat.f90 src/limetr/utils.py 35 | @echo "### Ensure version number in $(CONDA_PKG_DIR) matches with version in setup.py" 36 | @echo "Currently only tested for linux" 37 | @echo "Installing conda pre-requirements" 38 | @conda install --yes --strict-channel-priority -c conda-forge -c defaults conda-build conda-verify 39 | @echo "Installing additional conda dependencies" 40 | @conda install --yes --strict-channel-priority -c conda-forge -c defaults numpy==1.19.1 scipy==1.5.2 cyipopt 41 | @echo "Building conda package for limetr (from $(CONDA_PKG_DIR) folder)" 42 | conda build -k --no-anaconda-upload --verify --numpy $(NUMPY_VER) --output-folder "$(OUTPUT_DIR)" --cache-dir /tmp/limetrcache ./$(CONDA_PKG_DIR)/ 43 | @echo "conda build status:'$?'" 44 | @echo "Generated conda package file is: $(OUTPUT_DIR)/linux-64/limetr*.tar.bz2" 45 | @ls -l $(OUTPUT_DIR)/linux-64/limetr*.tar.bz2 46 | 47 | -------------------------------------------------------------------------------- /limetr/README.md: -------------------------------------------------------------------------------- 1 | # Linear Mixed Effects Model with Trimming 2 | 3 | [![License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://opensource.org/licenses/BSD-2-Clause) 4 | [![Travis CI/ Build Status](https://travis-ci.org/ramittal/limetr.svg?branch=master)](https://travis-ci.org/ramittal/limetr) 5 | [![Coverage Status](https://coveralls.io/repos/github/ramittal/limetr/badge.svg?branch=master)](https://coveralls.io/github/ramittal/limetr?branch=master) 6 | [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/ramittal/limetr/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/ramittal/limetr/?branch=master) 7 | [![PyPI](https://img.shields.io/pypi/v/ramittal.svg)](https://badge.fury.io/py/ramittal) 8 | 9 | 10 | -------------------------------------------------------------------------------- /limetr/check_requirements.py: -------------------------------------------------------------------------------- 1 | # check if the requried the packages are installed 2 | import os 3 | import pathlib 4 | import importlib 5 | from sys import platform 6 | 7 | 8 | # installation of the required packages 9 | required_modules = [('numpy', 10 | 'conda install -y numpy'), 11 | ('scipy', 12 | 'conda install -y scipy'), 13 | ('ipopt', 14 | 'conda install -y -c conda-forge cyipopt')] 15 | 16 | 17 | def check_module(module_name, install_command): 18 | try: 19 | importlib.import_module(module_name) 20 | except: 21 | os.system(install_command) 22 | 23 | 24 | def extract_lib(lib_name, des_lib_folder): 25 | conda_lib = os.path.join(os.getenv("CONDA_PREFIX"), "lib") 26 | pathlib.Path(des_lib_folder).mkdir(exist_ok=True) 27 | lib_files = [file_name for file_name in os.listdir(conda_lib) 28 | if lib_name in file_name] 29 | 30 | if not lib_files: 31 | raise FileNotFoundError(lib_name + "not found!") 32 | 33 | for file in lib_files: 34 | os.system(" ".join(["cp -L", 35 | os.path.join(conda_lib, file), 36 | des_lib_folder])) 37 | 38 | if platform == "linux" or platform == "linux2": 39 | required_lib_name = lib_name + ".so" 40 | related_lib_files = [file_name for file_name in lib_files 41 | if required_lib_name in file_name] 42 | assert any(related_lib_files) 43 | if not pathlib.Path(os.path.join(des_lib_folder, 44 | required_lib_name)).exists(): 45 | os.system(" ".join(["ln -s", 46 | related_lib_files[-1], 47 | os.path.join(des_lib_folder, 48 | required_lib_name)])) 49 | 50 | 51 | for module_name, install_command in required_modules: 52 | check_module(module_name, install_command) 53 | 54 | # create the library of blas and lapack 55 | des_lib_folder = "./lib" 56 | extract_lib("libblas", des_lib_folder) 57 | extract_lib("liblapack", des_lib_folder) 58 | -------------------------------------------------------------------------------- /limetr/conda_pkg/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # original author: ramittal@uw.edu, 2020-12-10 4 | # Usage: build.sh Invoked from "conda build" to build conda package from meta.yaml 5 | # ../build/lib.linux-x86_64-3.7/limetr/special_mat.cpython-37m-x86_64-linux-gnu.so 6 | # must have been build and available before invoking this script 7 | # PREFIX, RECIPE_DIR, SP_DIR, PY_VER are defined by 'conda build' 8 | # Not using any custom variables from Makefile, since this is invoked 9 | # from conda-build, not from make. conda-build can be invoked by make 10 | # https://docs.conda.io/projects/conda-build/en/latest/user-guide/environment-variables.html 11 | # 12 | # Purpose: Produces conda package including .so file for limetr 13 | # 14 | 15 | cwd="$(pwd)" 16 | bname="$(basename "${cwd}" )" 17 | 18 | # check and display environment variables used, never null from conda-build 19 | # shellcheck disable=SC2154 20 | if [ "" = "${RECIPE_DIR}" ] ; then 21 | echo "RECIPE_DIR is required but found empty" 22 | exit 1 23 | else 24 | echo "RECIPE_DIR is '${RECIPE_DIR}'" 25 | fi 26 | # shellcheck disable=SC2154 27 | if [ "" = "${PREFIX}" ] ; then 28 | echo "PREFIX is required but found empty" 29 | exit 1 30 | else 31 | echo "PREFIX is '${PREFIX}'" 32 | fi 33 | # shellcheck disable=SC2154 34 | if [ "" = "${SP_DIR}" ] ; then 35 | echo "SP_DIR is required but found empty" 36 | exit 1 37 | else 38 | echo "SP_DIR is '${SP_DIR}'" 39 | fi 40 | 41 | SOURCE_DIR="${RECIPE_DIR}/../src/limetr" 42 | filename="special_mat.cpython-37m-x86_64-linux-gnu.so" 43 | SHARED_OBJFILE="${RECIPE_DIR}/../build/lib.linux-x86_64-3.7/limetr/${filename}" 44 | 45 | # set target folder for SO file 46 | # shellcheck disable=SC2154 47 | TARGET_DIR="${SP_DIR}/${PKG_NAME}" 48 | 49 | echo "Starting build in working folder :${bname}: at $(date) in '${cwd}'" 50 | echo "Current working directory: '${cwd}'" 51 | echo "Working Conda Environment Variables: PREFIX: '${PREFIX}', RECIPE_DIR='${RECIPE_DIR}'" 52 | echo "Python's site-packages location: '${TARGET_DIR}'" 53 | # shellcheck disable=SC2154 54 | echo "Package Name: '${PKG_NAME}', Version:'${PKG_VERSION}'" 55 | # shellcheck disable=SC2154 56 | echo "Python version: '${PY_VER}'" 57 | echo "Contents of recipe directory: '${RECIPE_DIR}'" 58 | ls -l "${RECIPE_DIR}/" 59 | echo "Target path for package is:'${TARGET_DIR}/'" 60 | echo "Contents of SP_DIR (limetr) dir before copy" 61 | ls -l "${SP_DIR}/" 62 | 63 | # validate valid package and version names 64 | if [ "none" = "${PKG_NAME}" ] || [ "None" = "${PKG_VERSION}" ] ; then 65 | echo "***Error Invalid Package or version values " >&2 66 | echo 1 67 | fi 68 | 69 | echo "Starting copy of artifacts to '${TARGET_DIR}/" 70 | if [ -d "${TARGET_DIR}" ] ; then 71 | echo "Target dir '${TARGET_DIR} exists" 72 | else 73 | echo "Creating not existing target dir '${TARGET_DIR}" 74 | mkdir -p "${TARGET_DIR}" 75 | fi 76 | # copy shared library to target location 77 | if [ -f "${SHARED_OBJFILE}" ] ; then 78 | echo "Copying '${SHARED_OBJFILE}'" 79 | cp -f "${SHARED_OBJFILE}" "${TARGET_DIR}/" 80 | exit_status="$?" 81 | if [ "${exit_status}" -ne 0 ] ; then 82 | echo "***Error '${exit_status}'*** during copy of '${SHARED_OBJFILE}' file into site-packages dir " >&2 83 | exit 1 84 | fi 85 | else 86 | echo "***Error '${SHARED_OBJFILE}' does not exist at source " >&2 87 | exit 1 88 | fi 89 | echo "Copying source files from '${SOURCE_DIR}' to '${TARGET_DIR}'" 90 | ls -l "${SOURCE_DIR}" 91 | for afile in "${SOURCE_DIR}"/*.py 92 | do 93 | echo "Copying ${afile}" 94 | cp -f "${afile}" "${TARGET_DIR}/". 95 | exit_status="$?" 96 | if [ "0" = "${exit_status}" ] ; then 97 | echo "${afile} copied successfully" 98 | else 99 | echo "*** Copy of ${afile} failed with status: '${exit_status}' into '${TARGET_DIR}' dir " >&2 100 | exit 1 101 | fi 102 | done 103 | 104 | echo "Contents of SP_DIR (limetr) dir after copy" 105 | ls -l "${SP_DIR}" 106 | if [ -d "${TARGET_DIR}" ] ; then 107 | echo "Contents of '${TARGET_DIR}'" 108 | ls -l "${TARGET_DIR}" 109 | else 110 | echo "*** Error: expected '${TARGET_DIR}' does not exist" 111 | exit 1 112 | fi 113 | 114 | echo "Completing build of :${PKG_NAME}: at $(date)" 115 | exit "0" 116 | 117 | -------------------------------------------------------------------------------- /limetr/conda_pkg/conda_build_config.yaml: -------------------------------------------------------------------------------- 1 | # - are replaced by _ to allow conda build to parse. 2 | 3 | numpy: 4 | - 1.19.1 5 | scipy: 6 | - 1.5.2 7 | cyipopt: 8 | 9 | # cyipopt: 10 | # - 0.2.0 11 | # scikit_sparse: 12 | # - 0.4.4 13 | -------------------------------------------------------------------------------- /limetr/conda_pkg/meta.yaml: -------------------------------------------------------------------------------- 1 | # {% set data = load_setup_py_data() %} 2 | # {% set name = data.get('name') %} 3 | # {% set version = data.get('version') %} 4 | # above dynamic loading did not work, so using hard coded values 5 | {% set name = 'limetr' %} 6 | {% set version = '0.0.2' %} 7 | 8 | package: 9 | name: "{{ name|lower }}" 10 | version: "{{ version }}" 11 | 12 | source: 13 | path: . 14 | 15 | build: 16 | # build steps are in build.sh 17 | # script: python check_requirements.py setup.py install 18 | 19 | requirements: 20 | host: 21 | - pip 22 | - python 23 | - wheel 24 | 25 | run: 26 | - pip 27 | - python 28 | - numpy 29 | 30 | test: 31 | # test steps are in run_test.sh 32 | # script: python tests/check_utils.py tests/check_limetr.py 33 | 34 | about: 35 | description: linear mixed effects model with trimming 36 | dev_url: https://github.com/zhengp0/limetr/ 37 | doc_url: https://github.com/zhengp0/limetr/ 38 | home: https://github.com/zhengp0/limetr/ 39 | license: BSD 2-Clause License 40 | license_family: BSD 41 | summary: Linear Mixed Effects Model with Trimming 42 | To copy file from build/lib.linux-x86_64-3.7/limetr/special_mat.cpython-37m-x86_64-linux-gnu.so 43 | 44 | -------------------------------------------------------------------------------- /limetr/conda_pkg/run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # original author: ramittal@uw.edu, 2020-12-10 4 | # Usage: run_test.sh Invoked from "conda build" to test conda package from meta.yaml 5 | # Variables are based on temporary conda env. created and activated 6 | # by conda build. All paths used should be in context of conda 7 | # special_mat.cpython-37m-x86_64-linux-gnu.so is packaged during 8 | # build phase and made available 9 | # PREFIX, RECIPE_DIR, SP_DIR, PY_VER are defined by 'conda build' 10 | # Not using any custom variables from Makefile, since this is invoked 11 | # from conda-build, not from make. conda-build can be invoked by make 12 | # https://docs.conda.io/projects/conda-build/en/latest/user-guide/environment-variables.html 13 | # 14 | # Purpose: Validates presence of limetr files (.so) in python site-packages folder 15 | # ToDo: Add additional validations to actually invoke limetr code 16 | # 17 | 18 | cwd="$(pwd)" 19 | bname="$(basename "${cwd}" )" 20 | 21 | # check and display environment variables used, never null from conda-build 22 | # shellcheck disable=SC2154 23 | if [ "" = "${RECIPE_DIR}" ] ; then 24 | echo "RECIPE_DIR is required but found empty" 25 | exit 1 26 | else 27 | echo "RECIPE_DIR is '${RECIPE_DIR}'" 28 | fi 29 | # shellcheck disable=SC2154 30 | if [ "" = "${PREFIX}" ] ; then 31 | echo "PREFIX is required but found empty" 32 | exit 1 33 | else 34 | echo "PREFIX is '${PREFIX}'" 35 | fi 36 | # shellcheck disable=SC2154 37 | if [ "" = "${SP_DIR}" ] ; then 38 | echo "SP_DIR is required but found empty" 39 | exit 1 40 | else 41 | echo "SP_DIR is '${SP_DIR}'" 42 | fi 43 | 44 | filename="special_mat.cpython-37m-x86_64-linux-gnu.so" 45 | 46 | echo "Starting build in working dir :${bname}: at $(date) in '${cwd}'" 47 | echo "Current working directory: '${cwd}'" 48 | echo "Working Conda Environment Variables: PREFIX: '${PREFIX}', RECIPE_DIR='${RECIPE_DIR}'" 49 | # shellcheck disable=SC2154 50 | echo "Package Name: '${PKG_NAME}', Version:'${PKG_VERSION}'" 51 | # shellcheck disable=SC2154 52 | echo "Python version: '${PY_VER}'" 53 | echo "Contents of recipe directory: '${RECIPE_DIR}'" 54 | ls -l "${RECIPE_DIR}/" 55 | echo "Contents of SP_DIR (limetr) dir before test" 56 | ls -l "${SP_DIR}/" 57 | 58 | # set expected folder for SO file 59 | SHAREDOBJ_DIR="${SP_DIR}/${PKG_NAME}" 60 | echo "Python's site-packages location: '${SHAREDOBJ_DIR}'" 61 | echo "Target path for package is:'${SHAREDOBJ_DIR}/'" 62 | 63 | echo "## Listing conda environment" 64 | conda env list 65 | 66 | echo "## Listing installed conda packages" 67 | conda list | tee /tmp/conda_pkg_installed.txt 68 | 69 | echo "Validating existence of files at target location" 70 | SHARED_OBJFILE="${SHAREDOBJ_DIR}/${filename}" 71 | echo "Looking for '${SHARED_OBJFILE}'" 72 | if [ -f "${SHARED_OBJFILE}" ] ; then 73 | echo "File '${SHARED_OBJFILE}' exists at site-packages" 74 | ls -l "${SHARED_OBJFILE}" 75 | else 76 | echo "***Error '${SHARED_OBJFILE}' does not exist " >&2 77 | echo "########## NOT EXITING exit 1" 78 | fi 79 | 80 | # validate installation of package 81 | pkgver_file="/tmp/conda-${PKG_NAME}.txt" 82 | grep -i "^${PKG_NAME}" /tmp/conda_pkg_installed.txt > "${pkgver_file}" 83 | exit_status=$? 84 | if [ "0" = "${exit_status}" ] ; then 85 | # validate version 86 | version="$(cut -f 2- -d' ' "${pkgver_file}" | xargs | cut -f1 -d' ' | xargs )" 87 | if [ "${version}" = "${PKG_VERSION}" ] ; then 88 | echo "Correct version of Package Name: '${PKG_NAME}-${PKG_VERSION}' installed in env." 89 | else 90 | echo "***Error Incorrect version of '${PKG_NAME}' is installed in build/test conda env. " >&2 91 | echo "***Required '${PKG_VERSION}', found: '${version}' " >&2 92 | exit 1 93 | fi 94 | else 95 | echo "***Error '${PKG_NAME}' is not installed in current conda env. " >&2 96 | exit 1 97 | fi 98 | 99 | echo "Checking for existence of tests" 100 | if [ -d "${RECIPE_DIR}/../tests" ] ; then 101 | test_result=$(python "${RECIPE_DIR}/../tests/check_utils.py" "${RECIPE_DIR}/../tests/check_limetr.py") 102 | echo "Test result is '${test_result}'" 103 | else 104 | echo "Unable to find test sources, no test executed" 105 | fi 106 | 107 | echo "Completing test of :${PKG_NAME}: at $(date) with exit status: :${exit_status}:" 108 | exit "${exit_status}" 109 | 110 | -------------------------------------------------------------------------------- /limetr/experiments/test_trimming_with_certain_inlier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Test Trimming with Certain Inliers" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "from limetr import LimeTr" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## create test data" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "N = 101\n", 35 | "X = np.linspace(0.0, 2.0, N)\n", 36 | "X = np.insert(X[:, None], 0, 1.0, axis=1)\n", 37 | "Z = np.ones((N, 1))\n", 38 | "\n", 39 | "k_beta = 2\n", 40 | "k_gamma = 1\n", 41 | "\n", 42 | "n = np.array([50, 51])\n", 43 | "beta_true = np.array([1.0, 2.0])\n", 44 | "gamma_true = np.array([0.0])\n", 45 | "S = np.repeat(0.1, N)\n", 46 | "\n", 47 | "u = np.random.randn(n.size, k_gamma)*np.sqrt(gamma_true)\n", 48 | "E = np.random.randn(N)*S\n", 49 | "Y = X.dot(beta_true) + np.sum(Z*np.repeat(u, n, axis=0), axis=1) + E\n", 50 | "\n", 51 | "# add outlier\n", 52 | "num_outliers = 5\n", 53 | "outlier_id = np.random.choice(N, num_outliers, replace=False)\n", 54 | "Y[outlier_id] += 10.0" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## without pre-select inlier" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "lt = LimeTr(n, k_beta, k_gamma,\n", 71 | " Y,\n", 72 | " lambda beta: X.dot(beta),\n", 73 | " lambda beta: X,\n", 74 | " Z,\n", 75 | " S=S,\n", 76 | " inlier_percentage = 1.0 - num_outliers/N)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "beta_soln, gamma_soln, w_soln = lt.fitModel()" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "w_soln[outlier_id]" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "plt.scatter(X[:, 1], Y, marker='.')\n", 104 | "plt.scatter(X[w_soln == 0.0, 1], Y[w_soln == 0.0], marker='x', color='r')\n", 105 | "plt.plot(X[:, 1], X.dot(beta_true), 'k')\n", 106 | "plt.plot(X[:, 1], X.dot(beta_soln))" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "## pre-select inlier" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "lt = LimeTr(n, k_beta, k_gamma,\n", 123 | " Y,\n", 124 | " lambda beta: X.dot(beta),\n", 125 | " lambda beta: X,\n", 126 | " Z,\n", 127 | " S=S,\n", 128 | " certain_inlier_id = np.array([outlier_id[0]]),\n", 129 | " inlier_percentage = 1.0 - num_outliers/N)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "beta_soln, gamma_soln, w_soln = lt.fitModel()" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "w_soln[outlier_id]" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "plt.scatter(X[:, 1], Y, marker='.')\n", 157 | "plt.scatter(X[outlier_id[0], 1], Y[outlier_id[0]], marker='o', color='g', facecolors='none')\n", 158 | "plt.scatter(X[w_soln == 0.0, 1], Y[w_soln == 0.0], marker='x', color='r')\n", 159 | "plt.plot(X[:, 1], X.dot(beta_true), 'k')\n", 160 | "plt.plot(X[:, 1], X.dot(beta_soln))" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [] 169 | } 170 | ], 171 | "metadata": { 172 | "kernelspec": { 173 | "display_name": "Python 3", 174 | "language": "python", 175 | "name": "python3" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 3 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython3", 187 | "version": "3.7.3" 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 2 192 | } 193 | -------------------------------------------------------------------------------- /limetr/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from numpy.distutils.core import setup 3 | from numpy.distutils.core import Extension 4 | 5 | # fortran extension module 6 | ext = Extension(name='limetr.special_mat', 7 | sources=['src/limetr/special_mat.f90'], 8 | library_dirs=['./lib'], 9 | libraries=['lapack', 'blas']) 10 | 11 | setup(name='limetr', 12 | version='0.0.2', 13 | description='linear mixed effects model with trimming', 14 | url='https://github.com/zhengp0/limetr', 15 | author='Peng Zheng', 16 | author_email='zhengp@uw.edu', 17 | license='MIT', 18 | packages=['limetr'], 19 | package_dir={'limetr': 'src/limetr'}, 20 | ext_modules=[ext], 21 | install_requires=['numpy', 'scipy', 'ipopt'], 22 | zip_safe=False) 23 | -------------------------------------------------------------------------------- /limetr/src/limetr/Makefile: -------------------------------------------------------------------------------- 1 | # make file for the fortran module 2 | 3 | .PHONY: clean 4 | 5 | special_mat: special_mat.f90 6 | f2py -c -m special_mat special_mat.f90 -L/usr/local/lib -llapack -lblas 7 | 8 | clean: 9 | find . -name '__pycache__' | xargs rm -rf 10 | find . -name '*.so' | xargs rm -rf 11 | -------------------------------------------------------------------------------- /limetr/tests/check_limetr.py: -------------------------------------------------------------------------------- 1 | # test suit for limetr 2 | import os 3 | import sys 4 | # add current directory 5 | sys.path.append('./') 6 | 7 | 8 | def run_test(name): 9 | namespace = {} 10 | exec('import ' + name, namespace) 11 | exec('ok = ' + name + '.' + name + '()', namespace) 12 | ok = namespace['ok'] 13 | if ok: 14 | print(name + ': OK') 15 | else: 16 | print(name + ': Error') 17 | return ok 18 | 19 | 20 | fun_list = [ 21 | 'limetr_objective', 22 | 'limetr_gradient', 23 | 'limetr_objectiveTrimming', 24 | 'limetr_gradientTrimming', 25 | 'limetr_lasso' 26 | ] 27 | 28 | error_count = 0 29 | 30 | for name in fun_list: 31 | ok = run_test(name) 32 | if not ok: 33 | error_count += 1 34 | 35 | if error_count > 0: 36 | print('check_limetr: error_count =', error_count) 37 | sys.exit(1) 38 | else: 39 | print('check_limetr: OK') 40 | sys.exit(0) 41 | -------------------------------------------------------------------------------- /limetr/tests/check_utils.py: -------------------------------------------------------------------------------- 1 | # test suite for limetr 2 | import os 3 | import sys 4 | # add current directory 5 | sys.path.append('./') 6 | 7 | 8 | def run_test(name): 9 | namespace = {} 10 | exec('import ' + name, namespace) 11 | exec('ok = ' + name + '.' + name + '()', namespace) 12 | ok = namespace['ok'] 13 | if ok: 14 | print(name + ': OK') 15 | else: 16 | print(name + ': Error') 17 | return ok 18 | 19 | 20 | fun_list = [ 21 | 'izmat_lsvd', 22 | 'izmat_zdecomp', 23 | 'izmat_block_izmv', 24 | 'izmat_izmv', 25 | 'izmat_block_izmm', 26 | 'izmat_izmm', 27 | 'izmat_izeig', 28 | 'izmat_block_izdiag', 29 | 'izmat_izdiag', 30 | 'varmat_dot', 31 | 'varmat_invDot', 32 | 'varmat_diag', 33 | 'varmat_invDiag', 34 | 'varmat_logDet', 35 | 'projCappedSimplex' 36 | ] 37 | 38 | error_count = 0 39 | 40 | for name in fun_list: 41 | ok = run_test(name) 42 | if not ok: 43 | error_count += 1 44 | 45 | if error_count > 0: 46 | print('check_utils: error_count =', error_count) 47 | sys.exit(1) 48 | else: 49 | print('check_utils: OK') 50 | sys.exit(0) 51 | -------------------------------------------------------------------------------- /limetr/tests/izmat_block_izdiag.py: -------------------------------------------------------------------------------- 1 | # check utils block_izdiag 2 | 3 | 4 | def izmat_block_izdiag(): 5 | import numpy as np 6 | from limetr.special_mat import izmat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # problem 1, tall matrix 11 | # ------------------------------------------------------------------------- 12 | n, k = 6, 3 13 | l = min(n, k) 14 | 15 | z = np.random.randn(n, k) 16 | 17 | my_u = np.zeros(n*l) 18 | my_s = np.zeros(l) 19 | izmat.lsvd(z, my_u, my_s) 20 | 21 | tr_y = np.diag(np.eye(n) + z.dot(z.T)) 22 | my_y = np.zeros(n) 23 | izmat.block_izdiag(my_u, my_s**2, my_y) 24 | 25 | err = np.linalg.norm(tr_y - my_y) 26 | ok = ok and err < tol 27 | 28 | if not ok: 29 | print('err in block_izdiag tall matrix') 30 | print('err:', err) 31 | 32 | # problem 2, fat matrix 33 | # ------------------------------------------------------------------------- 34 | n, k = 3, 6 35 | l = min(n, k) 36 | 37 | z = np.random.randn(n, k) 38 | 39 | my_u = np.zeros(n*l) 40 | my_s = np.zeros(l) 41 | izmat.lsvd(z, my_u, my_s) 42 | 43 | tr_y = np.diag(np.eye(n) + z.dot(z.T)) 44 | my_y = np.zeros(n) 45 | izmat.block_izdiag(my_u, my_s**2, my_y) 46 | 47 | err = np.linalg.norm(tr_y - my_y) 48 | ok = ok and err < tol 49 | 50 | if not ok: 51 | print('err in block_izdiag fat matrix') 52 | print('err:', err) 53 | 54 | return ok 55 | -------------------------------------------------------------------------------- /limetr/tests/izmat_block_izmm.py: -------------------------------------------------------------------------------- 1 | # check utils block_izmm 2 | 3 | 4 | def izmat_block_izmm(): 5 | import numpy as np 6 | from limetr.special_mat import izmat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # problem 1, tall matrix 11 | # ------------------------------------------------------------------------- 12 | n, k = 6, 3 13 | l = min(n, k) 14 | 15 | z = np.random.randn(n, k) 16 | x = np.random.randn(n, 5) 17 | 18 | my_u = np.zeros(n*l) 19 | my_s = np.zeros(l) 20 | izmat.lsvd(z, my_u, my_s) 21 | 22 | tr_y = x + z.dot(z.T.dot(x)) 23 | my_y = np.zeros((n, 5), order='F') 24 | izmat.block_izmm(my_u, my_s**2, x, my_y) 25 | 26 | err = np.linalg.norm(tr_y - my_y) 27 | ok = ok and err < tol 28 | 29 | if not ok: 30 | print('err in block_izmm tall matrix') 31 | print('err:', err) 32 | 33 | # problem 2, fat matrix 34 | # ------------------------------------------------------------------------- 35 | n, k = 3, 6 36 | l = min(n, k) 37 | 38 | z = np.random.randn(n, k) 39 | x = np.random.randn(n, 5) 40 | 41 | my_u = np.zeros(n*l) 42 | my_s = np.zeros(l) 43 | izmat.lsvd(z, my_u, my_s) 44 | 45 | tr_y = x + z.dot(z.T.dot(x)) 46 | my_y = np.zeros((n, 5), order='F') 47 | izmat.block_izmm(my_u, my_s**2, x, my_y) 48 | 49 | err = np.linalg.norm(tr_y - my_y) 50 | ok = ok and err < tol 51 | 52 | if not ok: 53 | print('err in block_izmm fat matrix') 54 | print('err:', err) 55 | 56 | return ok 57 | -------------------------------------------------------------------------------- /limetr/tests/izmat_block_izmv.py: -------------------------------------------------------------------------------- 1 | # check utils block_izmv 2 | 3 | 4 | def izmat_block_izmv(): 5 | import numpy as np 6 | from limetr.special_mat import izmat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # problem 1, tall matrix 11 | # ------------------------------------------------------------------------- 12 | n, k = 6, 3 13 | l = min(n, k) 14 | 15 | z = np.random.randn(n, k) 16 | x = np.random.randn(n) 17 | 18 | my_u = np.zeros(n*l) 19 | my_s = np.zeros(l) 20 | izmat.lsvd(z, my_u, my_s) 21 | 22 | tr_y = x + z.dot(z.T.dot(x)) 23 | my_y = np.zeros(n) 24 | izmat.block_izmv(my_u, my_s**2, x, my_y) 25 | 26 | err = np.linalg.norm(tr_y - my_y) 27 | ok = ok and err < tol 28 | 29 | if not ok: 30 | print('err in block_izmv tall matrix') 31 | print('err:', err) 32 | 33 | # problem 2, fat matrix 34 | # ------------------------------------------------------------------------- 35 | n, k = 3, 6 36 | l = min(n, k) 37 | 38 | z = np.random.randn(n, k) 39 | x = np.random.randn(n) 40 | 41 | my_u = np.zeros(n*l) 42 | my_s = np.zeros(l) 43 | izmat.lsvd(z, my_u, my_s) 44 | 45 | tr_y = x + z.dot(z.T.dot(x)) 46 | my_y = np.zeros(n) 47 | izmat.block_izmv(my_u, my_s**2, x, my_y) 48 | 49 | err = np.linalg.norm(tr_y - my_y) 50 | ok = ok and err < tol 51 | 52 | if not ok: 53 | print('err in block_izmv fat matrix') 54 | print('err:', err) 55 | 56 | return ok 57 | -------------------------------------------------------------------------------- /limetr/tests/izmat_izdiag.py: -------------------------------------------------------------------------------- 1 | # check utils izdiag 2 | 3 | 4 | def izmat_izdiag(): 5 | import numpy as np 6 | from limetr.special_mat import izmat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # setup problem 11 | # ------------------------------------------------------------------------- 12 | k = 3 13 | n = np.array([5, 2, 4]) 14 | m = n.size 15 | 16 | z_list = [np.random.randn(n[i], k) for i in range(m)] 17 | 18 | z = np.vstack(z_list) 19 | 20 | ns = np.minimum(n, k) 21 | nu = ns*n 22 | nx = n 23 | nz = n 24 | 25 | u = np.zeros(nu.sum()) 26 | s = np.zeros(ns.sum()) 27 | 28 | izmat.zdecomp(nz, nu, ns, z, u, s) 29 | my_y = izmat.izdiag(n.sum(), nu, ns, nx, u, s**2) 30 | 31 | y_list = [np.diag(np.eye(n[i]) + z_list[i].dot(z_list[i].T)) 32 | for i in range(m)] 33 | 34 | tr_y = np.hstack(y_list) 35 | 36 | err = np.linalg.norm(tr_y - my_y) 37 | 38 | if not ok: 39 | print('err in izdiag') 40 | print('err:', err) 41 | 42 | return ok 43 | -------------------------------------------------------------------------------- /limetr/tests/izmat_izeig.py: -------------------------------------------------------------------------------- 1 | # check utils izeig 2 | 3 | 4 | def izmat_izeig(): 5 | import numpy as np 6 | from scipy.linalg import block_diag 7 | from limetr.special_mat import izmat 8 | 9 | ok = True 10 | tol = 1e-10 11 | # setup problem 12 | # ------------------------------------------------------------------------- 13 | k = 3 14 | n = np.array([5, 2, 4]) 15 | m = n.size 16 | 17 | z_list = [np.random.randn(n[i], k) for i in range(m)] 18 | 19 | z = np.vstack(z_list) 20 | 21 | ns = np.minimum(n, k) 22 | nu = ns*n 23 | nx = n 24 | nz = n 25 | 26 | u = np.zeros(nu.sum()) 27 | s = np.zeros(ns.sum()) 28 | 29 | izmat.zdecomp(nz, nu, ns, z, u, s) 30 | 31 | my_eig = izmat.izeig(sum(n), n, ns, s**2) 32 | tr_eig, vec = np.linalg.eig(block_diag(*[ 33 | np.eye(n[i]) + z_list[i].dot(z_list[i].T) 34 | for i in range(len(n)) 35 | ])) 36 | 37 | err = np.linalg.norm(tr_eig - my_eig) 38 | 39 | if not ok: 40 | print('err in izeig') 41 | print('err:', err) 42 | 43 | return ok 44 | -------------------------------------------------------------------------------- /limetr/tests/izmat_izmm.py: -------------------------------------------------------------------------------- 1 | # check utils izmm 2 | 3 | 4 | def izmat_izmm(): 5 | import numpy as np 6 | from limetr.special_mat import izmat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # setup problem 11 | # ------------------------------------------------------------------------- 12 | k = 3 13 | n = np.array([5, 2, 4]) 14 | m = n.size 15 | 16 | z_list = [np.random.randn(n[i], k) for i in range(m)] 17 | x_list = [np.random.randn(n[i], 5) for i in range(m)] 18 | 19 | z = np.vstack(z_list) 20 | x = np.vstack(x_list) 21 | 22 | ns = np.minimum(n, k) 23 | nu = ns*n 24 | nx = n 25 | nz = n 26 | 27 | u = np.zeros(nu.sum()) 28 | s = np.zeros(ns.sum()) 29 | 30 | izmat.zdecomp(nz, nu, ns, z, u, s) 31 | my_y = izmat.izmm(nu, ns, nx, u, s**2, x) 32 | 33 | y_list = [x_list[i] + z_list[i].dot(z_list[i].T.dot(x_list[i])) 34 | for i in range(m)] 35 | 36 | tr_y = np.vstack(y_list) 37 | 38 | err = np.linalg.norm(tr_y - my_y) 39 | 40 | if not ok: 41 | print('err in izmm') 42 | print('err:', err) 43 | 44 | return ok 45 | -------------------------------------------------------------------------------- /limetr/tests/izmat_izmv.py: -------------------------------------------------------------------------------- 1 | # check utils izmv 2 | 3 | 4 | def izmat_izmv(): 5 | import numpy as np 6 | from limetr.special_mat import izmat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # setup problem 11 | # ------------------------------------------------------------------------- 12 | k = 3 13 | n = np.array([5, 2, 4]) 14 | m = n.size 15 | 16 | z_list = [np.random.randn(n[i], k) for i in range(m)] 17 | x_list = [np.random.randn(n[i]) for i in range(m)] 18 | 19 | z = np.vstack(z_list) 20 | x = np.hstack(x_list) 21 | 22 | ns = np.minimum(n, k) 23 | nu = ns*n 24 | nx = n 25 | nz = n 26 | 27 | u = np.zeros(nu.sum()) 28 | s = np.zeros(ns.sum()) 29 | 30 | izmat.zdecomp(nz, nu, ns, z, u, s) 31 | my_y = izmat.izmv(nu, ns, nx, u, s**2, x) 32 | 33 | y_list = [x_list[i] + z_list[i].dot(z_list[i].T.dot(x_list[i])) 34 | for i in range(m)] 35 | 36 | tr_y = np.hstack(y_list) 37 | 38 | err = np.linalg.norm(tr_y - my_y) 39 | 40 | if not ok: 41 | print('err in izmv') 42 | print('err:', err) 43 | 44 | return ok 45 | -------------------------------------------------------------------------------- /limetr/tests/izmat_lsvd.py: -------------------------------------------------------------------------------- 1 | # check utils lsvd 2 | 3 | 4 | def izmat_lsvd(): 5 | import numpy as np 6 | from limetr.special_mat import izmat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # problem 1, tall matrix 11 | # ------------------------------------------------------------------------- 12 | n, k = 6, 3 13 | z = np.random.randn(n, k) 14 | tr_u, tr_s, tr_vt = np.linalg.svd(z, full_matrices=False) 15 | my_u = np.zeros(tr_u.size) 16 | my_s = np.zeros(tr_s.size) 17 | izmat.lsvd(z, my_u, my_s) 18 | 19 | err = np.linalg.norm(my_u.reshape(k, n).T - tr_u) 20 | ok = ok and err < tol 21 | 22 | if not ok: 23 | print('err in lsvd tall matrix') 24 | print('err:', err) 25 | 26 | # problem 2, fat matrix 27 | # ------------------------------------------------------------------------- 28 | n, k = 3, 6 29 | z = np.random.randn(n, k) 30 | tr_u, tr_s, tr_vt = np.linalg.svd(z, full_matrices=False) 31 | my_u = np.zeros(tr_u.size) 32 | my_s = np.zeros(tr_s.size) 33 | izmat.lsvd(z, my_u, my_s) 34 | 35 | err = np.linalg.norm(np.abs(my_u.reshape(n, n).T) - np.abs(tr_u)) 36 | ok = ok and err < tol 37 | 38 | if not ok: 39 | print('err in lsvd fat matrix') 40 | print('err:', err) 41 | 42 | return ok 43 | -------------------------------------------------------------------------------- /limetr/tests/izmat_zdecomp.py: -------------------------------------------------------------------------------- 1 | # check utils zdecomp 2 | 3 | 4 | def izmat_zdecomp(): 5 | import numpy as np 6 | from limetr.special_mat import izmat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # setup problem 11 | # ------------------------------------------------------------------------- 12 | k = 3 13 | n = [5, 2, 4] 14 | 15 | z_list = [] 16 | tr_u_list = [] 17 | tr_s_list = [] 18 | for i in range(len(n)): 19 | z_list.append(np.random.randn(n[i], k)) 20 | u, s, vt = np.linalg.svd(z_list[-1], full_matrices=False) 21 | tr_u_list.append(u) 22 | tr_s_list.append(s) 23 | 24 | z = np.vstack(z_list) 25 | tr_u = np.hstack([u.reshape(u.size, order='F') for u in tr_u_list]) 26 | tr_s = np.hstack(tr_s_list) 27 | 28 | my_u = np.zeros(tr_u.size) 29 | my_s = np.zeros(tr_s.size) 30 | 31 | nz = [z_sub.shape[0] for z_sub in z_list] 32 | nu = [u_sub.size for u_sub in tr_u_list] 33 | ns = [s_sub.size for s_sub in tr_s_list] 34 | 35 | izmat.zdecomp(nz, nu, ns, z, my_u, my_s) 36 | 37 | 38 | if not ok: 39 | print('err in zdecomp') 40 | print('err:', err) 41 | 42 | return ok 43 | -------------------------------------------------------------------------------- /limetr/tests/limetr_gradient.py: -------------------------------------------------------------------------------- 1 | # test function gradient 2 | 3 | 4 | def limetr_gradient(): 5 | import numpy as np 6 | from limetr.__init__ import LimeTr 7 | 8 | ok = True 9 | # setup test problem 10 | # ------------------------------------------------------------------------- 11 | model = LimeTr.testProblem(use_trimming=True, 12 | use_constraints=True, 13 | use_regularizer=True, 14 | use_uprior=True, 15 | use_gprior=True, 16 | know_obs_std=False, 17 | share_obs_std=True) 18 | 19 | tol = 1e-6 20 | 21 | # test the gradient 22 | # ------------------------------------------------------------------------- 23 | x = np.random.randn(model.k) 24 | x[model.idx_gamma] = 0.1 25 | x[model.idx_delta] = 0.1 26 | 27 | tr_grad = model.gradient(x, use_ad=True) 28 | my_grad = model.gradient(x) 29 | 30 | err = np.linalg.norm(tr_grad - my_grad) 31 | ok = ok and err < tol 32 | 33 | if not ok: 34 | print('err', err) 35 | print('tr_grad', tr_grad) 36 | print('my_grad', my_grad) 37 | 38 | return ok 39 | -------------------------------------------------------------------------------- /limetr/tests/limetr_gradientTrimming.py: -------------------------------------------------------------------------------- 1 | # test function gradientTrimming 2 | 3 | 4 | def limetr_gradientTrimming(): 5 | import numpy as np 6 | from limetr.__init__ import LimeTr 7 | 8 | ok = True 9 | # setup test problem 10 | # ------------------------------------------------------------------------- 11 | model = LimeTr.testProblem(use_trimming=True) 12 | 13 | # decouple all the studies 14 | model.n = np.array([1]*model.N) 15 | 16 | tol = 1e-8 17 | 18 | # test gradientTrimming 19 | # ------------------------------------------------------------------------- 20 | x = np.hstack((model.beta, model.gamma)) 21 | w = model.w 22 | 23 | tr_grad = model.gradientTrimming(w, use_ad=True) 24 | my_grad = model.gradientTrimming(w) 25 | 26 | err = np.linalg.norm(tr_grad - my_grad) 27 | ok = ok and err < tol 28 | 29 | if not ok: 30 | print('err', err) 31 | print('tr_grad', tr_grad) 32 | print('my_grad', my_grad) 33 | 34 | return ok 35 | -------------------------------------------------------------------------------- /limetr/tests/limetr_lasso.py: -------------------------------------------------------------------------------- 1 | # test function lprior 2 | 3 | 4 | def limetr_lasso(): 5 | import numpy as np 6 | from limetr.__init__ import LimeTr 7 | 8 | ok = True 9 | # setup test problem 10 | # ------------------------------------------------------------------------- 11 | model = LimeTr.testProblemLasso() 12 | 13 | tol = 1e-6 14 | 15 | # test lasso 16 | # ------------------------------------------------------------------------- 17 | model.optimize() 18 | beta = model.beta 19 | zero_idx = np.abs(beta) <= 1e-8 20 | beta[zero_idx] = 0.0 21 | 22 | # calculate the gradient 23 | g_beta = -model.JF(beta).T.dot(model.Y - model.F(beta)) 24 | for i in range(model.k_beta): 25 | if beta[i] == 0.0 and np.abs(g_beta[i]) < model.lw[i]: 26 | g_beta[i] = 0.0 27 | else: 28 | g_beta[i] += np.sign(beta[i])*model.lw[i] 29 | 30 | err = np.linalg.norm(g_beta) 31 | ok = ok and err < tol 32 | 33 | if not ok: 34 | print('err', err) 35 | 36 | return ok 37 | -------------------------------------------------------------------------------- /limetr/tests/limetr_objective.py: -------------------------------------------------------------------------------- 1 | # test function objective 2 | 3 | 4 | def limetr_objective(): 5 | import numpy as np 6 | from limetr.__init__ import LimeTr 7 | 8 | ok = True 9 | # setup test problem 10 | # ------------------------------------------------------------------------- 11 | model = LimeTr.testProblem(use_constraints=True, 12 | use_regularizer=True, 13 | use_uprior=True, 14 | use_gprior=True, 15 | know_obs_std=False) 16 | 17 | tol = 1e-8 18 | 19 | # test objective 20 | # ------------------------------------------------------------------------- 21 | x = np.random.randn(model.k) 22 | x[model.idx_gamma] = 0.1 23 | x[model.idx_delta] = 0.1 24 | 25 | tr_obj = model.objective(x, use_ad=True) 26 | my_obj = model.objective(x) 27 | 28 | err = np.abs(tr_obj - my_obj) 29 | ok = ok and err < tol 30 | 31 | if not ok: 32 | print('err', err) 33 | print('tr_obj', tr_obj) 34 | print('my_obj', my_obj) 35 | 36 | return ok 37 | -------------------------------------------------------------------------------- /limetr/tests/limetr_objectiveTrimming.py: -------------------------------------------------------------------------------- 1 | # test function objectiveTrimming 2 | 3 | 4 | def limetr_objectiveTrimming(): 5 | import numpy as np 6 | from limetr.__init__ import LimeTr 7 | 8 | ok = True 9 | # setup test problem 10 | # ------------------------------------------------------------------------- 11 | model = LimeTr.testProblem(use_trimming=True) 12 | 13 | tol = 1e-8 14 | 15 | # test objectiveTrimming 16 | # ------------------------------------------------------------------------- 17 | x = np.hstack((model.beta, model.gamma)) 18 | w = model.w 19 | 20 | r = model.Y - model.F(model.beta) 21 | t = (model.Z**2).dot(model.gamma) 22 | d = model.V + t 23 | 24 | tr_obj = 0.5*np.sum(r**2*w/d) + 0.5*model.N*np.log(2.0*np.pi)\ 25 | + 0.5*w.dot(np.log(d)) 26 | my_obj = model.objectiveTrimming(w) 27 | 28 | err = np.abs(tr_obj - my_obj) 29 | ok = ok and err < tol 30 | 31 | if not ok: 32 | print('err', err) 33 | print('tr_obj', tr_obj) 34 | print('my_obj', my_obj) 35 | 36 | return ok 37 | -------------------------------------------------------------------------------- /limetr/tests/projCappedSimplex.py: -------------------------------------------------------------------------------- 1 | # test function dot 2 | 3 | 4 | def projCappedSimplex(): 5 | import numpy as np 6 | from limetr.utils import projCappedSimplex 7 | 8 | ok = True 9 | # setup test problem 10 | # ------------------------------------------------------------------------- 11 | w = np.ones(10) 12 | sum_w = 9.0 13 | 14 | tr_w = np.repeat(0.9, 10) 15 | my_w = projCappedSimplex(w, sum_w) 16 | 17 | tol = 1e-10 18 | err = np.linalg.norm(tr_w - my_w) 19 | 20 | ok = ok and err < tol 21 | 22 | if not ok: 23 | print('tr_w', tr_w) 24 | print('my_w', my_w) 25 | 26 | return ok 27 | -------------------------------------------------------------------------------- /limetr/tests/varmat_diag.py: -------------------------------------------------------------------------------- 1 | # check utils diag 2 | 3 | 4 | def varmat_diag(): 5 | import numpy as np 6 | from limetr.utils import VarMat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # setup problem 11 | # ------------------------------------------------------------------------- 12 | mat = VarMat.testProblem() 13 | D = mat.varMat() 14 | 15 | tr_y = np.diag(D) 16 | 17 | my_y = mat.diag() 18 | 19 | err = np.linalg.norm(tr_y - my_y) 20 | ok = ok and err < tol 21 | 22 | if not ok: 23 | print('err in diag') 24 | print('err:', err) 25 | 26 | return ok 27 | -------------------------------------------------------------------------------- /limetr/tests/varmat_dot.py: -------------------------------------------------------------------------------- 1 | # check utils dot 2 | 3 | 4 | def varmat_dot(): 5 | import numpy as np 6 | from limetr.utils import VarMat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # setup problem 11 | # ------------------------------------------------------------------------- 12 | mat = VarMat.testProblem() 13 | D = mat.varMat() 14 | x = np.random.randn(mat.N) 15 | X = np.random.randn(mat.N, 5) 16 | 17 | tr_y = D.dot(x) 18 | tr_Y = D.dot(X) 19 | 20 | my_y = mat.dot(x) 21 | my_Y = mat.dot(X) 22 | 23 | err = np.linalg.norm(tr_y - my_y) + np.linalg.norm(tr_Y - my_Y) 24 | ok = ok and err < tol 25 | 26 | if not ok: 27 | print('err in dot') 28 | print('err:', err) 29 | 30 | return ok 31 | -------------------------------------------------------------------------------- /limetr/tests/varmat_invDiag.py: -------------------------------------------------------------------------------- 1 | # check utils invDiag 2 | 3 | 4 | def varmat_invDiag(): 5 | import numpy as np 6 | from limetr.utils import VarMat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # setup problem 11 | # ------------------------------------------------------------------------- 12 | mat = VarMat.testProblem() 13 | inv_D = mat.invVarMat() 14 | 15 | tr_y = np.diag(inv_D) 16 | 17 | my_y = mat.invDiag() 18 | 19 | err = np.linalg.norm(tr_y - my_y) 20 | ok = ok and err < tol 21 | 22 | if not ok: 23 | print('err in invDiag') 24 | print('err:', err) 25 | 26 | return ok 27 | -------------------------------------------------------------------------------- /limetr/tests/varmat_invDot.py: -------------------------------------------------------------------------------- 1 | # check utils dot 2 | 3 | 4 | def varmat_invDot(): 5 | import numpy as np 6 | from limetr.utils import VarMat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # setup problem 11 | # ------------------------------------------------------------------------- 12 | mat = VarMat.testProblem() 13 | inv_D = mat.invVarMat() 14 | x = np.random.randn(mat.N) 15 | X = np.random.randn(mat.N, 5) 16 | 17 | tr_y = inv_D.dot(x) 18 | tr_Y = inv_D.dot(X) 19 | 20 | my_y = mat.invDot(x) 21 | my_Y = mat.invDot(X) 22 | 23 | err = np.linalg.norm(tr_y - my_y) + np.linalg.norm(tr_Y - my_Y) 24 | ok = ok and err < tol 25 | 26 | if not ok: 27 | print('err in invDot') 28 | print('err:', err) 29 | 30 | return ok 31 | -------------------------------------------------------------------------------- /limetr/tests/varmat_logDet.py: -------------------------------------------------------------------------------- 1 | # check utils logDet 2 | 3 | 4 | def varmat_logDet(): 5 | import numpy as np 6 | from limetr.utils import VarMat 7 | 8 | ok = True 9 | tol = 1e-10 10 | # setup problem 11 | # ------------------------------------------------------------------------- 12 | mat = VarMat.testProblem() 13 | D = mat.varMat() 14 | 15 | tr_y = np.log(np.linalg.det(D)) 16 | my_y = mat.logDet() 17 | 18 | err = np.linalg.norm(tr_y - my_y) 19 | ok = ok and err < tol 20 | 21 | if not ok: 22 | print('err in logDet') 23 | print('err:', err) 24 | 25 | return ok 26 | -------------------------------------------------------------------------------- /mrtool/.github/workflows/python-build.yml: -------------------------------------------------------------------------------- 1 | name: python-build 2 | on: [push] 3 | jobs: 4 | build: 5 | 6 | runs-on: ubuntu-latest 7 | 8 | steps: 9 | - uses: actions/checkout@v2 10 | - name: Set up Python 3.8 11 | uses: actions/setup-python@v2 12 | with: 13 | python-version: 3.8 14 | - name: Install dependencies 15 | run: python -m pip install .[dev] --upgrade pip 16 | - name: Test with pytest 17 | run: pytest 18 | - name: Build package distribution 19 | if: startsWith(github.ref, 'refs/tags') 20 | run: | 21 | python -m pip install build 22 | python -m build --sdist --wheel --outdir dist/ . 23 | - name: Publish package distribution to PyPI 24 | if: startsWith(github.ref, 'refs/tags') 25 | uses: pypa/gh-action-pypi-publish@master 26 | with: 27 | skip_existing: true 28 | user: __token__ 29 | password: ${{ secrets.PYPI_API_TOKEN }} 30 | 31 | -------------------------------------------------------------------------------- /mrtool/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vscode 3 | .DS_Store 4 | -------------------------------------------------------------------------------- /mrtool/.readthedocs.yml: -------------------------------------------------------------------------------- 1 | build: 2 | image: latest 3 | python: 4 | version: 3.7 5 | setup_py_install: true 6 | install: 7 | - requirements: docs/requirements.txt 8 | sphinx: 9 | configuration: docs/sources/conf.py -------------------------------------------------------------------------------- /mrtool/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2020, IHME Math Sciences 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /mrtool/Makefile: -------------------------------------------------------------------------------- 1 | # makefile for easy manage package 2 | .PHONY: clean, tests 3 | 4 | build: setup.py 5 | python setup.py build 6 | 7 | install: setup.py 8 | python setup.py install 9 | 10 | sdist: setup.py 11 | python setup.py sdist 12 | 13 | tests: 14 | pytest tests 15 | 16 | clean: 17 | find . -name "*.so*" | xargs rm -rf 18 | find . -name "*.pyc" | xargs rm -rf 19 | find . -name "__pycache__" | xargs rm -rf 20 | find . -name "build" | xargs rm -rf 21 | find . -name "dist" | xargs rm -rf 22 | find . -name "MANIFEST" | xargs rm -rf 23 | find . -name "*.egg-info" | xargs rm -rf 24 | find . -name ".pytest_cache" | xargs rm -rf 25 | 26 | uninstall: 27 | find $(CONDA_PREFIX)/lib/ -name "*mrtools*" | xargs rm -rf -------------------------------------------------------------------------------- /mrtool/README.rst: -------------------------------------------------------------------------------- 1 | ====== 2 | MRTool 3 | ====== 4 | 5 | .. image:: https://img.shields.io/badge/License-BSD%202--Clause-orange.svg 6 | :target: https://opensource.org/licenses/BSD-2-Clause 7 | :alt: License 8 | 9 | .. image:: https://readthedocs.org/projects/mrtool/badge/?version=latest 10 | :target: https://mrtool.readthedocs.io/en/latest/ 11 | :alt: Documentation 12 | 13 | .. image:: https://github.com/ramittal/MRTool/workflows/build/badge.svg?branch=master 14 | :target: https://github.com/ramittal/MRTool/actions?query=workflow%3Abuild 15 | :alt: BuildStatus 16 | 17 | .. image:: https://badge.fury.io/py/MRTool.svg 18 | :target: https://badge.fury.io/py/mrtool 19 | :alt: PyPI 20 | 21 | .. image:: https://coveralls.io/repos/github/ramittal/MRTool/badge.svg?branch=master 22 | :target: https://coveralls.io/github/ramittal/MRTool?branch=master 23 | :alt: Coverage 24 | 25 | .. image:: https://www.codefactor.io/repository/github/ramittal/mrtool/badge/master 26 | :target: https://www.codefactor.io/repository/github/ramittal/mrtool/overview/master 27 | :alt: CodeFactor 28 | 29 | 30 | **MRTool** (Meta-Regression Tool) package is designed to solve general meta-regression problem. 31 | The most interesting features include, 32 | 33 | * linear and log prediction function, 34 | * spline extension for covariates, 35 | * direct Gaussian, Uniform and Laplace prior on fixed and random effects, 36 | * shape constraints (monotonicity and convexity) for spline. 37 | 38 | Advanced features include, 39 | 40 | * spline knots ensemble, 41 | * automatic covariate selection. 42 | 43 | 44 | Installation 45 | ------------ 46 | 47 | Required packages include, 48 | 49 | * basic scientific computing suite, Numpy, Scipy and Pandas, 50 | * main optimization engine, `IPOPT `_, 51 | * customized packages, `LimeTr `_ and 52 | `XSpline `_, 53 | * testing tool, Pytest. 54 | 55 | After install the required packages, clone the repository and install MRTool. 56 | 57 | .. code-block:: shell 58 | 59 | git clone https://github.com/ihmeuw-msca/MRTool.git 60 | cd MRTool && python setup.py install 61 | 62 | 63 | For more information please check the `documentation `_. 64 | 65 | -------------------------------------------------------------------------------- /mrtool/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SPHINXPROJ = mrtool 9 | SOURCEDIR = source 10 | BUILDDIR = build 11 | 12 | # Put it first so that "make" without argument is like "make help". 13 | help: 14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 15 | 16 | .PHONY: help Makefile 17 | 18 | # Catch-all target: route all unknown targets to Sphinx using the new 19 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 20 | %: Makefile 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | 23 | clean: 24 | rm -rf $(BUILDDIR) -------------------------------------------------------------------------------- /mrtool/docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /mrtool/docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-autodoc-typehints==1.4.0 2 | matplotlib 3 | -------------------------------------------------------------------------------- /mrtool/docs/source/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | /* font face */ 2 | body, p { 3 | font-family: BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";; 4 | font-size: 11pt; 5 | line-height: 1.5; 6 | } 7 | 8 | code { 9 | font-family: SFMono, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", Courier,monospace; 10 | } 11 | 12 | h1, h2, h3 { 13 | font-family: BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";; 14 | font-weight: normal; 15 | } -------------------------------------------------------------------------------- /mrtool/docs/source/api_reference/index.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | API Reference 3 | ============= 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | :glob: 8 | 9 | * 10 | -------------------------------------------------------------------------------- /mrtool/docs/source/api_reference/mrtool.core.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | mrtool.core package 3 | =================== 4 | 5 | .. automodule:: mrtool.core.data 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | 11 | .. automodule:: mrtool.core.cov_model 12 | :members: 13 | :undoc-members: 14 | :show-inheritance: 15 | 16 | 17 | .. automodule:: mrtool.core.model 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | 23 | .. automodule:: mrtool.core.utils 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | -------------------------------------------------------------------------------- /mrtool/docs/source/api_reference/mrtool.cov_selection.rst: -------------------------------------------------------------------------------- 1 | ============================ 2 | mrtool.cov_selection package 3 | ============================ 4 | 5 | .. automodule:: mrtool.cov_selection.covfinder 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /mrtool/docs/source/api_reference/mrtool.evidence_score.rst: -------------------------------------------------------------------------------- 1 | ============================= 2 | mrtool.evidence_score package 3 | ============================= 4 | 5 | .. automodule:: mrtool.evidence_score.scorelator 6 | :members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /mrtool/docs/source/concepts/data_gen/index.rst: -------------------------------------------------------------------------------- 1 | .. _data_gen: 2 | 3 | ========================= 4 | Data Generating Mechanism 5 | ========================= 6 | 7 | During the modeling process, the first question that needs to be 8 | answered is how is the data generated and the data generating mechanism 9 | is about using given information to create predictive model. 10 | 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | :glob: 15 | 16 | * -------------------------------------------------------------------------------- /mrtool/docs/source/concepts/data_gen/range_exposure.rst: -------------------------------------------------------------------------------- 1 | .. _range_exposure: 2 | 3 | ============== 4 | Range Exposure 5 | ============== 6 | 7 | Very often, data is being collected over cohorts or different 8 | groups of people, and therefore one data point can be interpreted as an average. 9 | 10 | For example, if we are interested in the relation between smoking and relative risk 11 | of getting lung cancer, one data point is measured by the relative risk between the smoking and the non-smoking group. 12 | Within the smoking group, subjects have different exposures to smoking. 13 | So what the data point measures is the average relative risk for the corresponding range of exposures. 14 | 15 | If we denote :math:`x` as the exposure and :math:`f(x)` as the function between the outcome and exposure, 16 | one measurement :math:`y` over a range of exposures :math:`x \in [a, b]` can be expressed as, 17 | 18 | .. math:: 19 | 20 | y = \frac{1}{b - a}\int_a^b f(x)\,\mathrm{d}x. 21 | 22 | A special case is when the function :math:`f` is linear, 23 | :math:`f(x) = \beta x`, and the expression can be simplified as, 24 | 25 | .. math:: 26 | 27 | y = \frac{1}{b - a}\int_a^b f(x)\,\mathrm{d}x = \frac{1}{2}(a + b) \beta. 28 | 29 | It is equivalent to use the midpoint of the exposures as the covariate. 30 | 31 | 32 | Sample Code 33 | ----------- 34 | 35 | In the code, you could communicate with the program that you have a range exposure by inputting a pair of covariates 36 | instead of one. 37 | 38 | .. code-block:: python 39 | 40 | cov_model = CovModel('exposure', alt_cov=['exposure_start', 'exposure_end']) 41 | -------------------------------------------------------------------------------- /mrtool/docs/source/concepts/data_gen/rr1_binary.rst: -------------------------------------------------------------------------------- 1 | .. _rr1_binary: 2 | 3 | ======================= 4 | Relative Risk 1: Binary 5 | ======================= 6 | 7 | Relative risk (RR) is the most common measurement type for the applications of ``MRTool``. 8 | Here we take a chance to introduce the basic concepts regarding relative risk, and 9 | how we build different types of relative risk models in ``MRTool``. 10 | 11 | Relative risk is the probability ratio of a certain outcome between exposed and unexposed group. 12 | For more information please check the `wiki page `_. 13 | Here we use smoking and lung cancer as a risk-outcome pair to explain the idea. 14 | 15 | Imagine the experiment is conducted with two groups, smoking (e) and non-smoking (u) group. 16 | We record the probability of getting lung cancer among the two groups, :math:`P_e`, :math:`P_u` 17 | and the relative risk can be expressed as, 18 | 19 | .. math:: 20 | 21 | RR = \frac{P_e}{P_u}. 22 | 23 | To implement meta-analysis on the effect of smoking, we often convert the collected relative risks from different 24 | studies (`longitudinal `_ or not) to log space, 25 | for the convenience of removing the sign restriction, 26 | 27 | .. math:: 28 | 29 | \ln(RR) = \ln(P_e) - \ln(P_u). 30 | 31 | To setup the binary model, we simply parametrize the log relative risk with an intercept, 32 | 33 | .. math:: 34 | 35 | \ln(RR) = \mathbf{1} (\beta + u), 36 | 37 | where :math:`\beta` is the fixed effect for intercept and :math:`u` is the random effect. 38 | When :math:`\beta` is `significantly `_ 39 | greater than zero, we say that it is harmful. 40 | For other risk outcome pair, there is possibility that :math:`\beta` is significantly less than zero, 41 | in which case we will call it protective. 42 | 43 | Very often instead of only considering smoking vs non-smoking (binary), we also want to study the effects 44 | under different exposure to smoking. The most common assumption is log linear, please check 45 | :ref:`rr2_log_linear` for the details. 46 | 47 | 48 | 49 | Sample Code 50 | ----------- 51 | 52 | To setup the problem, we will only need ``LinearCovModel``. 53 | 54 | .. code-block:: python 55 | 56 | from mrtool import MRData, LinearCovModel, MRBRT 57 | 58 | data = MRData() 59 | # `intercept` is automatically added to the data 60 | # no need to pass it in `col_covs` 61 | data.load_df( 62 | df=df, 63 | col_obs='ln_rr', 64 | col_obs_se='ln_rr_se', 65 | col_study_id='study_id' 66 | ) 67 | cov_model = LinearCovModel('intercept', use_re=True) 68 | model = MRBRT(data, cov_models=[cov_model]) 69 | -------------------------------------------------------------------------------- /mrtool/docs/source/concepts/data_gen/rr2_log_linear.rst: -------------------------------------------------------------------------------- 1 | .. _rr2_log_linear: 2 | 3 | =========================== 4 | Relative Risk 2: Log Linear 5 | =========================== 6 | 7 | When analyzing relative risk across different exposure levels, 8 | the most widely used assumption is that the model is log linear. 9 | We parametrize the log risk as a linear function of exposure, 10 | 11 | .. math:: 12 | 13 | \ln(RR) = \ln(R_e) - \ln(R_u) = x_a (\beta + u) - x_r (\beta + u) = (x_a - x_r)(\beta + u), 14 | 15 | where :math:`x` is the exposure, :math:`\beta`, :math:`u` are the fixed and random effects, 16 | and :math:`a`, :math:`r` refer to "alternative" and "reference" groups. 17 | They are consistent with previous notation, "exposed" and "unexposed". 18 | 19 | **Remark 1**: **No intercept!** 20 | 21 | Notice that in this model, we do NOT include the intercept to model the log risk. 22 | It is not possible to infer the absolute position of the risk curve using relative risk data, 23 | only the relative position. 24 | 25 | To see this, first assume that we have intercept in the log risk formulation, 26 | :math:`\ln(R) = (\beta_0 + u_0) + x (\beta_1 + u_1)`, 27 | when we construct the log relative risk, 28 | 29 | .. math:: 30 | 31 | \begin{aligned} 32 | \ln(RR) =& \ln(R_e) - \ln(R_u) \\ 33 | =& (\beta_0 + u_0) + x_a (\beta_1 + u_1) - ((\beta_0 + u_0) + x_r (\beta_1 + u_1)) \\ 34 | =& (x_a - x_r)(\beta_1 + u_1) 35 | \end{aligned} 36 | 37 | the intercept cancels and we returns to the original formula. 38 | 39 | **Remark 2**: **No intercept! Again!** 40 | 41 | The other possible use of the intercept is to directly model 42 | the log relative risk, instead of log risk, 43 | 44 | .. math:: 45 | 46 | \ln(RR) = (\beta_0 + u_0) + (x_a - x_r)(\beta_1 + u_1). 47 | 48 | This does NOT work due to the fact that when :math:`x_a` is equal to :math:`x_r`, 49 | we expect the log relative risk is zero. 50 | 51 | Compare to :ref:`rr1_binary`, where we use the intercept to model the log relative risk, 52 | 53 | * In the binary model, we directly model the log relative risk instead of log risk. 54 | * In the binary model, we never have the case when the exposures for two groups are the same. 55 | 56 | 57 | Sample Code 58 | ----------- 59 | 60 | To setup the problem, we will only need ``LinearCovModel``, just as in :ref:`rr1_binary`. 61 | 62 | If there is already a column in the data frame corresponding to the exposure differences, 63 | we can simply use it as the covariate. 64 | 65 | .. code-block:: python 66 | 67 | from mrtool import MRData, LinearCovModel, MRBRT 68 | 69 | data = MRData() 70 | data.load_df( 71 | df=df, 72 | col_obs='ln_rr', 73 | col_obs_se='ln_rr_se', 74 | col_covs=['exposure_diff'] 75 | col_study_id='study_id' 76 | ) 77 | cov_model = LinearCovModel('exposure_diff', use_re=True) 78 | model = MRBRT(data, cov_models=[cov_model]) 79 | 80 | Otherwise if you pass in the exposure for the "alternative" and "reference" group, 81 | the ``LinearCovModel`` will setup the model for you. 82 | 83 | .. code-block:: python 84 | 85 | data.load_df( 86 | df=df, 87 | col_obs='ln_rr', 88 | col_obs_se='ln_rr_se', 89 | col_covs=['exposure_alt', 'exposure_ref'] 90 | col_study_id='study_id' 91 | ) 92 | cov_model = LinearCovModel(alt_cov='exposure_alt', ref_cov='exposure_ref', use_re=True) 93 | -------------------------------------------------------------------------------- /mrtool/docs/source/concepts/index.rst: -------------------------------------------------------------------------------- 1 | .. _concepts: 2 | 3 | ======== 4 | Concepts 5 | ======== 6 | 7 | In ``MRTool`` there are many important concepts and definitions. 8 | We list them here under the topics of **data generating mechanisms**, 9 | **priors** and **optimization**. 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | :glob: 14 | 15 | data_gen/index 16 | priors/index 17 | optimization/index 18 | -------------------------------------------------------------------------------- /mrtool/docs/source/concepts/optimization/index.rst: -------------------------------------------------------------------------------- 1 | .. _optimization: 2 | 3 | ============ 4 | Optimization 5 | ============ 6 | 7 | -------------------------------------------------------------------------------- /mrtool/docs/source/concepts/priors/index.rst: -------------------------------------------------------------------------------- 1 | .. _priors: 2 | 3 | ====== 4 | Priors 5 | ====== 6 | 7 | -------------------------------------------------------------------------------- /mrtool/docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Configuration file for the Sphinx documentation builder. 3 | # 4 | # This file only contains a selection of the most common options. For a full 5 | # list see the documentation: 6 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 7 | 8 | # -- Path setup -------------------------------------------------------------- 9 | 10 | # If extensions (or modules to document with autodoc) are in another directory, 11 | # add these directories to sys.path here. If the directory is relative to the 12 | # documentation root, use os.path.abspath to make it absolute, like shown here. 13 | # 14 | 15 | from pathlib import Path 16 | import sys 17 | 18 | import mrtool 19 | base_dir = Path(mrtool.__file__).parent 20 | 21 | about = {} 22 | with (base_dir / '__about__.py').open() as f: 23 | exec(f.read(), about) 24 | 25 | sys.path.insert(0, Path('..').resolve()) 26 | 27 | 28 | # -- Project information ----------------------------------------------------- 29 | 30 | project = about['__title__'] 31 | copyright = f"2020, {about['__author__']}" 32 | author = about['__author__'] 33 | 34 | # The short X.Y version. 35 | version = about['__version__'] 36 | # The full version, including alpha/beta/rc tags. 37 | release = about['__version__'] 38 | 39 | # -- General configuration --------------------------------------------------- 40 | 41 | # Add any Sphinx extension module names here, as strings. They can be 42 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 43 | # ones. 44 | 45 | needs_sphinx = '1.5' 46 | 47 | extensions = [ 48 | 'sphinx.ext.autodoc', 49 | 'sphinx.ext.intersphinx', 50 | 'sphinx.ext.doctest', 51 | 'sphinx.ext.todo', 52 | 'sphinx.ext.coverage', 53 | 'sphinx.ext.mathjax', 54 | 'sphinx.ext.napoleon', 55 | 'sphinx.ext.viewcode', 56 | 'sphinx_autodoc_typehints', 57 | 'matplotlib.sphinxext.plot_directive', 58 | ] 59 | 60 | # Add any paths that contain templates here, relative to this directory. 61 | templates_path = ['_templates'] 62 | 63 | source_suffix = '.rst' 64 | master_doc = 'index' 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | # This pattern also affects html_static_path and html_extra_path. 69 | exclude_patterns = [] 70 | 71 | 72 | # -- Options for HTML output ------------------------------------------------- 73 | 74 | # The theme to use for HTML and HTML Help pages. See the documentation for 75 | # a list of builtin themes. 76 | # 77 | html_theme = 'sphinx_rtd_theme' 78 | 79 | # Add any paths that contain custom static files (such as style sheets) here, 80 | # relative to this directory. They are copied after the builtin static files, 81 | # so a file named "default.css" will overwrite the builtin "default.css". 82 | html_static_path = ['_static'] 83 | html_css_files = [ 84 | 'css/custom.css', 85 | ] 86 | 87 | add_module_names = False 88 | -------------------------------------------------------------------------------- /mrtool/docs/source/examples/example_linear.rst: -------------------------------------------------------------------------------- 1 | .. _example_linear: 2 | 3 | ============================ 4 | Example: Simple Linear Model 5 | ============================ 6 | 7 | In the following, we will go through a simple example of how to solve 8 | a linear mixed effects model. Consider the following setup, 9 | 10 | .. math:: 11 | 12 | y_{ij} = (\beta_0 + u_{0i}) + x \beta_1 + \epsilon_{ij} 13 | 14 | where :math:`y` is the measurement, :math:`x` is the covariate, :math:`\beta_0` and :math:`\beta_1` is the fixed 15 | effects, :math:`u_0` is the random intercept and :math:`\epsilon` is the measurement error. 16 | And :math:`i` is index for study, :math:`j` is index for observation within study. 17 | 18 | Assume our data frame looks like, 19 | 20 | .. csv-table:: 21 | :header: y, x, y_se, study_id 22 | :widths: 10, 10, 10, 10 23 | :align: center 24 | 25 | 0.20, 0.0, 0.1, A 26 | 0.29, 0.1, 0.1, A 27 | 0.09, 0.2, 0.1, B 28 | 0.14, 0.3, 0.1, C 29 | 0.40, 0.4, 0.1, D 30 | 31 | and our goal is to obtain the fixed effects and random effects for each study. 32 | 33 | 34 | Create Data Object 35 | ------------------ 36 | The first step is to create a ``MRData`` object to carry the data information. 37 | 38 | .. code-block:: python 39 | 40 | from mrtool import MRData 41 | 42 | data = MRData() 43 | data.load_df( 44 | df, 45 | col_obs='y', 46 | col_covs=['x'], 47 | col_obs_se='y_se', 48 | col_study_id='study_id' 49 | ) 50 | 51 | Notice that the ``MRData`` will automatically create an ``intercept`` in the covariate list. 52 | 53 | Configure Covariate Models 54 | -------------------------- 55 | The second step is to create covariate models. 56 | 57 | .. code-block:: python 58 | 59 | from mrtool import LinearCovModel 60 | 61 | cov_intercept = LinearCovModel('intercept', use_re=True) 62 | cov_x = LinearCovModel('x') 63 | 64 | 65 | Create Model and Fit Model 66 | -------------------------- 67 | The third step is to create the model to group data and covariate models. 68 | And use the optimization routine to find result. 69 | 70 | .. code-block:: python 71 | 72 | from mrtool import MRBRT 73 | 74 | model = MRBRT( 75 | data, 76 | [cov_intercept, cov_x] 77 | ) 78 | model.fit_model() 79 | 80 | You could get the fixed effects and random effects by calling ``model.beta_soln`` and ``model.re_soln``. 81 | 82 | 83 | Predict and Create Draws 84 | ------------------------ 85 | The last step is to predict and create draws. 86 | 87 | .. code-block:: python 88 | 89 | # first create data object used for predict 90 | # the new data frame has to provide the same covariates as in the fitting 91 | data_pred = MRData() 92 | data_pred.load_df( 93 | df_pred, 94 | col_covs=['x'] 95 | ) 96 | 97 | # create point prediction 98 | y_pred = model.predict(data_pred) 99 | 100 | # sampling solutions 101 | beta_samples, gamma_samples = model.sample_soln(sample_size=1000) 102 | 103 | # create draws 104 | y_draws = model.create_draws( 105 | data_pred, 106 | beta_samples, 107 | gamma_samples 108 | ) 109 | 110 | Here ``y_pred`` is the point prediction and ``y_draws`` contains ``1000`` draws of the outcome. -------------------------------------------------------------------------------- /mrtool/docs/source/examples/index.rst: -------------------------------------------------------------------------------- 1 | .. _examples: 2 | 3 | ================== 4 | Examples and Demos 5 | ================== 6 | 7 | In this part of the documentation, we will organize all useful examples and demos. 8 | 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | :hidden: 13 | :glob: 14 | 15 | example_linear -------------------------------------------------------------------------------- /mrtool/docs/source/index.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | MRTool Documentation 3 | ===================== 4 | 5 | **MRTool** (Meta-Regression Tool) package is designed to solve general meta-regression problem. 6 | The most common features include, 7 | 8 | * linear and log prediction function, 9 | * spline extension for covariates, 10 | * direct Gaussian, Uniform and Laplace prior on fixed and random effects, 11 | * shape constraints (monotonicity and convexity) for spline. 12 | 13 | Advanced features include, 14 | 15 | * spline knots ensemble, 16 | * automatic covariate selection. 17 | 18 | 19 | Installation 20 | ------------ 21 | This package uses `data class `_, therefore require ``python>=3.7``. 22 | 23 | Required packages include, 24 | 25 | * basic scientific computing suite, Numpy, Scipy and Pandas, 26 | * main optimization engine, `IPOPT `_, 27 | * customized packages, `LimeTr `_ and 28 | `XSpline `_, 29 | * testing tool, Pytest. 30 | 31 | After install the required packages, clone the repository and install MRTool. 32 | 33 | .. code-block:: shell 34 | 35 | git clone https://github.com/ihmeuw-msca/MRTool.git 36 | cd MRTool && python setup.py install 37 | 38 | 39 | Getting Started 40 | --------------- 41 | 42 | To build and run a model, we only need four steps, 43 | 44 | 1. create ``MRData`` object and load data from data frame 45 | 2. configure the ``CovModel`` with covariates and priors 46 | 3. create ``MRModel`` object with data object and covriate models and fit the model 47 | 4. predict or create draws with new data and model result 48 | 49 | In the following, we will list a set of examples to help user get familiar with 50 | the syntax. 51 | 52 | * :ref:`simple linear model ` 53 | 54 | 55 | Important Concepts 56 | ------------------ 57 | 58 | To correctly setup the model and solve problems, 59 | it is very important to understand some key :ref:`concepts `. 60 | We introduce them under three categories, 61 | 62 | * How can we match the data generating mechansim? 63 | * How can we incorporate prior knowledge? 64 | * How do the underlying optimization algorithms work? 65 | 66 | 67 | .. toctree:: 68 | :maxdepth: 2 69 | :hidden: 70 | 71 | examples/index 72 | concepts/index 73 | api_reference/index 74 | -------------------------------------------------------------------------------- /mrtool/setup.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from setuptools import setup, find_packages 3 | 4 | 5 | if __name__ == '__main__': 6 | base_dir = Path(__file__).parent 7 | src_dir = base_dir/'src' 8 | 9 | about = {} 10 | with (src_dir/'mrtool'/'__about__.py').open() as f: 11 | exec(f.read(), about) 12 | 13 | with (base_dir/'README.rst').open() as f: 14 | long_description = f.read() 15 | 16 | install_requirements = [ 17 | 'numpy', 18 | 'pandas', 19 | 'scipy', 20 | 'xspline', 21 | 'xarray' 22 | ] 23 | 24 | unsolved_requirements = [ 25 | 'ipopt', 26 | 'limetr', 27 | 'pycddlib' 28 | ] 29 | 30 | test_requirements = [ 31 | 'pytest', 32 | 'pytest-mock' 33 | ] 34 | 35 | doc_requirements = [ 36 | 'sphinx>3.0', 37 | 'sphinx-autodoc-typehints', 38 | 'sphinx-rtd-theme', 39 | 'IPython', 40 | 'matplotlib' 41 | ] 42 | 43 | setup(name=about['__title__'], 44 | version=about['__version__'], 45 | 46 | description=about['__summary__'], 47 | long_description=long_description, 48 | license=about['__license__'], 49 | url=about['__uri__'], 50 | 51 | author=about['__author__'], 52 | author_email=about['__email__'], 53 | 54 | package_dir={'': 'src'}, 55 | packages=find_packages(where='src'), 56 | include_package_data=True, 57 | 58 | install_requires=install_requirements, 59 | tests_require=test_requirements, 60 | extras_require={ 61 | 'docs': doc_requirements, 62 | 'test': test_requirements, 63 | 'dev': doc_requirements + test_requirements 64 | }, 65 | zip_safe=False,) 66 | -------------------------------------------------------------------------------- /mrtool/src/mrtool/__about__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "__title__", "__summary__", "__uri__", "__version__", "__author__", 3 | "__email__", "__license__", "__copyright__", 4 | ] 5 | 6 | __title__ = "mrtool" 7 | __summary__ = "MRTool: Featured Nonlinear Mixed effects Models" 8 | __uri__ = "https://github.com/ihmeuw/mrtool" 9 | 10 | __version__ = "0.0.1" 11 | 12 | __author__ = "Peng Zheng" 13 | __email__ = "zhengp@uw.edu" 14 | 15 | __license__ = "MIT License" 16 | __copyright__ = f"Copyright 2020 {__author__}" -------------------------------------------------------------------------------- /mrtool/src/mrtool/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | mrtool 4 | ~~~~~~ 5 | 6 | `mrtool` package. 7 | """ 8 | from .core.data import MRData 9 | from .core.cov_model import CovModel, LinearCovModel, LogCovModel 10 | from .core.model import MRBRT, MRBeRT 11 | from .core import utils 12 | from .cov_selection.covfinder import CovFinder 13 | -------------------------------------------------------------------------------- /mrtool/src/mrtool/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ihmeuw-msca/burden-of-proof/3f2e8e81f35889b14a52b421b532226d127412fe/mrtool/src/mrtool/core/__init__.py -------------------------------------------------------------------------------- /mrtool/src/mrtool/core/other_sampling.py: -------------------------------------------------------------------------------- 1 | """ 2 | other_sampling 3 | ~~~~~~~~~~~~~~ 4 | """ 5 | from warnings import warn 6 | from typing import Union 7 | from dataclasses import dataclass 8 | import numpy as np 9 | from .model import MRBRT 10 | from .cov_model import LinearCovModel 11 | 12 | 13 | try: 14 | from limetr.utils import VarMat 15 | except: 16 | class VarMat: 17 | pass 18 | 19 | 20 | @dataclass 21 | class SimpleLMESpecs: 22 | obs: np.ndarray 23 | obs_se: np.ndarray 24 | study_sizes: np.ndarray 25 | fe_mat: np.ndarray 26 | re_mat: np.ndarray 27 | beta_soln: np.ndarray 28 | gamma_soln: np.ndarray 29 | fe_gprior: Union[np.ndarray, None] = None 30 | trimming_weights: np.ndarray = None 31 | 32 | def __post_init__(self): 33 | self.num_obs = len(self.obs) 34 | self.num_x_vars = self.fe_mat.shape[1] 35 | self.num_z_vars = self.re_mat.shape[1] 36 | 37 | if self.fe_gprior is not None and np.isinf(self.fe_gprior[1]).all(): 38 | self.fe_gprior = None 39 | 40 | if self.trimming_weights is None: 41 | self.trimming_weights = np.ones(self.num_obs) 42 | 43 | 44 | def is_simple_linear_mixed_effects_model(model: MRBRT) -> bool: 45 | """Test if a model is simple linear mixed effects model, where 46 | * covmodel is linear 47 | * no constraints 48 | * no uniform prior for fixed effects. 49 | 50 | Args: 51 | model (MRBRT): Model to be tested. 52 | 53 | Returns: 54 | bool: 55 | True if model is linear mixed effects model. 56 | """ 57 | ok = all([isinstance(cov_model, LinearCovModel) 58 | for cov_model in model.cov_models]) 59 | 60 | uprior = model.create_uprior() 61 | fe_uprior = uprior[:, :model.num_x_vars] 62 | ok = ok and np.isneginf(fe_uprior[0]).all() and np.isposinf(fe_uprior[1]).all() 63 | 64 | lprior = model.create_lprior() 65 | fe_lprior = lprior[:, :model.num_x_vars] 66 | ok = ok and np.isinf(fe_lprior[1]).all() 67 | 68 | ok = ok and (model.num_constraints == 0) 69 | return ok 70 | 71 | 72 | def extract_simple_lme_specs(model: MRBRT) -> SimpleLMESpecs: 73 | """Extract the simple mixed effects model specs. 74 | 75 | Args: 76 | model (MRBRT): Simple mixed effects model 77 | 78 | Returns: 79 | SimpleLMESpecs: 80 | Data object contains information of the simple linear mixed effects model. 81 | """ 82 | if not is_simple_linear_mixed_effects_model(model): 83 | warn("Model is not a simple mixed effects model. Uncertainty might not be accurate.") 84 | 85 | x_fun, x_jac_fun = model.create_x_fun() 86 | x_mat = x_jac_fun(model.beta_soln) 87 | z_mat = model.create_z_mat() 88 | gprior = model.create_gprior() 89 | 90 | beta_soln = model.lt.beta.copy() 91 | gamma_soln = model.lt.gamma.copy() 92 | w_soln = model.lt.w.copy() 93 | 94 | return SimpleLMESpecs( 95 | obs=model.data.obs, 96 | obs_se=model.data.obs_se, 97 | study_sizes=model.data.study_sizes, 98 | fe_mat=x_mat, 99 | re_mat=z_mat, 100 | beta_soln=beta_soln, 101 | gamma_soln=gamma_soln, 102 | fe_gprior=gprior[:, :model.num_x_vars], 103 | trimming_weights=w_soln 104 | ) 105 | 106 | 107 | def extract_simple_lme_hessian(model_specs: SimpleLMESpecs) -> np.ndarray: 108 | """Extract the Hessian matrix from the simple linear mixed effects model. 109 | 110 | Args: 111 | model_specs (SimpleLMESpecs): Model specifications. 112 | 113 | Returns: 114 | np.ndarray: Hessian matrix. 115 | """ 116 | sqrt_weights = np.sqrt(model_specs.trimming_weights) 117 | x = model_specs.fe_mat*sqrt_weights[:, None] 118 | z = model_specs.re_mat*sqrt_weights[:, None] 119 | d = model_specs.obs_se**(2*model_specs.trimming_weights) 120 | v = VarMat(d, z, model_specs.gamma_soln, model_specs.study_sizes) 121 | 122 | hessian = x.T.dot(v.invDot(x)) 123 | if model_specs.fe_gprior is not None: 124 | hessian += np.diag(1.0/model_specs.fe_gprior[1]**2) 125 | 126 | return hessian 127 | 128 | 129 | def sample_simple_lme_beta(sample_size: int, model: MRBRT) -> np.ndarray: 130 | """Simple beta from simple linear mixed effects model. 131 | 132 | Args: 133 | sample_size (int): Sample size. 134 | model (MRBRT): Simple linear mixed effects model. 135 | 136 | Return: 137 | np.ndarray: 138 | Beta samples from the linear mixed effects model. 139 | """ 140 | # extract information 141 | model_specs = extract_simple_lme_specs(model) 142 | 143 | # compute the mean anc variance matrix for sampling 144 | beta_mean = model_specs.beta_soln 145 | beta_var = np.linalg.inv(extract_simple_lme_hessian(model_specs)) 146 | 147 | # sample the solutions 148 | beta_samples = np.random.multivariate_normal( 149 | beta_mean, 150 | beta_var, 151 | size=sample_size 152 | ) 153 | 154 | return beta_samples 155 | -------------------------------------------------------------------------------- /mrtool/src/mrtool/cov_selection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ihmeuw-msca/burden-of-proof/3f2e8e81f35889b14a52b421b532226d127412fe/mrtool/src/mrtool/cov_selection/__init__.py -------------------------------------------------------------------------------- /mrtool/src/mrtool/evidence_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ihmeuw-msca/burden-of-proof/3f2e8e81f35889b14a52b421b532226d127412fe/mrtool/src/mrtool/evidence_score/__init__.py -------------------------------------------------------------------------------- /mrtool/src/mrtool/evidence_score/dichotomous.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dichotomous scorelator 3 | """ 4 | import os 5 | from pathlib import Path 6 | from typing import Tuple, Union 7 | import numpy as np 8 | from scipy.stats import norm 9 | import matplotlib.pyplot as plt 10 | from mrtool import MRBRT 11 | from mrtool.core.other_sampling import extract_simple_lme_specs, extract_simple_lme_hessian 12 | 13 | 14 | class DichotomousScorelator: 15 | def __init__(self, 16 | model: MRBRT, 17 | cov_name: str = 'intercept', 18 | draw_bounds: Tuple[float, float] = (0.05, 0.95), 19 | name: str = 'unknown'): 20 | self.model = model 21 | self.cov_name = cov_name 22 | self.draw_bounds = draw_bounds 23 | self.cov_index = self.model.get_cov_model_index(self.cov_name) 24 | self.name = name 25 | 26 | x_ids = self.model.x_vars_indices[self.cov_index] 27 | z_ids = self.model.z_vars_indices[self.cov_index] 28 | self.beta = self.model.beta_soln[x_ids][0] 29 | self.gamma = self.model.gamma_soln[z_ids][0] 30 | 31 | # compute the fixed effects uncertainty 32 | model_specs = extract_simple_lme_specs(self.model) 33 | beta_var = np.linalg.inv(extract_simple_lme_hessian(model_specs)) 34 | self.beta_var = beta_var[np.ix_(x_ids, x_ids)][0, 0] 35 | 36 | # compute the random effects uncertainty 37 | lt = self.model.lt 38 | gamma_fisher = lt.get_gamma_fisher(lt.gamma) 39 | gamma_var = np.linalg.inv(gamma_fisher) 40 | self.gamma_var = gamma_var[np.ix_(z_ids, z_ids)][0, 0] 41 | 42 | # compute score 43 | gamma_ub = self.gamma + 2.0*np.sqrt(self.gamma_var) 44 | self.draw_lb = self.beta + norm.ppf(self.draw_bounds[0], scale=np.sqrt(self.gamma + self.beta_var)) 45 | self.draw_ub = self.beta + norm.ppf(self.draw_bounds[1], scale=np.sqrt(self.gamma + self.beta_var)) 46 | self.wider_draw_lb = self.beta + norm.ppf(self.draw_bounds[0], scale=np.sqrt(gamma_ub + self.beta_var)) 47 | self.wider_draw_ub = self.beta + norm.ppf(self.draw_bounds[1], scale=np.sqrt(gamma_ub + self.beta_var)) 48 | 49 | def is_harmful(self) -> bool: 50 | return self.beta > 0.0 51 | 52 | def get_score(self, use_gamma_ub: bool = False) -> float: 53 | if use_gamma_ub: 54 | score = self.wider_draw_lb if self.is_harmful() else -self.wider_draw_ub 55 | else: 56 | score = self.draw_lb if self.is_harmful() else -self.draw_ub 57 | return score 58 | 59 | def plot_model(self, 60 | ax=None, 61 | title: str = None, 62 | xlabel: str = 'ln relative risk', 63 | ylabel: str = 'ln relative risk se', 64 | xlim: tuple = None, 65 | ylim: tuple = None, 66 | xscale: str = None, 67 | yscale: str = None, 68 | folder: Union[str, Path] = None): 69 | if ax is None: 70 | fig = plt.figure() 71 | ax = fig.add_subplot() 72 | data = self.model.data 73 | trim_index = self.model.w_soln <= 0.1 74 | max_obs_se = np.max(data.obs_se)*1.1 75 | ax.set_ylim(max_obs_se, 0.0) 76 | ax.fill_betweenx([0.0, max_obs_se], 77 | [self.beta, self.beta - 1.96*max_obs_se], 78 | [self.beta, self.beta + 1.96*max_obs_se], color='#B0E0E6', alpha=0.4) 79 | obs = data.obs.copy() 80 | for i, cov_name in enumerate(self.model.cov_names): 81 | if cov_name == 'intercept': 82 | continue 83 | obs -= data.covs[cov_name]*self.model.beta_soln[i] 84 | ax.scatter(obs, data.obs_se, color='gray', alpha=0.4) 85 | ax.scatter(obs[trim_index], 86 | data.obs_se[trim_index], color='red', marker='x', alpha=0.4) 87 | ax.plot([self.beta, self.beta - 1.96*max_obs_se], [0.0, max_obs_se], 88 | linewidth=1, color='#87CEFA') 89 | ax.plot([self.beta, self.beta + 1.96*max_obs_se], [0.0, max_obs_se], 90 | linewidth=1, color='#87CEFA') 91 | 92 | ax.axvline(0.0, color='r', linewidth=1, linestyle='--') 93 | ax.axvline(self.beta, color='k', linewidth=1, linestyle='--') 94 | ax.axvline(self.draw_lb, color='#69b3a2', linewidth=1) 95 | ax.axvline(self.draw_ub, color='#69b3a2', linewidth=1) 96 | ax.axvline(self.wider_draw_lb, color='#256b5f', linewidth=1) 97 | ax.axvline(self.wider_draw_ub, color='#256b5f', linewidth=1) 98 | 99 | title = self.name if title is None else title 100 | score = self.get_score() 101 | low_score = self.get_score(use_gamma_ub=True) 102 | ax.set_xlabel(xlabel) 103 | ax.set_ylabel(ylabel) 104 | ax.set_title(f"{title}: score = ({low_score: .3f}, {score: .3f})", loc='left') 105 | 106 | if xlim is not None: 107 | ax.set_xlim(*xlim) 108 | if ylim is not None: 109 | ax.set_ylim(*ylim) 110 | if xscale is not None: 111 | ax.set_xscale(xscale) 112 | if yscale is not None: 113 | ax.set_yscale(yscale) 114 | 115 | if folder is not None: 116 | folder = Path(folder) 117 | if not folder.exists(): 118 | os.mkdir(folder) 119 | plt.savefig(folder/f"{self.name}.pdf", bbox_inches='tight') 120 | 121 | return ax 122 | -------------------------------------------------------------------------------- /mrtool/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | test_utils 4 | ~~~~~~~~~~ 5 | Test `utils` module of `sfma` package. 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import pytest 10 | from mrtool import utils 11 | 12 | 13 | @pytest.mark.parametrize('df', [pd.DataFrame({'alpha': np.ones(5), 14 | 'beta': np.zeros(5)})]) 15 | @pytest.mark.parametrize(('cols', 'col_shape'), 16 | [('alpha', (5,)), 17 | ('beta', (5,)), 18 | (['alpha'], (5, 1)), 19 | (['beta'], (5, 1)), 20 | (['alpha', 'beta'], (5, 2)), 21 | (None, (5, 0))]) 22 | def test_get_cols(df, cols, col_shape): 23 | col = utils.get_cols(df, cols) 24 | assert col.shape == col_shape 25 | 26 | 27 | @pytest.mark.parametrize(('cols', 'ok'), 28 | [('col0', True), 29 | (['col0', 'col1'], True), 30 | ([], True), 31 | (None, True), 32 | (1, False)]) 33 | def test_is_cols(cols, ok): 34 | assert ok == utils.is_cols(cols) 35 | 36 | 37 | @pytest.mark.parametrize('cols', [None, 'col0', ['col0', 'col1']]) 38 | @pytest.mark.parametrize('default', [None, 'col0', ['col0', 'col1']]) 39 | def test_input_cols_default(cols, default): 40 | result_cols = utils.input_cols(cols, default=default) 41 | if cols is None: 42 | assert result_cols == [] if default is None else default 43 | else: 44 | assert result_cols == cols 45 | 46 | 47 | @pytest.mark.parametrize('cols', [None, 'col0', ['col0', 'col1']]) 48 | @pytest.mark.parametrize('full_cols', [None, ['col2']]) 49 | def test_input_cols_append_to(cols, full_cols): 50 | cols = utils.input_cols(cols, append_to=full_cols) 51 | if full_cols is not None and cols: 52 | assert 'col0' in full_cols and 'col2' in full_cols 53 | if isinstance(cols, list): 54 | assert 'col1' in full_cols 55 | 56 | 57 | def test_sizes_to_indices(sizes, indices): 58 | my_indices = utils.sizes_to_indices(sizes) 59 | assert all([np.allclose(my_indices[i], indices[i]) 60 | for i in range(len(sizes))]) 61 | 62 | 63 | @pytest.mark.parametrize('sizes', [np.array([1, 2, 3])]) 64 | @pytest.mark.parametrize('indices', [[np.arange(0, 1), 65 | np.arange(1, 3), 66 | np.arange(3, 6)]]) 67 | def test_sizes_to_indices(sizes, indices): 68 | my_indices = utils.sizes_to_indices(sizes) 69 | assert all([np.allclose(my_indices[i], indices[i]) 70 | for i in range(len(sizes))]) 71 | 72 | 73 | @pytest.mark.parametrize(('prior', 'result'), 74 | [(np.array([0.0, 1.0]), True), 75 | (np.array([[0.0]*2, [1.0]*2]), True), 76 | (np.array([0.0, -1.0]), False), 77 | (np.array([[0.0]*2, [-1.0]*2]), False), 78 | (None, True), 79 | ('gaussian_prior', False)]) 80 | def test_is_gaussian_prior(prior, result): 81 | assert utils.is_gaussian_prior(prior) == result 82 | 83 | 84 | @pytest.mark.parametrize(('prior', 'result'), 85 | [(np.array([0.0, 1.0]), True), 86 | (np.array([[0.0]*2, [1.0]*2]), True), 87 | (np.array([0.0, -1.0]), False), 88 | (np.array([[0.0]*2, [-1.0]*2]), False), 89 | (None, True), 90 | ('uniform_prior', False)]) 91 | def test_is_uniform_prior(prior, result): 92 | assert utils.is_uniform_prior(prior) == result 93 | 94 | 95 | @pytest.mark.parametrize('obj', [1, 1.0, 'a', True, [1], [1.0], ['a'], [True]]) 96 | def test_to_list(obj): 97 | obj_list = utils.to_list(obj) 98 | if isinstance(obj, list): 99 | assert obj_list is obj 100 | else: 101 | assert isinstance(obj_list, list) 102 | -------------------------------------------------------------------------------- /risks/README.md: -------------------------------------------------------------------------------- 1 | 2 | This folder contains custom code for individual risk factors in the burden of proof publications. 3 | -------------------------------------------------------------------------------- /risks/ipv_csa/README.md: -------------------------------------------------------------------------------- 1 | This sub-directory contains code specific to the IPV and CSA risk factors. 2 | -------------------------------------------------------------------------------- /risks/processed_foods/Data_cleaning_and_formatting.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Clean the environment 4 | rm(list = ls()) 5 | 6 | ## Load libraries 7 | library(data.table) 8 | library(tidyverse) 9 | library(ggplot2) 10 | library(openxlsx) 11 | library(purrr) 12 | 13 | ## Edit the filepath 14 | data_dir <- "FIlEPATH" 15 | 16 | ## Create covariates to assess the study design characteristics. 17 | dt$representative <- ifelse(dt$rep_geography == 1, 0, 1) 18 | dt$exp_assess_level[dt$exp_assess_level == "At the individual "] <- "At the individual" 19 | dt$exposure_1 <- ifelse(dt$exp_assess_level == "At the individual", 0, 1) 20 | dt$exposure_2 <- ifelse(dt$exp_method_1 == "Self-report (human/environment)", 0, 1) 21 | dt$exposure_3 <- ifelse(dt$exp_assess_period == "only at baseline", 1, 0) 22 | dt$outcome_1 <- ifelse(dt$outcome_assess_1 == "Self-report", 1, 0) 23 | dt$outcome_2 <- "0" 24 | dt$confounder_1 <- ifelse(dt$design %in% c("Prospective cohort", "prospective cohort", "case-cohort", "Nested case-control"), 1, 0) 25 | dt$incidence <- ifelse(dt$outcome_type %in% c("Incidence", "Incidence & Mortality", "Incidence, mortality", "Incidence and mortality"), 0, 1) 26 | dt$mortality <- ifelse(dt$outcome_type %in% c("Mortality", "Incidence & Mortality", "Incidence, mortality", "Incidence and mortality"), 1, 0) 27 | dt$cov_odds_ratio <- ifelse(dt$effect_size_measure == "Odds ratio (OR)", 1, 0) 28 | 29 | ## Adjust certain variables 30 | dt <- dt %>% 31 | mutate(outcome_2 = 0, reverse_causation = 1, washout_years = NA, seq = NA, selection_bias = NA) 32 | 33 | ## Make the effect size and SE variables as numeric 34 | dt$effect_size <- as.numeric(dt$effect_size) 35 | dt$upper <- as.numeric(dt$upper) 36 | dt$lower <- as.numeric(dt$lower) 37 | 38 | ## Create the ln_effect and ln_se variables 39 | dt <- dt %>% 40 | mutate(ln_effect = log(effect_size), ln_se = (log(upper) - log(lower)) / 3.92) 41 | 42 | ## Converting the follow_up periods into the same unit 43 | dt$value_of_duration_fup <- as.numeric(dt$value_of_duration_fup) 44 | dt <- dt[, cov_follow_up := ifelse(value_of_duration_fup > 10, 1, 0)] 45 | 46 | ## Rename the effect size and dose response columns as required by the pipeline 47 | setnames(dt, old = c("ln_effect", "ln_se", "b_0", "b_1", "a_0", "a_1"), new = c("ln_rr", "ln_rr_se", "alt_risk_lower", "alt_risk_upper", "ref_risk_lower", "ref_risk_upper")) 48 | 49 | ## Convert selected columns to numeric 50 | columns_to_convert <- c("ln_rr", "ln_rr_se", "ref_risk_lower", "ref_risk_upper", "alt_risk_lower", "alt_risk_upper") 51 | dt[, (columns_to_convert) := lapply(.SD, as.numeric), .SDcols = columns_to_convert] 52 | 53 | ## Rename the bias covariates according to BoP guideline 54 | names(dt) <- gsub("^(cofounder|confounder|confounders)", "cov", names(dt)) 55 | 56 | ## Drop non-required variables 57 | dt <- dt[, c("cov_other_dietary_components", "cov_other") := NULL] 58 | dt <- dt[, measure := "relrisk"] 59 | 60 | ## Prepare the data for bundle upload 61 | dt$sex <- ifelse(dt$percent_male == 1, "Male", ifelse(dt$percent_male == 0, "Female", "Both")) 62 | dt$cov_exposure_definition <- ifelse(dt$Exposure_definition_reported == 1, 1, 0) 63 | dt$cov_outcome_def <- ifelse(dt$outcome_mapping == "aggregate", 0, 1) 64 | dt$cov_outcome_def[is.na(dt$cov_outcome_def)] <- 0 ## Set the missing values to 0 65 | 66 | ## Format the dataset for upload 67 | dt$design[dt$design == "Prospective cohort"] <- "prospective cohort" 68 | dt$design[dt$design == "case-cohort"] <- "case-cohort" 69 | dt$design[dt$design == "Nested case-control"] <- "nested case-control" 70 | dt$effect_size_measure[dt$effect_size_measure == "Hazard ratio (HR)"] <- "hazard ratio" 71 | dt$effect_size_measure[dt$effect_size_measure == "Relative risk (RR)"] <- "relative risk" 72 | dt$effect_size_measure[dt$effect_size_measure == "Odds ratio (OR)"] <- "odds ratio" 73 | setnames(dt, c("year_end_study", "year_start_study", "effect_size"), c("year_end", "year_start", "mean")) 74 | 75 | ## Get the location data 76 | source("FILEPATH/get_location_metadata.R") 77 | loc <- get_location_metadata(location_set_id = 35, release_id = 9) 78 | loc$location_id <- as.numeric(loc$location_id) 79 | dt$location_id <- as.numeric(dt$location_id) 80 | 81 | ## Merge 82 | dt_final <- merge(dt, loc, by = c("location_id", "location_name"), all.x = TRUE) 83 | 84 | ## Save the dataset 85 | write_csv(dt_final, "FILEPATH/FILENAME.csv") -------------------------------------------------------------------------------- /risks/processed_foods/create_bc_gamma_table.R: -------------------------------------------------------------------------------- 1 | ####################################################################### 2 | # Title: Create table for selected bias covariates and gamma solution for a BoP study 3 | # Author: 4 | ####################################################################### 5 | rm(list = ls()) 6 | 7 | ## create customer r library folder 8 | user <- Sys.getenv("USER") 9 | user_rlibs <- file.path("/homes", user, "rlibs") 10 | 11 | if (!dir.exists(user_rlibs)) { 12 | dir.create(user_rlibs) 13 | } 14 | 15 | ## install/load packages 16 | packages <- c("yaml", "data.table", "stringr", "officer", "flextable") 17 | for (p in packages) { 18 | if (!require(p, character.only = TRUE)) { 19 | install.packages(p, lib = user_rlibs) 20 | library(p, lib.loc = user_rlibs, character.only = TRUE) 21 | } 22 | } 23 | 24 | ## load cc functions 25 | source("FILEPATH/r/get_ids.R") 26 | 27 | ## main function 28 | #' This function creates a table for selected bias covariates and gamma solution to be shown in a Burden of Proof study 29 | #' 30 | #' @param input is the path to the folder where the results of the BoP pipeline are stored 31 | #' @param output is the path to the folder where the table should be saved 32 | #' @param heading optional heading of the table; "Table SX. Selected bias covariates and gamma solution" by default. 33 | #' @param footnote optional footnote of the table; vector(mode = "character") by default. 34 | #' 35 | #' @return 36 | #' @export 37 | #' 38 | #' @examples 39 | #' bc_gamma_table(input = "/homes/shcarr/bop_pipeline/results/dichotomous/", 40 | #' output = "/homes/shcarr/bop_pipeline/tables/", 41 | #' heading = "Table S4. Selected bias covariates and gamma solution") 42 | 43 | 44 | risk_folder = "processed_meat" 45 | trimming = "no_trimming/" 46 | exposure = paste0(risk_folder, "consumption") 47 | 48 | input = paste0("FILEPATH/", risk_folder, "/mrbrt_run/BoP_summary_table/", trimming, "/risk_outcome_pair/") 49 | output = paste0("FILEPATH/", risk_folder, "/mrbrt_run/BoP_summary_table/", trimming, "/summary_table/") 50 | heading = paste0("Table:", exposure ," Selected bias covariates and gamma solution") 51 | 52 | 53 | bc_gamma_table <- function(input, output, heading = "Table SX. Selected bias covariates and gamma solution", footnote = vector(mode = "character")){ 54 | 55 | if (!dir.exists(input)) stop("Please provide a valid input path") 56 | if (!dir.exists(output)) stop("Please provide a valid output path") 57 | 58 | # load cause_ids 59 | cause_ids <- get_ids("cause") 60 | 61 | results <- data.table() 62 | 63 | # loop through all risk-outcome pairs 64 | for (ro_pair in list.files(input)) { 65 | ro_path <- file.path(input, ro_pair) 66 | 67 | # load files 68 | summary <- yaml.load_file(file.path(ro_path, "summary.yaml")) 69 | cov_finder_result <- yaml.load_file(file.path(ro_path, "cov_finder_result.yaml")) 70 | 71 | # bias covariate(s) 72 | selected_bc_covs <- cov_finder_result$selected_covs 73 | selected_bc_covs <- gsub("cov_", "", selected_bc_covs) 74 | selected_bc_covs <- ifelse(length(selected_bc_covs) > 0, paste(selected_bc_covs, collapse = ", "), "None") 75 | 76 | # gamma solution 77 | gamma <- paste0(round(summary$gamma[1], 10), " (", round(summary$gamma[2], 5), ")") 78 | 79 | # exposure 80 | risk <- stringr::str_split(ro_pair, "-")[[1]][1] 81 | 82 | # health outcome 83 | cause <- stringr::str_split(ro_pair, "-")[[1]][2] 84 | health_outcome <- ifelse(cause %in% cause_ids$acause, cause_ids$cause_name[cause_ids$acause == cause], cause) 85 | 86 | # compile results 87 | temp <- data.frame( 88 | cbind( 89 | risk, 90 | health_outcome, 91 | selected_bc_covs, 92 | gamma 93 | ) 94 | ) 95 | results <- rbind(results, temp) 96 | } 97 | 98 | # rename columns 99 | colnames(results) <- c("Risk", "Health outcome", "Selected bias covariates", "Gamma solution (mean and sd)") 100 | 101 | # check if multiple exposures 102 | if (length(unique(results$`Risk`)) == 1) { 103 | results <- results[, -c("Risk")] 104 | } 105 | 106 | # define table settings 107 | if ("Risk" %in% colnames(results)) { 108 | cols <- c(1:4) 109 | width <- c(1.8, 1.8, 1.8, 1.3) 110 | } else { 111 | cols <- c(1:3) 112 | width <- c(1.8, 1.8, 1.3) 113 | } 114 | 115 | # create table 116 | flextable(results) %>% 117 | add_header_lines(values = heading) %>% 118 | add_footer_lines(footnote) %>% 119 | width(j = cols, width = width) %>% 120 | style(part = "header", pr_p = fp_par(text.align = "center")) %>% 121 | style(j = 1, part = "header", pr_p = fp_par(text.align = "left")) %>% 122 | style(part = "body", pr_p = fp_par(text.align = "center")) %>% 123 | style(j = 1, part = "body", pr_p = fp_par(text.align = "left")) %>% 124 | font(fontname = "Calibri", part = "all") %>% 125 | fontsize(size = 10, part = "body") %>% 126 | fontsize(i = 1, size = 12, part = "header") %>% 127 | line_spacing(space = 1.15) %>% 128 | save_as_docx(path = file.path(output, "bc_gamma_table.docx"), align = "center") 129 | } 130 | 131 | ## run the function 132 | 133 | 134 | bc_gamma_table(input,output, heading) 135 | 136 | -------------------------------------------------------------------------------- /risks/red_meat/README.md: -------------------------------------------------------------------------------- 1 | 2 | This folder contains custom code for diet high in red meat burden of proof analysis. 3 | -------------------------------------------------------------------------------- /risks/red_meat/model_functions.R: -------------------------------------------------------------------------------- 1 | # Functions to pull draws from the modified model objects 2 | 3 | 4 | get_cov_names <- function(signal_model) { 5 | cov_model <- signal_model$sub_models[[1]]$cov_models[[1]] 6 | list(alt_covs = cov_model$alt_cov, 7 | ref_covs = cov_model$ref_cov) 8 | } 9 | 10 | get_risk_limits <- function(signal_model) { 11 | cov_names <- get_cov_names(signal_model) 12 | risk_data <- signal_model$data$get_covs(unlist(cov_names)) 13 | c(min(risk_data), max(risk_data)) 14 | } 15 | 16 | get_signal <- function(signal_model, risk) { 17 | cov_names <- get_cov_names(signal_model) 18 | risk_limits <- get_risk_limits(signal_model) 19 | df_covs <- data.frame( 20 | c(sapply(cov_names$ref_covs, function(x) rep(risk_limits[1], length.out = length(risk)), 21 | simplify = FALSE, USE.NAMES = TRUE), 22 | sapply(cov_names$alt_covs, function(x) risk, 23 | simplify = FALSE, USE.NAMES = TRUE)) 24 | ) 25 | data <- MRData() 26 | data$load_df(df_covs, col_covs=unlist(cov_names)) 27 | signal_model$predict(data) 28 | } 29 | 30 | get_beta <- function(linear_model) { 31 | beta <- linear_model$beta_soln 32 | names(beta) <- linear_model$cov_names 33 | specs <- mrbrt002::core$other_sampling$extract_simple_lme_specs(linear_model) 34 | beta_hessian <- mrbrt002::core$other_sampling$extract_simple_lme_hessian(specs) 35 | beta_sd <- 1/sqrt(diag(beta_hessian)) 36 | names(beta_sd) <- linear_model$cov_names 37 | c(beta["signal"], beta_sd["signal"]) 38 | } 39 | 40 | get_gamma <- function(linear_model) { 41 | gamma <- linear_model$gamma_soln[[1]] 42 | gamma_fisher <- linear_model$lt$get_gamma_fisher(linear_model$gamma_soln) 43 | gamma_sd <- 1/sqrt(diag(gamma_fisher)) 44 | c(gamma, gamma_sd) 45 | } 46 | 47 | get_soln <- function(linear_model) { 48 | list( 49 | beta_soln = get_beta(linear_model), 50 | gamma_soln = get_gamma(linear_model) 51 | ) 52 | } 53 | 54 | get_ln_rr_draws <- function(signal_model, 55 | linear_model, 56 | risk, 57 | num_draws = 1000L, 58 | normalize_to_tmrel = FALSE, 59 | fe_only = FALSE) { 60 | signal <- get_signal(signal_model, risk) 61 | re_signal <- signal 62 | soln <- get_soln(linear_model) 63 | 64 | fe_samples <- rnorm(num_draws, mean=soln$beta[1], sd=soln$beta[2]) 65 | re_samples <- rnorm(num_draws, mean=0, sd=sqrt(soln$gamma[1] + 2*soln$gamma[2])) 66 | 67 | if(fe_only){ 68 | draws <- outer(signal, fe_samples) 69 | }else{ 70 | draws <- outer(signal, fe_samples) + outer(re_signal, re_samples) 71 | } 72 | 73 | if (normalize_to_tmrel) { 74 | tmrel_index <- which.min(signal) 75 | draws <- draws - draws[tmrel_index] 76 | } 77 | 78 | df <- as.data.frame(cbind(risk, draws)) 79 | names(df) <- c("risk", sapply(1:num_draws, function(i) paste0("draw_", i))) 80 | return(df) 81 | } 82 | 83 | 84 | summarize_draws <- function(data){ 85 | 86 | df <- as.data.table(copy(data)) 87 | draw_cols <- colnames(df)[grepl("draw_", colnames(df))] 88 | 89 | df[, mean := apply(.SD, 1, mean), .SDcols = draw_cols] 90 | df[, upper := apply(.SD, 1, quantile, 0.975), .SDcols = draw_cols] 91 | df[, lower := apply(.SD, 1, quantile, 0.025), .SDcols = draw_cols] 92 | 93 | df[, (draw_cols) := NULL] 94 | return(df) 95 | 96 | } -------------------------------------------------------------------------------- /risks/smoking/binary_risk/02_upload_dichotomous_launcher.R: -------------------------------------------------------------------------------- 1 | rm(list = ls()) 2 | source("/ihme/homes/xdai88/gbd_tobacco/gbd2020_smoking/evidence_score_pipeline/src/upload_dichotomous.R") 3 | 4 | ARCHIVE <- "[directory to the archive folder]" 5 | out_dir <- "[directory to the outputs folder]" 6 | 7 | pair_info <- list( 8 | smoking_hip_fracture = list( 9 | rei_id = "99", 10 | cause_id = "878", 11 | model_path = paste0(out_dir,"fracture_model.pkl") 12 | ), 13 | smoking_non_hip_fracture = list( 14 | rei_id = "99", 15 | cause_id = "923", 16 | model_path = paste0(out_dir,"fracture_model.pkl") 17 | ) 18 | ) 19 | 20 | for (pair in names(pair_info)) { 21 | print(paste0("upload pair=", pair)) 22 | results_folder <- file.path(ARCHIVE, pair) 23 | if (!dir.exists(results_folder)) { 24 | dir.create(results_folder) 25 | } 26 | do.call(upload_results, c(pair_info[[pair]], list(results_folder = results_folder))) 27 | } 28 | -------------------------------------------------------------------------------- /risks/smoking/binary_risk/03_forest_plot.R: -------------------------------------------------------------------------------- 1 | #################################################################################################################################################################### 2 | # 3 | # Author: Xiaochen Dai 4 | # Purpose: Plot mr-brt results 5 | # 6 | #################################################################################################################################################################### 7 | 8 | rm(list=ls()) 9 | 10 | library(data.table) 11 | library(dplyr) 12 | library(openxlsx) 13 | library(ggplot2) 14 | # library(crosswalk, lib.loc = "/ihme/code/mscm/R/packages/") 15 | library(mrbrt002, lib.loc = "/ihme/code/mscm/Rv4/packages/") 16 | 17 | args <- commandArgs(trailingOnly = TRUE) 18 | 19 | ## NEED TO CHANGE THE RO_PAIR HERE 20 | 21 | if(interactive()){ 22 | # NOTE: the ro_pair for this script does not include age-specific info 23 | ro_pair <- "fractures" # only works for fractures now 24 | cov_setting <- "cov_finder_no_sex" # option: ['cov_finder', 'cov_finder_no_sex', 'no_cov','percent_male_only','self_selected'(no percent_male)] 25 | trim <- 0.9 26 | #out_dir <- "/ihme/homes/xdai88/gbd_tobacco/gbd2019_alcohol/evidence_score/testing/test_run1_2020_09_05/" 27 | out_dir <- "/mnt/team/team/pub/sub_risks/tobacco/code/xdai88/gbd2020_smoking/relative_risk_curves/binary_risk/fracture_binary/" 28 | } else { 29 | ro_pair <- args[1] 30 | cov_setting <- args[2] 31 | trim <- args[3] 32 | out_dir <- args[4] 33 | } 34 | 35 | # 1. plotting the results --------------------------------------------------------------------------------------------------------------------------------------------------- 36 | 37 | #obs_data <- fread(paste0('/ihme/homes/xdai88/gbd_tobacco/gbd2020_smoking/test_run_2020_10_25/fracture_binary/mrbrt_output_', cov_setting, '_', trim, '.csv')) 38 | #mod1 <- py_load_object(filename = paste0('/ihme/homes/xdai88/gbd_tobacco/gbd2020_smoking/test_run_2020_10_25/fracture_binary/', ro_pair, '_', cov_setting,'_', trim, '.pkl'), pickle = "dill") 39 | 40 | obs_data <- fread(paste0(out_dir, ro_pair,"_",cov_setting, '_', trim, '.csv')) 41 | mod1 <- py_load_object(filename = paste0(out_dir, ro_pair, '_', cov_setting,'_', trim, '.pkl'), pickle = "dill") 42 | 43 | cov_names <- mod1$cov_names[!mod1$cov_names=="intercept"] 44 | header <- paste0(as.character(mod1$data),"\ncovariates: ",paste0(cov_names, collapse=", ") ,"\nexp(beta): ", round(exp(mod1$beta_soln[1]), digits = 3)," gamma: ", mod1$gamma_soln) 45 | 46 | # 47 | test <- obs_data[!is.na(se)] 48 | test[, val := exp(val)]; test[, lower := exp(lower)]; test[, upper := exp(upper)] 49 | results <- obs_data[is.na(se)] 50 | 51 | #lin_effect <- as.data.table(delta_transform(mean=test$val, sd=test$se, transformation='log_to_linear')) 52 | #data <- cbind(lin_effect, test) 53 | #data[, upper:=mean_linear+1.96*sd_linear] 54 | #data[, lower:=mean_linear-1.96*sd_linear] 55 | 56 | plot_data <- rbind(test, results, fill=T) 57 | plot_data[, mean_linear:=val] 58 | plot_data[study=='2019 Result', data:=3] 59 | 60 | plot_data[included==0, data:=4] 61 | 62 | # forest plot of the data 63 | color_vals <- c("black", "blue", "darksalmon", "red") 64 | names(color_vals) <- c(1,2,3,4) 65 | 66 | if (cov_setting %in% c('cov_finder', 'percent_male_only')) { 67 | 68 | plot_data[sample_sex==0, sex:='female'] 69 | plot_data[sample_sex==1, sex:='male'] 70 | plot_data[!sample_sex%in%c(0,1) & !is.na(sample_sex), sex:='both'] 71 | 72 | 73 | alpha_vals <- c(1, 0.75, 0.75, 1) 74 | names(alpha_vals) <- c('NA', 'male', 'female', 'both') 75 | 76 | 77 | pdf(paste0(out_dir, ro_pair, '_simple_forest_plot_', cov_setting, '_', trim, '.pdf'), 78 | height = 12) 79 | 80 | p <- ggplot(data=plot_data, 81 | aes(x = row,y = mean_linear, ymin = lower, ymax = upper, color = as.factor(data)))+ 82 | geom_pointrange(aes(shape = as.factor(data)))+ 83 | scale_color_manual("", values = color_vals, guide = F) + 84 | # scale_alpha_manual(values=c('NA'=1, 'male'=0.3, 'female'=0.3, 'both'=1)) + 85 | geom_hline(yintercept =1, linetype=2, color = "black")+ 86 | xlab('study id')+ ylab(paste0('Relative Risk', " (95% Confidence Interval)"))+ 87 | scale_x_continuous(label = obs_data$study, breaks = obs_data$row)+ 88 | geom_errorbar(aes(ymin=lower, ymax=upper),width=0.5,cex=1)+ 89 | labs(subtitle = header, color = "")+ 90 | scale_shape_discrete(guide = F)+ 91 | theme_bw()+ 92 | coord_flip() 93 | 94 | print(p) 95 | 96 | dev.off() 97 | } else { 98 | pdf(paste0(out_dir, ro_pair, '_simple_forest_plot_', cov_setting, '_', trim, '.pdf'), 99 | height=15) 100 | 101 | p <- ggplot(data=plot_data, 102 | aes(x = row,y = mean_linear, ymin = lower, ymax = upper, color = as.factor(data)))+ 103 | geom_pointrange(aes(shape = as.factor(data)))+ 104 | scale_color_manual("", values = color_vals, guide = F) + 105 | geom_hline(yintercept =1, linetype=2, color = "black")+ 106 | xlab('study id')+ ylab(paste0('Relative Risk', " (95% Confidence Interval)"))+ 107 | scale_x_continuous(label = obs_data$study, breaks = obs_data$row)+ 108 | geom_errorbar(aes(ymin=lower, ymax=upper),width=0.5,cex=1)+ 109 | labs(subtitle = header, color = "")+ 110 | scale_shape_discrete(guide = F)+ 111 | theme_bw()+ 112 | coord_flip() 113 | 114 | print(p) 115 | 116 | dev.off() 117 | } 118 | -------------------------------------------------------------------------------- /risks/smoking/config.R: -------------------------------------------------------------------------------- 1 | # Configuration of pipeline 2 | 3 | # User settings 4 | # ------------------------------------------------------------------------------ 5 | USER <- Sys.getenv("USER") 6 | WORK_DIR <- "[working directory]" 7 | CODE_PATH <- paste0(WORK_DIR, "/src/") 8 | 9 | # Cluster settings 10 | # ------------------------------------------------------------------------------ 11 | PROJ <- "[project name]" 12 | SINGULARITY_IMG <- "[R image directory]" 13 | 14 | # Version settings 15 | # ------------------------------------------------------------------------------ 16 | VERSION_ID <- "prod" 17 | 18 | # Directory settings 19 | # ------------------------------------------------------------------------------ 20 | OUT_DIR <- "[output directory]" 21 | INPUT_DATA_DIR = "[input data directory]" 22 | 23 | # Output directory for each stage 24 | SUB_DIRS <- c( 25 | paste0(OUT_DIR, "00_prepped_data"), 26 | paste0(OUT_DIR, "01_template_pkl_files"), 27 | paste0(OUT_DIR, "01_template_models"), 28 | paste0(OUT_DIR, "02_loglinear_models"), 29 | paste0(OUT_DIR, "02_loglinear_pkl_files"), 30 | paste0(OUT_DIR, "03_covariate_selection_models"), 31 | paste0(OUT_DIR, "03_covariate_selection_pkl_files"), 32 | paste0(OUT_DIR, "04_mixed_effects_models"), 33 | paste0(OUT_DIR, "04_mixed_effects_pkl_files"), 34 | paste0(OUT_DIR, "05_evidence_score") 35 | ) 36 | 37 | # create directories 38 | for (direc in SUB_DIRS){ 39 | dir.create(direc, showWarnings = F) 40 | } 41 | 42 | # data settings 43 | # ------------------------------------------------------------------------------ 44 | ALL_RO_PAIRS <- gsub(".csv", "", list.files(INPUT_DATA_DIR)) 45 | EXCLUDED_RO_PAIRS <- c("dairy_stroke", "fruit_oral", "fruit_larynx") 46 | RO_PAIRS <- ALL_RO_PAIRS[!(ALL_RO_PAIRS %in% EXCLUDED_RO_PAIRS)] 47 | 48 | OBS_VAR <- "ln_effect" 49 | OBS_SE_VAR <- "ln_se" 50 | STUDY_ID_VAR <- "nid" 51 | 52 | ALT_EXPOSURE_COLS <- c("b_0", "b_1") 53 | REF_EXPOSURE_COLS <- c("a_0", "a_1") 54 | 55 | # model settings 56 | # ------------------------------------------------------------------------------ 57 | BIAS_COVARIATES_AS_INTX <- TRUE 58 | 59 | BETA_PRIOR_MULTIPLIER = 0.1 60 | COV_FINDER_CONFIG = list( 61 | pre_selected_covs = list("exposure_linear"), 62 | num_samples = 1000L, 63 | power_range = list(-4, 4), 64 | power_step_size = 0.05, 65 | laplace_threshold = 1e-5, 66 | inlier_pct = 1.0, 67 | bias_zero = TRUE 68 | ) 69 | 70 | PRIOR_VAR_RSLOPE = 1e-6 71 | PRIOR_VAR_MAXDER <- 1e-4 72 | MONOSPLINE_SLOPE_MULTIPLIER <- 2 73 | 74 | MONOSPLINE_BIAS_CONFIG = list( 75 | spline_degree = 3L 76 | ) 77 | 78 | LOGLINEAR_BIAS_CONFIG = list( 79 | spline_degree = 3L 80 | ) 81 | -------------------------------------------------------------------------------- /risks/smoking/continous_risk/04_format_rr_draws_non_cvd.R: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # Purpose: format draws for non-cvd outcomes only 3 | # Author: Xiaochen Dai 4 | # Date: 07/25/2022 5 | #--------------------------------------------------- 6 | 7 | rm(list = ls()) 8 | 9 | # System info 10 | os <- Sys.info()[1] 11 | user <- Sys.info()[7] 12 | 13 | # Drives 14 | j <- if (os == "Linux") "/home/j/" else if (os == "Windows") "J:/" 15 | h <- if (os == "Linux") paste0("/homes/", user, "/") else if (os == "Windows") "H:/" 16 | 17 | library(dplyr) 18 | library(ggplot2) 19 | library(data.table) 20 | source("get_ids.R") 21 | source("get_age_metadata.R") 22 | 23 | # Set up arguments 24 | if(interactive()){ 25 | ro_pair <- "[enter risk-outcome pair of interest]" 26 | level_100 <- F 27 | } else { 28 | args <- commandArgs(trailingOnly = TRUE) 29 | ro_pair <- args[1] 30 | level_100 <- as.logical(args[2]) 31 | } 32 | 33 | if(level_100){ 34 | message("100 exposure levels") 35 | rr_dir <- "[path to raw draws for 100 exposure levels]" 36 | save_dir <- "[path to final formated draws for 100 exposure levels]" 37 | } else { 38 | message("1000 exposure levels") 39 | rr_dir <- "[path to raw draws for 1000 exposure levels]" 40 | save_dir <- "[path to final formated draws for 1000 exposure levels]" 41 | } 42 | 43 | ages <- get_age_metadata(19) 44 | setnames(ages, c("age_group_years_start", "age_group_years_end"), c("age_start", "age_end")) 45 | ages <- ages[,.(age_start, age_end, age_group_id)] 46 | 47 | # expand age group and sex 48 | age_group_ids <- c(6:20, 30:32, 235) 49 | 50 | # for fractures 51 | if(ro_pair=="fractures"){ 52 | rr_frac <- fread(paste0(rr_dir, "smoking_", ro_pair, ".csv")) 53 | rr_frac[, rr := exp(rr)] 54 | 55 | rr_full <- expand.grid(cause_id=c(878,923), draw=0:999, sex_id=1:2, age_group_id=age_group_ids) %>% as.data.table 56 | rr_full <- merge(rr_full, rr_frac, by="draw") 57 | 58 | } else { 59 | # reshape the data 60 | rr <- fread(paste0(rr_dir, "smoking_", ro_pair, ".csv")) 61 | setnames(rr, "risk", "exposure") 62 | rr_long <- melt(rr, id.vars = "exposure", variable.name = "draw", value.name = "rr") 63 | rr_long <- rr_long[order(exposure)] 64 | rr_long[, rr:=exp(rr)] 65 | rr_long[, draw := as.numeric(draw)-1] 66 | 67 | rr_full <- expand.grid(exposure=seq(0,100,0.1), draw=0:999, sex_id=1:2, age_group_id=age_group_ids) %>% as.data.table 68 | rr_full <- merge(rr_full, rr_long, by=c("exposure", "draw")) 69 | setorder(rr_full, "exposure","draw", "sex_id", "age_group_id") 70 | 71 | if(ro_pair %in% c("breast_cancer", "cervical_cancer")){ 72 | rr_full <- rr_full[sex_id==2] 73 | } 74 | 75 | if(ro_pair %in% c("prostate_cancer")){ 76 | rr_full <- rr_full[sex_id==1] 77 | } 78 | } 79 | 80 | # save the draws 81 | write.csv(rr_full, paste0(save_dir, ro_pair, ".csv"), row.names = F) 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /risks/smoking/continous_risk/05_02_age_specific_rr_curves.R: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------- 2 | # Purpose: create final age_specific relative risk curves for CVD outcomes 3 | # Author: Xiaochen Dai, adapted from Haley's codes 4 | # Date: 07/25/2022 5 | #--------------------------------------------------- 6 | 7 | rm(list = ls()) 8 | 9 | # System info 10 | os <- Sys.info()[1] 11 | user <- Sys.info()[7] 12 | 13 | # Drives 14 | j <- if (os == "Linux") "/home/j/" else if (os == "Windows") "J:/" 15 | h <- if (os == "Linux") paste0("/homes/", user, "/") else if (os == "Windows") "H:/" 16 | 17 | code_dir <- '[path to age_rr_utils.R]' 18 | save_dir <- "[path to results]" 19 | age_rr_dt_dir <- '[path to cleaned age-stratified data and AF draws]' 20 | plot_dir <- "[path to plots]" 21 | 22 | library(dplyr) 23 | library(ggplot2) 24 | library(data.table) 25 | library(mrbrt002, lib.loc = "/ihme/code/mscm/Rv4/packages/") 26 | source("get_ids.R") 27 | source("get_age_metadata.R") 28 | source(paste0(code_dir, "age_rr_utils.R")) 29 | source("helper_functions.R") 30 | np <- import("numpy") 31 | np$random$seed(as.integer(123)) 32 | 33 | # Set up arguments 34 | if(interactive()){ 35 | ro_pair <- "[enter CVD risk-outcome pair of interest]" 36 | mean_factor <- F 37 | log_af <- F # using AF calculated based on log_rr 38 | level_100 <- F 39 | } else { 40 | args <- commandArgs(trailingOnly = TRUE) 41 | ro_pair <- args[1] 42 | mean_factor <- as.logical(args[2]) 43 | log_af <- as.logical(args[3]) 44 | level_100 <- as.logical(args[4]) 45 | } 46 | 47 | if(level_100){ 48 | output_dir <- "[path to raw draws for 100 exposure levels]" 49 | new_dir <- "[path to final formated draws for 100 exposure levels]" 50 | } else { 51 | output_dir <- "[path to raw draws for 1000 exposure levels]" 52 | new_dir <- "[path to final formated draws for 1000 exposure levels]" 53 | } 54 | 55 | ages <- get_age_metadata(19) 56 | setnames(ages, c("age_group_years_start", "age_group_years_end"), c("age_start", "age_end")) 57 | ages <- ages[,.(age_start, age_end, age_group_id)] 58 | 59 | # get the reference age group 60 | data <- readRDS(paste0(save_dir, "01_template_models/", ro_pair, ".RDS")) 61 | df_data <- data$df_data 62 | age_ref <- df_data$age_ref %>% mean 63 | age_ref_group <- ages[age_start <= age_ref & age_end >= age_ref, age_group_id] 64 | 65 | # load rr draws and age pattern draws 66 | rr_draws <- fread(paste0(output_dir, "smoking_", ro_pair, ".csv")) 67 | setnames(rr_draws, "risk", "exposure") 68 | age_pattern_draws <- fread(file.path(age_rr_dt_dir, paste0("attenuation_pct_draws_", ro_pair, "_",age_ref_group,".csv"))) 69 | 70 | col_names <- c("exposure", paste0("draw_", 0:999)) 71 | 72 | # change variable name 73 | names(rr_draws) <- col_names 74 | 75 | # load mean of age pattern 76 | age_pattern_mean <- fread(file.path(age_rr_dt_dir, paste0("attenuation_pct_summary_", ro_pair, "_",age_ref_group,".csv"))) 77 | 78 | # apply attenuation factors 79 | if(mean_factor){ 80 | # apply mean of attenuation factors only 81 | plot_path <- paste0(plot_dir, "age_spec_smoking_", ro_pair,"_af_mean.pdf") 82 | age_spec_risk_curve <- apply_age_pattern_mean_af(ro_pair =ro_pair, 83 | risk_curve_draws_df = rr_draws, 84 | age_pattern_mean_df = age_pattern_mean, 85 | age_pattern_mean_log_df=age_pattern_mean_log, 86 | log_af = log_af, 87 | plot = T, 88 | plot_path = plot_path) 89 | 90 | } else { 91 | plot_path <- paste0(plot_dir, "age_spec_smoking_", ro_pair,"_af_draws_no_gamma.pdf") 92 | age_spec_risk_curve <- apply_age_pattern(ro_pair =ro_pair, 93 | risk_curve_draws_df = rr_draws, 94 | age_pattern_draws_df = age_pattern_draws, 95 | #draws_in_log = T, 96 | #return_draws_log = F, 97 | plot = T, 98 | plot_path = plot_path) 99 | 100 | } 101 | 102 | # re-shape the dataset 103 | age_spec_rr <- melt(age_spec_risk_curve, id.vars = c("exposure", "age_group_id"), variable.name = "draw", value.name = "rr") 104 | age_spec_rr[, draw:= as.numeric(draw)-1] 105 | 106 | age_spec_rr_full <- copy(age_spec_rr) 107 | for(age_id in c(6:8)){ 108 | temp <- age_spec_rr[age_group_id==9] 109 | temp[, age_group_id := age_id] 110 | age_spec_rr_full <- rbindlist(list(temp, age_spec_rr_full), use.names = T) 111 | } 112 | age_spec_rr_full[, age_group_id] %>% unique 113 | 114 | # add sex_id 115 | age_spec_rr_full_m <- copy(age_spec_rr_full) 116 | age_spec_rr_full_f <- copy(age_spec_rr_full) 117 | age_spec_rr_full_m[, sex_id := 1] 118 | age_spec_rr_full_f[, sex_id := 2] 119 | 120 | age_spec_rr_full <- rbindlist(list(age_spec_rr_full_m, age_spec_rr_full_f), use.names = T) 121 | 122 | setorder(age_spec_rr_full, "exposure","draw", "sex_id", "age_group_id") 123 | 124 | # save the draws 125 | message("saving draws...") 126 | write.csv(age_spec_rr_full, paste0(new_dir, ro_pair, ".csv"), row.names = F) 127 | -------------------------------------------------------------------------------- /risks/smoking/prep_data_function.R: -------------------------------------------------------------------------------- 1 | 2 | prep_diet_data <- function( 3 | ro_pair, obs_var, obs_se_var, ref_vars, alt_vars, allow_ref_gt_alt = FALSE, 4 | study_id_var = NA, 5 | drop_x_covs = NA, keep_x_covs = NA, drop_z_covs = NA, keep_z_covs = NA, 6 | diet_dir = NA, 7 | verbose = TRUE) { 8 | 9 | require(dplyr) 10 | require(rlang) 11 | 12 | if (verbose) cat(ro_pair, "\n") 13 | if (!is.na(drop_x_covs) & !is.na(keep_x_covs)) stop("Cannot specify both drop and keep X-covs") 14 | if (!is.na(drop_z_covs) & !is.na(keep_z_covs)) stop("Cannot specify both drop and keep Z-covs") 15 | 16 | df <- read.csv(paste0(diet_dir, "/", ro_pair, ".csv")) %>% 17 | filter(complete.cases(.[, c(ref_vars, alt_vars)])) 18 | 19 | if (nrow(df) == 0) stop("No observations with non-missing exposure columns") 20 | 21 | # # convert non-binary covariates into dummy variables 22 | # # and create list of bias covariates for the analysis 23 | create_dummy_vars <- function(dat, varname, reference_level) { 24 | dev <- FALSE 25 | if (dev) { 26 | dat <- data.frame(x1 = sample(c("a", "b", "c"), 30, TRUE)) 27 | varname <- "x1" 28 | reference_level <- "a" 29 | } 30 | vec <- as.data.frame(dat)[, varname] 31 | lvls <- unique(vec)[!unique(vec) == reference_level] 32 | dat2 <- as.data.frame(do.call("cbind", lapply(lvls, function(x) as.integer(vec == x)))) 33 | if(!is_empty(dat2)){ 34 | names(dat2) <- paste0(varname, "_", lvls) 35 | } 36 | return(dat2) 37 | } 38 | 39 | confounders <- names(df)[grepl('confounders_', names(df))] 40 | cvs <- names(df)[grepl('cv_', names(df))] 41 | 42 | data_cols <- c(cvs) 43 | 44 | bias_covs <- c() 45 | for (cov in data_cols[data_cols %in% names(df)]) { 46 | 47 | dev <- FALSE 48 | if (dev) { 49 | cov <- "follow_up" 50 | } 51 | 52 | if (any(is.na(df[, cov]))) next 53 | 54 | if (all(df[, cov] == round(df[, cov]))) { 55 | df[, cov] <- as.integer(df[, cov]) 56 | } else { 57 | stop(paste0("Bias covariate '", cov, "' is not of type integer")) 58 | } 59 | bias_covs <- c(bias_covs, cov) 60 | } 61 | 62 | # use SVD to prevent adding collinear variables 63 | bias_covs_tmp <- c() 64 | 65 | # sort the bias_covs to make sure the cv_adj is always included 66 | bias_covs <- sort(bias_covs) 67 | 68 | for (bias_cov in bias_covs) { 69 | dev <- FALSE 70 | if (dev) { 71 | bias_cov <- "exposure_3" 72 | } 73 | d <- svd(cbind(df[, bias_covs_tmp], df[, bias_cov]))$d 74 | if (d[length(d)] > 1e-10) bias_covs_tmp <- c(bias_covs_tmp, bias_cov) 75 | } 76 | 77 | bias_covs <- bias_covs_tmp 78 | 79 | # warn if cv_adj is not selected 80 | if(!'cv_adj' %in% bias_covs) message("Warning: cv_adj is not selected") 81 | 82 | # dataset 83 | # NOTE: these covs cannot have missingness! 84 | df <- df[, c("nid", "ln_effect", "ln_se", ALT_EXPOSURE_COLS, REF_EXPOSURE_COLS, 'percent_male', 'age_start', 'age_end', 'age_ref', bias_covs)] %>% 85 | filter(complete.cases(.)) %>% 86 | arrange(nid) 87 | 88 | ##cov inclusion/exclusion 89 | # -- X 90 | if (!is.na(keep_x_covs)) { 91 | if (!all(keep_x_covs %in% bias_covs)) { 92 | stop("One or more provided X-covs not allowed.") 93 | } else { 94 | x_covs <- keep_x_covs 95 | } 96 | } else if (!is.na(drop_x_covs)) { 97 | x_covs <- bias_covs[!bias_covs %in% drop_x_covs] 98 | } else { 99 | x_covs <- bias_covs 100 | } 101 | 102 | #-- Z 103 | if (!is.na(keep_z_covs)) { 104 | if (!all(keep_z_covs %in% bias_covs)) { 105 | stop("One or more provided Z-covs not allowed.") 106 | } else { 107 | z_covs <- keep_z_covs 108 | } 109 | } else if (!is.na(drop_z_covs)) { 110 | z_covs <- bias_covs[!bias_covs %in% drop_z_covs] 111 | } else { 112 | z_covs <- bias_covs 113 | } 114 | 115 | out <- list( 116 | df=df, ro_pair=ro_pair, x_covs=x_covs, z_covs=z_covs, 117 | obs_var=obs_var, obs_se_var=obs_se_var, 118 | ref_vars=ref_vars, alt_vars=alt_vars, 119 | study_id_var=study_id_var, 120 | allow_ref_gt_alt=allow_ref_gt_alt 121 | ) 122 | return(out) 123 | } 124 | 125 | -------------------------------------------------------------------------------- /risks/vegetables/README.md: -------------------------------------------------------------------------------- 1 | This folder contains custom code for diet low in vegetables burden of proof analysis. 2 | -------------------------------------------------------------------------------- /risks/vegetables/config.R: -------------------------------------------------------------------------------- 1 | # Configuration of pipeline 2 | 3 | # Directory settings 4 | # ------------------------------------------------------------------------------ 5 | OUT_DIR <- FILEPATH 6 | INPUT_DATA_DIR <- FILEPATH 7 | 8 | # Output directory for each stage 9 | SUB_DIRS <- c( 10 | paste0(OUT_DIR, "00_prepped_data"), 11 | paste0(OUT_DIR, "01_template_pkl_files"), 12 | paste0(OUT_DIR, "01_template_models"), 13 | paste0(OUT_DIR, "02_loglinear_models"), 14 | paste0(OUT_DIR, "02_loglinear_pkl_files"), 15 | paste0(OUT_DIR, "03_covariate_selection_models"), 16 | paste0(OUT_DIR, "04_mixed_effects_pkl_files"), 17 | paste0(OUT_DIR, "05_evidence_score"), 18 | paste0(OUT_DIR, "05_all_plots"), 19 | paste0(OUT_DIR, "05_all_csvs"), 20 | paste0(OUT_DIR, "05_pub_bias"), 21 | paste0(OUT_DIR, "05_draw_csvs") 22 | ) 23 | 24 | # data settings 25 | # ------------------------------------------------------------------------------ 26 | ALL_RO_PAIRS <- gsub(".csv", "", list.files(INPUT_DATA_DIR)) 27 | EXCLUDED_RO_PAIRS <- c("sugar_cvd", "sugar_obesity", "fruit_oral", "fruit_larynx", 28 | ALL_RO_PAIRS[grepl("original", ALL_RO_PAIRS)], 29 | ALL_RO_PAIRS[grepl("_stroke", ALL_RO_PAIRS)], 30 | ALL_RO_PAIRS[grepl("sugar", ALL_RO_PAIRS)]) 31 | RO_PAIRS <- ALL_RO_PAIRS[!(ALL_RO_PAIRS %in% EXCLUDED_RO_PAIRS)] 32 | RO_PAIRS <- ALL_RO_PAIRS[grepl("veg", ALL_RO_PAIRS)] # Alternative Option 1: select a subset of RO pairs to run 33 | 34 | 35 | OBS_VAR <- "ln_effect" 36 | OBS_SE_VAR <- "ln_se" 37 | STUDY_ID_VAR <- "nid" 38 | 39 | ALT_EXPOSURE_COLS <- c("b_0", "b_1") 40 | REF_EXPOSURE_COLS <- c("a_0", "a_1") 41 | 42 | USE_GLOBAL_DIST_PREDICT <- F # use the data to predict = F; use the exposure model to predict = T 43 | 44 | 45 | # model settings 46 | # ------------------------------------------------------------------------------ 47 | BIAS_COVARIATES_AS_INTX <- TRUE 48 | 49 | # For diet 50 | DIRECTION = list( 51 | veg = "decreasing", 52 | ) 53 | 54 | BETA_PRIOR_MULTIPLIER = 0.1 #used in covfinder and final model on covs 55 | 56 | 57 | colnames(PRE_SELECTED_COVS) <- c("ro_pair", "cov") 58 | 59 | COV_FINDER_CONFIG = list( 60 | #pre_selected_covs = list("signal"), 61 | num_samples = 1000L, 62 | power_range = list(-4, 4), 63 | power_step_size = 0.05, 64 | laplace_threshold = 1e-5, 65 | inlier_pct = 1, #since we trim in stage 1 66 | bias_zero = TRUE 67 | ) 68 | 69 | INLIER_PCT <- 0.9 # 0.9 standard trimming 70 | 71 | 72 | N_I_KNOTS <- 2L 73 | PRIOR_VAR_RSLOPE = 1e-6 #originally 1e-6 74 | PRIOR_VAR_MAXDER <- 1e-4 75 | 76 | # Monotonic risks will have monotonicity constraint included 77 | CONFIG = list( 78 | use_spline = TRUE, 79 | use_re = FALSE, 80 | spline_degree = 2L, 81 | spline_knots_type = 'domain', 82 | spline_r_linear = TRUE, 83 | spline_l_linear = FALSE, 84 | prior_spline_funval_uniform = array(c(-1 + 1e-6, 19)), 85 | prior_spline_num_constraint_points = 150L, 86 | spline_knots = array(seq(0, 1, length.out = N_I_KNOTS + 2)), 87 | prior_spline_maxder_gaussian = cbind(rbind(rep(0, N_I_KNOTS), 88 | rep(Inf, N_I_KNOTS)), 89 | c(0, sqrt(PRIOR_VAR_RSLOPE))) 90 | ) 91 | 92 | 93 | J_N_I_KNOTS <- 3L 94 | # 95 | J_SHAPED_CONFIG = list( 96 | use_spline = TRUE, 97 | use_re = FALSE, 98 | spline_degree = 2L, 99 | spline_knots_type = 'domain', 100 | spline_r_linear = TRUE, 101 | spline_l_linear = TRUE, 102 | prior_spline_funval_uniform = array(c(-1 + 1e-6, 19)), 103 | prior_spline_num_constraint_points = 150L, 104 | spline_knots = array(seq(0, 1, length.out = J_N_I_KNOTS + 2)), 105 | prior_spline_maxder_gaussian = cbind(c(0, sqrt(PRIOR_VAR_RSLOPE)), 106 | rbind(rep(0, J_N_I_KNOTS-1), 107 | rep(Inf, J_N_I_KNOTS-1)), 108 | c(0, sqrt(PRIOR_VAR_RSLOPE))) 109 | ) 110 | 111 | -------------------------------------------------------------------------------- /risks/vegetables/veg_TMREL.R: -------------------------------------------------------------------------------- 1 | # Drives 2 | os <- Sys.info()["sysname"] 3 | 4 | j <- if (os == "Linux") "/home/j/" else if (os == "Windows") "J:/" 5 | h <- if (os == "Linux") paste0("/homes/", user, "/") else if (os == "Windows") "H:/" 6 | 7 | out_dir <- "FILEPATH" 8 | WORK_DIR <- "FILEPATH" 9 | central_model_output_folder <- "FILEPATH" 10 | 11 | library(dplyr) 12 | library(ggplot2) 13 | library(data.table) 14 | 15 | 16 | df <- fread("FILEPATH") 17 | 18 | # FILE COMPARISON GROUPS IF LOW INTAKE GROUP WAS CODED AS ALTERNATIVE # 19 | df[a_0>b_0, b_0 := a_0] 20 | df[a_1>b_1, b_1 := a_1] 21 | 22 | df[, index := 1:.N, by = "nid"] 23 | 24 | alt_exp_study <- unique(df[a_0!=b_0, ][,.(nid, b_1,b_0)]) 25 | alt_exp_study[, b_midpoint := b_0 + (b_1-b_0)/2] 26 | 27 | lower <- as.numeric(quantile(alt_exp_study$b_0, 0.85)) 28 | upper <- as.numeric(quantile(alt_exp_study$b_midpoint, 0.85)) 29 | 30 | 31 | tmrel_draws <- data.table("tmrel" = runif(1000, min = lower, max = upper)) 32 | tmrel_draws[, `:=`(ro = p, draw = paste0("draw_",0:999), lower = lower, upper = upper)] 33 | write.csv(tmrel_draws, "FILEPATH", row.names = F) 34 | -------------------------------------------------------------------------------- /second_process/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | notebooks/ 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ -------------------------------------------------------------------------------- /second_process/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2021, IHME Math Sciences 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /second_process/README.md: -------------------------------------------------------------------------------- 1 | # Evidence Score Pipeline 2 | 3 | Used for pipeline to obtain the results from `mrtool` for the purpose of risk score. 4 | -------------------------------------------------------------------------------- /second_process/examples/gbd2020_continuous_risk.py: -------------------------------------------------------------------------------- 1 | from espipeline.main import PostContinuousProcess, ContinuousPipeline 2 | from espipeline.filemanager import FileManager 3 | import warnings 4 | warnings.filterwarnings("ignore") 5 | 6 | 7 | def main(): 8 | i_folder = "/mnt/team/msca/pub/archive/evidence-score/gbd2020" 9 | o_folder = "/mnt/team/msca/pub/archive/evidence-score/gbd2020-process" 10 | 11 | fm = FileManager(i_folder, o_folder) 12 | pipeline = ContinuousPipeline(fm, PostContinuousProcess) 13 | 14 | for pair in pipeline.pairs: 15 | print(pair) 16 | if "metab_bmi_adult" in pair: 17 | continue 18 | process = pipeline.get_process(pair) 19 | process.run() 20 | 21 | 22 | if __name__ == "__main__": 23 | main() 24 | -------------------------------------------------------------------------------- /second_process/setup.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from pathlib import Path 3 | from setuptools import setup, find_packages 4 | 5 | 6 | if __name__ == "__main__": 7 | base_dir = Path(__file__).parent 8 | spec = importlib.util.spec_from_file_location( 9 | "__about__", 10 | base_dir / "src" / "espipeline" / "__about__.py" 11 | ) 12 | about = importlib.util.module_from_spec(spec) 13 | spec.loader.exec_module(about) 14 | 15 | with (base_dir/"README.md").open() as f: 16 | long_description = f.read() 17 | 18 | install_requirements = [ 19 | "numpy", 20 | "scipy", 21 | "pandas", 22 | ] 23 | 24 | test_requirements = [ 25 | "pytest", 26 | "pytest-mock", 27 | ] 28 | 29 | doc_requirements = [] 30 | 31 | setup(name=about.__title__, 32 | version=about.__version__, 33 | 34 | description=about.__summary__, 35 | long_description=long_description, 36 | license=about.__license__, 37 | url=about.__uri__, 38 | 39 | author=about.__author__, 40 | author_email=about.__email__, 41 | 42 | package_dir={"": "src"}, 43 | packages=find_packages(where="src"), 44 | include_package_data=True, 45 | 46 | install_requires=install_requirements, 47 | tests_require=test_requirements, 48 | extras_require={ 49 | "docs": doc_requirements, 50 | "test": test_requirements, 51 | "dev": doc_requirements + test_requirements 52 | }, 53 | zip_safe=False,) 54 | -------------------------------------------------------------------------------- /second_process/src/espipeline/__about__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "__title__", "__summary__", "__uri__", "__version__", "__author__", 3 | "__email__", "__license__", "__copyright__", 4 | ] 5 | 6 | __title__ = "espipeline" 7 | __summary__ = "Evidence score pipeline" 8 | __uri__ = "https://stash.ihme.washington.edu/projects/MSCA/repos/escore-pipeline" 9 | 10 | __version__ = "0.0.0" 11 | 12 | __author__ = "IHME Math Sciences" 13 | __email__ = "ihme.math.sciences@gmail.com" 14 | 15 | __license__ = "BSD 2-Clause License" 16 | __copyright__ = f"Copyright 2021 {__author__}" 17 | -------------------------------------------------------------------------------- /second_process/src/espipeline/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipline import Pipeline 2 | from .process import Process 3 | from threadpoolctl import threadpool_limits 4 | 5 | 6 | threadpool_limits(limits=1, user_api='blas') 7 | threadpool_limits(limits=1, user_api='openmp') 8 | -------------------------------------------------------------------------------- /second_process/src/espipeline/dichotomous.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main GBD Evience Score Pipeline 3 | """ 4 | import matplotlib.pyplot as plt 5 | 6 | from espipeline.process import Process 7 | 8 | 9 | class PostDichotomousProcess(Process): 10 | """ 11 | Post process for GBD 2020 dichotomous risk 12 | """ 13 | 14 | def plot_model(self): 15 | _, ax = plt.subplots(figsize=(8, 5)) 16 | 17 | # plot data 18 | ax.scatter(self.study_data.log_rr, 19 | self.study_data.log_rr_se, 20 | color="grey", alpha=0.4) 21 | outlier_index = self.study_data.is_outlier == 1 22 | ax.scatter(self.study_data.log_rr[outlier_index], 23 | self.study_data.log_rr_se[outlier_index], 24 | color="grey", alpha=0.4) 25 | 26 | # plot funnel 27 | beta = self.model.fe_soln["intercept"][0] 28 | se_max = self.study_data.log_rr_se.max() 29 | ax.fill_betweenx( 30 | [0.0, se_max], 31 | [beta, beta - 1.96*se_max], 32 | [beta, beta + 1.96*se_max], 33 | color="#B0E0E6", alpha=0.4 34 | ) 35 | ax.plot([beta, beta - 1.96*se_max], 36 | [0.0, se_max], 37 | linewidth=1, color="#87CEFA") 38 | ax.plot([beta, beta + 1.96*se_max], 39 | [0.0, se_max], 40 | linewidth=1, color="#87CEFA") 41 | ax.set_ylim([se_max, 0.0]) 42 | 43 | # plot vertical lines 44 | ax.axvline(0.0, color="k") 45 | ax.axvline(beta, color="#008080") 46 | ax.fill_betweenx([0.0, se_max], 47 | [self.output_data.outer_log_cause_lower.values[0]]*2, 48 | [self.output_data.outer_log_cause_upper.values[0]]*2, 49 | color="#008080", alpha=0.2) 50 | ax.fill_betweenx([0.0, se_max], 51 | [self.output_data.inner_log_cause_lower.values[0]]*2, 52 | [self.output_data.inner_log_cause_upper.values[0]]*2, 53 | color="#008080", alpha=0.2) 54 | 55 | # set title and labels 56 | title = (f"name={self.name}, " 57 | f"score={self.risk_cause_metadata.score.values[0]: .3f}") 58 | ax.set_title(title, loc="left") 59 | ax.set_xlabel("log_rr") 60 | ax.set_ylabel("log_rr_se") 61 | 62 | plt.savefig(self.o_path / "model_figure.pdf", bbox_inches="tight") 63 | plt.close("all") 64 | 65 | def run(self): 66 | super().run() 67 | self.plot_model() 68 | -------------------------------------------------------------------------------- /second_process/src/espipeline/filemanager.py: -------------------------------------------------------------------------------- 1 | """ 2 | File manager: organize file paths 3 | """ 4 | from collections import defaultdict 5 | from dataclasses import dataclass, field 6 | from pathlib import Path 7 | from typing import Dict, List 8 | 9 | import pandas as pd 10 | 11 | 12 | @dataclass 13 | class FileManager: 14 | """ 15 | Manager files for all risk outcome pair in the folder 16 | """ 17 | 18 | i_path: Path 19 | o_path: Path 20 | pairs: List[str] = field(init=False) 21 | pair_paths: Dict[str, Path] = field(init=False) 22 | pair_types: List[str] = field(init=False) 23 | pairs_by_type: Dict[str, List[str]] = field(init=False) 24 | 25 | def __post_init__(self): 26 | self.i_path = Path(self.i_path) 27 | self.o_path = Path(self.o_path) 28 | if not self.i_path.exists(): 29 | raise FileNotFoundError(str(self.i_path)) 30 | if not self.o_path.exists(): 31 | self.o_path.mkdir() 32 | 33 | self.pairs = [path.name 34 | for path in self.i_path.iterdir() if path.is_dir()] 35 | self.pair_paths = {pair: self.i_path / pair for pair in self.pairs} 36 | self.pairs_by_type = defaultdict(list) 37 | self.sort_pairs() 38 | self.pair_types = list(self.pairs_by_type.keys()) 39 | 40 | def sort_pairs(self): 41 | for pair, path in self.pair_paths.items(): 42 | meta = path / "risk_cause_metadata.csv" 43 | if not meta.exists(): 44 | raise FileNotFoundError(str(meta)) 45 | df_meta = pd.read_csv(meta) 46 | self.pairs_by_type[df_meta.risk_type[0]].append(pair) 47 | 48 | def __getitem__(self, pair: str) -> Path: 49 | return self.pair_paths[pair] 50 | 51 | def __repr__(self) -> str: 52 | return f"{type(self).__name__}(num_pairs={len(self.pairs)})" 53 | -------------------------------------------------------------------------------- /second_process/src/espipeline/loglinear.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main GBD Evience Score Pipeline 3 | """ 4 | from typing import List 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | from pandas import DataFrame 9 | 10 | from mrtool import MRBRT 11 | 12 | from espipeline.process import Process 13 | 14 | 15 | class PostLogLinearProcess(Process): 16 | """ 17 | Post process for GBD 2020 loglinear risk 18 | """ 19 | 20 | def plot_models(self): 21 | # plot original model 22 | plot_model(self.study_data, 23 | self.output_data, 24 | self.risk_cause_metadata, 25 | self.model, 26 | self.name) 27 | plt.savefig(self.o_path / "model_figure.pdf", bbox_inches="tight") 28 | plt.close("all") 29 | 30 | def run(self): 31 | super().run() 32 | self.plot_models() 33 | 34 | 35 | def get_data_signal(model: MRBRT) -> np.ndarray: 36 | for cov_model in model.cov_models: 37 | if len(cov_model.ref_cov) != 0: 38 | break 39 | alt_cov = model.data.get_covs(cov_model.alt_cov).mean(axis=1) 40 | ref_cov = model.data.get_covs(cov_model.ref_cov).mean(axis=1) 41 | return alt_cov - ref_cov 42 | 43 | 44 | def plot_model(study_data: DataFrame, 45 | output_data: DataFrame, 46 | risk_cause_metadata: DataFrame, 47 | model: MRBRT, 48 | name: str, 49 | ax: List[plt.Axes] = None): 50 | if ax is None: 51 | _, ax = plt.subplots(1, 2, figsize=(16, 5)) 52 | 53 | # plot data 54 | ax[0].scatter( 55 | study_data.alt_risk, 56 | study_data.log_alt_cause, 57 | s=5.0/study_data.log_rr_se, 58 | color="gray", 59 | alpha=0.5 60 | ) 61 | outlier_index = study_data.is_outlier == 1 62 | ax[0].scatter( 63 | study_data.alt_risk[outlier_index], 64 | study_data.log_alt_cause[outlier_index], 65 | s=5.0/study_data.log_rr_se[outlier_index], 66 | color="red", 67 | alpha=0.5, 68 | marker="x" 69 | ) 70 | 71 | # plot prediction 72 | ax[0].plot(output_data.risk, output_data.log_cause, 73 | color="#008080", linewidth=1) 74 | 75 | # plot uncertainties 76 | ax[0].fill_between(output_data.risk, 77 | output_data.inner_log_cause_lower, 78 | output_data.inner_log_cause_upper, 79 | color="#008080", 80 | alpha=0.2) 81 | ax[0].fill_between(output_data.risk, 82 | output_data.outer_log_cause_lower, 83 | output_data.outer_log_cause_upper, 84 | color="#008080", 85 | alpha=0.2) 86 | 87 | # plot bounds 88 | for b in [risk_cause_metadata.risk_lower.values[0], 89 | risk_cause_metadata.risk_upper.values[0]]: 90 | ax[0].axvline(b, linestyle="--", linewidth=1, color="k") 91 | 92 | # plot 0 line 93 | ax[0].axhline(0.0, linestyle="-", linewidth=1, color="k") 94 | 95 | # add unit to the xaxis 96 | ax[0].set_xlabel(risk_cause_metadata.risk_unit.values[0]) 97 | 98 | # title 99 | title = (f"name={name}, " 100 | f"score={risk_cause_metadata.score.values[0]: .3f}") 101 | ax[0].set_title(title, loc="left") 102 | 103 | # plot residual 104 | residual = model.data.obs - model.predict(model.data) 105 | residual_sd = np.sqrt(model.data.obs_se**2 + get_data_signal(model)**2*model.gamma_soln[0]) 106 | outlier_index = model.w_soln < 0.1 107 | ax[1].set_ylim(residual_sd.max(), 0.0) 108 | ax[1].scatter(residual, residual_sd, 109 | color="gray", alpha=0.4) 110 | ax[1].scatter(residual[outlier_index], 111 | residual_sd[outlier_index], 112 | color="red", alpha=0.4, marker="x") 113 | ax[1].fill_betweenx( 114 | [0.0, residual_sd.max()], 115 | [0.0, -1.96*residual_sd.max()], 116 | [0.0, 1.96*residual_sd.max()], 117 | color="#B0E0E6", alpha=0.4 118 | ) 119 | ax[1].plot([0.0, -1.96*residual_sd.max()], 120 | [0.0, residual_sd.max()], 121 | linewidth=1, color="#87CEFA") 122 | ax[1].plot([0.0, 1.96*residual_sd.max()], 123 | [0.0, residual_sd.max()], 124 | linewidth=1, color="#87CEFA") 125 | ax[1].axvline(0.0, color="k", linewidth=1, linestyle="--") 126 | ax[1].set_xlabel("residual") 127 | ax[1].set_ylabel("residual sd") 128 | -------------------------------------------------------------------------------- /second_process/src/espipeline/pipline.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pipeline Class 3 | """ 4 | from typing import Callable, List 5 | 6 | import numpy as np 7 | 8 | from espipeline.filemanager import FileManager 9 | from espipeline.utils import list_all_files 10 | 11 | 12 | class Pipeline: 13 | """ 14 | Main Pipeline class 15 | """ 16 | 17 | def __init__(self, 18 | name: str, 19 | fm: FileManager, 20 | process_constructor: Callable, 21 | pairs: List[str] = None): 22 | self.name = name 23 | self.pairs = fm.pairs_by_type[self.name] if pairs is None else pairs 24 | self.i_pair_paths = { 25 | pair: fm.pair_paths[pair] 26 | for pair in self.pairs 27 | } 28 | self.o_pair_paths = { 29 | pair: fm.o_path / pair 30 | for pair in self.pairs 31 | } 32 | self.process_constructor = process_constructor 33 | 34 | @property 35 | def num_pairs(self) -> int: 36 | return len(self.pairs) 37 | 38 | def get_process(self, pair: str) -> "Process": 39 | return self.process_constructor(self.i_pair_paths[pair], 40 | self.o_pair_paths[pair]) 41 | 42 | def run(self): 43 | for pair in self.pairs: 44 | i_path = self.i_pair_paths[pair] 45 | o_path = self.o_pair_paths[pair] 46 | i_time = max(f.stat().st_mtime for f in list_all_files(i_path)) 47 | if (not o_path.exists() or o_path.stat().st_size == 0): 48 | o_time = -np.inf 49 | else: 50 | o_time = max(f.stat().st_mtime for f in list_all_files(o_path)) 51 | if i_time > o_time: 52 | print(pair) 53 | process = self.get_process(pair) 54 | process.run() 55 | 56 | def __repr__(self) -> str: 57 | return (f"{type(self).__name__}(name={self.name}, " 58 | f"num_pairs={self.num_pairs})") 59 | -------------------------------------------------------------------------------- /second_process/src/espipeline/process.py: -------------------------------------------------------------------------------- 1 | """ 2 | Process Class 3 | """ 4 | from pathlib import Path 5 | import shutil 6 | 7 | import pickle5 as pkl 8 | import pandas as pd 9 | 10 | from espipeline.utils import list_all_files 11 | 12 | 13 | class Process: 14 | """ 15 | Process function 16 | """ 17 | 18 | def __init__(self, i_path: Path, o_path: Path): 19 | self.i_path = Path(i_path) 20 | self.o_path = Path(o_path) 21 | self.name = i_path.name 22 | 23 | # load all files 24 | for f in list_all_files(self.i_path): 25 | stem, suffix = f.stem, f.suffix 26 | if suffix == ".pkl": 27 | setattr(self, stem, pkl.load(open(f, "rb"))) 28 | elif suffix == ".csv": 29 | setattr(self, stem, pd.read_csv(f)) 30 | 31 | def run(self): 32 | if self.o_path.exists(): 33 | shutil.rmtree(self.o_path) 34 | shutil.copytree(self.i_path, self.o_path) 35 | -------------------------------------------------------------------------------- /second_process/src/espipeline/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility Functions 3 | """ 4 | from itertools import chain 5 | from pathlib import Path 6 | from typing import Dict, List, Tuple 7 | 8 | import numpy as np 9 | from mrtool import MRBRT 10 | from mrtool.core.other_sampling import (extract_simple_lme_hessian, 11 | extract_simple_lme_specs) 12 | from numpy import ndarray 13 | from scipy.stats import norm 14 | 15 | 16 | def get_fe_hessian(model: MRBRT) -> ndarray: 17 | specs = extract_simple_lme_specs(model) 18 | return extract_simple_lme_hessian(specs) 19 | 20 | 21 | def get_re_fisher(model: MRBRT) -> ndarray: 22 | lt = model.lt 23 | return lt.get_gamma_fisher(lt.gamma) 24 | 25 | 26 | def get_beta_info(model: MRBRT, name: str = "signal") -> Tuple[float]: 27 | # get beta solution 28 | cov_index = model.cov_names.index(name) 29 | beta = model.beta_soln[cov_index] 30 | beta_hessian = get_fe_hessian(model) 31 | beta_sd = 1.0/np.sqrt(np.diag(beta_hessian))[cov_index] 32 | return (beta, beta_sd) 33 | 34 | 35 | def get_gamma_info(model: MRBRT) -> Tuple[float]: 36 | # get gamma solution 37 | gamma = model.gamma_soln[0] 38 | gamma_fisher = get_re_fisher(model) 39 | gamma_sd = 1.0/np.sqrt(gamma_fisher[0, 0]) 40 | return (gamma, gamma_sd) 41 | 42 | 43 | def get_pval(mean, sd, one_sided: bool = False) -> float: 44 | zscore = np.abs(mean/sd) 45 | if one_sided: 46 | pval = 1 - norm.cdf(zscore) 47 | else: 48 | pval = 2*(1 - norm.cdf(zscore)) 49 | return pval 50 | 51 | 52 | def egger_regression(residual, residual_sd, one_sided: bool = True) -> Dict[str, float]: 53 | weighted_residual = residual/residual_sd 54 | r_mean = weighted_residual.mean() 55 | r_sd = 1/np.sqrt(weighted_residual.size) 56 | r_pval = get_pval(r_mean, r_sd, one_sided=one_sided) 57 | return { 58 | "mean": r_mean, 59 | "sd": r_sd, 60 | "pval": r_pval 61 | } 62 | 63 | 64 | def get_pub_bias(*args, **kwargs) -> int: 65 | result = egger_regression(*args, **kwargs) 66 | return int(result["pval"] < 0.05) 67 | 68 | 69 | def list_all_files(path: Path) -> List[Path]: 70 | if path.is_file(): 71 | return [path] 72 | return chain.from_iterable( 73 | list_all_files(sub_path) for sub_path in path.iterdir() 74 | ) 75 | --------------------------------------------------------------------------------