├── LICENSE
├── README.md
├── first_process
    ├── README.md
    ├── config.R
    ├── docs
    │   └── post-analysis.md
    ├── examples
    │   ├── process_continuous_results.R
    │   ├── upload_continuous_results.R
    │   ├── upload_dichotomous_results.R
    │   └── upload_loglinear_results.R
    ├── run_pipeline.R
    └── src
    │   ├── 01_create_template.R
    │   ├── 02_loglinear_models.R
    │   ├── 03_covariate_selection.R
    │   ├── 04_mixed_effects_models.R
    │   ├── 05_evidence_score_continuous.R
    │   ├── 05_evidence_score_dichotomous.R
    │   ├── 05_evidence_score_legacy.R
    │   ├── 05_evidence_score_loglinear.R
    │   ├── 05_evidence_score_mixed.R
    │   ├── upload_continuous.R
    │   ├── upload_dichotomous.R
    │   ├── upload_loglinear.R
    │   └── utils
    │       ├── continuous_functions.R
    │       ├── dichotomous_functions.R
    │       ├── egger_functions.R
    │       ├── extract_old_results.R
    │       ├── loglinear_functions.R
    │       ├── mixed_functions.R
    │       ├── plot_3_curves.R
    │       ├── prep_diet_data_function.R
    │       └── qsub_function.R
├── limetr
    ├── .gitignore
    ├── .travis.yml
    ├── LICENSE
    ├── Makefile
    ├── README.md
    ├── check_requirements.py
    ├── conda_pkg
    │   ├── build.sh
    │   ├── conda_build_config.yaml
    │   ├── meta.yaml
    │   └── run_test.sh
    ├── experiments
    │   └── test_trimming_with_certain_inlier.ipynb
    ├── setup.py
    ├── src
    │   └── limetr
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── special_mat.f90
    │   │   └── utils.py
    └── tests
    │   ├── check_limetr.py
    │   ├── check_utils.py
    │   ├── izmat_block_izdiag.py
    │   ├── izmat_block_izmm.py
    │   ├── izmat_block_izmv.py
    │   ├── izmat_izdiag.py
    │   ├── izmat_izeig.py
    │   ├── izmat_izmm.py
    │   ├── izmat_izmv.py
    │   ├── izmat_lsvd.py
    │   ├── izmat_zdecomp.py
    │   ├── limetr_gradient.py
    │   ├── limetr_gradientTrimming.py
    │   ├── limetr_lasso.py
    │   ├── limetr_objective.py
    │   ├── limetr_objectiveTrimming.py
    │   ├── projCappedSimplex.py
    │   ├── varmat_diag.py
    │   ├── varmat_dot.py
    │   ├── varmat_invDiag.py
    │   ├── varmat_invDot.py
    │   └── varmat_logDet.py
├── mrtool
    ├── .github
    │   └── workflows
    │   │   └── python-build.yml
    ├── .gitignore
    ├── .readthedocs.yml
    ├── LICENSE
    ├── Makefile
    ├── README.rst
    ├── docs
    │   ├── Makefile
    │   ├── make.bat
    │   ├── requirements.txt
    │   └── source
    │   │   ├── _static
    │   │       └── css
    │   │       │   └── custom.css
    │   │   ├── api_reference
    │   │       ├── index.rst
    │   │       ├── mrtool.core.rst
    │   │       ├── mrtool.cov_selection.rst
    │   │       └── mrtool.evidence_score.rst
    │   │   ├── concepts
    │   │       ├── data_gen
    │   │       │   ├── index.rst
    │   │       │   ├── range_exposure.rst
    │   │       │   ├── rr1_binary.rst
    │   │       │   └── rr2_log_linear.rst
    │   │       ├── index.rst
    │   │       ├── optimization
    │   │       │   └── index.rst
    │   │       └── priors
    │   │       │   └── index.rst
    │   │   ├── conf.py
    │   │   ├── examples
    │   │       ├── example_linear.rst
    │   │       └── index.rst
    │   │   └── index.rst
    ├── setup.py
    ├── src
    │   └── mrtool
    │   │   ├── __about__.py
    │   │   ├── __init__.py
    │   │   ├── core
    │   │       ├── __init__.py
    │   │       ├── cov_model.py
    │   │       ├── data.py
    │   │       ├── model.py
    │   │       ├── other_sampling.py
    │   │       ├── plots.py
    │   │       └── utils.py
    │   │   ├── cov_selection
    │   │       ├── __init__.py
    │   │       └── covfinder.py
    │   │   └── evidence_score
    │   │       ├── __init__.py
    │   │       ├── continuous.py
    │   │       ├── dichotomous.py
    │   │       ├── loglinear.py
    │   │       ├── mixed.py
    │   │       └── scorelator.py
    └── tests
    │   ├── test_covmodel.py
    │   ├── test_data.py
    │   └── test_utils.py
├── risks
    ├── README.md
    ├── alcohol_ihd
    │   ├── cleaning_1.R
    │   ├── cleaning_2.R
    │   ├── cleaning_3.R
    │   └── tables.R
    ├── chewing_tobacco
    │   ├── 01.0_rr_data_cleaning.R
    │   └── 02.0_rr_data_formatting.R
    ├── ipv_csa
    │   ├── 01_clean_all_extracted_data.R
    │   ├── 02_prep_csa_anorexia.R
    │   ├── 02_prep_csa_anx.R
    │   ├── 02_prep_csa_asthma.R
    │   ├── 02_prep_csa_aud.R
    │   ├── 02_prep_csa_bulimia.R
    │   ├── 02_prep_csa_conduct.R
    │   ├── 02_prep_csa_diabetes.R
    │   ├── 02_prep_csa_druguse.R
    │   ├── 02_prep_csa_hiv.R
    │   ├── 02_prep_csa_ihd.R
    │   ├── 02_prep_csa_mat_abort_mis.R
    │   ├── 02_prep_csa_mdd.R
    │   ├── 02_prep_csa_schizophrenia.R
    │   ├── 02_prep_csa_selfharm.R
    │   ├── 02_prep_csa_sti.R
    │   ├── 02_prep_ipv_anx.R
    │   ├── 02_prep_ipv_hiv.R
    │   ├── 02_prep_ipv_mat_abort_mis.R
    │   ├── 02_prep_ipv_mdd.R
    │   ├── 02_prep_ipv_selfharm.R
    │   ├── 03_csa_main_forest_plots.R
    │   ├── 03_ipv_main_forest_plot.R
    │   └── README.md
    ├── processed_foods
    │   ├── Bop_summary_table.R
    │   ├── Data_cleaning_and_formatting.R
    │   ├── code_tocnvert_per_unitEffectisize_measures.R
    │   ├── create_bc_gamma_table.R
    │   └── create_parameter_table.R
    ├── red_meat
    │   ├── README.md
    │   ├── model_functions.R
    │   ├── prep_diet_data_function.R
    │   └── redmeat_aggregate_curve.R
    ├── smoking
    │   ├── binary_risk
    │   │   ├── 00_data_cleaning_binary.R
    │   │   ├── 01_binary_rr_pipeline.R
    │   │   ├── 02_upload_dichotomous_launcher.R
    │   │   ├── 03_forest_plot.R
    │   │   └── upload_dichotomous.R
    │   ├── config.R
    │   ├── continous_risk
    │   │   ├── 00_data_cleaning_formatting.R
    │   │   ├── 00_data_cleaning_new_extraction.R
    │   │   ├── 01_full_rr_pipeline.R
    │   │   ├── 02_upload_continuous_launcher.R
    │   │   ├── 03_create_draws.R
    │   │   ├── 04_format_rr_draws_non_cvd.R
    │   │   ├── 05_00_age_rr_data_cleaning.R
    │   │   ├── 05_01_age_rr_trend.R
    │   │   ├── 05_02_age_specific_rr_curves.R
    │   │   ├── age_rr_utils.R
    │   │   ├── helper_functions.R
    │   │   └── upload_continuous.R
    │   └── prep_data_function.R
    └── vegetables
    │   ├── Prep bias covariates for diet MR_BRT.r
    │   ├── README.md
    │   ├── config.R
    │   └── veg_TMREL.R
└── second_process
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── examples
        ├── gbd2020_continuous_risk.py
        └── summary.ipynb
    ├── setup.py
    └── src
        └── espipeline
            ├── __about__.py
            ├── __init__.py
            ├── continuous.py
            ├── dichotomous.py
            ├── filemanager.py
            ├── loglinear.py
            ├── pipline.py
            ├── process.py
            ├── utils.py
            └── validator.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2022, IHME Math Sciences
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # burden-of-proof
 2 | Time capsule for burden of proof paper.
 3 | This repository include,
 4 | 
 5 | * `limetr` is the meta-regression engine
 6 | * `mrtool` provides the model specification interface
 7 | * `first_process` is the first part of the data processing
 8 | * `second_process` is the second part of the data processing
 9 | * `risks` is risk-specific custom code
10 | 


--------------------------------------------------------------------------------
/first_process/README.md:
--------------------------------------------------------------------------------
 1 | # Evidence score pipeline
 2 | 
 3 | **Note: The pipeline is still under development.**
 4 | 
 5 | ## Configuration
 6 | 
 7 | In `config.R`, the variables are global variables that will be used in scripts in the pipeline. Users can change the value of variables based on their tasks.
 8 | 
 9 | Users need to change some settings before running the pipeline, including `WORK_DIR`, `PROJ`, `VERSION_ID`, `OUT_DIR`. Other settings are task-specific, including, but not limited to, `INPUT_DATA_DIR`, `OBS_VAR`  and an array of settings for models in each stage. Users are advised to read through the `config.R` file to make sure the settings match their needs.
10 | 
11 | ## Run script
12 | 
13 | `run_pipeline.R` is the main script to run the pipeline. Users need to change `WORK_DIR` to specify the location where the scripts are saved.
14 | 
15 | The pipeline consists of five stages:
16 | 1. Ensemble model with exposure only to get signal; no random effects
17 | 2. Log-linear model to get slope prior for covariate selection
18 | 3. Covariate selection model
19 | 4. Final mixed effects model that combines the signal and selected covariates
20 | 5. Get evidence score with signal model and final model; plots
21 | 
22 | Each stage is run sequentially but paralleled for risk-outcome pairs. Note that Step 5 cannot be run in parallel on cluster. It need to be run mannully after typing "repl_python()" and "esc". Details can be found in script.
23 | 
24 | ## Results
25 | 
26 | The plots of evidence score will be saved in `05_evidence_score` under the `OUT_DIR` specified.
27 | 
28 | ## Useful links
29 | [GBD 2020 Guidance on Evidence Score](https://docs.google.com/document/d/1gP7-T6cxah2rLfjaTxWZaO0ejRw7Wk4eVFDGoszetj4/edit)
30 | 
31 | [Introduction to MR-BRT](https://rpubs.com/rsoren/mrbrt_gbd2020)
32 | 
33 | [MR-BRT examples](https://rpubs.com/rsoren/mrbrt_examples_gbd2020)


--------------------------------------------------------------------------------
/first_process/config.R:
--------------------------------------------------------------------------------
  1 | # Configuration of pipeline
  2 | 
  3 | # User settings
  4 | # ------------------------------------------------------------------------------
  5 | USER <- Sys.getenv("USER")
  6 | WORK_DIR <- "/ihme/homes/jiaweihe/msca/mrbrt/evidence_score_pipeline"
  7 | CODE_PATH <- paste0(WORK_DIR, "/src/")
  8 | ARCHIVE <- "/mnt/team/msca/pub/archive/evidence-score/gbd2020"
  9 | 
 10 | # Cluster settings
 11 | # ------------------------------------------------------------------------------
 12 | PROJ <- "proj_mscm"
 13 | SINGULARITY_IMG <- "/ihme/singularity-images/rstudio/ihme_rstudio_3631.img"
 14 | 
 15 | # Version settings
 16 | # ------------------------------------------------------------------------------
 17 | VERSION_ID <- "prod"
 18 | 
 19 | # Directory settings
 20 | # ------------------------------------------------------------------------------
 21 | OUT_DIR <- paste0("/ihme/scratch/users/", USER, "/evidence_score_pipeline/", VERSION_ID, "/")
 22 | INPUT_DATA_DIR = "/home/j/temp/hkl1/mr_brt/03_evidence_score/input_data/for_ryan/version15"
 23 | 
 24 | # Output directory for each stage
 25 | SUB_DIRS <- c(
 26 |   paste0(OUT_DIR, "00_prepped_data"),
 27 |   paste0(OUT_DIR, "01_template_pkl_files"),
 28 |   paste0(OUT_DIR, "01_template_models"),
 29 |   paste0(OUT_DIR, "02_loglinear_models"),
 30 |   paste0(OUT_DIR, "02_loglinear_pkl_files"),
 31 |   paste0(OUT_DIR, "03_covariate_selection_models"),
 32 |   paste0(OUT_DIR, "03_covariate_selection_pkl_files"),
 33 |   paste0(OUT_DIR, "04_mixed_effects_models"),
 34 |   paste0(OUT_DIR, "04_mixed_effects_pkl_files"),
 35 |   paste0(OUT_DIR, "05_evidence_score")
 36 | ) 
 37 | 
 38 | # data settings
 39 | # ------------------------------------------------------------------------------
 40 | ALL_RO_PAIRS <- gsub(".csv", "", list.files(INPUT_DATA_DIR))
 41 | EXCLUDED_RO_PAIRS <- c("dairy_stroke", "fruit_oral", "fruit_larynx")
 42 | RO_PAIRS <- ALL_RO_PAIRS[!(ALL_RO_PAIRS %in% EXCLUDED_RO_PAIRS)]
 43 | RO_PAIRS <- c("alcohol")
 44 | RO_PAIRS <- c("redmeat_colorectal")
 45 | RO_PAIRS <- c("lpa_ihd", "bmi_diabetes", "bmi_uter_canc", 
 46 |               "bmi_leuk", "fruit_ihd", "redmeat_colorectal", 
 47 |               "nuts_ihd", "wholegrain_ihd", "fiber_stroke", 
 48 |               "alcohol_lri", "alcohol_tb", "lung_cancer", 
 49 |               "copd", "ihd_19", "diabetes", "peptic_ulcer")
 50 | 
 51 | OBS_VAR <- "ln_effect"
 52 | OBS_SE_VAR <- "ln_se"
 53 | STUDY_ID_VAR <- "nid"
 54 | 
 55 | ALT_EXPOSURE_COLS <- c("b_0", "b_1")
 56 | REF_EXPOSURE_COLS <- c("a_0", "a_1")
 57 | 
 58 | # Sarah's
 59 | # RO_PAIRS <- c("air_pmhap_neo_lung", "air_pmhap_lri", "air_pmhap_t2_dm", 
 60 | #               "air_pmhap_resp_copd", "air_pmhap_cvd_stroke_60")
 61 | # ALT_EXPOSURE_COLS <- c("conc")
 62 | # REF_EXPOSURE_COLS <- c("conc_den")
 63 | 
 64 | 
 65 | # model settings
 66 | # ------------------------------------------------------------------------------
 67 | BIAS_COVARIATES_AS_INTX <- TRUE
 68 | 
 69 | # For diet
 70 | # DIRECTION = list(
 71 | #   calcium = "decreasing",
 72 | #   cheese = "decreasing",
 73 | #   dairy = "decreasing",
 74 | #   fiber = "decreasing",
 75 | #   fish = "decreasing",
 76 | #   fruit = "decreasing",
 77 | #   legumes = "decreasing",
 78 | #   milk = "decreasing",
 79 | #   nuts = "decreasing",
 80 | #   omega3 = "decreasing",
 81 | #   veg = "decreasing",
 82 | #   wholegrain = "decreasing",
 83 | #   pufa = "decreasing",
 84 | #   yogurt = "decreasing",
 85 | #   procmeat = "increasing",
 86 | #   redmeat = "increasing",
 87 | #   sodium = "increasing",
 88 | #   ssb = "increasing",
 89 | #   sugar = "increasing",
 90 | #   transfat = "increasing"
 91 | # )
 92 | 
 93 | # Get monotonicity direction.
 94 | # tmp <- read.csv("/ihme/homes/jiaweihe/msca/mrbrt/evidence_score_pipeline/all_pairs.csv")
 95 | # tmp$mono <- ifelse(tmp$type=='protective', 'decreasing', 'increasing')
 96 | # DIRECTION <- setNames(as.character(tmp$mono), tmp$ro_pair)
 97 | DIRECTION = list(
 98 |   lpa_ihd = "increasing"
 99 | )
100 | 
101 | 
102 | BETA_PRIOR_MULTIPLIER = 0.1
103 | COV_FINDER_CONFIG = list(
104 |     pre_selected_covs = list("exposure_linear"), 
105 |     num_samples = 1000L,
106 |     power_range = list(-4, 4), 
107 |     power_step_size = 0.05,
108 |     laplace_threshold = 1e-5,
109 |     inlier_pct = 1.0,
110 |     bias_zero = TRUE
111 | )
112 | 
113 | # Not used by new pipeline
114 | 
115 | # N_I_KNOTS <- 3
116 | # PRIOR_VAR_RSLOPE = 1e-6
117 | # PRIOR_VAR_MAXDER <- 1e-4
118 | # MONOSPLINE_SLOPE_MULTIPLIER <- 2
119 | 
120 | # MONOSPLINE_CONFIG = list(
121 | #     use_re = TRUE,
122 | #     use_spline = TRUE,
123 | #     spline_degree = 3L,
124 | #     spline_knots_type = 'domain',
125 | #     spline_r_linear = TRUE,
126 | #     prior_spline_funval_uniform = array(c(-1 + 1e-6, 19)),
127 | #     prior_spline_num_constraint_points = 150L,
128 | #     spline_knots = array(seq(0, 1, length.out = N_I_KNOTS + 2)),
129 | #     prior_spline_maxder_gaussian = cbind(rbind(rep(0, N_I_KNOTS), 
130 | #       rep(sqrt(PRIOR_VAR_MAXDER), N_I_KNOTS)), c(0, sqrt(PRIOR_VAR_RSLOPE))),
131 | #     prior_spline_der2val_gaussian = NULL,
132 | #     prior_spline_der2val_gaussian_domain = array(c(0.0, 1.0)),
133 | #     name = "exposure"
134 | # )
135 | 
136 | # MONOSPLINE_BIAS_CONFIG = list(
137 | #   spline_degree = 3L
138 | # )
139 | 
140 | # LOGLINEAR_BIAS_CONFIG = list(
141 | #   spline_degree = 3L
142 | # )
143 | 


--------------------------------------------------------------------------------
/first_process/docs/post-analysis.md:
--------------------------------------------------------------------------------
 1 | # Post Analysis
 2 | 
 3 | Here, we document the processes after the model fitting, including
 4 | 
 5 | * detect and adjust for publication-bias
 6 | * compute evidence score
 7 | * output diagnostic figures and data
 8 | 
 9 | ## Detect Publication-Bias
10 | 
11 | Publication-bias analysis is an important part of systematic review.
12 | As a metric of evaluating the evidence in the dataset, evidence score needs to take
13 | publication-bias into account.
14 | 
15 | To detect publication-bias, we use a data-driven approach known as [Egger's Regression](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2127453/).
16 | The idea is simple. We want to detect if there is a significant correlation between the
17 | residuals and their standard errors. Egger's regression function can be found [here](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/utils/continuous_functions.R#106-112).
18 | 
19 | Interestingly, we find out that the trimming algorithm helps against the publication-bias.
20 | In the process, we apply Egger's Regression on both untrimmed and trimmed data.
21 | Examples can be found [here](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#39-42).
22 | 
23 | ## Adjust for Publication-Bias
24 | 
25 | If publication-bias has been detected, to adjust for it, we use an algorithm called [The Trim-and-Fill Method](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6571372/).
26 | The method is based on the assumption that residual should be distributed symmetrically, and if there is asymmetry, it means that there are certain studies missing.
27 | The algorithm involve two major steps, iteratively trim data to get accurate mean estimation, and fill in the "missing" data based on the mean estimation and get final result.
28 | 
29 | Our cases are slightly different than the ones considered in "The Trim-and-Fill Method".
30 | Nonetheless, we could modify and applied to our problem.
31 | One major change we need to make is that we remove the "trim" step, since we have our own trimming and we trust our mean estimation. And our adjustment process involves
32 | 
33 | * use the rank statistics and the residual to compute the number of points need to be filled
34 | * fill the data and re-fit the model
35 | 
36 | Create filled data function can be found [here](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/utils/continuous_functions.R#90-104).
37 | And re-fit the model step can be found [here](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#53-70).
38 | 
39 | ## Get Scores and Diagnostics
40 | 
41 | Finally, we need to get [the evidence scores](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#73-78) and diagnostics including [risk function plot](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#88-93), [residual funnel plot](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#86) and [summary dataframe](https://stash.ihme.washington.edu/users/jiaweihe/repos/evidence_score_pipeline/browse/src/05_evidence_score_continuous.R#96-103).
42 | Notice that for dichotomous outcomes there is no plot residual step, because it will be exactly the same with the model plot.
43 | 
44 | ## File Structure
45 | 
46 | These processes is organized in `src/05_evidence_score_*.R` and their corresponding functions is in `src/utils/*_functions.R`. Currently we have the scripts for continuous and dichotomous outcomes.
47 | The old evidence score step is saved as `src/05_evidence_score_legacy.R`.
48 | 
49 | 


--------------------------------------------------------------------------------
/first_process/examples/process_continuous_results.R:
--------------------------------------------------------------------------------
  1 | # unpack information from results and create draws
  2 | rm(list = ls())
  3 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/")
  4 | 
  5 | # define functions
  6 | # =================================================================================================
  7 | get_cov_names <- function(signal_model) {
  8 |   cov_model <- signal_model$sub_models[[1]]$cov_models[[1]]
  9 |   list(alt_covs = cov_model$alt_cov,
 10 |        ref_covs = cov_model$ref_cov)
 11 | }
 12 | 
 13 | get_risk_limits <- function(signal_model) {
 14 |   cov_names <- get_cov_names(signal_model)
 15 |   risk_data <- signal_model$data$get_covs(unlist(cov_names))
 16 |   c(min(risk_data), max(risk_data))
 17 | }
 18 | 
 19 | get_signal <- function(signal_model, risk) {
 20 |   cov_names <- get_cov_names(signal_model)
 21 |   risk_limits <- get_risk_limits(signal_model)
 22 |   df_covs <- data.frame(
 23 |     c(sapply(cov_names$ref_covs, function(x) rep(risk_limits[1], length.out = length(risk)),
 24 |              simplify = FALSE, USE.NAMES = TRUE),
 25 |       sapply(cov_names$alt_covs, function(x) risk,
 26 |              simplify = FALSE, USE.NAMES = TRUE))
 27 |   )
 28 |   data <- MRData()
 29 |   data$load_df(df_covs, col_covs=unlist(cov_names))
 30 |   signal_model$predict(data)
 31 | }
 32 | 
 33 | get_beta <- function(linear_model) {
 34 |   beta <- linear_model$beta_soln
 35 |   names(beta) <- linear_model$cov_names
 36 |   specs <- mrbrt001::core$other_sampling$extract_simple_lme_specs(linear_model)
 37 |   beta_hessian <- mrbrt001::core$other_sampling$extract_simple_lme_hessian(specs)
 38 |   beta_sd <- 1/sqrt(diag(beta_hessian))
 39 |   names(beta_sd) <- linear_model$cov_names
 40 |   c(beta["signal"], beta_sd["signal"])
 41 | }
 42 | 
 43 | get_gamma <- function(linear_model) {
 44 |   gamma <- linear_model$gamma_soln[[1]]
 45 |   gamma_fisher <- linear_model$lt$get_gamma_fisher(linear_model$gamma_soln)
 46 |   gamma_sd <- 1/sqrt(diag(gamma_fisher)[[1]])
 47 |   c(gamma, gamma_sd)
 48 | }
 49 | 
 50 | get_soln <- function(linear_model) {
 51 |   list(
 52 |     beta_soln = get_beta(linear_model),
 53 |     gamma_soln = get_gamma(linear_model)
 54 |   )
 55 | }
 56 | 
 57 | get_ln_rr_draws <- function(signal_model,
 58 |                             linear_model,
 59 |                             risk,
 60 |                             num_draws = 1000L,
 61 |                             normalize_to_tmrel = FALSE,
 62 |                             include_re = TRUE) {
 63 |   # set seed inside function
 64 |   set.seed(1234)
 65 |   
 66 |   signal <- get_signal(signal_model, risk)
 67 |   re_signal <- signal
 68 |   soln <- get_soln(linear_model)
 69 |   
 70 |   fe_samples <- rnorm(num_draws, mean=soln$beta[1], sd=soln$beta[2])
 71 |   re_samples <- rnorm(num_draws, mean=0, sd=sqrt(soln$gamma[1] + 2*soln$gamma[2]))
 72 |   
 73 |   draws <- outer(signal, fe_samples)
 74 |   if (include_re) {
 75 |     draws <- draws + outer(re_signal, re_samples)
 76 |   }
 77 |   
 78 |   if (normalize_to_tmrel) {
 79 |     tmrel_index <- which.min(signal)
 80 |     draws <- apply(draws, 2, function(x) x - x[tmrel_index])
 81 |   }
 82 |   
 83 |   df <- as.data.frame(cbind(risk, draws))
 84 |   names(df) <- c("risk", sapply(1:num_draws, function(i) paste0("draw_", i)))
 85 |   return(df)
 86 | }
 87 | 
 88 | # process results
 89 | # =================================================================================================
 90 | # load models
 91 | signal_model_path <- "/mnt/team/msca/pub/archive/evidence-score/gbd2020-process/nuts_ihd/signal_model.pkl"
 92 | linear_model_path <- "/mnt/team/msca/pub/archive/evidence-score/gbd2020-process/nuts_ihd/new_linear_model.pkl"
 93 | 
 94 | signal_model <- py_load_object(filename = signal_model_path, pickle = "dill")
 95 | linear_model <- py_load_object(filename = linear_model_path, pickle = "dill")
 96 | 
 97 | # specify risk, you need to input the exposures that you want to predict
 98 | risk <- 0:100
 99 | 
100 | # get_draws
101 | df <- get_ln_rr_draws(signal_model,
102 |                       linear_model,
103 |                       risk,
104 |                       num_draws = 1000L,
105 |                       normalize_to_tmrel = FALSE)
106 | 
107 | # visual check draws
108 | draws <- df[, 2:ncol(df)]
109 | draw_mean <- apply(draws, 1, function(x) mean(x))
110 | draw_lower <- apply(draws, 1, function(x) quantile(x, probs=.05))
111 | draw_upper <- apply(draws, 1, function(x) quantile(x, probs=.95))
112 | 
113 | lines(risk, draw_mean)
114 | lines(risk, draw_lower)
115 | lines(risk, draw_upper)
116 | 


--------------------------------------------------------------------------------
/first_process/examples/upload_continuous_results.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | source("src/upload_continuous.R")
 3 | 
 4 | results_folder <- "/mnt/team/msca/pub/archive/evidence-score-test/gbd2020-results"
 5 | 
 6 | pair_info <- list(
 7 |   dairy_diabetes = list(
 8 |     rei_id = "unknown",
 9 |     cause_id = "unkown",
10 |     risk_unit = "unknown",
11 |     signal_model_path = file.path(results_folder, "dairy_diabetes", "signal_model.pkl"),
12 |     linear_model_path = file.path(results_folder, "dairy_diabetes", "linear_model.pkl")
13 |   ),
14 |   air_pmhap_lri = list(
15 |     rei_id = "unknown",
16 |     cause_id = "unkown",
17 |     risk_unit = "unknown",
18 |     signal_model_path = file.path(results_folder, "air_pmhap_lri", "signal_model.pkl"),
19 |     linear_model_path = file.path(results_folder, "air_pmhap_lri", "linear_model.pkl")
20 |   ),
21 |   fpg_neo_liver = list(
22 |     rei_id = "unknown",
23 |     cause_id = "unkown",
24 |     risk_unit = "unknown",
25 |     signal_model_path = file.path(results_folder, "fpg_neo_liver", "signal_model.pkl"),
26 |     linear_model_path = file.path(results_folder, "fpg_neo_liver", "linear_model.pkl")
27 |   )
28 | )
29 | 
30 | for (pair in names(pair_info)) {
31 |   print(paste0("upload pair=", pair))
32 |   results_folder <- file.path(ARCHIVE, pair)
33 |   if (!dir.exists(results_folder)) {
34 |     dir.create(results_folder)
35 |   }
36 |   do.call(upload_results, c(pair_info[[pair]], list(results_folder = results_folder)))
37 | }
38 | 


--------------------------------------------------------------------------------
/first_process/examples/upload_dichotomous_results.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | source("src/upload_dichotomous.R")
 3 | 
 4 | results_folder <- "/mnt/team/msca/pub/archive/evidence-score-test/gbd2020-results"
 5 | 
 6 | pair_info <- list(
 7 |   opioid_suicide = list(
 8 |     rei_id = "unknown",
 9 |     cause_id = "unkown",
10 |     model_path = file.path(results_folder, "opioid_suicide", "model.pkl")
11 |   ),
12 |   idu_hepB = list(
13 |     rei_id = "unknown",
14 |     cause_id = "unkown",
15 |     model_path = file.path(results_folder, "idu_hepB", "model.pkl")
16 |   ),
17 |   idu_hepC = list(
18 |     rei_id = "unknown",
19 |     cause_id = "unkown",
20 |     model_path = file.path(results_folder, "idu_hepC", "model.pkl")
21 |   )
22 | )
23 | 
24 | for (pair in names(pair_info)) {
25 |   print(paste0("upload pair=", pair))
26 |   results_folder <- file.path(ARCHIVE, pair)
27 |   if (!dir.exists(results_folder)) {
28 |     dir.create(results_folder)
29 |   }
30 |   do.call(upload_results, c(pair_info[[pair]], list(results_folder = results_folder)))
31 | }
32 | 


--------------------------------------------------------------------------------
/first_process/examples/upload_loglinear_results.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | source("src/upload_loglinear.R")
 3 | 
 4 | 
 5 | pair_info <- list(
 6 |   air_no2_resp_asthma = list(
 7 |     rei_id = 404,
 8 |     cause_id = 515,
 9 |     risk_unit = "ppb",
10 |     model_path = "/ihme/erf/GBD2020/air_no2/rr/models/20/model.pkl"
11 |   )
12 | )
13 | 
14 | 
15 | for (pair in names(pair_info)) {
16 |   print(paste0("upload pair=", pair))
17 |   results_folder <- file.path(ARCHIVE, pair)
18 |   if (!dir.exists(results_folder)) {
19 |     dir.create(results_folder)
20 |   }
21 |   do.call(upload_results, c(pair_info[[pair]], list(results_folder = results_folder)))
22 | }
23 | 


--------------------------------------------------------------------------------
/first_process/run_pipeline.R:
--------------------------------------------------------------------------------
 1 | #
 2 | # run_pipeline_parallel.R
 3 | #
 4 | 
 5 | library(dplyr)
 6 | library(parallel)
 7 | 
 8 | #####
 9 | # user params
10 | #
11 | WORK_DIR <- "/ihme/homes/jiaweihe/msca/mrbrt/evidence_score_pipeline"
12 | source(paste0(WORK_DIR, "/config.R"))
13 | source(paste0(WORK_DIR, "/src/utils/prep_diet_data_function.R"))
14 | source(paste0(WORK_DIR, "/src/utils/qsub_function.R"))
15 | 
16 | 
17 | #####
18 | # create directories
19 | #
20 | 
21 | if (!dir.exists(OUT_DIR)) {
22 |   if (!dir.exists(dirname(OUT_DIR))) {
23 |     dir.create(dirname(OUT_DIR))
24 |   }
25 |   dir.create(OUT_DIR)
26 | } else {
27 |   warning("Directory '", OUT_DIR, "' already exists")
28 | }
29 | 
30 | for (dir in SUB_DIRS) {
31 |   if (!dir.exists(dir)) {
32 |     dir.create(dir)
33 |   } else {
34 |     warning("Directory '", dir, "' already exists")
35 |   }
36 | }
37 | 
38 | 
39 | submit_jobs <- function(pair, WORK_DIR) {
40 |   # stage 1, create signal
41 |   submit_sub_job(pair, "01_create_template.R", "_01_template", WORK_DIR)
42 |   qwait("01_create_template_models", pair)
43 | 
44 |   # # stage 2, loglinear model
45 |   submit_sub_job(pair, "02_loglinear_models.R", "_02_loglinear", WORK_DIR)
46 |   qwait("02_loglinear_models", pair)
47 | 
48 |   # stage 3, covariate selection
49 |   submit_sub_job(pair, "03_covariate_selection.R", "_03_cov_selection", WORK_DIR)
50 |   qwait("03_covariate_selection_models", pair)
51 | 
52 |   # stage 4, final model
53 |   submit_sub_job(pair, "04_mixed_effects_models.R", "_04_mixed_effects", WORK_DIR)
54 | }
55 | 
56 | #####
57 | # data prep for diet risks
58 | #
59 | 
60 | stage0_results <- lapply(RO_PAIRS, function(ro_pair) {
61 |   x <- try({
62 |     prep_diet_data(
63 |       ro_pair = ro_pair,
64 |       obs_var = OBS_VAR,
65 |       obs_se_var = OBS_SE_VAR,
66 |       ref_vars = REF_EXPOSURE_COLS,
67 |       alt_vars = ALT_EXPOSURE_COLS,
68 |       allow_ref_gt_alt = FALSE,
69 |       diet_dir = INPUT_DATA_DIR,
70 |       study_id_var = "nid",
71 |       verbose = TRUE
72 |     )
73 |   })
74 |   
75 |   saveRDS(x, paste0(OUT_DIR, "00_prepped_data/", ro_pair, ".RDS"))
76 |   return(x)
77 | })
78 | 
79 | names(stage0_results) <- RO_PAIRS
80 | saveRDS(stage0_results, paste0(OUT_DIR, "stage0_results.RDS"))
81 | 
82 | # Submit stage jobs for each pair
83 | mclapply(RO_PAIRS, function(pair) {
84 |   submit_jobs(pair, WORK_DIR)
85 | }, mc.cores = length(RO_PAIRS))
86 | 
87 | 


--------------------------------------------------------------------------------
/first_process/src/01_create_template.R:
--------------------------------------------------------------------------------
  1 | #
  2 | # 01_create_template.R
  3 | #
  4 | #
  5 | library(dplyr)
  6 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/")
  7 | 
  8 | args <- commandArgs(trailingOnly = TRUE)
  9 | 
 10 | ro_pair <- args[1]
 11 | out_dir <- args[2]
 12 | WORK_DIR <- args[3]
 13 | setwd(WORK_DIR)
 14 | source("./config.R")
 15 | 
 16 | # xiaochen's example
 17 | # model <- py_load_object(
 18 | # 	filename="/ihme/homes/xdai88/gbd_tobacco/gbd2019_alcohol/evidence_score/testing/test_run1_2020_09_05/04_monospline_pkl_files/lung_cancer_0.9_ensemble.pkl",
 19 | # 	pickle = "dill")
 20 | # data = model$data
 21 | # df <- data$to_df()
 22 | 
 23 | # diet example
 24 | # data <- readRDS(paste0(out_dir, "00_prepped_data/", ro_pair, ".RDS"))
 25 | # df <- data$df
 26 | 
 27 | library(readxl)
 28 | df_meta <- read_excel("/ihme/homes/jiaweihe/msca/mrbrt/evidence_score_pipeline/evidence_score.xlsx")
 29 | infile <- df_meta[df_meta$ro_pair==ro_pair, 'data']
 30 | df <- read.csv(infile[[1]])
 31 | 
 32 | # Deal with inconsistency of naming
 33 | OBS_VAR <- "ln_effect"
 34 | OBS_SE_VAR <- "ln_se"
 35 | STUDY_ID_VAR <- "nid"
 36 | 
 37 | if (!(STUDY_ID_VAR %in% names(df))){
 38 | 	STUDY_ID_VAR <- "study_id"
 39 | }
 40 | 
 41 | if (!(OBS_VAR %in% names(df))){
 42 | 	OBS_VAR <- "obs"
 43 | 	if (!(OBS_VAR %in% names(df))){
 44 | 		OBS_VAR <- "log_rr"
 45 | 	}
 46 | }
 47 | 
 48 | if (!(OBS_SE_VAR %in% names(df))){
 49 | 	OBS_SE_VAR <- "obs_se"
 50 | 	if (!(OBS_SE_VAR %in% names(df))){
 51 | 		OBS_SE_VAR <- "log_se"
 52 | 	}
 53 | }
 54 | 
 55 | # Specify all the columns you need for your application
 56 | cov_names <- c(REF_EXPOSURE_COLS, ALT_EXPOSURE_COLS)
 57 | 
 58 | mrdata <- MRData()
 59 | 
 60 | mrdata$load_df(
 61 |   data = df, 
 62 |   col_obs = OBS_VAR,
 63 |   col_obs_se = OBS_SE_VAR, 
 64 |   col_study_id = STUDY_ID_VAR, 
 65 |   col_covs = as.list(cov_names)
 66 | )
 67 | 
 68 | monotonicity <- DIRECTION[ro_pair][[1]]
 69 | # if (is.na(monotonicity)){
 70 | # 	monotonicity <- NULL
 71 | # }
 72 | 
 73 | N_I_KNOTS <- 3
 74 | PRIOR_VAR_RSLOPE = 1e-6
 75 | PRIOR_VAR_MAXDER <- 1e-4
 76 | 
 77 | ensemble_cov_model <- LogCovModel(
 78 | 	alt_cov = ALT_EXPOSURE_COLS,
 79 |   	ref_cov = REF_EXPOSURE_COLS,
 80 | 	use_spline = TRUE,
 81 | 	use_re = FALSE,
 82 | 	spline_degree = 3L,
 83 |     spline_knots_type = 'domain',
 84 |     spline_r_linear = TRUE,
 85 |     prior_spline_funval_uniform = array(c(-1 + 1e-6, 19)),
 86 |     prior_spline_num_constraint_points = 150L,
 87 |     spline_knots = array(seq(0, 1, length.out = N_I_KNOTS + 2)),
 88 |     prior_spline_maxder_gaussian = cbind(rbind(rep(0, N_I_KNOTS), 
 89 |         rep(sqrt(PRIOR_VAR_MAXDER), N_I_KNOTS)), c(0, sqrt(PRIOR_VAR_RSLOPE))),
 90 |     prior_spline_der2val_gaussian = NULL,
 91 |     prior_spline_der2val_gaussian_domain = array(c(0.0, 1.0)),
 92 |     prior_spline_monotonicity = monotonicity
 93 | )
 94 | 
 95 | # Create knot samples
 96 | knots <- import("mrtool.core.model")
 97 | knots_samples <- knots$create_knots_samples(
 98 |   data = mrdata, l_zero = TRUE, num_splines = 50L, 
 99 |   num_knots = 5L, width_pct = 0.2,
100 |   alt_cov_names = ALT_EXPOSURE_COLS,
101 |   ref_cov_names = REF_EXPOSURE_COLS
102 | )
103 | 
104 | # Ensemble model with exposure only 
105 | signal_model <- MRBeRT(mrdata,
106 |                       ensemble_cov_model=ensemble_cov_model,
107 |                       ensemble_knots=knots_samples,
108 |                       inlier_pct=0.9)
109 | 
110 | signal_model$fit_model(inner_print_level=5L, inner_max_iter=200L, 
111 | 	outer_step_size=200L, outer_max_iter=100L)
112 | 
113 | # create "new covariates" for later use
114 | signal <- signal_model$predict(mrdata, predict_for_study=FALSE)
115 | 
116 | # Extract weights of data point
117 | w <- t(do.call(rbind, 
118 |           lapply(1:length(signal_model$sub_models), 
119 |                  function(i){signal_model$sub_models[[i]]$w_soln}))
120 |        ) %*% signal_model$weights
121 | 
122 | df_data <-  mrdata$to_df()
123 | # Assign signal to data for use in later stage
124 | df_data$signal <-  signal
125 | # Drop data trimmed
126 | df_data <-  df_data[w >= 0.1,]
127 | 
128 | # Save data and model
129 | py_save_object(object = signal_model, 
130 | 	filename = paste0(out_dir, "01_template_pkl_files/", ro_pair, ".pkl"), 
131 | 	pickle = "dill")
132 | 
133 | out <- append(data, list(df_data=df_data))
134 | saveRDS(out, paste0(out_dir, "01_template_models/", ro_pair, ".RDS"))
135 | 


--------------------------------------------------------------------------------
/first_process/src/02_loglinear_models.R:
--------------------------------------------------------------------------------
 1 | #
 2 | # 02_loglinear_models.R
 3 | #
 4 | #
 5 | library(dplyr)
 6 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/")
 7 | 
 8 | args <- commandArgs(trailingOnly = TRUE)
 9 | 
10 | ro_pair <- args[1]
11 | out_dir <- args[2]
12 | WORK_DIR <- args[3]
13 | setwd(WORK_DIR)
14 | source("./config.R")
15 | 
16 | 
17 | data <- readRDS(paste0(out_dir, "01_template_models/", ro_pair, ".RDS"))
18 | df_data <- data$df_data
19 | 
20 | mrdata <- MRData()
21 |   
22 | mrdata$load_df(
23 |   data = df_data, 
24 |   col_obs = c('obs'),
25 |   col_obs_se = c('obs_se'), 
26 |   col_study_id = c('study_id'), 
27 |   col_covs = as.list(c("signal"))
28 | )
29 | 
30 | # Fit Linear Cov model with signal
31 | cov_models <- list(LinearCovModel(
32 |   alt_cov = "signal",
33 |   use_re = TRUE,
34 |   prior_beta_uniform=array(c(1.0, 1.0))
35 | ))
36 | 
37 | # No trimming
38 | model <- MRBRT(
39 |   data = mrdata,
40 |   cov_models = cov_models,
41 |   inlier_pct = 1.0
42 | )
43 | 
44 | model$fit_model(inner_print_level=5L, inner_max_iter=200L, 
45 |   outer_step_size=200L, outer_max_iter=100L) 
46 | 
47 | # Sample betas to use as priors for covariate selection.
48 | sampling <- import("mrtool.core.other_sampling")
49 | beta_samples <- sampling$sample_simple_lme_beta(1000L, model)
50 | beta_std <- sd(beta_samples)
51 | 
52 | # Save data and model
53 | py_save_object(object = model, 
54 |   filename = paste0(out_dir, "02_loglinear_pkl_files/", ro_pair, ".pkl"), 
55 |   pickle = "dill")
56 | 
57 | out <- append(data, list(beta_std = beta_std))
58 | saveRDS(out, paste0(out_dir, "02_loglinear_models/", ro_pair, ".RDS"))
59 | 
60 | 


--------------------------------------------------------------------------------
/first_process/src/03_covariate_selection.R:
--------------------------------------------------------------------------------
 1 | #
 2 | # 03_covariate_selection.R
 3 | #
 4 | #
 5 | library(dplyr)
 6 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/")
 7 | 
 8 | args <- commandArgs(trailingOnly = TRUE)
 9 | 
10 | ro_pair <- args[1]
11 | out_dir <- args[2]
12 | WORK_DIR <- args[3]
13 | setwd(WORK_DIR)
14 | source("./config.R")
15 | 
16 | # Read data
17 | data <- readRDS(paste0(out_dir, "02_loglinear_models/", ro_pair, ".RDS"))
18 | df_data <- data$df_data
19 | df_tmp <- data$df
20 | df_tmp <- df_tmp[as.numeric(rownames(df_data)),]
21 | 
22 | cov_names <- c("exposure_linear", data$x_covs)
23 | 
24 | # Delete in future development; skip covariate selection for now.
25 | cov_names <- c("exposure_linear")
26 | candidate_covs <- cov_names[!cov_names == "exposure_linear"]
27 | 
28 | # Interaction with signal
29 | if (BIAS_COVARIATES_AS_INTX){
30 |   for (cov in candidate_covs) df_data[, cov] <- df_data$signal * df_tmp[, cov]
31 | }
32 | 
33 | # Change the name of signal to exposure_linear, since some
34 | # underlying code deal with column name `exposure_linear`
35 | df_data$exposure_linear <- df_data$signal
36 | mrdata <- MRData()
37 |   
38 | mrdata$load_df(
39 |   data = df_data, 
40 |   col_obs = c('obs'),
41 |   col_obs_se = c('obs_se'), 
42 |   col_study_id = c('study_id'), 
43 |   col_covs = as.list(cov_names)
44 | )
45 | 
46 | loglinear_model <- readRDS(paste0(out_dir, "02_loglinear_models/", ro_pair, ".RDS"))
47 | 
48 | # Beta prior from first loglinear model results.
49 | beta_gprior_std <- loglinear_model$beta_std
50 | covfinder <- do.call(
51 |     CovFinder,
52 |     c(COV_FINDER_CONFIG, 
53 |       list(
54 |         data = mrdata, 
55 |         covs = as.list(candidate_covs)),
56 |         beta_gprior_std = BETA_PRIOR_MULTIPLIER * beta_gprior_std
57 |       )
58 |     )
59 |   
60 | covfinder$select_covs(verbose = TRUE)
61 | 
62 | selected_covs <- covfinder$selected_covs
63 | selected_covs
64 | 
65 | # Save data and selected covariates
66 | out <- append(data, list(df_cov_selection=df_data, selected_covs=selected_covs))
67 | saveRDS(out, paste0(out_dir, "03_covariate_selection_models/", ro_pair, ".RDS"))


--------------------------------------------------------------------------------
/first_process/src/04_mixed_effects_models.R:
--------------------------------------------------------------------------------
  1 | #
  2 | # 04_mixed_effects_models.R
  3 | #
  4 | #
  5 | library(dplyr)
  6 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/")
  7 | 
  8 | args <- commandArgs(trailingOnly = TRUE)
  9 | 
 10 | ro_pair <- args[1]
 11 | out_dir <- args[2]
 12 | WORK_DIR <- args[3]
 13 | setwd(WORK_DIR)
 14 | source("./config.R")
 15 | 
 16 | # Extract selected covariates
 17 | data <- readRDS(paste0(out_dir, "03_covariate_selection_models/", ro_pair, ".RDS"))
 18 | df_data <- data$df_data
 19 | df_tmp <- data$df
 20 | # Only keep rows that are not trimmed
 21 | df_tmp <- df_tmp[as.numeric(rownames(df_data)),]
 22 | 
 23 | cov_names <- data$selected_covs
 24 | bias_covs <- cov_names[!cov_names == "exposure_linear"]
 25 | 
 26 | # Add interaction
 27 | for (cov in bias_covs) df_data[, cov] <- df_data$signal * df_tmp[, cov]
 28 | 
 29 | # Selected bias covariates plus signal
 30 | covs <- c("signal", bias_covs)
 31 | 
 32 | mrdata <- MRData()
 33 | mrdata$load_df(
 34 |   df_data,
 35 |   col_obs = c('obs'),
 36 |   col_obs_se = c('obs_se'), 
 37 |   col_study_id = c('study_id'),
 38 |   col_covs=as.list(covs)
 39 | )
 40 | 
 41 | 
 42 | loglinear_model <- readRDS(paste0(out_dir, "02_loglinear_models/", ro_pair, ".RDS"))
 43 | 
 44 | # Beta prior from first loglinear model results.
 45 | beta_gprior_std <- loglinear_model$beta_std
 46 | 
 47 | # Combine cov models
 48 | cov_models <- list()
 49 | for (cov in bias_covs) cov_models <- append(cov_models, 
 50 |   list(
 51 |     do.call(
 52 |       LinearCovModel, 
 53 |       list(
 54 |         alt_cov=cov,
 55 |         beta_gprior_std=BETA_PRIOR_MULTIPLIER * beta_gprior_std
 56 |       )
 57 |     )
 58 |   )
 59 | )
 60 | 
 61 | # Mixed effects model
 62 | cov_models <- append(cov_models, LinearCovModel('signal', use_re=TRUE, 
 63 |   prior_beta_uniform=array(c(1.0, 1.0))))
 64 | 
 65 | model <- MRBRT(
 66 |   data=mrdata,
 67 |   cov_models = cov_models,
 68 |   inlier_pct = 1.0
 69 | )
 70 | 
 71 | model$fit_model(inner_print_level=5L, inner_max_iter=200L, 
 72 |   outer_step_size=200L, outer_max_iter=100L)
 73 | 
 74 | # Load signal model and data in Stage 1
 75 | signal_model <- py_load_object(filename=paste0(out_dir, "01_template_pkl_files/", ro_pair, ".pkl"), 
 76 |   pickle = "dill")
 77 | orig_data <- readRDS(paste0(out_dir, "01_template_models/", ro_pair, ".RDS"))
 78 | df <- orig_data$df
 79 | 
 80 | # This should be provided by the user
 81 | NUM_POINTS <- 100L
 82 | exposure_lower <- min(df[,c(REF_EXPOSURE_COLS, ALT_EXPOSURE_COLS)])
 83 | exposure_upper <- max(df[,c(REF_EXPOSURE_COLS, ALT_EXPOSURE_COLS)])
 84 | exposure <- seq(exposure_lower, exposure_upper, length.out=NUM_POINTS)
 85 | min_cov <- rep(exposure_lower, NUM_POINTS)
 86 | 
 87 | # Deal with Sarah's data
 88 | if ('a_0' %in% REF_EXPOSURE_COLS){
 89 |   df_signal_pred <- data.frame(a_0=min_cov, a_1=min_cov, b_0=exposure, b_1=exposure)
 90 | } else {
 91 |   df_signal_pred <- data.frame(a_0=min_cov, b_0=exposure)
 92 |   names(df_signal_pred) <- c(REF_EXPOSURE_COLS, ALT_EXPOSURE_COLS)
 93 | }
 94 | 
 95 | # Predict using signal model and gridded exposure
 96 | data_signal_pred <- MRData()
 97 | data_signal_pred$load_df(
 98 |   df_signal_pred,
 99 |   col_covs = as.list(c(REF_EXPOSURE_COLS, ALT_EXPOSURE_COLS))
100 |   )
101 | signal_pred <- signal_model$predict(data_signal_pred)
102 | 
103 | # TODO: data of selected covariates to be added
104 | df_final_pred <- data.frame(signal=signal_pred)
105 | data_final_pred <- MRData()
106 | data_final_pred$load_df(
107 |   df_final_pred,
108 |   col_covs = as.list(c("signal"))
109 | )
110 | 
111 | # create draws and prediction
112 | sampling <- import("mrtool.core.other_sampling")
113 | num_samples <- 1000L
114 | beta_samples <- sampling$sample_simple_lme_beta(num_samples, model)
115 | gamma_samples <- rep(model$gamma_soln, num_samples) * matrix(1, num_samples)
116 | 
117 | curve <- model$predict(data_final_pred)
118 | draws <- model$create_draws(
119 |   data_final_pred,
120 |   beta_samples=beta_samples,
121 |   gamma_samples=gamma_samples
122 | )
123 | 
124 | # Save model
125 | py_save_object(object = model, 
126 |   filename = paste0(out_dir, "04_mixed_effects_pkl_files/", ro_pair, ".pkl"), 
127 |   pickle = "dill")
128 | 
129 | # OBS_VAR <- "ln_effect"
130 | # OBS_SE_VAR <- "ln_se"
131 | 
132 | # if (!(OBS_VAR %in% names(df))){
133 | #   OBS_VAR <- "obs"
134 | # }
135 | 
136 | # if (!(OBS_SE_VAR %in% names(df))){
137 | #   OBS_SE_VAR <- "obs_se"
138 | # }
139 | 
140 | # Sanity check
141 | # pdf(paste0(out_dir, "04_mixed_effects_models/", ro_pair, ".pdf"))
142 | 
143 | # if (length(ALT_EXPOSURE_COLS) == 1){
144 | #   plot(df[,ALT_EXPOSURE_COLS] - df[,REF_EXPOSURE_COLS], 
145 | #      df[, OBS_VAR], cex=1/(7*df[, OBS_SE_VAR]), xlab="exposure", ylab="ln_effect",
146 | #      main=ro_pair, col=c('blue'))
147 | # } else {
148 | #   plot(apply(df[,ALT_EXPOSURE_COLS], 1, mean) - apply(df[,REF_EXPOSURE_COLS], 1, mean), 
149 | #      df[, OBS_VAR], cex=1/(7*df[, OBS_SE_VAR]), xlab="exposure", ylab="ln_effect",
150 | #      main=ro_pair, col=c('blue'))
151 | # }
152 | 
153 | # lines(exposure, curve)
154 | # dev.off()


--------------------------------------------------------------------------------
/first_process/src/05_evidence_score_continuous.R:
--------------------------------------------------------------------------------
  1 | #
  2 | # 06_publication_bias.R
  3 | #
  4 | #
  5 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/")
  6 | args <- commandArgs(trailingOnly = TRUE)
  7 | 
  8 | 
  9 | ### Running settings
 10 | # ro_pair <- args[1]
 11 | # out_dir <- args[2]
 12 | # WORK_DIR <- args[3]
 13 | ro_pair <- c("lpa_ihd")
 14 | out_dir <- ""
 15 | work_dir <- "/ihme/homes/zhengp/Repositories/evidence_score_pipeline"
 16 | 
 17 | setwd(work_dir)
 18 | source("./config.R")
 19 | source("./src/utils/continuous_functions.R")
 20 | 
 21 | linear_model_path <- paste0("/home/j/temp/zhengp/escore/", ro_pair, "_linear.pkl")
 22 | signal_model_path <- paste0("/home/j/temp/zhengp/escore/", ro_pair, "_signal.pkl")
 23 | ref_covs <- c("a_0", "a_1")
 24 | alt_covs <- c("b_0", "b_1")
 25 | 
 26 | 
 27 | ### Load model objects
 28 | linear_model <- py_load_object(filename = linear_model_path, pickle = "dill")
 29 | signal_model <- py_load_object(filename = signal_model_path, pickle = "dill")
 30 | 
 31 | data_info <- extract_data_info(signal_model,
 32 |                                linear_model,
 33 |                                ref_covs = ref_covs,
 34 |                                alt_covs = alt_covs)
 35 | data_info$ro_pair <- ro_pair
 36 | df <- data_info$df
 37 | 
 38 | ### Detect publication bias
 39 | df_no_outlier <- df[!df$outlier,]
 40 | egger_model_all <- egger_regression(df$residual, df$residual_se)
 41 | egger_model <- egger_regression(df_no_outlier$residual, df_no_outlier$residual_se)
 42 | has_pub_bias <- egger_model$pval < 0.05
 43 | 
 44 | ### Adjust for publication bias
 45 | if (has_pub_bias) {
 46 |   df_fill <- get_df_fill(df[!df$outlier,])
 47 |   num_fill <- nrow(df_fill)
 48 | } else {
 49 |   num_fill <- 0
 50 | }
 51 | 
 52 | # fill the data if needed and refit the model
 53 | if (num_fill > 0) {
 54 |   df <- rbind(df, df_fill)
 55 |   data_info$df <- df
 56 |   
 57 |   # refit the model
 58 |   data = MRData()
 59 |   data$load_df(
 60 |     data=df[!df$outlier,],
 61 |     col_obs='obs',
 62 |     col_obs_se='obs_se',
 63 |     col_covs=as.list(linear_model$cov_names),
 64 |     col_study_id='study_id'
 65 |   )
 66 |   linear_model_fill <- MRBRT(data, cov_models=linear_model$cov_models)
 67 |   linear_model_fill$fit_model()
 68 | } else {
 69 |   linear_model_fill <- NULL
 70 | }
 71 | 
 72 | ### Extract scores
 73 | uncertainty_info <- get_uncertainty_info(data_info, linear_model)
 74 | if (is.null(linear_model_fill)) {
 75 |   uncertainty_info_fill <- NULL
 76 | } else {
 77 |   uncertainty_info_fill <- get_uncertainty_info(data_info, linear_model_fill)
 78 | }
 79 | 
 80 | 
 81 | ### Output diagnostics
 82 | # figures
 83 | title <- paste0(ro_pair, ": egger_mean=", round(egger_model$mean, 3),
 84 |                 ", egger_sd=", round(egger_model$sd,3), ", egger_pval=", 
 85 |                 round(egger_model$pval, 3))
 86 | plot_residual(df, title)
 87 | 
 88 | plot_model(data_info,
 89 |            uncertainty_info,
 90 |            linear_model,
 91 |            signal_model,
 92 |            uncertainty_info_fill,
 93 |            linear_model_fill)
 94 | 
 95 | # summary
 96 | summary <- summarize_model(data_info,
 97 |                            uncertainty_info,
 98 |                            linear_model,
 99 |                            signal_model,
100 |                            egger_model,
101 |                            egger_model_all,
102 |                            uncertainty_info_fill,
103 |                            linear_model_fill)
104 | 
105 | draws <- get_draws(data_info, linear_model)
106 | 


--------------------------------------------------------------------------------
/first_process/src/05_evidence_score_dichotomous.R:
--------------------------------------------------------------------------------
 1 | #
 2 | # 06_publication_bias.R
 3 | #
 4 | #
 5 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/")
 6 | args <- commandArgs(trailingOnly = TRUE)
 7 | 
 8 | 
 9 | ### Running settings
10 | # ro_pair <- args[1]
11 | # out_dir <- args[2]
12 | # WORK_DIR <- args[3]
13 | ro_pair <- c("sim")
14 | out_dir <- ""
15 | work_dir <- "/ihme/homes/zhengp/Repositories/evidence_score_pipeline"
16 | 
17 | setwd(work_dir)
18 | source("./config.R")
19 | source("./src/utils/dichotomous_functions.R")
20 | 
21 | model_path <- "/home/j/temp/zhengp/escore/sim_dicho.pkl"
22 | 
23 | 
24 | ### Load model objects
25 | model <- py_load_object(filename = model_path, pickle = "dill")
26 | 
27 | 
28 | ### Extract data
29 | df <- extract_data_info(model)
30 | 
31 | 
32 | ### Detect publication bias
33 | egger_model_all <- egger_regression(df$residual, df$residual_se)
34 | egger_model <- egger_regression(df[!df$outlier,]$residual, df[!df$outlier,]$residual_se)
35 | has_pub_bias <- egger_model$pval < 0.05
36 | 
37 | 
38 | ### Adjust for publication bias
39 | if (has_pub_bias) {
40 |   df_fill <- get_df_fill(df)
41 |   num_fill <- nrow(df_fill)
42 | } else {
43 |   num_fill <- 0
44 | }
45 | 
46 | # fill the data if needed and refit the model
47 | if (num_fill > 0) {
48 |   df <- rbind(df, df_fill)
49 |   
50 |   # refit the model
51 |   data = MRData()
52 |   data$load_df(
53 |     data=df[!df$outlier,],
54 |     col_obs='obs',
55 |     col_obs_se='obs_se',
56 |     col_covs=as.list(model$cov_names),
57 |     col_study_id='study_id'
58 |   )
59 |   model_fill <- MRBRT(data, cov_models=model$cov_models)
60 |   model_fill$fit_model()
61 | } else {
62 |   model_fill <- NULL
63 | }
64 | 
65 | 
66 | ### Extract scores
67 | uncertainty_info <- get_uncertainty_info(model)
68 | if (is.null(model_fill)) {
69 |   uncertainty_info_fill <- NULL
70 | } else {
71 |   uncertainty_info_fill <- get_uncertainty_info(model_fill)
72 | }
73 | 
74 | 
75 | ### Output diagnostics
76 | plot_model(df, uncertainty_info, model, uncertainty_info_fill, model_fill, ro_pair)
77 | summary <- summarize_model(ro_pair, model, model_fill, egger_model, egger_model_all, uncertainty_info)
78 | draws <- get_draws(model)
79 | 


--------------------------------------------------------------------------------
/first_process/src/05_evidence_score_legacy.R:
--------------------------------------------------------------------------------
 1 | #
 2 | # 05_evidence_score.R
 3 | #
 4 | #
 5 | library(dplyr)
 6 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/")
 7 | 
 8 | args <- commandArgs(trailingOnly = TRUE)
 9 | 
10 | ro_pair <- args[1]
11 | out_dir <- args[2]
12 | WORK_DIR <- args[3]
13 | setwd(WORK_DIR)
14 | source("./config.R")
15 | 
16 | 
17 | # Load signal_model and final_model
18 | signal_model <- py_load_object(filename=paste0(out_dir, "01_template_pkl_files/", ro_pair, ".pkl"), 
19 |   pickle = "dill")
20 | 
21 | final_model <- py_load_object(filename=paste0(out_dir, "04_mixed_effects_pkl_files/", ro_pair, ".pkl"), 
22 |   pickle = "dill")
23 | 
24 | # using the scorelator
25 | 
26 | # need to run 'repl_python()' to open an interactive Python interpreter,
27 | # then immediately type 'exit' to get back to the R interpreter
28 | # -- this helps to load a required Python package
29 | repl_python()
30 | # -- type 'exit' or hit escape
31 | 
32 | evidence_score <- import("mrtool.evidence_score.scorelator")
33 | scorelator <- evidence_score$ContinuousScorelator(signal_model = signal_model, final_model = final_model, 
34 |                                                   alt_cov_names= as.list(ALT_EXPOSURE_COLS), 
35 |                                                   ref_cov_names = as.list(REF_EXPOSURE_COLS),
36 |                                                   name=ro_pair)
37 | scorelator$plot_model(folder = paste0(out_dir, "05_evidence_score/"))
38 | score <- scorelator$get_score()
39 | low_score <- scorelator$get_score(use_gamma_ub=TRUE)
40 | 


--------------------------------------------------------------------------------
/first_process/src/05_evidence_score_loglinear.R:
--------------------------------------------------------------------------------
  1 | #
  2 | # 06_publication_bias.R
  3 | #
  4 | #
  5 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/")
  6 | args <- commandArgs(trailingOnly = TRUE)
  7 | 
  8 | 
  9 | ### Running settings
 10 | # ro_pair <- args[1]
 11 | # out_dir <- args[2]
 12 | # WORK_DIR <- args[3]
 13 | ro_pair <- c("lpa_ihd")
 14 | out_dir <- ""
 15 | work_dir <- "/ihme/homes/zhengp/Repositories/evidence_score_pipeline"
 16 | 
 17 | setwd(work_dir)
 18 | source("./config.R")
 19 | source("./src/utils/loglinear_functions.R")
 20 | 
 21 | model_path <- paste0("/home/j/temp/zhengp/escore/sim_loglinear.pkl")
 22 | ref_covs <- NULL
 23 | alt_covs <- c("exp")
 24 | 
 25 | 
 26 | ### Load model objects
 27 | model <- py_load_object(filename = model_path, pickle = "dill")
 28 | 
 29 | data_info <- extract_data_info(model,
 30 |                                ref_covs = ref_covs,
 31 |                                alt_covs = alt_covs)
 32 | data_info$ro_pair <- ro_pair
 33 | df <- data_info$df
 34 | 
 35 | ### Detect publication bias
 36 | df_no_outlier <- df[!df$outlier,]
 37 | egger_model_all <- egger_regression(df$residual, df$residual_se)
 38 | egger_model <- egger_regression(df_no_outlier$residual, df_no_outlier$residual_se)
 39 | has_pub_bias <- egger_model$pval < 0.05
 40 | 
 41 | ### Adjust for publication bias
 42 | if (has_pub_bias) {
 43 |   df_fill <- get_df_fill(df[!df$outlier,])
 44 |   num_fill <- nrow(df_fill)
 45 | } else {
 46 |   num_fill <- 0
 47 | }
 48 | 
 49 | # fill the data if needed and refit the model
 50 | if (num_fill > 0) {
 51 |   df <- rbind(df, df_fill)
 52 |   data_info$df <- df
 53 |   
 54 |   # refit the model
 55 |   data = MRData()
 56 |   data$load_df(
 57 |     data=df[!df$outlier,],
 58 |     col_obs='obs',
 59 |     col_obs_se='obs_se',
 60 |     col_covs=as.list(model$cov_names),
 61 |     col_study_id='study_id'
 62 |   )
 63 |   model_fill <- MRBRT(data, cov_models=model$cov_models)
 64 |   model_fill$fit_model()
 65 | } else {
 66 |   model_fill <- NULL
 67 | }
 68 | 
 69 | ### Extract scores
 70 | uncertainty_info <- get_uncertainty_info(data_info, model)
 71 | if (is.null(model_fill)) {
 72 |   uncertainty_info_fill <- NULL
 73 | } else {
 74 |   uncertainty_info_fill <- get_uncertainty_info(data_info, model_fill)
 75 | }
 76 | 
 77 | 
 78 | ### Output diagnostics
 79 | # figures
 80 | title <- paste0(ro_pair, ": egger_mean=", round(egger_model$mean, 3),
 81 |                 ", egger_sd=", round(egger_model$sd,3), ", egger_pval=", 
 82 |                 round(egger_model$pval, 3))
 83 | plot_residual(df, title)
 84 | 
 85 | plot_model(data_info,
 86 |            uncertainty_info,
 87 |            model,
 88 |            uncertainty_info_fill,
 89 |            model_fill)
 90 | 
 91 | # summary
 92 | summary <- summarize_model(data_info,
 93 |                            uncertainty_info,
 94 |                            model,
 95 |                            egger_model,
 96 |                            egger_model_all,
 97 |                            uncertainty_info_fill,
 98 |                            model_fill)
 99 | summary
100 | 
101 | draws <- get_draws(data_info, model)
102 | 


--------------------------------------------------------------------------------
/first_process/src/05_evidence_score_mixed.R:
--------------------------------------------------------------------------------
 1 | #
 2 | # 06_publication_bias.R
 3 | #
 4 | #
 5 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/")
 6 | args <- commandArgs(trailingOnly = TRUE)
 7 | 
 8 | 
 9 | ### Running settings
10 | # ro_pair <- args[1]
11 | # out_dir <- args[2]
12 | # WORK_DIR <- args[3]
13 | ro_pair <- c("sim")
14 | out_dir <- ""
15 | work_dir <- "/ihme/homes/zhengp/Repositories/evidence_score_pipeline"
16 | 
17 | setwd(work_dir)
18 | source("./config.R")
19 | source("./src/utils/mixed_functions.R")
20 | 
21 | model_path <- "/ihme/code/qwr/ckd_qwr/evidence_score_pipeline/mrbrt_model_outputs/vmixed/stage3_str_bp_vmixed/mod1.pkl"
22 | 
23 | 
24 | ### Load model objects
25 | model <- py_load_object(filename = model_path, pickle = "dill")
26 | 
27 | 
28 | ### Extract data
29 | data_info <- extract_data_info(model, cont_cov = "age_mean")
30 | df <- data_info$df
31 | data_info$ro_pair <- ro_pair
32 | 
33 | ### Detect publication bias
34 | egger_model_all <- egger_regression(df$residual, df$residual_se)
35 | egger_model <- egger_regression(df[!df$outlier,]$residual, df[!df$outlier,]$residual_se)
36 | has_pub_bias <- egger_model$pval < 0.05
37 | 
38 | 
39 | ### Adjust for publication bias
40 | if (has_pub_bias) {
41 |   df_fill <- get_df_fill(df)
42 |   num_fill <- nrow(df_fill)
43 | } else {
44 |   num_fill <- 0
45 | }
46 | 
47 | # fill the data if needed and refit the model
48 | if (num_fill > 0) {
49 |   df <- rbind(df, df_fill)
50 |   data_info$df <- df
51 |   
52 |   # refit the model
53 |   data = MRData()
54 |   data$load_df(
55 |     data=df[!df$outlier,],
56 |     col_obs='obs',
57 |     col_obs_se='obs_se',
58 |     col_covs=as.list(model$cov_names),
59 |     col_study_id='study_id'
60 |   )
61 |   model_fill <- MRBRT(data, cov_models=model$cov_models)
62 |   model_fill$fit_model()
63 | } else {
64 |   model_fill <- NULL
65 | }
66 | 
67 | 
68 | ### Extract scores
69 | uncertainty_info <- get_uncertainty_info(data_info, model)
70 | if (is.null(model_fill)) {
71 |   uncertainty_info_fill <- NULL
72 | } else {
73 |   uncertainty_info_fill <- get_uncertainty_info(data_info, model_fill)
74 | }
75 | 
76 | 
77 | ### Output diagnostics
78 | title <- paste0("sim", ": egger_mean=", round(egger_model$mean, 3),
79 |                 ", egger_sd=", round(egger_model$sd,3), ", egger_pval=", 
80 |                 round(egger_model$pval, 3))
81 | plot_residual(df, title)
82 | 
83 | plot_model(data_info,
84 |            uncertainty_info,
85 |            model,
86 |            uncertainty_info_fill,
87 |            model_fill)
88 | summary <- summarize_model(data_info,
89 |                            uncertainty_info,
90 |                            model,
91 |                            uncertainty_info_fill,
92 |                            model_fill)
93 | 
94 | draws <- get_draws(data_info, model)
95 | 


--------------------------------------------------------------------------------
/first_process/src/utils/egger_functions.R:
--------------------------------------------------------------------------------
 1 | egger_regression <- function(residual, residual_sd, one_sided = TRUE) {
 2 |   weighted_residual <- residual/residual_sd
 3 |   r_mean <- mean(weighted_residual)
 4 |   r_sd <- 1/sqrt(length(weighted_residual))
 5 |   r_pval <- get_pval(r_mean, r_sd, one_sided = one_sided)
 6 |   list(mean = r_mean, sd = r_sd, pval = r_pval)
 7 | }
 8 | 
 9 | get_pval <- function(beta, beta_sd, one_sided = FALSE) {
10 |   zscore <- abs(beta/beta_sd)
11 |   if (one_sided) {
12 |     pval <- 1 - pnorm(zscore)
13 |   } else {
14 |     pval <- 2*(1 - pnorm(zscore))
15 |   }
16 |   pval
17 | }


--------------------------------------------------------------------------------
/first_process/src/utils/extract_old_results.R:
--------------------------------------------------------------------------------
  1 | #
  2 | # extract_old_results.R
  3 | #
  4 | # Reed Sorensen
  5 | # June 2020
  6 | #
  7 | 
  8 | 
  9 | library(reticulate)
 10 | library(dplyr)
 11 | use_condaenv(condaenv="mr_brt_refactor_env", conda="/ihme/code/evidence_score/miniconda3/bin/conda", required = TRUE)
 12 | 
 13 | py_cmds <- c(
 14 |   "import sys",
 15 |   "import os",
 16 |   "import dill as pickle",
 17 |   "import argparse",
 18 |   "import numpy as np",
 19 |   "import pandas as pd",
 20 |   "sys.path.append(os.path.dirname('/home/j/temp/reed/prog/repos/mr_brt_ihme/refactor/'))",
 21 |   "from mrbrt.__init__ import MR_BRT, MR_BeRT",
 22 |   "from mrbrt.utils import ratioInit, sampleKnots"
 23 | )
 24 | 
 25 | 
 26 | 
 27 | 
 28 | 
 29 | for (cmd in py_cmds) py_run_string(cmd)
 30 | 
 31 | path1 <- "/home/j/temp/rmbarber/red_meat_paper/diet_model_pipeline_2020_01_23"
 32 | dirs1 <- list.dirs(path1)[-c(1:2)]
 33 | 
 34 | path2 <- "/home/j/temp/reed/jiawei/red_meat_paper/diet_model_pipeline_2020_06_29_100_iters"
 35 | dirs2 <- list.dirs(path2)[-c(1)]
 36 | 
 37 | path3 <- "/home/j/temp/reed/jiawei/red_meat_paper/diet_model_pipeline_2020_06_29_200_iters"
 38 | dirs3 <- list.dirs(path3)[-c(1)]
 39 | 
 40 | 
 41 | get_old_diet_results <- function(dir, verbose = TRUE) {
 42 |   
 43 |   dev <- FALSE
 44 |   if (dev) {
 45 |     dir <- dirs1[27]
 46 |   }
 47 |   if (verbose) cat(dir, "\n")
 48 |   
 49 |   try({
 50 |     path_stage1 <- paste0(dir, "/stage1.pkl")
 51 |     
 52 |     if (file.exists(path_stage1)) {
 53 |       py_run_string(paste0("with open('", path_stage1, "', 'rb') as fopen:  model1 = pickle.load(fopen)"))
 54 |       x_covs <- py$model1$ratio_x_covs
 55 |       z_covs <- py$model1$ratio_z_covs
 56 |     } else {
 57 |       x_covs <- z_covs <- NA
 58 |     }
 59 |     
 60 |     path_mod_mono <- paste0(dir, "/ratio_mod_mono.pkl")
 61 |     if (file.exists(path_mod_mono)) {
 62 |       py_run_string(paste0("with open('", path_mod_mono, "', 'rb') as fopen:  model2 = pickle.load(fopen)"))
 63 |       
 64 |       knots_tmp = py$model2$mr$spline_list[[1]]$knots
 65 |       k0 = knots_tmp[1]
 66 |       k1 = knots_tmp[length(knots_tmp)]
 67 |       # pred_x_cov_list, pred_z_cov_list, y_samples, y_samples_fe
 68 |       py_run_string(paste0("a, b, c, d = model2.mr_predict(domain = [", k0, ", ", k1, "])"))
 69 |       # pred = y_samples.mean(axis = 1)
 70 |       pred = apply(py$c, 1, mean)
 71 |       pred_fe = apply(py$d, 1, mean)
 72 |       # exp_tmp = pred_x_cov_list[0]['mat']
 73 |       exp_tmp = py$a[[1]][['mat']]
 74 |       df <- py$model2$df
 75 |     } else {
 76 |       knots_tmp <- pred <- exp_tmp <- df <- NA
 77 |     }
 78 |     
 79 |     ro_pair_tmp <- strsplit(dir, "\\/")[[1]]
 80 |     ro_pair <- ro_pair_tmp[length(ro_pair_tmp)]
 81 |     
 82 |     out <- list(
 83 |       ro_pair=ro_pair, x_covs=x_covs, z_covs=z_covs, 
 84 |       knots_tmp=knots_tmp, pred=pred, pred_fe=pred_fe, exp_tmp=exp_tmp, df=df
 85 |     )
 86 |     return(out)
 87 |     
 88 |   })
 89 | }
 90 | 
 91 | #####
 92 | 
 93 | old_results <- lapply(dirs1, get_old_diet_results)
 94 | old_results <- old_results[!sapply(old_results, function(x) class(x) == "try-error")]
 95 | names(old_results) <- sapply(old_results, function(x) x$ro_pair)
 96 | # saveRDS(old_results, "/home/j/temp/reed/misc/old_results_fe.RDS")
 97 | 
 98 | old_results <- readRDS("/home/j/temp/reed/misc/old_results_fe.RDS")
 99 | tmp1 <- lapply(old_results, function(x) {
100 |   # x <- old_results[[1]] # dev
101 |   list(
102 |     n_rows = nrow(x$df),
103 |     col_names = names(x$df),
104 |     x_covs = x$x_covs
105 |   )
106 | })
107 | names(tmp11) <- names(old_results)
108 | 
109 | new_dir <- "/ihme/scratch/users/rsoren/evidence_score_diet/v5_test/04_monospline_models/"
110 | new_results <- lapply(list.files(new_dir, full.names = TRUE), readRDS)
111 | tmp2 <- lapply(new_results, function(x) {
112 |   # x <- new_results[[1]] # dev
113 |   list(
114 |     n_rows = nrow(x$df),
115 |     col_names = names(x$df),
116 |     x_covs = x$selected_covs[x$selected_covs != "exposure_linear"]
117 |   )
118 | })
119 | names(tmp2) <- gsub(".RDS", "", list.files(new_dir))
120 | 
121 | tmp3 <- do.call("rbind", lapply(gsub(".RDS", "", list.files(new_dir)), function(x) {
122 |   # x <- gsub(".RDS", "", list.files(new_dir))[4] # dev
123 |   data.frame(
124 |     pair = x,
125 |     nrows_old = tmp1[[x]]$n_rows,
126 |     nrows_new = tmp2[[x]]$n_rows,
127 |     xcovs_old = paste(tmp1[[x]]$x_covs, collapse = ","),
128 |     xcovs_new = paste(tmp2[[x]]$x_covs, collapse = ","),
129 |     colnames_old = paste(tmp1[[x]]$col_names, collapse = ","),
130 |     colnames_new = paste(tmp2[[x]]$col_names, collapse = ",")
131 |   )
132 | }))
133 | write.csv(tmp3, "/home/j/temp/reed/misc/comparison_with_ryans_data_prep.csv")
134 | 
135 | 
136 | # get pairs without selected covs in Ryan's model
137 | tmp <- sapply(old_results, function(x) x$x_covs)
138 | tmp2 <- names(tmp)[sapply(tmp, function(x) length(x) == 0)]
139 | saveRDS(tmp2, "/home/j/temp/reed/misc/pairs_with_no_selectedcovs.RDS")
140 | 
141 | ##### jiawei's results with 100 iterations
142 | old_results2 <- lapply(dirs2, get_old_diet_results)
143 | old_results2 <- old_results2[!sapply(old_results2, function(x) class(x) == "try-error")]
144 | names(old_results2) <- sapply(old_results2, function(x) x$ro_pair)
145 | saveRDS(old_results2, "/home/j/temp/reed/misc/old_results_fe2.RDS")
146 | 
147 | ##### jiawei's results with 200 iterations
148 | old_results3 <- lapply(dirs3, get_old_diet_results)
149 | old_results3 <- old_results3[!sapply(old_results3, function(x) class(x) == "try-error")]
150 | names(old_results3) <- sapply(old_results3, function(x) x$ro_pair)
151 | saveRDS(old_results3, "/home/j/temp/reed/misc/old_results_fe3.RDS")
152 | 
153 | 


--------------------------------------------------------------------------------
/first_process/src/utils/plot_3_curves.R:
--------------------------------------------------------------------------------
  1 | #
  2 | # plot_3_curves.R
  3 | #
  4 | # Reed Sorensen
  5 | # June 2020
  6 | #
  7 | CURDIR <- "/ihme/homes/jiaweihe/msca/mrbrt/evidence_score_diet"
  8 | CODE_PATH <- paste0(CURDIR, "/parallel/")
  9 | source(paste0(CODE_PATH, "00_globals.R"))
 10 | 
 11 | library(dplyr)
 12 | library(mrbrt001, lib.loc = "/ihme/code/mscm/R/packages/")
 13 | 
 14 | pairs_without_selectedcovs <- readRDS(paste0(J_DIR, "/pairs_with_no_selectedcovs.RDS"))
 15 | output_dir <- paste0(RESULTS_DIR, VERSION_ID, "/04_monospline_pkl_files/")
 16 | ro_pairs <- gsub(".pkl", "", list.files(output_dir))
 17 | rds_dir <- paste0(RESULTS_DIR, VERSION_ID, "/04_monospline_models/")
 18 | old_dir <- "/home/j/temp/jiaweihe/red_meat_paper/predict_draws/"
 19 | 
 20 | pdf(paste0(RESULTS_DIR, VERSION_ID, "/04_monospline_pdf1/compare_curves.pdf"))
 21 | for (pair in ro_pairs) {
 22 | # for (pair in ro_pairs[ro_pairs %in% pairs_without_selectedcovs]) {
 23 |   dev <- FALSE
 24 |   if (dev) {
 25 |     pair <- "redmeat_diabetes"
 26 |   }
 27 |   cat(pair, "\n")
 28 |   
 29 |   # try({
 30 |     # Sys.sleep(2)
 31 |     x <- readRDS(paste0(rds_dir, pair, ".RDS"))
 32 |     mod2 <- py_load_object(paste0(output_dir, pair, ".pkl"))
 33 |     
 34 |     get_knots_ensemble <- function(model) {
 35 |       cov_name_tmp <- model$ensemble_cov_model_name
 36 |       tmp <- model$sub_models[[1]]
 37 |       tmp2 <- tmp$get_cov_model(name = cov_name_tmp)
 38 |       tmp2$spline_knots
 39 |     }
 40 |     
 41 |     draws_path <- paste0(
 42 |       paste0(RESULTS_DIR, VERSION_ID, "/04_monospline_pdf1/"),
 43 |       x$ro_pair, "_y_draws_fe.pkl"
 44 |     )
 45 | 
 46 |     draws_dat <- py_load_object(draws_path)
 47 |     draws_mean <- exp(apply(draws_dat, 1, mean))
 48 |     # draws_mean <- exp(apply(draws_dat, 1, function(x) quantile(x, 0.5)))
 49 |     pred_lo <- exp(apply(draws_dat, 1, function(x) quantile(x, 0.025)))
 50 |     pred_hi <- exp(apply(draws_dat, 1, function(x) quantile(x, 0.975)))
 51 |     
 52 |     df_pred2 <- x$df_pred2
 53 |     
 54 |     old_results <- readRDS(paste0(J_DIR, "old_results_fe.RDS"))
 55 |     names(old_results) <- sapply(old_results, function(x) x$ro_pair)
 56 |     tmp <- old_results[[x$ro_pair]]
 57 | 
 58 |     tmp_vec <- c(draws_mean, tmp$pred_fe)
 59 |     x_data <- seq(min(tmp$exp_tmp), max(tmp$exp_tmp), length.out = length(draws_mean))
 60 |     new_df <- data.frame(exposure=x_data, pred_lo=pred_lo, pred_hi=pred_hi)
 61 | 
 62 |     bias_covs <- x$selected_covs[x$selected_covs != "exposure_linear"]
 63 | 
 64 |     old_draws_path <- paste0(old_dir, x$ro_pair, "_y_draws_fe.pkl")
 65 |     old_draws_dat <- py_load_object(old_draws_path)
 66 |     old_draws_mean <- apply(old_draws_dat, 1, mean)
 67 |     old_pred_lo <- apply(old_draws_dat, 1, function(x) quantile(x, 0.025))
 68 |     old_pred_hi <- apply(old_draws_dat, 1, function(x) quantile(x, 0.975))
 69 |     old_x_data <- seq(min(tmp$exp_tmp), max(tmp$exp_tmp), length.out = length(old_draws_mean))
 70 |     old_df <- data.frame(exposure=old_x_data, pred_lo=old_pred_lo, pred_hi=old_pred_hi)
 71 |     
 72 |     if (length(bias_covs) == 0) {
 73 |       covlabel <- "[None]"
 74 |     }else {
 75 |       covlabel <- paste(bias_covs, collapse = ", ")
 76 |     }
 77 |     
 78 |     covlabel1 <- paste0("c(", paste(bias_covs, collapse = ", "), ")")
 79 |     covlabel2 <- paste0("[", paste(tmp$x_covs, collapse = ", "), "]")
 80 | 
 81 |     min_y <- min(min(pred_lo), min(tmp_vec), min(old_pred_lo))
 82 |     max_y <- max(max(pred_hi), max(tmp_vec), max(old_pred_hi))
 83 | 
 84 |     with(df_pred2, plot(
 85 |       x_data, draws_mean,
 86 |       lwd = 3, type = "l", col="blue",
 87 |       main = paste0(x$ro_pair),
 88 |       ylim = c(min_y, max_y)
 89 |     ))
 90 |     abline(h = 1, lwd = 2, lty = 2)
 91 |     mtext(text = paste0(paste0(covlabel1, "; ", covlabel2)), side = 3, line = 0.4)
 92 |     
 93 |     lines(tmp$exp_tmp, tmp$pred_fe, col = adjustcolor("red", 0.6), lwd = 2)
 94 | 
 95 |     # function for plotting uncertainty intervals
 96 |     add_ui <- function(dat, x_var, lo_var, hi_var, color = "darkblue", opacity = 0.1) {
 97 |       polygon(
 98 |         x = c(dat[, x_var], rev(dat[, x_var])),
 99 |         y = c(dat[, lo_var], rev(dat[, hi_var])),
100 |         col = adjustcolor(col = color, alpha.f = opacity), 
101 |         border = FALSE
102 |       )
103 |     }
104 | 
105 |     add_ui(new_df, 'exposure', 'pred_lo', 'pred_hi')
106 | 
107 |     add_ui(old_df, 'exposure', 'pred_lo', 'pred_hi', color="firebrick1")
108 | 
109 |     if (min(pred_hi) < 1){
110 |         legend_pos <- c("bottomleft")
111 |     }else if (max(pred_lo) > 1){
112 |         legend_pos <- c("topleft")
113 |     }
114 |      
115 |     legend(legend_pos, 
116 |            legend = c("New model", "Old model, 20 iter."),
117 |            lwd = 2, 
118 |            col = c("blue", "red"),
119 |            cex = 0.85
120 |            )
121 |   # })
122 | 
123 | }
124 | 
125 | dev.off()
126 | 


--------------------------------------------------------------------------------
/first_process/src/utils/qsub_function.R:
--------------------------------------------------------------------------------
 1 | 
 2 | run_script <- function(script, img, args = c("")) {
 3 |   cmd <- paste(
 4 |     "/ihme/singularity-images/rstudio/shells/execRscript.sh",  
 5 |     "-i", img,  "-s", script, paste(args, collapse = " ")
 6 |   )
 7 |   system(cmd)
 8 | }
 9 | 
10 | submit_qsub <- function(script, job_name, img, proj = PROJ,
11 |                         queue = "long.q", hours = 6, threads = 1, 
12 |                         error_logs = paste0("/share/temp/sgeoutput/", USER, "/errors"),
13 |                         output_logs = paste0("/share/temp/sgeoutput/", USER, "/output"),
14 |                         memory = "8G", args = "", verbose = TRUE) {
15 |   cmd <- paste(
16 |     "qsub -terse -N", job_name, 
17 |     "-q", queue,
18 |     paste0("-l fthread=", threads),
19 |     paste0("-l m_mem_free=", memory),
20 |     paste0("-l h_rt=", hours, ":00:00"),
21 |     paste0("-l archive=TRUE"),
22 |     "-P", proj,
23 |     "-e", error_logs,
24 |     "-o", output_logs,
25 |     "/ihme/singularity-images/rstudio/shells/execRscript.sh ", 
26 |     "-i", img,
27 |     "-s", script, 
28 |     paste(args, collapse = " ")
29 |   )
30 |   
31 |   if (verbose) cat(cmd, "\n")
32 |   system(cmd)
33 | }
34 | 
35 | # Wait for upstream job to finish for each pair
36 | qwait <- function(sub_dir, pair){
37 |   outfile <- paste0(OUT_DIR, sub_dir, "/", pair, ".RDS")
38 |   while (!file.exists(outfile)) {
39 |     Sys.sleep(1)
40 |   }
41 | }
42 | 
43 | # Submit job for each stage for each pair
44 | submit_sub_job <- function(pair, script, job_name_suffix, script_dir) {
45 |   submit_qsub(
46 |     script = paste0(CODE_PATH, script),
47 |     job_name = paste0(pair, job_name_suffix), 
48 |     img = SINGULARITY_IMG,
49 |     args = c(pair, OUT_DIR, script_dir)
50 |   )
51 | }
52 | 
53 | # Submit job for plotting risk function and derivative fit.
54 | submit_plot_job <- function(pair) {
55 |   cmd <- paste0(
56 |     paste0("sh ", CODE_PATH, "submit_qsub_python.sh "),
57 |     paste0(CODE_PATH, " "),
58 |     paste0(OUT_DIR, "04_monospline_pkl_files/ "),
59 |     paste0(OUT_DIR, "05_monospline_pdf/ "),
60 |     paste0(pair, " "),
61 |     paste0(USER, " "),
62 |     PROJ
63 |   )
64 |   system(cmd)
65 | }
66 | 
67 | # Submit job for generating evidence score.
68 | submit_score_job <- function(pair) {
69 |   cmd <- paste0(
70 |     paste0("sh ", CODE_PATH, "submit_score_qsub_python.sh "),
71 |     paste0(CODE_PATH, " "),
72 |     paste0(OUT_DIR, "05_monospline_pdf/ "),
73 |     paste0(OUT_DIR, "06_evidence_score/ "),
74 |     paste0(pair, " "),
75 |     paste0(USER, " "),
76 |     PROJ
77 |   )
78 |   system(cmd)
79 | }


--------------------------------------------------------------------------------
/limetr/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | notebooks/
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | 
132 | # vscode
133 | .vscode/
134 | 
135 | # mac
136 | .DS_Store
137 | 


--------------------------------------------------------------------------------
/limetr/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | os: linux
 3 | dist: xenial
 4 | 
 5 | jobs:
 6 |   name: "Conda/ Python Linux"
 7 |   python: 3.7
 8 |   # env: no env. variables needed
 9 | 
10 | before_install:
11 |   # Here we just install Miniconda, which you shouldn't have to change.
12 |   - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
13 |   - chmod +x miniconda.sh
14 |   - ./miniconda.sh -b
15 |   - export PATH=/home/travis/miniconda3/bin:$PATH
16 |   - conda update --yes conda
17 | 
18 | install:
19 |   # We just set up a conda environment with the right Python version.
20 |   - conda create --yes -n limetr_conda python=$TRAVIS_PYTHON_VERSION
21 |   - source activate limetr_conda
22 |   - conda env list
23 |   - conda install --yes -c defaults -c conda-forge conda-build conda-verify
24 |   - conda install --yes -c conda-forge -c defaults gcc_linux-64 gfortran_linux-64 gxx_linux-64 numpy==1.19.1 scipy==1.5.2 cyipopt
25 |   - conda update --yes conda
26 |   - make build
27 |   - make install
28 | 
29 | script:
30 |   - make tests
31 |   - make sdist
32 |   - make package
33 | 
34 | 


--------------------------------------------------------------------------------
/limetr/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2020, Peng Zheng
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/limetr/Makefile:
--------------------------------------------------------------------------------
 1 | # make file for pynlme class
 2 | OUTPUT_DIR=output
 3 | CONDA_PKG_DIR=conda_pkg
 4 | NUMPY_VER=1.19.1
 5 | 
 6 | .PHONY: clean, tests
 7 | 
 8 | build: setup.py
 9 | 	python setup.py build
10 | 
11 | install: setup.py
12 | 	python check_requirements.py
13 | 	python setup.py install
14 | 
15 | sdist: setup.py
16 | 	python setup.py sdist
17 | 
18 | tests:
19 | 	python tests/check_utils.py
20 | 	python tests/check_limetr.py
21 | 
22 | clean:
23 | 	find . -name "*.so*" | xargs rm -rf
24 | 	find . -name "*.pyc" | xargs rm -rf
25 | 	find . -name "__pycache__" | xargs rm -rf
26 | 	find . -name "build" | xargs rm -rf
27 | 	find . -name "dist" | xargs rm -rf
28 | 	find . -name "MANIFEST" | xargs rm -rf
29 | 	rm -rf ./lib ./$(OUTPUT_DIR)
30 | 
31 | uninstall:
32 | 	find $(CONDA_PREFIX)/lib/ -name "*limetr*" | xargs rm -rf
33 | 
34 | package: src/limetr/Makefile src/limetr/special_mat.f90 src/limetr/utils.py
35 | 	@echo "### Ensure version number in $(CONDA_PKG_DIR) matches with version in setup.py"
36 | 	@echo "Currently only tested for linux"
37 | 	@echo "Installing conda pre-requirements"
38 | 	@conda install --yes --strict-channel-priority -c conda-forge -c defaults conda-build conda-verify
39 | 	@echo "Installing additional conda dependencies"
40 | 	@conda install --yes --strict-channel-priority -c conda-forge -c defaults numpy==1.19.1 scipy==1.5.2 cyipopt
41 | 	@echo "Building conda package for limetr (from $(CONDA_PKG_DIR) folder)"
42 | 	conda build -k --no-anaconda-upload --verify --numpy $(NUMPY_VER) --output-folder "$(OUTPUT_DIR)" --cache-dir /tmp/limetrcache ./$(CONDA_PKG_DIR)/
43 | 	@echo "conda build status:'$?'"
44 | 	@echo "Generated conda package file is: $(OUTPUT_DIR)/linux-64/limetr*.tar.bz2"
45 | 	@ls -l $(OUTPUT_DIR)/linux-64/limetr*.tar.bz2
46 | 
47 | 


--------------------------------------------------------------------------------
/limetr/README.md:
--------------------------------------------------------------------------------
 1 | # Linear Mixed Effects Model with Trimming
 2 | 
 3 | [![License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://opensource.org/licenses/BSD-2-Clause)
 4 | [![Travis CI/ Build Status](https://travis-ci.org/ramittal/limetr.svg?branch=master)](https://travis-ci.org/ramittal/limetr)
 5 | [![Coverage Status](https://coveralls.io/repos/github/ramittal/limetr/badge.svg?branch=master)](https://coveralls.io/github/ramittal/limetr?branch=master)
 6 | [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/ramittal/limetr/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/ramittal/limetr/?branch=master)
 7 | [![PyPI](https://img.shields.io/pypi/v/ramittal.svg)](https://badge.fury.io/py/ramittal)
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/limetr/check_requirements.py:
--------------------------------------------------------------------------------
 1 | # check if the requried the packages are installed
 2 | import os
 3 | import pathlib
 4 | import importlib
 5 | from sys import platform
 6 | 
 7 | 
 8 | # installation of the required packages
 9 | required_modules = [('numpy',
10 |                      'conda install -y numpy'),
11 |                     ('scipy',
12 |                      'conda install -y scipy'),
13 |                     ('ipopt',
14 |                      'conda install -y -c conda-forge cyipopt')]
15 | 
16 | 
17 | def check_module(module_name, install_command):
18 |     try:
19 |         importlib.import_module(module_name)
20 |     except:
21 |         os.system(install_command)
22 | 
23 | 
24 | def extract_lib(lib_name, des_lib_folder):
25 |     conda_lib = os.path.join(os.getenv("CONDA_PREFIX"), "lib")
26 |     pathlib.Path(des_lib_folder).mkdir(exist_ok=True)
27 |     lib_files = [file_name for file_name in os.listdir(conda_lib)
28 |                  if lib_name in file_name]
29 | 
30 |     if not lib_files:
31 |         raise FileNotFoundError(lib_name + "not found!")
32 | 
33 |     for file in lib_files:
34 |         os.system(" ".join(["cp -L",
35 |                             os.path.join(conda_lib, file),
36 |                             des_lib_folder]))
37 | 
38 |     if platform == "linux" or platform == "linux2":
39 |         required_lib_name = lib_name + ".so"
40 |         related_lib_files = [file_name for file_name in lib_files
41 |                              if required_lib_name in file_name]
42 |         assert any(related_lib_files)
43 |         if not pathlib.Path(os.path.join(des_lib_folder,
44 |                                          required_lib_name)).exists():
45 |             os.system(" ".join(["ln -s",
46 |                                 related_lib_files[-1],
47 |                                 os.path.join(des_lib_folder,
48 |                                              required_lib_name)]))
49 | 
50 | 
51 | for module_name, install_command in required_modules:
52 |     check_module(module_name, install_command)
53 | 
54 | # create the library of blas and lapack
55 | des_lib_folder = "./lib"
56 | extract_lib("libblas", des_lib_folder)
57 | extract_lib("liblapack", des_lib_folder)
58 | 


--------------------------------------------------------------------------------
/limetr/conda_pkg/build.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | # original author: ramittal@uw.edu, 2020-12-10
  4 | # Usage: build.sh    Invoked from "conda build" to build conda package from meta.yaml
  5 | #                    ../build/lib.linux-x86_64-3.7/limetr/special_mat.cpython-37m-x86_64-linux-gnu.so
  6 | #                    must have been build and available before invoking this script
  7 | #                    PREFIX, RECIPE_DIR, SP_DIR, PY_VER are defined by 'conda build'
  8 | #                    Not using any custom variables from Makefile, since this is invoked
  9 | #                    from conda-build, not from make. conda-build can be invoked by make
 10 | # https://docs.conda.io/projects/conda-build/en/latest/user-guide/environment-variables.html
 11 | #
 12 | # Purpose: Produces conda package including .so file for limetr
 13 | #
 14 | 
 15 | cwd="$(pwd)"
 16 | bname="$(basename "${cwd}" )"
 17 | 
 18 | # check and display environment variables used, never null from conda-build
 19 | # shellcheck disable=SC2154
 20 | if [ "" = "${RECIPE_DIR}" ] ; then
 21 | 	echo "RECIPE_DIR is required but found empty"
 22 | 	exit 1
 23 | else
 24 | 	echo "RECIPE_DIR is '${RECIPE_DIR}'"
 25 | fi
 26 | # shellcheck disable=SC2154
 27 | if [ "" = "${PREFIX}" ] ; then
 28 | 	echo "PREFIX is required but found empty"
 29 | 	exit 1
 30 | else
 31 | 	echo "PREFIX is '${PREFIX}'"
 32 | fi
 33 | # shellcheck disable=SC2154
 34 | if [ "" = "${SP_DIR}" ] ; then
 35 | 	echo "SP_DIR is required but found empty"
 36 | 	exit 1
 37 | else
 38 | 	echo "SP_DIR is '${SP_DIR}'"
 39 | fi
 40 | 
 41 | SOURCE_DIR="${RECIPE_DIR}/../src/limetr"
 42 | filename="special_mat.cpython-37m-x86_64-linux-gnu.so"
 43 | SHARED_OBJFILE="${RECIPE_DIR}/../build/lib.linux-x86_64-3.7/limetr/${filename}"
 44 | 
 45 | # set target folder for SO file
 46 | # shellcheck disable=SC2154
 47 | TARGET_DIR="${SP_DIR}/${PKG_NAME}"
 48 | 
 49 | echo "Starting build in working folder :${bname}: at $(date) in '${cwd}'"
 50 | echo "Current working directory: '${cwd}'"
 51 | echo "Working Conda Environment Variables: PREFIX: '${PREFIX}', RECIPE_DIR='${RECIPE_DIR}'"
 52 | echo "Python's site-packages location: '${TARGET_DIR}'"
 53 | # shellcheck disable=SC2154
 54 | echo "Package Name: '${PKG_NAME}', Version:'${PKG_VERSION}'"
 55 | # shellcheck disable=SC2154
 56 | echo "Python version: '${PY_VER}'"
 57 | echo "Contents of recipe directory: '${RECIPE_DIR}'"
 58 | ls -l "${RECIPE_DIR}/"
 59 | echo "Target path for package is:'${TARGET_DIR}/'"
 60 | echo "Contents of SP_DIR (limetr) dir before copy"
 61 | ls -l "${SP_DIR}/"
 62 | 
 63 | # validate valid package and version names
 64 | if [ "none" = "${PKG_NAME}" ] ||  [ "None" = "${PKG_VERSION}" ] ; then
 65 | 	echo "***Error Invalid Package or version values " >&2
 66 | 	echo 1
 67 | fi
 68 | 
 69 | echo "Starting copy of artifacts to '${TARGET_DIR}/"
 70 | if [ -d "${TARGET_DIR}" ] ; then
 71 | 	echo "Target dir '${TARGET_DIR} exists"
 72 | else
 73 | 	echo "Creating not existing target dir '${TARGET_DIR}"
 74 | 	mkdir -p "${TARGET_DIR}"
 75 | fi
 76 | # copy shared library to target location
 77 | if [ -f "${SHARED_OBJFILE}" ] ; then
 78 | 	echo "Copying '${SHARED_OBJFILE}'"
 79 | 	cp -f "${SHARED_OBJFILE}" "${TARGET_DIR}/"
 80 | 	exit_status="$?"
 81 | 	if [ "${exit_status}" -ne 0 ] ; then
 82 | 		echo "***Error '${exit_status}'*** during copy of '${SHARED_OBJFILE}' file into site-packages dir " >&2
 83 | 		exit 1
 84 | 	fi
 85 | else
 86 | 	echo "***Error '${SHARED_OBJFILE}' does not exist at source " >&2
 87 | 	exit 1
 88 | fi
 89 | echo "Copying source files from '${SOURCE_DIR}' to '${TARGET_DIR}'"
 90 | ls -l "${SOURCE_DIR}"
 91 | for afile in "${SOURCE_DIR}"/*.py
 92 | do
 93 | 	echo "Copying ${afile}"
 94 | 	cp -f "${afile}" "${TARGET_DIR}/".
 95 | 	exit_status="$?"
 96 | 	if [ "0" = "${exit_status}" ] ; then
 97 | 		echo "${afile} copied successfully"
 98 | 	else
 99 | 		echo "*** Copy of ${afile} failed with status: '${exit_status}' into '${TARGET_DIR}' dir " >&2
100 | 		exit 1
101 | 	fi
102 | done
103 | 
104 | echo "Contents of SP_DIR (limetr) dir after copy"
105 | ls -l "${SP_DIR}"
106 | if [ -d "${TARGET_DIR}" ] ; then
107 | 	echo "Contents of '${TARGET_DIR}'"
108 | 	ls -l "${TARGET_DIR}"
109 | else
110 | 	echo "*** Error: expected '${TARGET_DIR}' does not exist"
111 | 	exit 1
112 | fi
113 | 
114 | echo "Completing build of :${PKG_NAME}: at $(date)"
115 | exit "0"
116 | 
117 | 


--------------------------------------------------------------------------------
/limetr/conda_pkg/conda_build_config.yaml:
--------------------------------------------------------------------------------
 1 | # - are replaced by _ to allow conda build to parse.
 2 | 
 3 | numpy:
 4 |   - 1.19.1
 5 | scipy:
 6 |   - 1.5.2
 7 | cyipopt:
 8 | 
 9 | # cyipopt:
10 | #   - 0.2.0  
11 | # scikit_sparse:
12 | #  - 0.4.4
13 | 


--------------------------------------------------------------------------------
/limetr/conda_pkg/meta.yaml:
--------------------------------------------------------------------------------
 1 | # {% set data = load_setup_py_data()   %}
 2 | # {% set name = data.get('name')       %}
 3 | # {% set version = data.get('version') %}
 4 | # above dynamic loading did not work, so using hard coded values
 5 | {% set name = 'limetr'   %}
 6 | {% set version = '0.0.2' %}
 7 | 
 8 | package:
 9 |   name: "{{ name|lower }}"
10 |   version: "{{ version }}"
11 | 
12 | source:
13 |   path: .
14 | 
15 | build:
16 |   # build steps are in build.sh
17 |   # script: python check_requirements.py setup.py install
18 | 
19 | requirements:
20 |   host:
21 |     - pip
22 |     - python
23 |     - wheel
24 | 
25 |   run:
26 |     - pip
27 |     - python
28 |     - numpy
29 | 
30 | test:
31 |   # test steps are in run_test.sh
32 |   # script: python tests/check_utils.py tests/check_limetr.py
33 | 
34 | about:
35 |   description: linear mixed effects model with trimming
36 |   dev_url: https://github.com/zhengp0/limetr/
37 |   doc_url: https://github.com/zhengp0/limetr/
38 |   home: https://github.com/zhengp0/limetr/
39 |   license: BSD 2-Clause License
40 |   license_family: BSD
41 |   summary: Linear Mixed Effects Model with Trimming
42 |     To copy file from build/lib.linux-x86_64-3.7/limetr/special_mat.cpython-37m-x86_64-linux-gnu.so
43 | 
44 | 


--------------------------------------------------------------------------------
/limetr/conda_pkg/run_test.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | # original author: ramittal@uw.edu, 2020-12-10
  4 | # Usage: run_test.sh Invoked from "conda build" to test conda package from meta.yaml
  5 | #                    Variables are based on temporary conda env. created and activated
  6 | #                    by conda build. All paths used should be in context of conda
  7 | #                    special_mat.cpython-37m-x86_64-linux-gnu.so is packaged during
  8 | #                    build phase and made available
  9 | #                    PREFIX, RECIPE_DIR, SP_DIR, PY_VER are defined by 'conda build'
 10 | #                    Not using any custom variables from Makefile, since this is invoked
 11 | #                    from conda-build, not from make. conda-build can be invoked by make
 12 | # https://docs.conda.io/projects/conda-build/en/latest/user-guide/environment-variables.html
 13 | #
 14 | # Purpose: Validates presence of limetr files (.so) in python site-packages folder
 15 | #          ToDo: Add additional validations to actually invoke limetr code
 16 | #
 17 | 
 18 | cwd="$(pwd)"
 19 | bname="$(basename "${cwd}" )"
 20 | 
 21 | # check and display environment variables used, never null from conda-build
 22 | # shellcheck disable=SC2154
 23 | if [ "" = "${RECIPE_DIR}" ] ; then
 24 | 	echo "RECIPE_DIR is required but found empty"
 25 | 	exit 1
 26 | else
 27 | 	echo "RECIPE_DIR is '${RECIPE_DIR}'"
 28 | fi
 29 | # shellcheck disable=SC2154
 30 | if [ "" = "${PREFIX}" ] ; then
 31 | 	echo "PREFIX is required but found empty"
 32 | 	exit 1
 33 | else
 34 | 	echo "PREFIX is '${PREFIX}'"
 35 | fi
 36 | # shellcheck disable=SC2154
 37 | if [ "" = "${SP_DIR}" ] ; then
 38 | 	echo "SP_DIR is required but found empty"
 39 | 	exit 1
 40 | else
 41 | 	echo "SP_DIR is '${SP_DIR}'"
 42 | fi
 43 | 
 44 | filename="special_mat.cpython-37m-x86_64-linux-gnu.so"
 45 | 
 46 | echo "Starting build in working dir :${bname}: at $(date) in '${cwd}'"
 47 | echo "Current working directory: '${cwd}'"
 48 | echo "Working Conda Environment Variables: PREFIX: '${PREFIX}', RECIPE_DIR='${RECIPE_DIR}'"
 49 | # shellcheck disable=SC2154
 50 | echo "Package Name: '${PKG_NAME}', Version:'${PKG_VERSION}'"
 51 | # shellcheck disable=SC2154
 52 | echo "Python version: '${PY_VER}'"
 53 | echo "Contents of recipe directory: '${RECIPE_DIR}'"
 54 | ls -l "${RECIPE_DIR}/"
 55 | echo "Contents of SP_DIR (limetr) dir before test"
 56 | ls -l "${SP_DIR}/"
 57 | 
 58 | # set expected folder for SO file
 59 | SHAREDOBJ_DIR="${SP_DIR}/${PKG_NAME}"
 60 | echo "Python's site-packages location: '${SHAREDOBJ_DIR}'"
 61 | echo "Target path for package is:'${SHAREDOBJ_DIR}/'"
 62 | 
 63 | echo "## Listing conda environment"
 64 | conda env list
 65 | 
 66 | echo "## Listing installed conda packages"
 67 | conda list | tee /tmp/conda_pkg_installed.txt
 68 | 
 69 | echo "Validating existence of files at target location"
 70 | SHARED_OBJFILE="${SHAREDOBJ_DIR}/${filename}"
 71 | echo "Looking for '${SHARED_OBJFILE}'"
 72 | if [ -f "${SHARED_OBJFILE}" ] ; then
 73 | 	echo "File '${SHARED_OBJFILE}' exists at site-packages"
 74 | 	ls -l "${SHARED_OBJFILE}"
 75 | else
 76 | 	echo "***Error '${SHARED_OBJFILE}' does not exist " >&2
 77 | 	echo "########## NOT EXITING exit 1"
 78 | fi
 79 | 
 80 | # validate installation of package
 81 | pkgver_file="/tmp/conda-${PKG_NAME}.txt"
 82 | grep -i "^${PKG_NAME}" /tmp/conda_pkg_installed.txt > "${pkgver_file}"
 83 | exit_status=$?
 84 | if [ "0" = "${exit_status}" ] ; then
 85 | 	# validate version
 86 | 	version="$(cut -f 2- -d' ' "${pkgver_file}" | xargs | cut -f1 -d' ' | xargs )"
 87 | 	if [ "${version}" = "${PKG_VERSION}" ] ; then
 88 | 		echo "Correct version of Package Name: '${PKG_NAME}-${PKG_VERSION}' installed in env."
 89 | 	else
 90 | 		echo "***Error Incorrect version of '${PKG_NAME}' is installed in build/test conda env. " >&2
 91 | 		echo "***Required '${PKG_VERSION}', found: '${version}' " >&2
 92 | 		exit 1
 93 | 	fi
 94 | else
 95 | 	echo "***Error '${PKG_NAME}' is not installed in current conda env. " >&2
 96 | 	exit 1
 97 | fi
 98 | 
 99 | echo "Checking for existence of tests"
100 | if [ -d "${RECIPE_DIR}/../tests" ] ; then
101 | 	test_result=$(python "${RECIPE_DIR}/../tests/check_utils.py" "${RECIPE_DIR}/../tests/check_limetr.py")
102 | 	echo "Test result is '${test_result}'"
103 | else
104 | 	echo "Unable to find test sources, no test executed"
105 | fi
106 | 
107 | echo "Completing test of :${PKG_NAME}: at $(date) with exit status: :${exit_status}:"
108 | exit "${exit_status}"
109 | 
110 | 


--------------------------------------------------------------------------------
/limetr/experiments/test_trimming_with_certain_inlier.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Test Trimming with Certain Inliers"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import matplotlib.pyplot as plt\n",
 18 |     "from limetr import LimeTr"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "## create test data"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "N = 101\n",
 35 |     "X = np.linspace(0.0, 2.0, N)\n",
 36 |     "X = np.insert(X[:, None], 0, 1.0, axis=1)\n",
 37 |     "Z = np.ones((N, 1))\n",
 38 |     "\n",
 39 |     "k_beta = 2\n",
 40 |     "k_gamma = 1\n",
 41 |     "\n",
 42 |     "n = np.array([50, 51])\n",
 43 |     "beta_true = np.array([1.0, 2.0])\n",
 44 |     "gamma_true = np.array([0.0])\n",
 45 |     "S = np.repeat(0.1, N)\n",
 46 |     "\n",
 47 |     "u = np.random.randn(n.size, k_gamma)*np.sqrt(gamma_true)\n",
 48 |     "E = np.random.randn(N)*S\n",
 49 |     "Y = X.dot(beta_true) + np.sum(Z*np.repeat(u, n, axis=0), axis=1) + E\n",
 50 |     "\n",
 51 |     "# add outlier\n",
 52 |     "num_outliers = 5\n",
 53 |     "outlier_id = np.random.choice(N, num_outliers, replace=False)\n",
 54 |     "Y[outlier_id] += 10.0"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## without pre-select inlier"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "lt = LimeTr(n, k_beta, k_gamma,\n",
 71 |     "            Y,\n",
 72 |     "            lambda beta: X.dot(beta),\n",
 73 |     "            lambda beta: X,\n",
 74 |     "            Z,\n",
 75 |     "            S=S,\n",
 76 |     "            inlier_percentage = 1.0 - num_outliers/N)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "beta_soln, gamma_soln, w_soln = lt.fitModel()"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "w_soln[outlier_id]"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "plt.scatter(X[:, 1], Y, marker='.')\n",
104 |     "plt.scatter(X[w_soln == 0.0, 1], Y[w_soln == 0.0], marker='x', color='r')\n",
105 |     "plt.plot(X[:, 1], X.dot(beta_true), 'k')\n",
106 |     "plt.plot(X[:, 1], X.dot(beta_soln))"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "## pre-select inlier"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "lt = LimeTr(n, k_beta, k_gamma,\n",
123 |     "            Y,\n",
124 |     "            lambda beta: X.dot(beta),\n",
125 |     "            lambda beta: X,\n",
126 |     "            Z,\n",
127 |     "            S=S,\n",
128 |     "            certain_inlier_id = np.array([outlier_id[0]]),\n",
129 |     "            inlier_percentage = 1.0 - num_outliers/N)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "beta_soln, gamma_soln, w_soln = lt.fitModel()"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "w_soln[outlier_id]"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": null,
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "plt.scatter(X[:, 1], Y, marker='.')\n",
157 |     "plt.scatter(X[outlier_id[0], 1], Y[outlier_id[0]], marker='o', color='g', facecolors='none')\n",
158 |     "plt.scatter(X[w_soln == 0.0, 1], Y[w_soln == 0.0], marker='x', color='r')\n",
159 |     "plt.plot(X[:, 1], X.dot(beta_true), 'k')\n",
160 |     "plt.plot(X[:, 1], X.dot(beta_soln))"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": []
169 |   }
170 |  ],
171 |  "metadata": {
172 |   "kernelspec": {
173 |    "display_name": "Python 3",
174 |    "language": "python",
175 |    "name": "python3"
176 |   },
177 |   "language_info": {
178 |    "codemirror_mode": {
179 |     "name": "ipython",
180 |     "version": 3
181 |    },
182 |    "file_extension": ".py",
183 |    "mimetype": "text/x-python",
184 |    "name": "python",
185 |    "nbconvert_exporter": "python",
186 |    "pygments_lexer": "ipython3",
187 |    "version": "3.7.3"
188 |   }
189 |  },
190 |  "nbformat": 4,
191 |  "nbformat_minor": 2
192 | }
193 | 


--------------------------------------------------------------------------------
/limetr/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from numpy.distutils.core import setup
 3 | from numpy.distutils.core import Extension
 4 | 
 5 | # fortran extension module
 6 | ext = Extension(name='limetr.special_mat',
 7 |                 sources=['src/limetr/special_mat.f90'],
 8 |                 library_dirs=['./lib'],
 9 |                 libraries=['lapack', 'blas'])
10 | 
11 | setup(name='limetr',
12 |       version='0.0.2',
13 |       description='linear mixed effects model with trimming',
14 |       url='https://github.com/zhengp0/limetr',
15 |       author='Peng Zheng',
16 |       author_email='zhengp@uw.edu',
17 |       license='MIT',
18 |       packages=['limetr'],
19 |       package_dir={'limetr': 'src/limetr'},
20 |       ext_modules=[ext],
21 |       install_requires=['numpy', 'scipy', 'ipopt'],
22 |       zip_safe=False)
23 | 


--------------------------------------------------------------------------------
/limetr/src/limetr/Makefile:
--------------------------------------------------------------------------------
 1 | # make file for the fortran module
 2 | 
 3 | .PHONY: clean
 4 | 
 5 | special_mat: special_mat.f90
 6 | 	f2py -c -m special_mat special_mat.f90 -L/usr/local/lib -llapack -lblas
 7 | 
 8 | clean:
 9 | 	find . -name '__pycache__' | xargs rm -rf
10 | 	find . -name '*.so' | xargs rm -rf
11 | 


--------------------------------------------------------------------------------
/limetr/tests/check_limetr.py:
--------------------------------------------------------------------------------
 1 | # test suit for limetr
 2 | import os
 3 | import sys
 4 | # add current directory
 5 | sys.path.append('./')
 6 | 
 7 | 
 8 | def run_test(name):
 9 |     namespace = {}
10 |     exec('import ' + name, namespace)
11 |     exec('ok = ' + name + '.' + name + '()', namespace)
12 |     ok = namespace['ok']
13 |     if ok:
14 |         print(name + ': OK')
15 |     else:
16 |         print(name + ': Error')
17 |     return ok
18 | 
19 | 
20 | fun_list = [
21 |     'limetr_objective',
22 |     'limetr_gradient',
23 |     'limetr_objectiveTrimming',
24 |     'limetr_gradientTrimming',
25 |     'limetr_lasso'
26 | ]
27 | 
28 | error_count = 0
29 | 
30 | for name in fun_list:
31 |     ok = run_test(name)
32 |     if not ok:
33 |         error_count += 1
34 | 
35 | if error_count > 0:
36 |     print('check_limetr: error_count =', error_count)
37 |     sys.exit(1)
38 | else:
39 |     print('check_limetr: OK')
40 |     sys.exit(0)
41 | 


--------------------------------------------------------------------------------
/limetr/tests/check_utils.py:
--------------------------------------------------------------------------------
 1 | # test suite for limetr
 2 | import os
 3 | import sys
 4 | # add current directory
 5 | sys.path.append('./')
 6 | 
 7 | 
 8 | def run_test(name):
 9 |     namespace = {}
10 |     exec('import ' + name, namespace)
11 |     exec('ok = ' + name + '.' + name + '()', namespace)
12 |     ok = namespace['ok']
13 |     if ok:
14 |         print(name + ': OK')
15 |     else:
16 |         print(name + ': Error')
17 |     return ok
18 | 
19 | 
20 | fun_list = [
21 |     'izmat_lsvd',
22 |     'izmat_zdecomp',
23 |     'izmat_block_izmv',
24 |     'izmat_izmv',
25 |     'izmat_block_izmm',
26 |     'izmat_izmm',
27 |     'izmat_izeig',
28 |     'izmat_block_izdiag',
29 |     'izmat_izdiag',
30 |     'varmat_dot',
31 |     'varmat_invDot',
32 |     'varmat_diag',
33 |     'varmat_invDiag',
34 |     'varmat_logDet',
35 |     'projCappedSimplex'
36 | ]
37 | 
38 | error_count = 0
39 | 
40 | for name in fun_list:
41 |     ok = run_test(name)
42 |     if not ok:
43 |         error_count += 1
44 | 
45 | if error_count > 0:
46 |     print('check_utils: error_count =', error_count)
47 |     sys.exit(1)
48 | else:
49 |     print('check_utils: OK')
50 |     sys.exit(0)
51 | 


--------------------------------------------------------------------------------
/limetr/tests/izmat_block_izdiag.py:
--------------------------------------------------------------------------------
 1 | # check utils block_izdiag
 2 | 
 3 | 
 4 | def izmat_block_izdiag():
 5 |     import numpy as np
 6 |     from limetr.special_mat import izmat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # problem 1, tall matrix
11 |     # -------------------------------------------------------------------------
12 |     n, k = 6, 3
13 |     l = min(n, k)
14 | 
15 |     z = np.random.randn(n, k)
16 |     
17 |     my_u = np.zeros(n*l)
18 |     my_s = np.zeros(l)
19 |     izmat.lsvd(z, my_u, my_s)
20 | 
21 |     tr_y = np.diag(np.eye(n) + z.dot(z.T))
22 |     my_y = np.zeros(n)
23 |     izmat.block_izdiag(my_u, my_s**2, my_y)
24 | 
25 |     err = np.linalg.norm(tr_y - my_y)
26 |     ok = ok and err < tol
27 | 
28 |     if not ok:
29 |         print('err in block_izdiag tall matrix')
30 |         print('err:', err)
31 | 
32 |     # problem 2, fat matrix
33 |     # -------------------------------------------------------------------------
34 |     n, k = 3, 6
35 |     l = min(n, k)
36 | 
37 |     z = np.random.randn(n, k)
38 |     
39 |     my_u = np.zeros(n*l)
40 |     my_s = np.zeros(l)
41 |     izmat.lsvd(z, my_u, my_s)
42 | 
43 |     tr_y = np.diag(np.eye(n) + z.dot(z.T))
44 |     my_y = np.zeros(n)
45 |     izmat.block_izdiag(my_u, my_s**2, my_y)
46 | 
47 |     err = np.linalg.norm(tr_y - my_y)
48 |     ok = ok and err < tol
49 | 
50 |     if not ok:
51 |         print('err in block_izdiag fat matrix')
52 |         print('err:', err)
53 | 
54 |     return ok
55 | 


--------------------------------------------------------------------------------
/limetr/tests/izmat_block_izmm.py:
--------------------------------------------------------------------------------
 1 | # check utils block_izmm
 2 | 
 3 | 
 4 | def izmat_block_izmm():
 5 |     import numpy as np
 6 |     from limetr.special_mat import izmat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # problem 1, tall matrix
11 |     # -------------------------------------------------------------------------
12 |     n, k = 6, 3
13 |     l = min(n, k)
14 | 
15 |     z = np.random.randn(n, k)
16 |     x = np.random.randn(n, 5)
17 |     
18 |     my_u = np.zeros(n*l)
19 |     my_s = np.zeros(l)
20 |     izmat.lsvd(z, my_u, my_s)
21 | 
22 |     tr_y = x + z.dot(z.T.dot(x))
23 |     my_y = np.zeros((n, 5), order='F')
24 |     izmat.block_izmm(my_u, my_s**2, x, my_y)
25 | 
26 |     err = np.linalg.norm(tr_y - my_y)
27 |     ok = ok and err < tol
28 | 
29 |     if not ok:
30 |         print('err in block_izmm tall matrix')
31 |         print('err:', err)
32 | 
33 |     # problem 2, fat matrix
34 |     # -------------------------------------------------------------------------
35 |     n, k = 3, 6
36 |     l = min(n, k)
37 | 
38 |     z = np.random.randn(n, k)
39 |     x = np.random.randn(n, 5)
40 |     
41 |     my_u = np.zeros(n*l)
42 |     my_s = np.zeros(l)
43 |     izmat.lsvd(z, my_u, my_s)
44 | 
45 |     tr_y = x + z.dot(z.T.dot(x))
46 |     my_y = np.zeros((n, 5), order='F')
47 |     izmat.block_izmm(my_u, my_s**2, x, my_y)
48 | 
49 |     err = np.linalg.norm(tr_y - my_y)
50 |     ok = ok and err < tol
51 | 
52 |     if not ok:
53 |         print('err in block_izmm fat matrix')
54 |         print('err:', err)
55 | 
56 |     return ok
57 | 


--------------------------------------------------------------------------------
/limetr/tests/izmat_block_izmv.py:
--------------------------------------------------------------------------------
 1 | # check utils block_izmv
 2 | 
 3 | 
 4 | def izmat_block_izmv():
 5 |     import numpy as np
 6 |     from limetr.special_mat import izmat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # problem 1, tall matrix
11 |     # -------------------------------------------------------------------------
12 |     n, k = 6, 3
13 |     l = min(n, k)
14 | 
15 |     z = np.random.randn(n, k)
16 |     x = np.random.randn(n)
17 |     
18 |     my_u = np.zeros(n*l)
19 |     my_s = np.zeros(l)
20 |     izmat.lsvd(z, my_u, my_s)
21 | 
22 |     tr_y = x + z.dot(z.T.dot(x))
23 |     my_y = np.zeros(n)
24 |     izmat.block_izmv(my_u, my_s**2, x, my_y)
25 | 
26 |     err = np.linalg.norm(tr_y - my_y)
27 |     ok = ok and err < tol
28 | 
29 |     if not ok:
30 |         print('err in block_izmv tall matrix')
31 |         print('err:', err)
32 | 
33 |     # problem 2, fat matrix
34 |     # -------------------------------------------------------------------------
35 |     n, k = 3, 6
36 |     l = min(n, k)
37 | 
38 |     z = np.random.randn(n, k)
39 |     x = np.random.randn(n)
40 |     
41 |     my_u = np.zeros(n*l)
42 |     my_s = np.zeros(l)
43 |     izmat.lsvd(z, my_u, my_s)
44 | 
45 |     tr_y = x + z.dot(z.T.dot(x))
46 |     my_y = np.zeros(n)
47 |     izmat.block_izmv(my_u, my_s**2, x, my_y)
48 | 
49 |     err = np.linalg.norm(tr_y - my_y)
50 |     ok = ok and err < tol
51 | 
52 |     if not ok:
53 |         print('err in block_izmv fat matrix')
54 |         print('err:', err)
55 | 
56 |     return ok
57 | 


--------------------------------------------------------------------------------
/limetr/tests/izmat_izdiag.py:
--------------------------------------------------------------------------------
 1 | # check utils izdiag
 2 | 
 3 | 
 4 | def izmat_izdiag():
 5 |     import numpy as np
 6 |     from limetr.special_mat import izmat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # setup problem
11 |     # -------------------------------------------------------------------------
12 |     k = 3
13 |     n = np.array([5, 2, 4])
14 |     m = n.size
15 | 
16 |     z_list = [np.random.randn(n[i], k) for i in range(m)]
17 | 
18 |     z = np.vstack(z_list)
19 |     
20 |     ns = np.minimum(n, k)
21 |     nu = ns*n
22 |     nx = n
23 |     nz = n
24 | 
25 |     u = np.zeros(nu.sum())
26 |     s = np.zeros(ns.sum())
27 | 
28 |     izmat.zdecomp(nz, nu, ns, z, u, s)
29 |     my_y = izmat.izdiag(n.sum(), nu, ns, nx, u, s**2)
30 | 
31 |     y_list = [np.diag(np.eye(n[i]) + z_list[i].dot(z_list[i].T))
32 |               for i in range(m)]
33 | 
34 |     tr_y = np.hstack(y_list)
35 | 
36 |     err = np.linalg.norm(tr_y - my_y)
37 | 
38 |     if not ok:
39 |         print('err in izdiag')
40 |         print('err:', err)
41 | 
42 |     return ok
43 | 


--------------------------------------------------------------------------------
/limetr/tests/izmat_izeig.py:
--------------------------------------------------------------------------------
 1 | # check utils izeig
 2 | 
 3 | 
 4 | def izmat_izeig():
 5 |     import numpy as np
 6 |     from scipy.linalg import block_diag
 7 |     from limetr.special_mat import izmat
 8 | 
 9 |     ok = True
10 |     tol = 1e-10
11 |     # setup problem
12 |     # -------------------------------------------------------------------------
13 |     k = 3
14 |     n = np.array([5, 2, 4])
15 |     m = n.size
16 | 
17 |     z_list = [np.random.randn(n[i], k) for i in range(m)]
18 | 
19 |     z = np.vstack(z_list)
20 |     
21 |     ns = np.minimum(n, k)
22 |     nu = ns*n
23 |     nx = n
24 |     nz = n
25 | 
26 |     u = np.zeros(nu.sum())
27 |     s = np.zeros(ns.sum())
28 | 
29 |     izmat.zdecomp(nz, nu, ns, z, u, s)
30 |     
31 |     my_eig = izmat.izeig(sum(n), n, ns, s**2)
32 |     tr_eig, vec = np.linalg.eig(block_diag(*[
33 |             np.eye(n[i]) + z_list[i].dot(z_list[i].T)
34 |             for i in range(len(n))
35 |         ]))
36 | 
37 |     err = np.linalg.norm(tr_eig - my_eig)
38 | 
39 |     if not ok:
40 |         print('err in izeig')
41 |         print('err:', err)
42 | 
43 |     return ok
44 | 


--------------------------------------------------------------------------------
/limetr/tests/izmat_izmm.py:
--------------------------------------------------------------------------------
 1 | # check utils izmm
 2 | 
 3 | 
 4 | def izmat_izmm():
 5 |     import numpy as np
 6 |     from limetr.special_mat import izmat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # setup problem
11 |     # -------------------------------------------------------------------------
12 |     k = 3
13 |     n = np.array([5, 2, 4])
14 |     m = n.size
15 | 
16 |     z_list = [np.random.randn(n[i], k) for i in range(m)]
17 |     x_list = [np.random.randn(n[i], 5) for i in range(m)]
18 | 
19 |     z = np.vstack(z_list)
20 |     x = np.vstack(x_list)
21 |     
22 |     ns = np.minimum(n, k)
23 |     nu = ns*n
24 |     nx = n
25 |     nz = n
26 | 
27 |     u = np.zeros(nu.sum())
28 |     s = np.zeros(ns.sum())
29 | 
30 |     izmat.zdecomp(nz, nu, ns, z, u, s)
31 |     my_y = izmat.izmm(nu, ns, nx, u, s**2, x)
32 | 
33 |     y_list = [x_list[i] + z_list[i].dot(z_list[i].T.dot(x_list[i]))
34 |               for i in range(m)]
35 | 
36 |     tr_y = np.vstack(y_list)
37 | 
38 |     err = np.linalg.norm(tr_y - my_y)
39 | 
40 |     if not ok:
41 |         print('err in izmm')
42 |         print('err:', err)
43 | 
44 |     return ok
45 | 


--------------------------------------------------------------------------------
/limetr/tests/izmat_izmv.py:
--------------------------------------------------------------------------------
 1 | # check utils izmv
 2 | 
 3 | 
 4 | def izmat_izmv():
 5 |     import numpy as np
 6 |     from limetr.special_mat import izmat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # setup problem
11 |     # -------------------------------------------------------------------------
12 |     k = 3
13 |     n = np.array([5, 2, 4])
14 |     m = n.size
15 | 
16 |     z_list = [np.random.randn(n[i], k) for i in range(m)]
17 |     x_list = [np.random.randn(n[i]) for i in range(m)]
18 | 
19 |     z = np.vstack(z_list)
20 |     x = np.hstack(x_list)
21 |     
22 |     ns = np.minimum(n, k)
23 |     nu = ns*n
24 |     nx = n
25 |     nz = n
26 | 
27 |     u = np.zeros(nu.sum())
28 |     s = np.zeros(ns.sum())
29 | 
30 |     izmat.zdecomp(nz, nu, ns, z, u, s)
31 |     my_y = izmat.izmv(nu, ns, nx, u, s**2, x)
32 | 
33 |     y_list = [x_list[i] + z_list[i].dot(z_list[i].T.dot(x_list[i]))
34 |               for i in range(m)]
35 | 
36 |     tr_y = np.hstack(y_list)
37 | 
38 |     err = np.linalg.norm(tr_y - my_y)
39 | 
40 |     if not ok:
41 |         print('err in izmv')
42 |         print('err:', err)
43 | 
44 |     return ok
45 | 


--------------------------------------------------------------------------------
/limetr/tests/izmat_lsvd.py:
--------------------------------------------------------------------------------
 1 | # check utils lsvd
 2 | 
 3 | 
 4 | def izmat_lsvd():
 5 |     import numpy as np
 6 |     from limetr.special_mat import izmat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # problem 1, tall matrix
11 |     # -------------------------------------------------------------------------
12 |     n, k = 6, 3
13 |     z = np.random.randn(n, k)
14 |     tr_u, tr_s, tr_vt = np.linalg.svd(z, full_matrices=False)
15 |     my_u = np.zeros(tr_u.size)
16 |     my_s = np.zeros(tr_s.size)
17 |     izmat.lsvd(z, my_u, my_s)
18 | 
19 |     err = np.linalg.norm(my_u.reshape(k, n).T - tr_u)
20 |     ok = ok and err < tol
21 | 
22 |     if not ok:
23 |         print('err in lsvd tall matrix')
24 |         print('err:', err)
25 | 
26 |     # problem 2, fat matrix
27 |     # -------------------------------------------------------------------------
28 |     n, k = 3, 6
29 |     z = np.random.randn(n, k)
30 |     tr_u, tr_s, tr_vt = np.linalg.svd(z, full_matrices=False)
31 |     my_u = np.zeros(tr_u.size)
32 |     my_s = np.zeros(tr_s.size)
33 |     izmat.lsvd(z, my_u, my_s)
34 | 
35 |     err = np.linalg.norm(np.abs(my_u.reshape(n, n).T) - np.abs(tr_u))
36 |     ok = ok and err < tol
37 | 
38 |     if not ok:
39 |         print('err in lsvd fat matrix')
40 |         print('err:', err)
41 | 
42 |     return ok
43 | 


--------------------------------------------------------------------------------
/limetr/tests/izmat_zdecomp.py:
--------------------------------------------------------------------------------
 1 | # check utils zdecomp
 2 | 
 3 | 
 4 | def izmat_zdecomp():
 5 |     import numpy as np
 6 |     from limetr.special_mat import izmat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # setup problem
11 |     # -------------------------------------------------------------------------
12 |     k = 3
13 |     n = [5, 2, 4]
14 | 
15 |     z_list = []
16 |     tr_u_list = []
17 |     tr_s_list = []
18 |     for i in range(len(n)):
19 |         z_list.append(np.random.randn(n[i], k))
20 |         u, s, vt = np.linalg.svd(z_list[-1], full_matrices=False)
21 |         tr_u_list.append(u)
22 |         tr_s_list.append(s)
23 | 
24 |     z = np.vstack(z_list)
25 |     tr_u = np.hstack([u.reshape(u.size, order='F') for u in tr_u_list])
26 |     tr_s = np.hstack(tr_s_list)
27 | 
28 |     my_u = np.zeros(tr_u.size)
29 |     my_s = np.zeros(tr_s.size)
30 | 
31 |     nz = [z_sub.shape[0] for z_sub in z_list]
32 |     nu = [u_sub.size for u_sub in tr_u_list]
33 |     ns = [s_sub.size for s_sub in tr_s_list]
34 | 
35 |     izmat.zdecomp(nz, nu, ns, z, my_u, my_s)
36 | 
37 | 
38 |     if not ok:
39 |         print('err in zdecomp')
40 |         print('err:', err)
41 | 
42 |     return ok
43 | 


--------------------------------------------------------------------------------
/limetr/tests/limetr_gradient.py:
--------------------------------------------------------------------------------
 1 | # test function gradient
 2 | 
 3 | 
 4 | def limetr_gradient():
 5 |     import numpy as np
 6 |     from limetr.__init__ import LimeTr
 7 | 
 8 |     ok = True
 9 |     # setup test problem
10 |     # -------------------------------------------------------------------------
11 |     model = LimeTr.testProblem(use_trimming=True,
12 |                                use_constraints=True,
13 |                                use_regularizer=True,
14 |                                use_uprior=True,
15 |                                use_gprior=True,
16 |                                know_obs_std=False,
17 |                                share_obs_std=True)
18 | 
19 |     tol = 1e-6
20 | 
21 |     # test the gradient
22 |     # -------------------------------------------------------------------------
23 |     x = np.random.randn(model.k)
24 |     x[model.idx_gamma] = 0.1
25 |     x[model.idx_delta] = 0.1
26 | 
27 |     tr_grad = model.gradient(x, use_ad=True)
28 |     my_grad = model.gradient(x)
29 | 
30 |     err = np.linalg.norm(tr_grad - my_grad)
31 |     ok = ok and err < tol
32 | 
33 |     if not ok:
34 |         print('err', err)
35 |         print('tr_grad', tr_grad)
36 |         print('my_grad', my_grad)
37 | 
38 |     return ok
39 | 


--------------------------------------------------------------------------------
/limetr/tests/limetr_gradientTrimming.py:
--------------------------------------------------------------------------------
 1 | # test function gradientTrimming
 2 | 
 3 | 
 4 | def limetr_gradientTrimming():
 5 |     import numpy as np
 6 |     from limetr.__init__ import LimeTr
 7 | 
 8 |     ok = True
 9 |     # setup test problem
10 |     # -------------------------------------------------------------------------
11 |     model = LimeTr.testProblem(use_trimming=True)
12 | 
13 |     # decouple all the studies
14 |     model.n = np.array([1]*model.N)
15 | 
16 |     tol = 1e-8
17 | 
18 |     # test gradientTrimming
19 |     # -------------------------------------------------------------------------
20 |     x = np.hstack((model.beta, model.gamma))
21 |     w = model.w
22 | 
23 |     tr_grad = model.gradientTrimming(w, use_ad=True)
24 |     my_grad = model.gradientTrimming(w)
25 | 
26 |     err = np.linalg.norm(tr_grad - my_grad)
27 |     ok = ok and err < tol
28 | 
29 |     if not ok:
30 |         print('err', err)
31 |         print('tr_grad', tr_grad)
32 |         print('my_grad', my_grad)
33 | 
34 |     return ok
35 | 


--------------------------------------------------------------------------------
/limetr/tests/limetr_lasso.py:
--------------------------------------------------------------------------------
 1 | # test function lprior
 2 | 
 3 | 
 4 | def limetr_lasso():
 5 |     import numpy as np
 6 |     from limetr.__init__ import LimeTr
 7 | 
 8 |     ok = True
 9 |     # setup test problem
10 |     # -------------------------------------------------------------------------
11 |     model = LimeTr.testProblemLasso()
12 | 
13 |     tol = 1e-6
14 | 
15 |     # test lasso
16 |     # -------------------------------------------------------------------------
17 |     model.optimize()
18 |     beta = model.beta
19 |     zero_idx = np.abs(beta) <= 1e-8
20 |     beta[zero_idx] = 0.0
21 | 
22 |     # calculate the gradient
23 |     g_beta = -model.JF(beta).T.dot(model.Y - model.F(beta))
24 |     for i in range(model.k_beta):
25 |         if beta[i] == 0.0 and np.abs(g_beta[i]) < model.lw[i]:
26 |             g_beta[i] = 0.0
27 |         else:
28 |             g_beta[i] += np.sign(beta[i])*model.lw[i]
29 | 
30 |     err = np.linalg.norm(g_beta)
31 |     ok = ok and err < tol
32 | 
33 |     if not ok:
34 |         print('err', err)
35 | 
36 |     return ok
37 | 


--------------------------------------------------------------------------------
/limetr/tests/limetr_objective.py:
--------------------------------------------------------------------------------
 1 | # test function objective
 2 | 
 3 | 
 4 | def limetr_objective():
 5 |     import numpy as np
 6 |     from limetr.__init__ import LimeTr
 7 | 
 8 |     ok = True
 9 |     # setup test problem
10 |     # -------------------------------------------------------------------------
11 |     model = LimeTr.testProblem(use_constraints=True,
12 |                                use_regularizer=True,
13 |                                use_uprior=True,
14 |                                use_gprior=True,
15 |                                know_obs_std=False)
16 | 
17 |     tol = 1e-8
18 | 
19 |     # test objective
20 |     # -------------------------------------------------------------------------
21 |     x = np.random.randn(model.k)
22 |     x[model.idx_gamma] = 0.1
23 |     x[model.idx_delta] = 0.1
24 | 
25 |     tr_obj = model.objective(x, use_ad=True)
26 |     my_obj = model.objective(x)
27 | 
28 |     err = np.abs(tr_obj - my_obj)
29 |     ok = ok and err < tol
30 | 
31 |     if not ok:
32 |         print('err', err)
33 |         print('tr_obj', tr_obj)
34 |         print('my_obj', my_obj)
35 | 
36 |     return ok
37 | 


--------------------------------------------------------------------------------
/limetr/tests/limetr_objectiveTrimming.py:
--------------------------------------------------------------------------------
 1 | # test function objectiveTrimming
 2 | 
 3 | 
 4 | def limetr_objectiveTrimming():
 5 |     import numpy as np
 6 |     from limetr.__init__ import LimeTr
 7 | 
 8 |     ok = True
 9 |     # setup test problem
10 |     # -------------------------------------------------------------------------
11 |     model = LimeTr.testProblem(use_trimming=True)
12 | 
13 |     tol = 1e-8
14 | 
15 |     # test objectiveTrimming
16 |     # -------------------------------------------------------------------------
17 |     x = np.hstack((model.beta, model.gamma))
18 |     w = model.w
19 | 
20 |     r = model.Y - model.F(model.beta)
21 |     t = (model.Z**2).dot(model.gamma)
22 |     d = model.V + t
23 | 
24 |     tr_obj = 0.5*np.sum(r**2*w/d) + 0.5*model.N*np.log(2.0*np.pi)\
25 |         + 0.5*w.dot(np.log(d))
26 |     my_obj = model.objectiveTrimming(w)
27 | 
28 |     err = np.abs(tr_obj - my_obj)
29 |     ok = ok and err < tol
30 | 
31 |     if not ok:
32 |         print('err', err)
33 |         print('tr_obj', tr_obj)
34 |         print('my_obj', my_obj)
35 | 
36 |     return ok
37 | 


--------------------------------------------------------------------------------
/limetr/tests/projCappedSimplex.py:
--------------------------------------------------------------------------------
 1 | # test function dot
 2 | 
 3 | 
 4 | def projCappedSimplex():
 5 |     import numpy as np
 6 |     from limetr.utils import projCappedSimplex
 7 | 
 8 |     ok = True
 9 |     # setup test problem
10 |     # -------------------------------------------------------------------------
11 |     w = np.ones(10)
12 |     sum_w = 9.0
13 | 
14 |     tr_w = np.repeat(0.9, 10)
15 |     my_w = projCappedSimplex(w, sum_w)
16 | 
17 |     tol = 1e-10
18 |     err = np.linalg.norm(tr_w - my_w)
19 | 
20 |     ok = ok and err < tol
21 | 
22 |     if not ok:
23 |         print('tr_w', tr_w)
24 |         print('my_w', my_w)
25 | 
26 |     return ok
27 | 


--------------------------------------------------------------------------------
/limetr/tests/varmat_diag.py:
--------------------------------------------------------------------------------
 1 | # check utils diag
 2 | 
 3 | 
 4 | def varmat_diag():
 5 |     import numpy as np
 6 |     from limetr.utils import VarMat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # setup problem
11 |     # -------------------------------------------------------------------------
12 |     mat = VarMat.testProblem()
13 |     D = mat.varMat()
14 | 
15 |     tr_y = np.diag(D)
16 | 
17 |     my_y = mat.diag()
18 | 
19 |     err = np.linalg.norm(tr_y - my_y)
20 |     ok = ok and err < tol
21 | 
22 |     if not ok:
23 |         print('err in diag')
24 |         print('err:', err)
25 | 
26 |     return ok
27 | 


--------------------------------------------------------------------------------
/limetr/tests/varmat_dot.py:
--------------------------------------------------------------------------------
 1 | # check utils dot
 2 | 
 3 | 
 4 | def varmat_dot():
 5 |     import numpy as np
 6 |     from limetr.utils import VarMat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # setup problem
11 |     # -------------------------------------------------------------------------
12 |     mat = VarMat.testProblem()
13 |     D = mat.varMat()
14 |     x = np.random.randn(mat.N)
15 |     X = np.random.randn(mat.N, 5)
16 | 
17 |     tr_y = D.dot(x)
18 |     tr_Y = D.dot(X)
19 | 
20 |     my_y = mat.dot(x)
21 |     my_Y = mat.dot(X)
22 | 
23 |     err = np.linalg.norm(tr_y - my_y) + np.linalg.norm(tr_Y - my_Y)
24 |     ok = ok and err < tol
25 | 
26 |     if not ok:
27 |         print('err in dot')
28 |         print('err:', err)
29 | 
30 |     return ok
31 | 


--------------------------------------------------------------------------------
/limetr/tests/varmat_invDiag.py:
--------------------------------------------------------------------------------
 1 | # check utils invDiag
 2 | 
 3 | 
 4 | def varmat_invDiag():
 5 |     import numpy as np
 6 |     from limetr.utils import VarMat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # setup problem
11 |     # -------------------------------------------------------------------------
12 |     mat = VarMat.testProblem()
13 |     inv_D = mat.invVarMat()
14 | 
15 |     tr_y = np.diag(inv_D)
16 | 
17 |     my_y = mat.invDiag()
18 | 
19 |     err = np.linalg.norm(tr_y - my_y)
20 |     ok = ok and err < tol
21 | 
22 |     if not ok:
23 |         print('err in invDiag')
24 |         print('err:', err)
25 | 
26 |     return ok
27 | 


--------------------------------------------------------------------------------
/limetr/tests/varmat_invDot.py:
--------------------------------------------------------------------------------
 1 | # check utils dot
 2 | 
 3 | 
 4 | def varmat_invDot():
 5 |     import numpy as np
 6 |     from limetr.utils import VarMat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # setup problem
11 |     # -------------------------------------------------------------------------
12 |     mat = VarMat.testProblem()
13 |     inv_D = mat.invVarMat()
14 |     x = np.random.randn(mat.N)
15 |     X = np.random.randn(mat.N, 5)
16 | 
17 |     tr_y = inv_D.dot(x)
18 |     tr_Y = inv_D.dot(X)
19 | 
20 |     my_y = mat.invDot(x)
21 |     my_Y = mat.invDot(X)
22 | 
23 |     err = np.linalg.norm(tr_y - my_y) + np.linalg.norm(tr_Y - my_Y)
24 |     ok = ok and err < tol
25 | 
26 |     if not ok:
27 |         print('err in invDot')
28 |         print('err:', err)
29 | 
30 |     return ok
31 | 


--------------------------------------------------------------------------------
/limetr/tests/varmat_logDet.py:
--------------------------------------------------------------------------------
 1 | # check utils logDet
 2 | 
 3 | 
 4 | def varmat_logDet():
 5 |     import numpy as np
 6 |     from limetr.utils import VarMat
 7 | 
 8 |     ok = True
 9 |     tol = 1e-10
10 |     # setup problem
11 |     # -------------------------------------------------------------------------
12 |     mat = VarMat.testProblem()
13 |     D = mat.varMat()
14 | 
15 |     tr_y = np.log(np.linalg.det(D))
16 |     my_y = mat.logDet()
17 | 
18 |     err = np.linalg.norm(tr_y - my_y)
19 |     ok = ok and err < tol
20 | 
21 |     if not ok:
22 |         print('err in logDet')
23 |         print('err:', err)
24 | 
25 |     return ok
26 | 


--------------------------------------------------------------------------------
/mrtool/.github/workflows/python-build.yml:
--------------------------------------------------------------------------------
 1 | name: python-build
 2 | on: [push]
 3 | jobs:
 4 |   build:
 5 | 
 6 |     runs-on: ubuntu-latest
 7 | 
 8 |     steps:
 9 |     - uses: actions/checkout@v2
10 |     - name: Set up Python 3.8
11 |       uses: actions/setup-python@v2
12 |       with:
13 |         python-version: 3.8
14 |     - name: Install dependencies
15 |       run: python -m pip install .[dev] --upgrade pip
16 |     - name: Test with pytest
17 |       run: pytest
18 |     - name: Build package distribution
19 |       if: startsWith(github.ref, 'refs/tags')
20 |       run: |
21 |         python -m pip install build 
22 |         python -m build --sdist --wheel --outdir dist/ .
23 |     - name: Publish package distribution to PyPI
24 |       if: startsWith(github.ref, 'refs/tags')
25 |       uses: pypa/gh-action-pypi-publish@master
26 |       with:
27 |         skip_existing: true
28 |         user: __token__
29 |         password: ${{ secrets.PYPI_API_TOKEN }}
30 | 
31 | 


--------------------------------------------------------------------------------
/mrtool/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | .vscode
3 | .DS_Store
4 | 


--------------------------------------------------------------------------------
/mrtool/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | build:
2 |   image: latest
3 | python:
4 |   version: 3.7
5 |   setup_py_install: true
6 |   install:
7 |     - requirements: docs/requirements.txt
8 | sphinx:
9 |   configuration: docs/sources/conf.py


--------------------------------------------------------------------------------
/mrtool/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2020, IHME Math Sciences
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/mrtool/Makefile:
--------------------------------------------------------------------------------
 1 | # makefile for easy manage package
 2 | .PHONY: clean, tests
 3 | 
 4 | build: setup.py
 5 | 	python setup.py build
 6 | 
 7 | install: setup.py
 8 | 	python setup.py install
 9 | 
10 | sdist: setup.py
11 | 	python setup.py sdist
12 | 
13 | tests:
14 | 	pytest tests
15 | 
16 | clean:
17 | 	find . -name "*.so*" | xargs rm -rf
18 | 	find . -name "*.pyc" | xargs rm -rf
19 | 	find . -name "__pycache__" | xargs rm -rf
20 | 	find . -name "build" | xargs rm -rf
21 | 	find . -name "dist" | xargs rm -rf
22 | 	find . -name "MANIFEST" | xargs rm -rf
23 | 	find . -name "*.egg-info" | xargs rm -rf
24 | 	find . -name ".pytest_cache" | xargs rm -rf
25 | 
26 | uninstall:
27 | 	find $(CONDA_PREFIX)/lib/ -name "*mrtools*" | xargs rm -rf


--------------------------------------------------------------------------------
/mrtool/README.rst:
--------------------------------------------------------------------------------
 1 | ======
 2 | MRTool
 3 | ======
 4 | 
 5 | .. image:: https://img.shields.io/badge/License-BSD%202--Clause-orange.svg
 6 |     :target: https://opensource.org/licenses/BSD-2-Clause
 7 |     :alt: License
 8 | 
 9 | .. image:: https://readthedocs.org/projects/mrtool/badge/?version=latest
10 |     :target: https://mrtool.readthedocs.io/en/latest/
11 |     :alt: Documentation
12 | 
13 | .. image:: https://github.com/ramittal/MRTool/workflows/build/badge.svg?branch=master
14 |     :target: https://github.com/ramittal/MRTool/actions?query=workflow%3Abuild
15 |     :alt: BuildStatus
16 | 
17 | .. image:: https://badge.fury.io/py/MRTool.svg
18 |     :target: https://badge.fury.io/py/mrtool
19 |     :alt: PyPI
20 | 
21 | .. image:: https://coveralls.io/repos/github/ramittal/MRTool/badge.svg?branch=master
22 |     :target: https://coveralls.io/github/ramittal/MRTool?branch=master
23 |     :alt: Coverage
24 | 
25 | .. image:: https://www.codefactor.io/repository/github/ramittal/mrtool/badge/master
26 |     :target: https://www.codefactor.io/repository/github/ramittal/mrtool/overview/master
27 |     :alt: CodeFactor
28 | 
29 | 
30 | **MRTool** (Meta-Regression Tool) package is designed to solve general meta-regression problem.
31 | The most interesting features include,
32 | 
33 | * linear and log prediction function,
34 | * spline extension for covariates,
35 | * direct Gaussian, Uniform and Laplace prior on fixed and random effects,
36 | * shape constraints (monotonicity and convexity) for spline.
37 | 
38 | Advanced features include,
39 | 
40 | * spline knots ensemble,
41 | * automatic covariate selection.
42 | 
43 | 
44 | Installation
45 | ------------
46 | 
47 | Required packages include,
48 | 
49 | * basic scientific computing suite, Numpy, Scipy and Pandas,
50 | * main optimization engine, `IPOPT <https://github.com/matthias-k/cyipopt>`_,
51 | * customized packages, `LimeTr <https://github.com/zhengp0/limetr>`_ and
52 |   `XSpline <https://github.com/zhengp0/xspline>`_,
53 | * testing tool, Pytest.
54 | 
55 | After install the required packages, clone the repository and install MRTool.
56 | 
57 | .. code-block:: shell
58 | 
59 |    git clone https://github.com/ihmeuw-msca/MRTool.git
60 |    cd MRTool && python setup.py install
61 | 
62 | 
63 | For more information please check the `documentation <https://mrtool.readthedocs.io/en/latest>`_.
64 | 
65 | 


--------------------------------------------------------------------------------
/mrtool/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SPHINXPROJ    = mrtool
 9 | SOURCEDIR     = source
10 | BUILDDIR      = build
11 | 
12 | # Put it first so that "make" without argument is like "make help".
13 | help:
14 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
15 | 
16 | .PHONY: help Makefile
17 | 
18 | # Catch-all target: route all unknown targets to Sphinx using the new
19 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
20 | %: Makefile
21 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 
23 | clean:
24 | 	rm -rf $(BUILDDIR)


--------------------------------------------------------------------------------
/mrtool/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/mrtool/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx-autodoc-typehints==1.4.0
2 | matplotlib
3 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | /* font face */
 2 | body, p {
 3 |     font-family: BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";;
 4 |     font-size: 11pt;
 5 |     line-height: 1.5;
 6 | }
 7 | 
 8 | code {
 9 |     font-family: SFMono, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", Courier,monospace;
10 | }
11 | 
12 | h1, h2, h3 {
13 |     font-family: BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";;
14 |     font-weight: normal;
15 | }


--------------------------------------------------------------------------------
/mrtool/docs/source/api_reference/index.rst:
--------------------------------------------------------------------------------
 1 | =============
 2 | API Reference
 3 | =============
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 2
 7 |    :glob:
 8 | 
 9 |    *
10 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/api_reference/mrtool.core.rst:
--------------------------------------------------------------------------------
 1 | ===================
 2 | mrtool.core package
 3 | ===================
 4 | 
 5 | .. automodule:: mrtool.core.data
 6 |    :members:
 7 |    :undoc-members:
 8 |    :show-inheritance:
 9 | 
10 | 
11 | .. automodule:: mrtool.core.cov_model
12 |    :members:
13 |    :undoc-members:
14 |    :show-inheritance:
15 | 
16 | 
17 | .. automodule:: mrtool.core.model
18 |    :members:
19 |    :undoc-members:
20 |    :show-inheritance:
21 | 
22 | 
23 | .. automodule:: mrtool.core.utils
24 |    :members:
25 |    :undoc-members:
26 |    :show-inheritance:
27 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/api_reference/mrtool.cov_selection.rst:
--------------------------------------------------------------------------------
1 | ============================
2 | mrtool.cov_selection package
3 | ============================
4 | 
5 | .. automodule:: mrtool.cov_selection.covfinder
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/api_reference/mrtool.evidence_score.rst:
--------------------------------------------------------------------------------
1 | =============================
2 | mrtool.evidence_score package
3 | =============================
4 | 
5 | .. automodule:: mrtool.evidence_score.scorelator
6 |    :members:
7 |    :undoc-members:
8 |    :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/concepts/data_gen/index.rst:
--------------------------------------------------------------------------------
 1 | .. _data_gen:
 2 | 
 3 | =========================
 4 | Data Generating Mechanism
 5 | =========================
 6 | 
 7 | During the modeling process, the first question that needs to be
 8 | answered is how is the data generated and the data generating mechanism
 9 | is about using given information to create predictive model.
10 | 
11 | 
12 | .. toctree::
13 |    :maxdepth: 2
14 |    :glob:
15 | 
16 |    *


--------------------------------------------------------------------------------
/mrtool/docs/source/concepts/data_gen/range_exposure.rst:
--------------------------------------------------------------------------------
 1 | .. _range_exposure:
 2 | 
 3 | ==============
 4 | Range Exposure
 5 | ==============
 6 | 
 7 | Very often, data is being collected over cohorts or different
 8 | groups of people, and therefore one data point can be interpreted as an average.
 9 | 
10 | For example, if we are interested in the relation between smoking and relative risk
11 | of getting lung cancer, one data point is measured by the relative risk between the smoking and the non-smoking group.
12 | Within the smoking group, subjects have different exposures to smoking.
13 | So what the data point measures is the average relative risk for the corresponding range of exposures.
14 | 
15 | If we denote :math:`x` as the exposure and :math:`f(x)` as the function between the outcome and exposure,
16 | one measurement :math:`y` over a range of exposures :math:`x \in [a, b]` can be expressed as,
17 | 
18 | .. math::
19 | 
20 |    y = \frac{1}{b - a}\int_a^b f(x)\,\mathrm{d}x.
21 | 
22 | A special case is when the function :math:`f` is linear,
23 | :math:`f(x) = \beta x`, and the expression can be simplified as,
24 | 
25 | .. math::
26 | 
27 |    y = \frac{1}{b - a}\int_a^b f(x)\,\mathrm{d}x = \frac{1}{2}(a + b) \beta.
28 | 
29 | It is equivalent to use the midpoint of the exposures as the covariate.
30 | 
31 | 
32 | Sample Code
33 | -----------
34 | 
35 | In the code, you could communicate with the program that you have a range exposure by inputting a pair of covariates
36 | instead of one.
37 | 
38 | .. code-block:: python
39 | 
40 |    cov_model = CovModel('exposure', alt_cov=['exposure_start', 'exposure_end'])
41 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/concepts/data_gen/rr1_binary.rst:
--------------------------------------------------------------------------------
 1 | .. _rr1_binary:
 2 | 
 3 | =======================
 4 | Relative Risk 1: Binary
 5 | =======================
 6 | 
 7 | Relative risk (RR) is the most common measurement type for the applications of ``MRTool``.
 8 | Here we take a chance to introduce the basic concepts regarding relative risk, and
 9 | how we build different types of relative risk models in ``MRTool``.
10 | 
11 | Relative risk is the probability ratio of a certain outcome between exposed and unexposed group.
12 | For more information please check the `wiki page <https://en.wikipedia.org/wiki/Relative_risk>`_.
13 | Here we use smoking and lung cancer as a risk-outcome pair to explain the idea.
14 | 
15 | Imagine the experiment is conducted with two groups, smoking (e) and non-smoking (u) group.
16 | We record the probability of getting lung cancer among the two groups, :math:`P_e`, :math:`P_u`
17 | and the relative risk can be expressed as,
18 | 
19 | .. math::
20 | 
21 |    RR = \frac{P_e}{P_u}.
22 | 
23 | To implement meta-analysis on the effect of smoking, we often convert the collected relative risks from different
24 | studies (`longitudinal <https://en.wikipedia.org/wiki/Longitudinal_study>`_ or not) to log space,
25 | for the convenience of removing the sign restriction,
26 | 
27 | .. math::
28 | 
29 |    \ln(RR) = \ln(P_e) - \ln(P_u).
30 | 
31 | To setup the binary model, we simply parametrize the log relative risk with an intercept,
32 | 
33 | .. math::
34 | 
35 |    \ln(RR) = \mathbf{1} (\beta + u),
36 | 
37 | where :math:`\beta` is the fixed effect for intercept and :math:`u` is the random effect.
38 | When :math:`\beta` is `significantly <https://en.wikipedia.org/wiki/Statistical_significance>`_
39 | greater than zero, we say that it is harmful.
40 | For other risk outcome pair, there is possibility that :math:`\beta` is significantly less than zero,
41 | in which case we will call it protective.
42 | 
43 | Very often instead of only considering smoking vs non-smoking (binary), we also want to study the effects
44 | under different exposure to smoking. The most common assumption is log linear, please check
45 | :ref:`rr2_log_linear` for the details.
46 | 
47 | 
48 | 
49 | Sample Code
50 | -----------
51 | 
52 | To setup the problem, we will only need ``LinearCovModel``.
53 | 
54 | .. code-block:: python
55 | 
56 |    from mrtool import MRData, LinearCovModel, MRBRT
57 | 
58 |    data = MRData()
59 |    # `intercept` is automatically added to the data
60 |    # no need to pass it in `col_covs`
61 |    data.load_df(
62 |        df=df,
63 |        col_obs='ln_rr',
64 |        col_obs_se='ln_rr_se',
65 |        col_study_id='study_id'
66 |    )
67 |    cov_model = LinearCovModel('intercept', use_re=True)
68 |    model = MRBRT(data, cov_models=[cov_model])
69 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/concepts/data_gen/rr2_log_linear.rst:
--------------------------------------------------------------------------------
 1 | .. _rr2_log_linear:
 2 | 
 3 | ===========================
 4 | Relative Risk 2: Log Linear
 5 | ===========================
 6 | 
 7 | When analyzing relative risk across different exposure levels,
 8 | the most widely used assumption is that the model is log linear.
 9 | We parametrize the log risk as a linear function of exposure,
10 | 
11 | .. math::
12 | 
13 |    \ln(RR) = \ln(R_e) - \ln(R_u) = x_a (\beta + u) - x_r (\beta + u) = (x_a - x_r)(\beta + u),
14 | 
15 | where :math:`x` is the exposure, :math:`\beta`, :math:`u` are the fixed and random effects,
16 | and :math:`a`, :math:`r` refer to "alternative" and "reference" groups.
17 | They are consistent with previous notation, "exposed" and "unexposed".
18 | 
19 | **Remark 1**: **No intercept!**
20 | 
21 | Notice that in this model, we do NOT include the intercept to model the log risk.
22 | It is not possible to infer the absolute position of the risk curve using relative risk data,
23 | only the relative position.
24 | 
25 | To see this, first assume that we have intercept in the log risk formulation,
26 | :math:`\ln(R) = (\beta_0 + u_0) + x (\beta_1 + u_1)`,
27 | when we construct the log relative risk,
28 | 
29 | .. math::
30 | 
31 |    \begin{aligned}
32 |    \ln(RR) =& \ln(R_e) - \ln(R_u) \\
33 |    =& (\beta_0 + u_0) + x_a (\beta_1 + u_1) - ((\beta_0 + u_0) + x_r (\beta_1 + u_1)) \\
34 |    =& (x_a - x_r)(\beta_1 + u_1)
35 |    \end{aligned}
36 | 
37 | the intercept cancels and we returns to the original formula.
38 | 
39 | **Remark 2**: **No intercept! Again!**
40 | 
41 | The other possible use of the intercept is to directly model
42 | the log relative risk, instead of log risk,
43 | 
44 | .. math::
45 | 
46 |    \ln(RR) = (\beta_0 + u_0) + (x_a - x_r)(\beta_1 + u_1).
47 | 
48 | This does NOT work due to the fact that when :math:`x_a` is equal to :math:`x_r`,
49 | we expect the log relative risk is zero.
50 | 
51 | Compare to :ref:`rr1_binary`, where we use the intercept to model the log relative risk,
52 | 
53 | * In the binary model, we directly model the log relative risk instead of log risk.
54 | * In the binary model, we never have the case when the exposures for two groups are the same.
55 | 
56 | 
57 | Sample Code
58 | -----------
59 | 
60 | To setup the problem, we will only need ``LinearCovModel``, just as in :ref:`rr1_binary`.
61 | 
62 | If there is already a column in the data frame corresponding to the exposure differences,
63 | we can simply use it as the covariate.
64 | 
65 | .. code-block:: python
66 | 
67 |    from mrtool import MRData, LinearCovModel, MRBRT
68 | 
69 |    data = MRData()
70 |    data.load_df(
71 |        df=df,
72 |        col_obs='ln_rr',
73 |        col_obs_se='ln_rr_se',
74 |        col_covs=['exposure_diff']
75 |        col_study_id='study_id'
76 |    )
77 |    cov_model = LinearCovModel('exposure_diff', use_re=True)
78 |    model = MRBRT(data, cov_models=[cov_model])
79 | 
80 | Otherwise if you pass in the exposure for the "alternative" and "reference" group,
81 | the ``LinearCovModel`` will setup the model for you.
82 | 
83 | .. code-block:: python
84 | 
85 |    data.load_df(
86 |        df=df,
87 |        col_obs='ln_rr',
88 |        col_obs_se='ln_rr_se',
89 |        col_covs=['exposure_alt', 'exposure_ref']
90 |        col_study_id='study_id'
91 |    )
92 |    cov_model = LinearCovModel(alt_cov='exposure_alt', ref_cov='exposure_ref', use_re=True)
93 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/concepts/index.rst:
--------------------------------------------------------------------------------
 1 | .. _concepts:
 2 | 
 3 | ========
 4 | Concepts
 5 | ========
 6 | 
 7 | In ``MRTool`` there are many important concepts and definitions.
 8 | We list them here under the topics of **data generating mechanisms**,
 9 | **priors** and **optimization**.
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 |    :glob:
14 | 
15 |    data_gen/index
16 |    priors/index
17 |    optimization/index
18 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/concepts/optimization/index.rst:
--------------------------------------------------------------------------------
1 | .. _optimization:
2 | 
3 | ============
4 | Optimization
5 | ============
6 | 
7 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/concepts/priors/index.rst:
--------------------------------------------------------------------------------
1 | .. _priors:
2 | 
3 | ======
4 | Priors
5 | ======
6 | 
7 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Configuration file for the Sphinx documentation builder.
 3 | #
 4 | # This file only contains a selection of the most common options. For a full
 5 | # list see the documentation:
 6 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 7 | 
 8 | # -- Path setup --------------------------------------------------------------
 9 | 
10 | # If extensions (or modules to document with autodoc) are in another directory,
11 | # add these directories to sys.path here. If the directory is relative to the
12 | # documentation root, use os.path.abspath to make it absolute, like shown here.
13 | #
14 | 
15 | from pathlib import Path
16 | import sys
17 | 
18 | import mrtool
19 | base_dir = Path(mrtool.__file__).parent
20 | 
21 | about = {}
22 | with (base_dir / '__about__.py').open() as f:
23 |     exec(f.read(), about)
24 | 
25 | sys.path.insert(0, Path('..').resolve())
26 | 
27 | 
28 | # -- Project information -----------------------------------------------------
29 | 
30 | project = about['__title__']
31 | copyright = f"2020, {about['__author__']}"
32 | author = about['__author__']
33 | 
34 | # The short X.Y version.
35 | version = about['__version__']
36 | # The full version, including alpha/beta/rc tags.
37 | release = about['__version__']
38 | 
39 | # -- General configuration ---------------------------------------------------
40 | 
41 | # Add any Sphinx extension module names here, as strings. They can be
42 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
43 | # ones.
44 | 
45 | needs_sphinx = '1.5'
46 | 
47 | extensions = [
48 |     'sphinx.ext.autodoc',
49 |     'sphinx.ext.intersphinx',
50 |     'sphinx.ext.doctest',
51 |     'sphinx.ext.todo',
52 |     'sphinx.ext.coverage',
53 |     'sphinx.ext.mathjax',
54 |     'sphinx.ext.napoleon',
55 |     'sphinx.ext.viewcode',
56 |     'sphinx_autodoc_typehints',
57 |     'matplotlib.sphinxext.plot_directive',
58 | ]
59 | 
60 | # Add any paths that contain templates here, relative to this directory.
61 | templates_path = ['_templates']
62 | 
63 | source_suffix = '.rst'
64 | master_doc = 'index'
65 | 
66 | # List of patterns, relative to source directory, that match files and
67 | # directories to ignore when looking for source files.
68 | # This pattern also affects html_static_path and html_extra_path.
69 | exclude_patterns = []
70 | 
71 | 
72 | # -- Options for HTML output -------------------------------------------------
73 | 
74 | # The theme to use for HTML and HTML Help pages.  See the documentation for
75 | # a list of builtin themes.
76 | #
77 | html_theme = 'sphinx_rtd_theme'
78 | 
79 | # Add any paths that contain custom static files (such as style sheets) here,
80 | # relative to this directory. They are copied after the builtin static files,
81 | # so a file named "default.css" will overwrite the builtin "default.css".
82 | html_static_path = ['_static']
83 | html_css_files = [
84 |     'css/custom.css',
85 | ]
86 | 
87 | add_module_names = False
88 | 


--------------------------------------------------------------------------------
/mrtool/docs/source/examples/example_linear.rst:
--------------------------------------------------------------------------------
  1 | .. _example_linear:
  2 | 
  3 | ============================
  4 | Example: Simple Linear Model
  5 | ============================
  6 | 
  7 | In the following, we will go through a simple example of how to solve
  8 | a linear mixed effects model. Consider the following setup,
  9 | 
 10 | .. math::
 11 | 
 12 |    y_{ij} = (\beta_0 + u_{0i}) + x \beta_1 + \epsilon_{ij}
 13 | 
 14 | where :math:`y` is the measurement, :math:`x` is the covariate,  :math:`\beta_0` and :math:`\beta_1` is the fixed
 15 | effects, :math:`u_0` is the random intercept and :math:`\epsilon` is the measurement error.
 16 | And :math:`i` is index for study, :math:`j` is index for observation within study.
 17 | 
 18 | Assume our data frame looks like,
 19 | 
 20 | .. csv-table::
 21 |    :header: y, x, y_se, study_id
 22 |    :widths: 10, 10, 10, 10
 23 |    :align: center
 24 | 
 25 |    0.20, 0.0, 0.1, A
 26 |    0.29, 0.1, 0.1, A
 27 |    0.09, 0.2, 0.1, B
 28 |    0.14, 0.3, 0.1, C
 29 |    0.40, 0.4, 0.1, D
 30 | 
 31 | and our goal is to obtain the fixed effects and random effects for each study.
 32 | 
 33 | 
 34 | Create Data Object
 35 | ------------------
 36 | The first step is to create a ``MRData`` object to carry the data information.
 37 | 
 38 | .. code-block:: python
 39 | 
 40 |    from mrtool import MRData
 41 | 
 42 |    data = MRData()
 43 |    data.load_df(
 44 |        df,
 45 |        col_obs='y',
 46 |        col_covs=['x'],
 47 |        col_obs_se='y_se',
 48 |        col_study_id='study_id'
 49 |    )
 50 | 
 51 | Notice that the ``MRData`` will automatically create an ``intercept`` in the covariate list.
 52 | 
 53 | Configure Covariate Models
 54 | --------------------------
 55 | The second step is to create covariate models.
 56 | 
 57 | .. code-block:: python
 58 | 
 59 |    from mrtool import LinearCovModel
 60 | 
 61 |    cov_intercept = LinearCovModel('intercept', use_re=True)
 62 |    cov_x = LinearCovModel('x')
 63 | 
 64 | 
 65 | Create Model and Fit Model
 66 | --------------------------
 67 | The third step is to create the model to group data and covariate models.
 68 | And use the optimization routine to find result.
 69 | 
 70 | .. code-block:: python
 71 | 
 72 |    from mrtool import MRBRT
 73 | 
 74 |    model = MRBRT(
 75 |        data,
 76 |        [cov_intercept, cov_x]
 77 |    )
 78 |    model.fit_model()
 79 | 
 80 | You could get the fixed effects and random effects by calling ``model.beta_soln`` and ``model.re_soln``.
 81 | 
 82 | 
 83 | Predict and Create Draws
 84 | ------------------------
 85 | The last step is to predict and create draws.
 86 | 
 87 | .. code-block:: python
 88 | 
 89 |    # first create data object used for predict
 90 |    # the new data frame has to provide the same covariates as in the fitting
 91 |    data_pred = MRData()
 92 |    data_pred.load_df(
 93 |        df_pred,
 94 |        col_covs=['x']
 95 |    )
 96 | 
 97 |    # create point prediction
 98 |    y_pred = model.predict(data_pred)
 99 | 
100 |    # sampling solutions
101 |    beta_samples, gamma_samples = model.sample_soln(sample_size=1000)
102 | 
103 |    # create draws
104 |    y_draws = model.create_draws(
105 |        data_pred,
106 |        beta_samples,
107 |        gamma_samples
108 |    )
109 | 
110 | Here ``y_pred`` is the point prediction and ``y_draws`` contains ``1000`` draws of the outcome.


--------------------------------------------------------------------------------
/mrtool/docs/source/examples/index.rst:
--------------------------------------------------------------------------------
 1 | .. _examples:
 2 | 
 3 | ==================
 4 | Examples and Demos
 5 | ==================
 6 | 
 7 | In this part of the documentation, we will organize all useful examples and demos.
 8 | 
 9 | 
10 | .. toctree::
11 |    :maxdepth: 2
12 |    :hidden:
13 |    :glob:
14 | 
15 |    example_linear


--------------------------------------------------------------------------------
/mrtool/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | =====================
 2 | MRTool Documentation
 3 | =====================
 4 | 
 5 | **MRTool** (Meta-Regression Tool) package is designed to solve general meta-regression problem.
 6 | The most common features include,
 7 | 
 8 | * linear and log prediction function,
 9 | * spline extension for covariates,
10 | * direct Gaussian, Uniform and Laplace prior on fixed and random effects,
11 | * shape constraints (monotonicity and convexity) for spline.
12 | 
13 | Advanced features include,
14 | 
15 | * spline knots ensemble,
16 | * automatic covariate selection.
17 | 
18 | 
19 | Installation
20 | ------------
21 | This package uses `data class <https://docs.python.org/3/library/dataclasses.html>`_, therefore require ``python>=3.7``.
22 | 
23 | Required packages include,
24 | 
25 | * basic scientific computing suite, Numpy, Scipy and Pandas,
26 | * main optimization engine, `IPOPT <https://github.com/matthias-k/cyipopt>`_,
27 | * customized packages, `LimeTr <https://github.com/zhengp0/limetr>`_ and
28 |   `XSpline <https://github.com/zhengp0/xspline>`_,
29 | * testing tool, Pytest.
30 | 
31 | After install the required packages, clone the repository and install MRTool.
32 | 
33 | .. code-block:: shell
34 | 
35 |    git clone https://github.com/ihmeuw-msca/MRTool.git
36 |    cd MRTool && python setup.py install
37 | 
38 | 
39 | Getting Started
40 | ---------------
41 | 
42 | To build and run a model, we only need four steps,
43 | 
44 | 1. create ``MRData`` object and load data from data frame
45 | 2. configure the ``CovModel`` with covariates and priors
46 | 3. create ``MRModel`` object with data object and covriate models and fit the model
47 | 4. predict or create draws with new data and model result
48 | 
49 | In the following, we will list a set of examples to help user get familiar with
50 | the syntax.
51 | 
52 | * :ref:`simple linear model <example_linear>`
53 | 
54 | 
55 | Important Concepts
56 | ------------------
57 | 
58 | To correctly setup the model and solve problems,
59 | it is very important to understand some key :ref:`concepts <concepts>`.
60 | We introduce them under three categories,
61 | 
62 | * How can we match the data generating mechansim?
63 | * How can we incorporate prior knowledge?
64 | * How do the underlying optimization algorithms work?
65 | 
66 | 
67 | .. toctree::
68 |    :maxdepth: 2
69 |    :hidden:
70 | 
71 |    examples/index
72 |    concepts/index
73 |    api_reference/index
74 | 


--------------------------------------------------------------------------------
/mrtool/setup.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from setuptools import setup, find_packages
 3 | 
 4 | 
 5 | if __name__ == '__main__':
 6 |     base_dir = Path(__file__).parent
 7 |     src_dir = base_dir/'src'
 8 | 
 9 |     about = {}
10 |     with (src_dir/'mrtool'/'__about__.py').open() as f:
11 |         exec(f.read(), about)
12 | 
13 |     with (base_dir/'README.rst').open() as f:
14 |         long_description = f.read()
15 | 
16 |     install_requirements = [
17 |         'numpy',
18 |         'pandas',
19 |         'scipy',
20 |         'xspline',
21 |         'xarray'
22 |     ]
23 | 
24 |     unsolved_requirements = [
25 |         'ipopt',
26 |         'limetr',
27 |         'pycddlib'
28 |     ]
29 | 
30 |     test_requirements = [
31 |         'pytest',
32 |         'pytest-mock'
33 |     ]
34 | 
35 |     doc_requirements = [
36 |         'sphinx>3.0',
37 |         'sphinx-autodoc-typehints',
38 |         'sphinx-rtd-theme',
39 |         'IPython',
40 |         'matplotlib'
41 |     ]
42 | 
43 |     setup(name=about['__title__'],
44 |           version=about['__version__'],
45 | 
46 |           description=about['__summary__'],
47 |           long_description=long_description,
48 |           license=about['__license__'],
49 |           url=about['__uri__'],
50 | 
51 |           author=about['__author__'],
52 |           author_email=about['__email__'],
53 | 
54 |           package_dir={'': 'src'},
55 |           packages=find_packages(where='src'),
56 |           include_package_data=True,
57 | 
58 |           install_requires=install_requirements,
59 |           tests_require=test_requirements,
60 |           extras_require={
61 |               'docs': doc_requirements,
62 |               'test': test_requirements,
63 |               'dev': doc_requirements + test_requirements
64 |           },
65 |           zip_safe=False,)
66 | 


--------------------------------------------------------------------------------
/mrtool/src/mrtool/__about__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "__title__", "__summary__", "__uri__", "__version__", "__author__",
 3 |     "__email__", "__license__", "__copyright__",
 4 | ]
 5 | 
 6 | __title__ = "mrtool"
 7 | __summary__ = "MRTool: Featured Nonlinear Mixed effects Models"
 8 | __uri__ = "https://github.com/ihmeuw/mrtool"
 9 | 
10 | __version__ = "0.0.1"
11 | 
12 | __author__ = "Peng Zheng"
13 | __email__ = "zhengp@uw.edu"
14 | 
15 | __license__ = "MIT License"
16 | __copyright__ = f"Copyright 2020 {__author__}"


--------------------------------------------------------------------------------
/mrtool/src/mrtool/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     mrtool
 4 |     ~~~~~~
 5 | 
 6 |     `mrtool` package.
 7 | """
 8 | from .core.data import MRData
 9 | from .core.cov_model import CovModel, LinearCovModel, LogCovModel
10 | from .core.model import MRBRT, MRBeRT
11 | from .core import utils
12 | from .cov_selection.covfinder import CovFinder
13 | 


--------------------------------------------------------------------------------
/mrtool/src/mrtool/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ihmeuw-msca/burden-of-proof/3f2e8e81f35889b14a52b421b532226d127412fe/mrtool/src/mrtool/core/__init__.py


--------------------------------------------------------------------------------
/mrtool/src/mrtool/core/other_sampling.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     other_sampling
  3 |     ~~~~~~~~~~~~~~
  4 | """
  5 | from warnings import warn
  6 | from typing import Union
  7 | from dataclasses import dataclass
  8 | import numpy as np
  9 | from .model import MRBRT
 10 | from .cov_model import LinearCovModel
 11 | 
 12 | 
 13 | try:
 14 |     from limetr.utils import VarMat
 15 | except:
 16 |     class VarMat:
 17 |         pass
 18 | 
 19 | 
 20 | @dataclass
 21 | class SimpleLMESpecs:
 22 |     obs: np.ndarray
 23 |     obs_se: np.ndarray
 24 |     study_sizes: np.ndarray
 25 |     fe_mat: np.ndarray
 26 |     re_mat: np.ndarray
 27 |     beta_soln: np.ndarray
 28 |     gamma_soln: np.ndarray
 29 |     fe_gprior: Union[np.ndarray, None] = None
 30 |     trimming_weights: np.ndarray = None
 31 | 
 32 |     def __post_init__(self):
 33 |         self.num_obs = len(self.obs)
 34 |         self.num_x_vars = self.fe_mat.shape[1]
 35 |         self.num_z_vars = self.re_mat.shape[1]
 36 | 
 37 |         if self.fe_gprior is not None and np.isinf(self.fe_gprior[1]).all():
 38 |             self.fe_gprior = None
 39 | 
 40 |         if self.trimming_weights is None:
 41 |             self.trimming_weights = np.ones(self.num_obs)
 42 | 
 43 | 
 44 | def is_simple_linear_mixed_effects_model(model: MRBRT) -> bool:
 45 |     """Test if a model is simple linear mixed effects model, where
 46 |     * covmodel is linear
 47 |     * no constraints
 48 |     * no uniform prior for fixed effects.
 49 | 
 50 |     Args:
 51 |         model (MRBRT): Model to be tested.
 52 | 
 53 |     Returns:
 54 |         bool:
 55 |             True if model is linear mixed effects model.
 56 |     """
 57 |     ok = all([isinstance(cov_model, LinearCovModel)
 58 |               for cov_model in model.cov_models])
 59 | 
 60 |     uprior = model.create_uprior()
 61 |     fe_uprior = uprior[:, :model.num_x_vars]
 62 |     ok = ok and np.isneginf(fe_uprior[0]).all() and np.isposinf(fe_uprior[1]).all()
 63 | 
 64 |     lprior = model.create_lprior()
 65 |     fe_lprior = lprior[:, :model.num_x_vars]
 66 |     ok = ok and np.isinf(fe_lprior[1]).all()
 67 | 
 68 |     ok = ok and (model.num_constraints == 0)
 69 |     return ok
 70 | 
 71 | 
 72 | def extract_simple_lme_specs(model: MRBRT) -> SimpleLMESpecs:
 73 |     """Extract the simple mixed effects model specs.
 74 | 
 75 |     Args:
 76 |         model (MRBRT): Simple mixed effects model
 77 | 
 78 |     Returns:
 79 |         SimpleLMESpecs:
 80 |             Data object contains information of the simple linear mixed effects model.
 81 |     """
 82 |     if not is_simple_linear_mixed_effects_model(model):
 83 |         warn("Model is not a simple mixed effects model. Uncertainty might not be accurate.")
 84 | 
 85 |     x_fun, x_jac_fun = model.create_x_fun()
 86 |     x_mat = x_jac_fun(model.beta_soln)
 87 |     z_mat = model.create_z_mat()
 88 |     gprior = model.create_gprior()
 89 | 
 90 |     beta_soln = model.lt.beta.copy()
 91 |     gamma_soln = model.lt.gamma.copy()
 92 |     w_soln = model.lt.w.copy()
 93 | 
 94 |     return SimpleLMESpecs(
 95 |         obs=model.data.obs,
 96 |         obs_se=model.data.obs_se,
 97 |         study_sizes=model.data.study_sizes,
 98 |         fe_mat=x_mat,
 99 |         re_mat=z_mat,
100 |         beta_soln=beta_soln,
101 |         gamma_soln=gamma_soln,
102 |         fe_gprior=gprior[:, :model.num_x_vars],
103 |         trimming_weights=w_soln
104 |     )
105 | 
106 | 
107 | def extract_simple_lme_hessian(model_specs: SimpleLMESpecs) -> np.ndarray:
108 |     """Extract the Hessian matrix from the simple linear mixed effects model.
109 | 
110 |     Args:
111 |         model_specs (SimpleLMESpecs): Model specifications.
112 | 
113 |     Returns:
114 |         np.ndarray: Hessian matrix.
115 |     """
116 |     sqrt_weights = np.sqrt(model_specs.trimming_weights)
117 |     x = model_specs.fe_mat*sqrt_weights[:, None]
118 |     z = model_specs.re_mat*sqrt_weights[:, None]
119 |     d = model_specs.obs_se**(2*model_specs.trimming_weights)
120 |     v = VarMat(d, z, model_specs.gamma_soln, model_specs.study_sizes)
121 | 
122 |     hessian = x.T.dot(v.invDot(x))
123 |     if model_specs.fe_gprior is not None:
124 |         hessian += np.diag(1.0/model_specs.fe_gprior[1]**2)
125 | 
126 |     return hessian
127 | 
128 | 
129 | def sample_simple_lme_beta(sample_size: int, model: MRBRT) -> np.ndarray:
130 |     """Simple beta from simple linear mixed effects model.
131 | 
132 |     Args:
133 |         sample_size (int): Sample size.
134 |         model (MRBRT): Simple linear mixed effects model.
135 | 
136 |     Return:
137 |         np.ndarray:
138 |             Beta samples from the linear mixed effects model.
139 |     """
140 |     # extract information
141 |     model_specs = extract_simple_lme_specs(model)
142 | 
143 |     # compute the mean anc variance matrix for sampling
144 |     beta_mean = model_specs.beta_soln
145 |     beta_var = np.linalg.inv(extract_simple_lme_hessian(model_specs))
146 | 
147 |     # sample the solutions
148 |     beta_samples = np.random.multivariate_normal(
149 |         beta_mean,
150 |         beta_var,
151 |         size=sample_size
152 |     )
153 | 
154 |     return beta_samples
155 | 


--------------------------------------------------------------------------------
/mrtool/src/mrtool/cov_selection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ihmeuw-msca/burden-of-proof/3f2e8e81f35889b14a52b421b532226d127412fe/mrtool/src/mrtool/cov_selection/__init__.py


--------------------------------------------------------------------------------
/mrtool/src/mrtool/evidence_score/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ihmeuw-msca/burden-of-proof/3f2e8e81f35889b14a52b421b532226d127412fe/mrtool/src/mrtool/evidence_score/__init__.py


--------------------------------------------------------------------------------
/mrtool/src/mrtool/evidence_score/dichotomous.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Dichotomous scorelator
  3 | """
  4 | import os
  5 | from pathlib import Path
  6 | from typing import Tuple, Union
  7 | import numpy as np
  8 | from scipy.stats import norm
  9 | import matplotlib.pyplot as plt
 10 | from mrtool import MRBRT
 11 | from mrtool.core.other_sampling import extract_simple_lme_specs, extract_simple_lme_hessian
 12 | 
 13 | 
 14 | class DichotomousScorelator:
 15 |     def __init__(self,
 16 |                  model: MRBRT,
 17 |                  cov_name: str = 'intercept',
 18 |                  draw_bounds: Tuple[float, float] = (0.05, 0.95),
 19 |                  name: str = 'unknown'):
 20 |         self.model = model
 21 |         self.cov_name = cov_name
 22 |         self.draw_bounds = draw_bounds
 23 |         self.cov_index = self.model.get_cov_model_index(self.cov_name)
 24 |         self.name = name
 25 | 
 26 |         x_ids = self.model.x_vars_indices[self.cov_index]
 27 |         z_ids = self.model.z_vars_indices[self.cov_index]
 28 |         self.beta = self.model.beta_soln[x_ids][0]
 29 |         self.gamma = self.model.gamma_soln[z_ids][0]
 30 | 
 31 |         # compute the fixed effects uncertainty
 32 |         model_specs = extract_simple_lme_specs(self.model)
 33 |         beta_var = np.linalg.inv(extract_simple_lme_hessian(model_specs))
 34 |         self.beta_var = beta_var[np.ix_(x_ids, x_ids)][0, 0]
 35 | 
 36 |         # compute the random effects uncertainty
 37 |         lt = self.model.lt
 38 |         gamma_fisher = lt.get_gamma_fisher(lt.gamma)
 39 |         gamma_var = np.linalg.inv(gamma_fisher)
 40 |         self.gamma_var = gamma_var[np.ix_(z_ids, z_ids)][0, 0]
 41 | 
 42 |         # compute score
 43 |         gamma_ub = self.gamma + 2.0*np.sqrt(self.gamma_var)
 44 |         self.draw_lb = self.beta + norm.ppf(self.draw_bounds[0], scale=np.sqrt(self.gamma + self.beta_var))
 45 |         self.draw_ub = self.beta + norm.ppf(self.draw_bounds[1], scale=np.sqrt(self.gamma + self.beta_var))
 46 |         self.wider_draw_lb = self.beta + norm.ppf(self.draw_bounds[0], scale=np.sqrt(gamma_ub + self.beta_var))
 47 |         self.wider_draw_ub = self.beta + norm.ppf(self.draw_bounds[1], scale=np.sqrt(gamma_ub + self.beta_var))
 48 | 
 49 |     def is_harmful(self) -> bool:
 50 |         return self.beta > 0.0
 51 | 
 52 |     def get_score(self, use_gamma_ub: bool = False) -> float:
 53 |         if use_gamma_ub:
 54 |             score = self.wider_draw_lb if self.is_harmful() else -self.wider_draw_ub
 55 |         else:
 56 |             score = self.draw_lb if self.is_harmful() else -self.draw_ub
 57 |         return score
 58 | 
 59 |     def plot_model(self,
 60 |                    ax=None,
 61 |                    title: str = None,
 62 |                    xlabel: str = 'ln relative risk',
 63 |                    ylabel: str = 'ln relative risk se',
 64 |                    xlim: tuple = None,
 65 |                    ylim: tuple = None,
 66 |                    xscale: str = None,
 67 |                    yscale: str = None,
 68 |                    folder: Union[str, Path] = None):
 69 |         if ax is None:
 70 |             fig = plt.figure()
 71 |             ax = fig.add_subplot()
 72 |         data = self.model.data
 73 |         trim_index = self.model.w_soln <= 0.1
 74 |         max_obs_se = np.max(data.obs_se)*1.1
 75 |         ax.set_ylim(max_obs_se, 0.0)
 76 |         ax.fill_betweenx([0.0, max_obs_se],
 77 |                          [self.beta, self.beta - 1.96*max_obs_se],
 78 |                          [self.beta, self.beta + 1.96*max_obs_se], color='#B0E0E6', alpha=0.4)
 79 |         obs = data.obs.copy()
 80 |         for i, cov_name in enumerate(self.model.cov_names):
 81 |             if cov_name == 'intercept':
 82 |                 continue
 83 |             obs -= data.covs[cov_name]*self.model.beta_soln[i]
 84 |         ax.scatter(obs, data.obs_se, color='gray', alpha=0.4)
 85 |         ax.scatter(obs[trim_index],
 86 |                    data.obs_se[trim_index], color='red', marker='x', alpha=0.4)
 87 |         ax.plot([self.beta, self.beta - 1.96*max_obs_se], [0.0, max_obs_se],
 88 |                 linewidth=1, color='#87CEFA')
 89 |         ax.plot([self.beta, self.beta + 1.96*max_obs_se], [0.0, max_obs_se],
 90 |                 linewidth=1, color='#87CEFA')
 91 | 
 92 |         ax.axvline(0.0, color='r', linewidth=1, linestyle='--')
 93 |         ax.axvline(self.beta, color='k', linewidth=1, linestyle='--')
 94 |         ax.axvline(self.draw_lb, color='#69b3a2', linewidth=1)
 95 |         ax.axvline(self.draw_ub, color='#69b3a2', linewidth=1)
 96 |         ax.axvline(self.wider_draw_lb, color='#256b5f', linewidth=1)
 97 |         ax.axvline(self.wider_draw_ub, color='#256b5f', linewidth=1)
 98 | 
 99 |         title = self.name if title is None else title
100 |         score = self.get_score()
101 |         low_score = self.get_score(use_gamma_ub=True)
102 |         ax.set_xlabel(xlabel)
103 |         ax.set_ylabel(ylabel)
104 |         ax.set_title(f"{title}: score = ({low_score: .3f}, {score: .3f})", loc='left')
105 | 
106 |         if xlim is not None:
107 |             ax.set_xlim(*xlim)
108 |         if ylim is not None:
109 |             ax.set_ylim(*ylim)
110 |         if xscale is not None:
111 |             ax.set_xscale(xscale)
112 |         if yscale is not None:
113 |             ax.set_yscale(yscale)
114 | 
115 |         if folder is not None:
116 |             folder = Path(folder)
117 |             if not folder.exists():
118 |                 os.mkdir(folder)
119 |             plt.savefig(folder/f"{self.name}.pdf", bbox_inches='tight')
120 | 
121 |         return ax
122 | 


--------------------------------------------------------------------------------
/mrtool/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     test_utils
  4 |     ~~~~~~~~~~
  5 |     Test `utils` module of `sfma` package.
  6 | """
  7 | import numpy as np
  8 | import pandas as pd
  9 | import pytest
 10 | from mrtool import utils
 11 | 
 12 | 
 13 | @pytest.mark.parametrize('df', [pd.DataFrame({'alpha': np.ones(5),
 14 |                                               'beta': np.zeros(5)})])
 15 | @pytest.mark.parametrize(('cols', 'col_shape'),
 16 |                          [('alpha', (5,)),
 17 |                           ('beta', (5,)),
 18 |                           (['alpha'], (5, 1)),
 19 |                           (['beta'], (5, 1)),
 20 |                           (['alpha', 'beta'], (5, 2)),
 21 |                           (None, (5, 0))])
 22 | def test_get_cols(df, cols, col_shape):
 23 |     col = utils.get_cols(df, cols)
 24 |     assert col.shape == col_shape
 25 | 
 26 | 
 27 | @pytest.mark.parametrize(('cols', 'ok'),
 28 |                          [('col0', True),
 29 |                           (['col0', 'col1'], True),
 30 |                           ([], True),
 31 |                           (None, True),
 32 |                           (1, False)])
 33 | def test_is_cols(cols, ok):
 34 |     assert ok == utils.is_cols(cols)
 35 | 
 36 | 
 37 | @pytest.mark.parametrize('cols', [None, 'col0', ['col0', 'col1']])
 38 | @pytest.mark.parametrize('default', [None, 'col0', ['col0', 'col1']])
 39 | def test_input_cols_default(cols, default):
 40 |     result_cols = utils.input_cols(cols, default=default)
 41 |     if cols is None:
 42 |         assert result_cols == [] if default is None else default
 43 |     else:
 44 |         assert result_cols == cols
 45 | 
 46 | 
 47 | @pytest.mark.parametrize('cols', [None, 'col0', ['col0', 'col1']])
 48 | @pytest.mark.parametrize('full_cols', [None, ['col2']])
 49 | def test_input_cols_append_to(cols, full_cols):
 50 |     cols = utils.input_cols(cols, append_to=full_cols)
 51 |     if full_cols is not None and cols:
 52 |         assert 'col0' in full_cols and 'col2' in full_cols
 53 |         if isinstance(cols, list):
 54 |             assert 'col1' in full_cols
 55 | 
 56 | 
 57 | def test_sizes_to_indices(sizes, indices):
 58 |     my_indices = utils.sizes_to_indices(sizes)
 59 |     assert all([np.allclose(my_indices[i], indices[i])
 60 |                 for i in range(len(sizes))])
 61 | 
 62 | 
 63 | @pytest.mark.parametrize('sizes', [np.array([1, 2, 3])])
 64 | @pytest.mark.parametrize('indices', [[np.arange(0, 1),
 65 |                                       np.arange(1, 3),
 66 |                                       np.arange(3, 6)]])
 67 | def test_sizes_to_indices(sizes, indices):
 68 |     my_indices = utils.sizes_to_indices(sizes)
 69 |     assert all([np.allclose(my_indices[i], indices[i])
 70 |                 for i in range(len(sizes))])
 71 | 
 72 | 
 73 | @pytest.mark.parametrize(('prior', 'result'),
 74 |                          [(np.array([0.0, 1.0]), True),
 75 |                           (np.array([[0.0]*2, [1.0]*2]), True),
 76 |                           (np.array([0.0, -1.0]), False),
 77 |                           (np.array([[0.0]*2, [-1.0]*2]), False),
 78 |                           (None, True),
 79 |                           ('gaussian_prior', False)])
 80 | def test_is_gaussian_prior(prior, result):
 81 |     assert utils.is_gaussian_prior(prior) == result
 82 | 
 83 | 
 84 | @pytest.mark.parametrize(('prior', 'result'),
 85 |                          [(np.array([0.0, 1.0]), True),
 86 |                           (np.array([[0.0]*2, [1.0]*2]), True),
 87 |                           (np.array([0.0, -1.0]), False),
 88 |                           (np.array([[0.0]*2, [-1.0]*2]), False),
 89 |                           (None, True),
 90 |                           ('uniform_prior', False)])
 91 | def test_is_uniform_prior(prior, result):
 92 |     assert utils.is_uniform_prior(prior) == result
 93 | 
 94 | 
 95 | @pytest.mark.parametrize('obj', [1, 1.0, 'a', True, [1], [1.0], ['a'], [True]])
 96 | def test_to_list(obj):
 97 |     obj_list = utils.to_list(obj)
 98 |     if isinstance(obj, list):
 99 |         assert obj_list is obj
100 |     else:
101 |         assert isinstance(obj_list, list)
102 | 


--------------------------------------------------------------------------------
/risks/README.md:
--------------------------------------------------------------------------------
1 | 
2 | This folder contains custom code for individual risk factors in the burden of proof publications.
3 | 


--------------------------------------------------------------------------------
/risks/ipv_csa/README.md:
--------------------------------------------------------------------------------
1 | This sub-directory contains code specific to the IPV and CSA risk factors.
2 | 


--------------------------------------------------------------------------------
/risks/processed_foods/Data_cleaning_and_formatting.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ## Clean the environment
 4 | rm(list = ls())
 5 | 
 6 | ## Load libraries
 7 | library(data.table)
 8 | library(tidyverse)
 9 | library(ggplot2)
10 | library(openxlsx)
11 | library(purrr)
12 | 
13 | ## Edit the filepath
14 | data_dir <- "FIlEPATH"
15 | 
16 | ## Create covariates to assess the study design characteristics.
17 | dt$representative <- ifelse(dt$rep_geography == 1, 0, 1)
18 | dt$exp_assess_level[dt$exp_assess_level == "At the individual "] <- "At the individual"
19 | dt$exposure_1 <- ifelse(dt$exp_assess_level == "At the individual", 0, 1)
20 | dt$exposure_2 <- ifelse(dt$exp_method_1 == "Self-report (human/environment)", 0, 1)
21 | dt$exposure_3 <- ifelse(dt$exp_assess_period == "only at baseline", 1, 0)
22 | dt$outcome_1 <- ifelse(dt$outcome_assess_1 == "Self-report", 1, 0)
23 | dt$outcome_2 <- "0"
24 | dt$confounder_1 <- ifelse(dt$design %in% c("Prospective cohort", "prospective cohort", "case-cohort", "Nested case-control"), 1, 0)
25 | dt$incidence <- ifelse(dt$outcome_type %in% c("Incidence", "Incidence & Mortality", "Incidence, mortality", "Incidence and mortality"), 0, 1)
26 | dt$mortality <- ifelse(dt$outcome_type %in% c("Mortality", "Incidence & Mortality", "Incidence, mortality", "Incidence and mortality"), 1, 0)
27 | dt$cov_odds_ratio <- ifelse(dt$effect_size_measure == "Odds ratio (OR)", 1, 0)
28 | 
29 | ## Adjust certain variables
30 | dt <- dt %>%
31 |   mutate(outcome_2 = 0, reverse_causation = 1, washout_years = NA, seq = NA, selection_bias = NA)
32 | 
33 | ## Make the effect size and SE variables as numeric
34 | dt$effect_size <- as.numeric(dt$effect_size)
35 | dt$upper <- as.numeric(dt$upper)
36 | dt$lower <- as.numeric(dt$lower)
37 | 
38 | ## Create the ln_effect and ln_se variables
39 | dt <- dt %>%
40 |   mutate(ln_effect = log(effect_size), ln_se = (log(upper) - log(lower)) / 3.92)
41 | 
42 | ## Converting the follow_up periods into the same unit
43 | dt$value_of_duration_fup <- as.numeric(dt$value_of_duration_fup)
44 | dt <- dt[, cov_follow_up := ifelse(value_of_duration_fup > 10, 1, 0)]
45 | 
46 | ## Rename the effect size and dose response columns as required by the pipeline
47 | setnames(dt, old = c("ln_effect", "ln_se", "b_0", "b_1", "a_0", "a_1"), new = c("ln_rr", "ln_rr_se", "alt_risk_lower", "alt_risk_upper", "ref_risk_lower", "ref_risk_upper"))
48 | 
49 | ## Convert selected columns to numeric
50 | columns_to_convert <- c("ln_rr", "ln_rr_se", "ref_risk_lower", "ref_risk_upper", "alt_risk_lower", "alt_risk_upper")
51 | dt[, (columns_to_convert) := lapply(.SD, as.numeric), .SDcols = columns_to_convert]
52 | 
53 | ## Rename the bias covariates according to BoP guideline
54 | names(dt) <- gsub("^(cofounder|confounder|confounders)", "cov", names(dt))
55 | 
56 | ## Drop non-required variables
57 | dt <- dt[, c("cov_other_dietary_components", "cov_other") := NULL]
58 | dt <- dt[, measure := "relrisk"]
59 | 
60 | ## Prepare the data for bundle upload
61 | dt$sex <- ifelse(dt$percent_male == 1, "Male", ifelse(dt$percent_male == 0, "Female", "Both"))
62 | dt$cov_exposure_definition <- ifelse(dt$Exposure_definition_reported == 1, 1, 0)
63 | dt$cov_outcome_def <- ifelse(dt$outcome_mapping == "aggregate", 0, 1)
64 | dt$cov_outcome_def[is.na(dt$cov_outcome_def)] <- 0  ## Set the missing values to 0
65 | 
66 | ## Format the dataset for upload
67 | dt$design[dt$design == "Prospective cohort"] <- "prospective cohort"
68 | dt$design[dt$design == "case-cohort"] <- "case-cohort"
69 | dt$design[dt$design == "Nested case-control"] <- "nested case-control"
70 | dt$effect_size_measure[dt$effect_size_measure == "Hazard ratio (HR)"] <- "hazard ratio"
71 | dt$effect_size_measure[dt$effect_size_measure == "Relative risk (RR)"] <- "relative risk"
72 | dt$effect_size_measure[dt$effect_size_measure == "Odds ratio (OR)"] <- "odds ratio"
73 | setnames(dt, c("year_end_study", "year_start_study", "effect_size"), c("year_end", "year_start", "mean"))
74 | 
75 | ## Get the location data
76 | source("FILEPATH/get_location_metadata.R")
77 | loc <- get_location_metadata(location_set_id = 35, release_id = 9)
78 | loc$location_id <- as.numeric(loc$location_id)
79 | dt$location_id <- as.numeric(dt$location_id)
80 | 
81 | ## Merge
82 | dt_final <- merge(dt, loc, by = c("location_id", "location_name"), all.x = TRUE)
83 | 
84 | ## Save the dataset
85 | write_csv(dt_final, "FILEPATH/FILENAME.csv")


--------------------------------------------------------------------------------
/risks/processed_foods/create_bc_gamma_table.R:
--------------------------------------------------------------------------------
  1 | #######################################################################    
  2 | # Title: Create table for selected bias covariates and gamma solution for a BoP study
  3 | # Author: 
  4 | #######################################################################    
  5 | rm(list = ls())
  6 | 
  7 | ## create customer r library folder
  8 | user <- Sys.getenv("USER")
  9 | user_rlibs <- file.path("/homes", user, "rlibs")
 10 | 
 11 | if (!dir.exists(user_rlibs)) {
 12 |   dir.create(user_rlibs)
 13 | } 
 14 | 
 15 | ## install/load packages
 16 | packages <- c("yaml", "data.table", "stringr", "officer", "flextable")
 17 | for (p in packages) {
 18 |   if (!require(p, character.only = TRUE)) {
 19 |     install.packages(p, lib = user_rlibs)
 20 |     library(p, lib.loc = user_rlibs, character.only = TRUE)
 21 |   }
 22 | }
 23 | 
 24 | ## load cc functions
 25 | source("FILEPATH/r/get_ids.R")
 26 | 
 27 | ## main function
 28 | #' This function creates a table for selected bias covariates and gamma solution to be shown in a Burden of Proof study
 29 | #'
 30 | #' @param input is the path to the folder where the results of the BoP pipeline are stored
 31 | #' @param output is the path to the folder where the table should be saved
 32 | #' @param heading optional heading of the table; "Table SX. Selected bias covariates and gamma solution" by default.
 33 | #' @param footnote optional footnote of the table; vector(mode = "character") by default.
 34 | #'
 35 | #' @return
 36 | #' @export
 37 | #'
 38 | #' @examples 
 39 | #' bc_gamma_table(input = "/homes/shcarr/bop_pipeline/results/dichotomous/",
 40 | #'                output = "/homes/shcarr/bop_pipeline/tables/", 
 41 | #'                heading = "Table S4. Selected bias covariates and gamma solution")
 42 | 
 43 | 
 44 | risk_folder = "processed_meat"
 45 | trimming = "no_trimming/"
 46 | exposure = paste0(risk_folder, "consumption")
 47 | 
 48 | input = paste0("FILEPATH/", risk_folder, "/mrbrt_run/BoP_summary_table/", trimming, "/risk_outcome_pair/")
 49 | output = paste0("FILEPATH/", risk_folder, "/mrbrt_run/BoP_summary_table/", trimming, "/summary_table/")
 50 | heading = paste0("Table:", exposure ,"  Selected bias covariates and gamma solution")
 51 | 
 52 | 
 53 | bc_gamma_table <- function(input, output, heading = "Table SX. Selected bias covariates and gamma solution", footnote = vector(mode = "character")){
 54 |   
 55 |   if (!dir.exists(input)) stop("Please provide a valid input path")
 56 |   if (!dir.exists(output)) stop("Please provide a valid output path")
 57 |   
 58 |   # load cause_ids
 59 |   cause_ids <- get_ids("cause")
 60 |   
 61 |   results <- data.table()
 62 |   
 63 |   # loop through all risk-outcome pairs
 64 |   for (ro_pair in list.files(input)) {
 65 |     ro_path <- file.path(input, ro_pair)
 66 |     
 67 |     # load files
 68 |     summary <- yaml.load_file(file.path(ro_path, "summary.yaml"))
 69 |     cov_finder_result <- yaml.load_file(file.path(ro_path, "cov_finder_result.yaml"))
 70 |     
 71 |     # bias covariate(s)
 72 |     selected_bc_covs <- cov_finder_result$selected_covs
 73 |     selected_bc_covs <- gsub("cov_", "", selected_bc_covs)
 74 |     selected_bc_covs <- ifelse(length(selected_bc_covs) > 0, paste(selected_bc_covs, collapse = ", "), "None")
 75 |     
 76 |     # gamma solution
 77 |     gamma <- paste0(round(summary$gamma[1], 10), " (", round(summary$gamma[2], 5), ")")
 78 |     
 79 |     # exposure
 80 |     risk <- stringr::str_split(ro_pair, "-")[[1]][1]
 81 |     
 82 |     # health outcome
 83 |     cause <- stringr::str_split(ro_pair, "-")[[1]][2]
 84 |     health_outcome <- ifelse(cause %in% cause_ids$acause, cause_ids$cause_name[cause_ids$acause == cause], cause)
 85 |     
 86 |     # compile results
 87 |     temp <- data.frame(
 88 |       cbind(
 89 |         risk,
 90 |         health_outcome,
 91 |         selected_bc_covs,
 92 |         gamma
 93 |       )
 94 |     )
 95 |     results <- rbind(results, temp)
 96 |   }
 97 |   
 98 |   # rename columns
 99 |   colnames(results) <- c("Risk", "Health outcome", "Selected bias covariates", "Gamma solution (mean and sd)")
100 |   
101 |   # check if multiple exposures
102 |   if (length(unique(results$`Risk`)) == 1) {
103 |     results <- results[, -c("Risk")]
104 |   } 
105 |   
106 |   # define table settings
107 |   if ("Risk" %in% colnames(results)) {
108 |     cols <- c(1:4)
109 |     width <- c(1.8, 1.8, 1.8, 1.3)
110 |   } else {
111 |     cols <- c(1:3)
112 |     width <- c(1.8, 1.8, 1.3)
113 |   }
114 |   
115 |   # create table
116 |   flextable(results) %>%
117 |     add_header_lines(values = heading) %>%
118 |     add_footer_lines(footnote) %>%
119 |     width(j = cols, width = width) %>%
120 |     style(part = "header", pr_p = fp_par(text.align = "center")) %>%
121 |     style(j = 1, part = "header", pr_p = fp_par(text.align = "left")) %>%
122 |     style(part = "body", pr_p = fp_par(text.align = "center")) %>%
123 |     style(j = 1, part = "body", pr_p = fp_par(text.align = "left")) %>%
124 |     font(fontname = "Calibri", part = "all") %>%
125 |     fontsize(size = 10, part = "body") %>%
126 |     fontsize(i = 1, size = 12, part = "header") %>%
127 |     line_spacing(space = 1.15) %>%
128 |     save_as_docx(path = file.path(output, "bc_gamma_table.docx"), align = "center")
129 | }
130 | 
131 | ## run the function
132 | 
133 | 
134 | bc_gamma_table(input,output, heading)
135 | 
136 | 


--------------------------------------------------------------------------------
/risks/red_meat/README.md:
--------------------------------------------------------------------------------
1 | 
2 | This folder contains custom code for diet high in red meat burden of proof analysis.
3 | 


--------------------------------------------------------------------------------
/risks/red_meat/model_functions.R:
--------------------------------------------------------------------------------
 1 | # Functions to pull draws from the modified model objects
 2 | 
 3 | 
 4 | get_cov_names <- function(signal_model) {
 5 |   cov_model <- signal_model$sub_models[[1]]$cov_models[[1]]
 6 |   list(alt_covs = cov_model$alt_cov,
 7 |        ref_covs = cov_model$ref_cov)
 8 | }
 9 | 
10 | get_risk_limits <- function(signal_model) {
11 |   cov_names <- get_cov_names(signal_model)
12 |   risk_data <- signal_model$data$get_covs(unlist(cov_names))
13 |   c(min(risk_data), max(risk_data))
14 | }
15 | 
16 | get_signal <- function(signal_model, risk) {
17 |   cov_names <- get_cov_names(signal_model)
18 |   risk_limits <- get_risk_limits(signal_model)
19 |   df_covs <- data.frame(
20 |     c(sapply(cov_names$ref_covs, function(x) rep(risk_limits[1], length.out = length(risk)),
21 |              simplify = FALSE, USE.NAMES = TRUE),
22 |       sapply(cov_names$alt_covs, function(x) risk,
23 |              simplify = FALSE, USE.NAMES = TRUE))
24 |   )
25 |   data <- MRData()
26 |   data$load_df(df_covs, col_covs=unlist(cov_names))
27 |   signal_model$predict(data)
28 | }
29 | 
30 | get_beta <- function(linear_model) {
31 |   beta <- linear_model$beta_soln
32 |   names(beta) <- linear_model$cov_names
33 |   specs <- mrbrt002::core$other_sampling$extract_simple_lme_specs(linear_model)
34 |   beta_hessian <- mrbrt002::core$other_sampling$extract_simple_lme_hessian(specs)
35 |   beta_sd <- 1/sqrt(diag(beta_hessian))
36 |   names(beta_sd) <- linear_model$cov_names
37 |   c(beta["signal"], beta_sd["signal"])
38 | }
39 | 
40 | get_gamma <- function(linear_model) {
41 |   gamma <- linear_model$gamma_soln[[1]]
42 |   gamma_fisher <- linear_model$lt$get_gamma_fisher(linear_model$gamma_soln)
43 |   gamma_sd <- 1/sqrt(diag(gamma_fisher))
44 |   c(gamma, gamma_sd)
45 | }
46 | 
47 | get_soln <- function(linear_model) {
48 |   list(
49 |     beta_soln = get_beta(linear_model),
50 |     gamma_soln = get_gamma(linear_model)
51 |   )
52 | }
53 | 
54 | get_ln_rr_draws <- function(signal_model,
55 |                             linear_model,
56 |                             risk,
57 |                             num_draws = 1000L,
58 |                             normalize_to_tmrel = FALSE, 
59 |                             fe_only = FALSE) {
60 |   signal <- get_signal(signal_model, risk)
61 |   re_signal <- signal
62 |   soln <- get_soln(linear_model)
63 |   
64 |   fe_samples <- rnorm(num_draws, mean=soln$beta[1], sd=soln$beta[2])
65 |   re_samples <- rnorm(num_draws, mean=0, sd=sqrt(soln$gamma[1] + 2*soln$gamma[2]))
66 |   
67 |   if(fe_only){
68 |     draws <- outer(signal, fe_samples)
69 |   }else{
70 |     draws <- outer(signal, fe_samples) + outer(re_signal, re_samples)
71 |   }
72 |   
73 |   if (normalize_to_tmrel) {
74 |     tmrel_index <- which.min(signal)
75 |     draws <- draws - draws[tmrel_index]
76 |   }
77 |   
78 |   df <- as.data.frame(cbind(risk, draws))
79 |   names(df) <- c("risk", sapply(1:num_draws, function(i) paste0("draw_", i)))
80 |   return(df)
81 | }
82 | 
83 | 
84 | summarize_draws <- function(data){
85 |   
86 |   df <- as.data.table(copy(data))
87 |   draw_cols <- colnames(df)[grepl("draw_", colnames(df))]
88 |   
89 |   df[, mean := apply(.SD, 1, mean), .SDcols = draw_cols]
90 |   df[, upper := apply(.SD, 1, quantile, 0.975), .SDcols = draw_cols]
91 |   df[, lower := apply(.SD, 1, quantile, 0.025), .SDcols = draw_cols]
92 |   
93 |   df[, (draw_cols) := NULL]
94 |   return(df)
95 |   
96 | }


--------------------------------------------------------------------------------
/risks/smoking/binary_risk/02_upload_dichotomous_launcher.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | source("/ihme/homes/xdai88/gbd_tobacco/gbd2020_smoking/evidence_score_pipeline/src/upload_dichotomous.R")
 3 | 
 4 | ARCHIVE <- "[directory to the archive folder]"
 5 | out_dir <- "[directory to the outputs folder]"
 6 | 
 7 | pair_info <- list(
 8 |   smoking_hip_fracture = list(
 9 |     rei_id = "99",
10 |     cause_id = "878",
11 |     model_path = paste0(out_dir,"fracture_model.pkl")
12 |   ),
13 |   smoking_non_hip_fracture = list(
14 |     rei_id = "99",
15 |     cause_id = "923",
16 |     model_path = paste0(out_dir,"fracture_model.pkl")
17 |   )
18 | )
19 | 
20 | for (pair in names(pair_info)) {
21 |   print(paste0("upload pair=", pair))
22 |   results_folder <- file.path(ARCHIVE, pair)
23 |   if (!dir.exists(results_folder)) {
24 |     dir.create(results_folder)
25 |   }
26 |   do.call(upload_results, c(pair_info[[pair]], list(results_folder = results_folder)))
27 | }
28 | 


--------------------------------------------------------------------------------
/risks/smoking/binary_risk/03_forest_plot.R:
--------------------------------------------------------------------------------
  1 | ####################################################################################################################################################################
  2 | # 
  3 | # Author: Xiaochen Dai
  4 | # Purpose: Plot mr-brt results
  5 | #
  6 | ####################################################################################################################################################################
  7 | 
  8 | rm(list=ls())
  9 | 
 10 | library(data.table)
 11 | library(dplyr)
 12 | library(openxlsx)
 13 | library(ggplot2)
 14 | # library(crosswalk, lib.loc = "/ihme/code/mscm/R/packages/")
 15 | library(mrbrt002, lib.loc = "/ihme/code/mscm/Rv4/packages/")
 16 | 
 17 | args <- commandArgs(trailingOnly = TRUE)
 18 | 
 19 | ## NEED TO CHANGE THE RO_PAIR HERE
 20 | 
 21 | if(interactive()){
 22 |   # NOTE: the ro_pair for this script does not include age-specific info
 23 |   ro_pair <- "fractures" # only works for fractures now
 24 |   cov_setting <- "cov_finder_no_sex" # option: ['cov_finder', 'cov_finder_no_sex', 'no_cov','percent_male_only','self_selected'(no percent_male)]
 25 |   trim <- 0.9
 26 |   #out_dir <- "/ihme/homes/xdai88/gbd_tobacco/gbd2019_alcohol/evidence_score/testing/test_run1_2020_09_05/"
 27 |   out_dir <- "/mnt/team/team/pub/sub_risks/tobacco/code/xdai88/gbd2020_smoking/relative_risk_curves/binary_risk/fracture_binary/"
 28 | } else {
 29 |   ro_pair <- args[1]
 30 |   cov_setting <- args[2]
 31 |   trim <- args[3]
 32 |   out_dir <- args[4]
 33 | }
 34 | 
 35 | # 1. plotting the results ---------------------------------------------------------------------------------------------------------------------------------------------------
 36 | 
 37 | #obs_data <- fread(paste0('/ihme/homes/xdai88/gbd_tobacco/gbd2020_smoking/test_run_2020_10_25/fracture_binary/mrbrt_output_', cov_setting, '_', trim, '.csv'))
 38 | #mod1 <- py_load_object(filename = paste0('/ihme/homes/xdai88/gbd_tobacco/gbd2020_smoking/test_run_2020_10_25/fracture_binary/', ro_pair, '_', cov_setting,'_', trim, '.pkl'), pickle = "dill")
 39 | 
 40 | obs_data <- fread(paste0(out_dir, ro_pair,"_",cov_setting, '_', trim, '.csv'))
 41 | mod1 <- py_load_object(filename = paste0(out_dir, ro_pair, '_', cov_setting,'_', trim, '.pkl'), pickle = "dill")
 42 | 
 43 | cov_names <- mod1$cov_names[!mod1$cov_names=="intercept"]
 44 | header <- paste0(as.character(mod1$data),"\ncovariates: ",paste0(cov_names, collapse=", ") ,"\nexp(beta): ", round(exp(mod1$beta_soln[1]), digits = 3),"     gamma: ", mod1$gamma_soln)
 45 | 
 46 | # 
 47 | test <- obs_data[!is.na(se)]
 48 | test[, val := exp(val)]; test[, lower := exp(lower)]; test[, upper := exp(upper)]
 49 | results <- obs_data[is.na(se)]
 50 | 
 51 | #lin_effect <- as.data.table(delta_transform(mean=test$val, sd=test$se, transformation='log_to_linear'))
 52 | #data <- cbind(lin_effect, test)
 53 | #data[, upper:=mean_linear+1.96*sd_linear]
 54 | #data[, lower:=mean_linear-1.96*sd_linear]
 55 | 
 56 | plot_data <- rbind(test, results, fill=T)
 57 | plot_data[, mean_linear:=val]
 58 | plot_data[study=='2019 Result', data:=3]
 59 | 
 60 | plot_data[included==0, data:=4]
 61 | 
 62 | # forest plot of the data
 63 | color_vals <- c("black", "blue", "darksalmon", "red")
 64 | names(color_vals) <- c(1,2,3,4)
 65 | 
 66 | if (cov_setting %in% c('cov_finder', 'percent_male_only')) {
 67 |   
 68 |   plot_data[sample_sex==0, sex:='female']
 69 |   plot_data[sample_sex==1, sex:='male']
 70 |   plot_data[!sample_sex%in%c(0,1) & !is.na(sample_sex), sex:='both']
 71 |   
 72 |   
 73 |   alpha_vals <- c(1, 0.75, 0.75, 1)
 74 |   names(alpha_vals) <- c('NA', 'male', 'female', 'both')
 75 |   
 76 |   
 77 |   pdf(paste0(out_dir, ro_pair, '_simple_forest_plot_', cov_setting, '_', trim, '.pdf'),
 78 |       height = 12)  
 79 |   
 80 |   p <- ggplot(data=plot_data,
 81 |               aes(x = row,y = mean_linear, ymin = lower, ymax = upper, color = as.factor(data)))+
 82 |     geom_pointrange(aes(shape = as.factor(data)))+
 83 |     scale_color_manual("", values = color_vals, guide = F) + 
 84 |     # scale_alpha_manual(values=c('NA'=1, 'male'=0.3, 'female'=0.3, 'both'=1)) + 
 85 |     geom_hline(yintercept =1, linetype=2, color = "black")+
 86 |     xlab('study id')+ ylab(paste0('Relative Risk', " (95% Confidence Interval)"))+
 87 |     scale_x_continuous(label = obs_data$study, breaks = obs_data$row)+
 88 |     geom_errorbar(aes(ymin=lower, ymax=upper),width=0.5,cex=1)+ 
 89 |     labs(subtitle = header, color = "")+
 90 |     scale_shape_discrete(guide = F)+
 91 |     theme_bw()+
 92 |     coord_flip()
 93 |   
 94 |   print(p)
 95 |   
 96 |   dev.off()
 97 | } else {
 98 |   pdf(paste0(out_dir, ro_pair, '_simple_forest_plot_', cov_setting, '_', trim, '.pdf'),
 99 |       height=15)  
100 |   
101 |   p <- ggplot(data=plot_data,
102 |               aes(x = row,y = mean_linear, ymin = lower, ymax = upper, color = as.factor(data)))+
103 |     geom_pointrange(aes(shape = as.factor(data)))+
104 |     scale_color_manual("", values = color_vals, guide = F) + 
105 |     geom_hline(yintercept =1, linetype=2, color = "black")+
106 |     xlab('study id')+ ylab(paste0('Relative Risk', " (95% Confidence Interval)"))+
107 |     scale_x_continuous(label = obs_data$study, breaks = obs_data$row)+
108 |     geom_errorbar(aes(ymin=lower, ymax=upper),width=0.5,cex=1)+ 
109 |     labs(subtitle = header, color = "")+
110 |     scale_shape_discrete(guide = F)+
111 |     theme_bw()+
112 |     coord_flip()
113 |   
114 |   print(p)
115 |   
116 |   dev.off()
117 | }
118 | 


--------------------------------------------------------------------------------
/risks/smoking/config.R:
--------------------------------------------------------------------------------
 1 | # Configuration of pipeline
 2 | 
 3 | # User settings
 4 | # ------------------------------------------------------------------------------
 5 | USER <- Sys.getenv("USER")
 6 | WORK_DIR <- "[working directory]"
 7 | CODE_PATH <- paste0(WORK_DIR, "/src/")
 8 | 
 9 | # Cluster settings
10 | # ------------------------------------------------------------------------------
11 | PROJ <- "[project name]"
12 | SINGULARITY_IMG <- "[R image directory]"
13 | 
14 | # Version settings
15 | # ------------------------------------------------------------------------------
16 | VERSION_ID <- "prod"
17 | 
18 | # Directory settings
19 | # ------------------------------------------------------------------------------
20 | OUT_DIR <- "[output directory]"
21 | INPUT_DATA_DIR = "[input data directory]"
22 | 
23 | # Output directory for each stage
24 | SUB_DIRS <- c(
25 |   paste0(OUT_DIR, "00_prepped_data"),
26 |   paste0(OUT_DIR, "01_template_pkl_files"),
27 |   paste0(OUT_DIR, "01_template_models"),
28 |   paste0(OUT_DIR, "02_loglinear_models"),
29 |   paste0(OUT_DIR, "02_loglinear_pkl_files"),
30 |   paste0(OUT_DIR, "03_covariate_selection_models"),
31 |   paste0(OUT_DIR, "03_covariate_selection_pkl_files"),
32 |   paste0(OUT_DIR, "04_mixed_effects_models"),
33 |   paste0(OUT_DIR, "04_mixed_effects_pkl_files"),
34 |   paste0(OUT_DIR, "05_evidence_score")
35 | ) 
36 | 
37 | # create directories
38 | for (direc in SUB_DIRS){
39 |   dir.create(direc, showWarnings = F)
40 | }
41 | 
42 | # data settings
43 | # ------------------------------------------------------------------------------
44 | ALL_RO_PAIRS <- gsub(".csv", "", list.files(INPUT_DATA_DIR))
45 | EXCLUDED_RO_PAIRS <- c("dairy_stroke", "fruit_oral", "fruit_larynx")
46 | RO_PAIRS <- ALL_RO_PAIRS[!(ALL_RO_PAIRS %in% EXCLUDED_RO_PAIRS)]
47 | 
48 | OBS_VAR <- "ln_effect"
49 | OBS_SE_VAR <- "ln_se"
50 | STUDY_ID_VAR <- "nid"
51 | 
52 | ALT_EXPOSURE_COLS <- c("b_0", "b_1")
53 | REF_EXPOSURE_COLS <- c("a_0", "a_1")
54 | 
55 | # model settings
56 | # ------------------------------------------------------------------------------
57 | BIAS_COVARIATES_AS_INTX <- TRUE
58 | 
59 | BETA_PRIOR_MULTIPLIER = 0.1
60 | COV_FINDER_CONFIG = list(
61 |   pre_selected_covs = list("exposure_linear"), 
62 |   num_samples = 1000L,
63 |   power_range = list(-4, 4), 
64 |   power_step_size = 0.05,
65 |   laplace_threshold = 1e-5,
66 |   inlier_pct = 1.0,
67 |   bias_zero = TRUE
68 | )
69 | 
70 | PRIOR_VAR_RSLOPE = 1e-6
71 | PRIOR_VAR_MAXDER <- 1e-4
72 | MONOSPLINE_SLOPE_MULTIPLIER <- 2
73 | 
74 | MONOSPLINE_BIAS_CONFIG = list(
75 |   spline_degree = 3L
76 | )
77 | 
78 | LOGLINEAR_BIAS_CONFIG = list(
79 |   spline_degree = 3L
80 | )
81 | 


--------------------------------------------------------------------------------
/risks/smoking/continous_risk/04_format_rr_draws_non_cvd.R:
--------------------------------------------------------------------------------
 1 | #---------------------------------------------------
 2 | # Purpose: format draws for non-cvd outcomes only
 3 | # Author: Xiaochen Dai
 4 | # Date: 07/25/2022
 5 | #---------------------------------------------------
 6 | 
 7 | rm(list = ls())
 8 | 
 9 | # System info
10 | os <- Sys.info()[1]
11 | user <- Sys.info()[7]
12 | 
13 | # Drives
14 | j <- if (os == "Linux") "/home/j/" else if (os == "Windows") "J:/"
15 | h <- if (os == "Linux") paste0("/homes/", user, "/") else if (os == "Windows") "H:/"
16 | 
17 | library(dplyr)
18 | library(ggplot2)
19 | library(data.table)
20 | source("get_ids.R")
21 | source("get_age_metadata.R")
22 | 
23 | # Set up arguments
24 | if(interactive()){
25 |   ro_pair <- "[enter risk-outcome pair of interest]"
26 |   level_100 <- F
27 | } else {
28 |   args <- commandArgs(trailingOnly = TRUE)
29 |   ro_pair <- args[1]
30 |   level_100 <- as.logical(args[2])
31 | }
32 | 
33 | if(level_100){
34 |   message("100 exposure levels")
35 |   rr_dir <- "[path to raw draws for 100 exposure levels]"
36 |   save_dir <- "[path to final formated draws for 100 exposure levels]"
37 | } else {
38 |   message("1000 exposure levels")
39 |   rr_dir <- "[path to raw draws for 1000 exposure levels]"
40 |   save_dir <- "[path to final formated draws for 1000 exposure levels]"
41 | }
42 | 
43 | ages <- get_age_metadata(19)
44 | setnames(ages, c("age_group_years_start", "age_group_years_end"), c("age_start", "age_end"))
45 | ages <- ages[,.(age_start, age_end, age_group_id)]
46 | 
47 | # expand age group and sex
48 | age_group_ids <- c(6:20, 30:32, 235)
49 | 
50 | # for fractures
51 | if(ro_pair=="fractures"){
52 |   rr_frac <- fread(paste0(rr_dir, "smoking_", ro_pair, ".csv"))
53 |   rr_frac[, rr := exp(rr)]
54 |   
55 |   rr_full <- expand.grid(cause_id=c(878,923), draw=0:999, sex_id=1:2, age_group_id=age_group_ids) %>% as.data.table
56 |   rr_full <- merge(rr_full, rr_frac, by="draw")
57 |   
58 | } else {
59 |   # reshape the data
60 |   rr <- fread(paste0(rr_dir, "smoking_", ro_pair, ".csv"))
61 |   setnames(rr, "risk", "exposure")
62 |   rr_long <- melt(rr, id.vars = "exposure", variable.name = "draw", value.name = "rr")
63 |   rr_long <- rr_long[order(exposure)]
64 |   rr_long[, rr:=exp(rr)]
65 |   rr_long[, draw := as.numeric(draw)-1]
66 |   
67 |   rr_full <- expand.grid(exposure=seq(0,100,0.1), draw=0:999, sex_id=1:2, age_group_id=age_group_ids) %>% as.data.table
68 |   rr_full <- merge(rr_full, rr_long, by=c("exposure", "draw"))
69 |   setorder(rr_full, "exposure","draw", "sex_id", "age_group_id")
70 |   
71 |   if(ro_pair %in% c("breast_cancer", "cervical_cancer")){
72 |     rr_full <- rr_full[sex_id==2]
73 |   }
74 |   
75 |   if(ro_pair %in% c("prostate_cancer")){
76 |     rr_full <- rr_full[sex_id==1]
77 |   }  
78 | }
79 | 
80 | # save the draws
81 | write.csv(rr_full, paste0(save_dir, ro_pair, ".csv"), row.names = F)
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/risks/smoking/continous_risk/05_02_age_specific_rr_curves.R:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------
  2 | # Purpose: create final age_specific relative risk curves for CVD outcomes
  3 | # Author: Xiaochen Dai, adapted from Haley's codes
  4 | # Date: 07/25/2022
  5 | #---------------------------------------------------
  6 | 
  7 | rm(list = ls())
  8 | 
  9 | # System info
 10 | os <- Sys.info()[1]
 11 | user <- Sys.info()[7]
 12 | 
 13 | # Drives
 14 | j <- if (os == "Linux") "/home/j/" else if (os == "Windows") "J:/"
 15 | h <- if (os == "Linux") paste0("/homes/", user, "/") else if (os == "Windows") "H:/"
 16 | 
 17 | code_dir <- '[path to age_rr_utils.R]'
 18 | save_dir <- "[path to results]"
 19 | age_rr_dt_dir <- '[path to cleaned age-stratified data and AF draws]'
 20 | plot_dir <- "[path to plots]"
 21 | 
 22 | library(dplyr)
 23 | library(ggplot2)
 24 | library(data.table)
 25 | library(mrbrt002, lib.loc = "/ihme/code/mscm/Rv4/packages/")
 26 | source("get_ids.R")
 27 | source("get_age_metadata.R")
 28 | source(paste0(code_dir, "age_rr_utils.R"))
 29 | source("helper_functions.R")
 30 | np <- import("numpy")
 31 | np$random$seed(as.integer(123))
 32 | 
 33 | # Set up arguments
 34 | if(interactive()){
 35 |   ro_pair <- "[enter CVD risk-outcome pair of interest]"
 36 |   mean_factor <- F
 37 |   log_af <- F # using AF calculated based on log_rr
 38 |   level_100 <- F
 39 | } else {
 40 |   args <- commandArgs(trailingOnly = TRUE)
 41 |   ro_pair <- args[1]
 42 |   mean_factor <- as.logical(args[2])
 43 |   log_af <- as.logical(args[3])
 44 |   level_100 <- as.logical(args[4])
 45 | }
 46 | 
 47 | if(level_100){
 48 |   output_dir <- "[path to raw draws for 100 exposure levels]"
 49 |   new_dir <- "[path to final formated draws for 100 exposure levels]"
 50 | } else {
 51 |   output_dir <- "[path to raw draws for 1000 exposure levels]"
 52 |   new_dir <- "[path to final formated draws for 1000 exposure levels]"
 53 | }
 54 | 
 55 | ages <- get_age_metadata(19)
 56 | setnames(ages, c("age_group_years_start", "age_group_years_end"), c("age_start", "age_end"))
 57 | ages <- ages[,.(age_start, age_end, age_group_id)]
 58 | 
 59 | # get the reference age group
 60 | data <- readRDS(paste0(save_dir, "01_template_models/", ro_pair, ".RDS"))
 61 | df_data <- data$df_data
 62 | age_ref <- df_data$age_ref %>% mean
 63 | age_ref_group <- ages[age_start <= age_ref & age_end >= age_ref, age_group_id]
 64 | 
 65 | # load rr draws and age pattern draws
 66 | rr_draws <- fread(paste0(output_dir, "smoking_", ro_pair, ".csv"))
 67 | setnames(rr_draws, "risk", "exposure")
 68 | age_pattern_draws <- fread(file.path(age_rr_dt_dir, paste0("attenuation_pct_draws_", ro_pair, "_",age_ref_group,".csv")))
 69 | 
 70 | col_names <- c("exposure", paste0("draw_", 0:999))
 71 | 
 72 | # change variable name
 73 | names(rr_draws) <- col_names
 74 | 
 75 | # load mean of age pattern
 76 | age_pattern_mean <- fread(file.path(age_rr_dt_dir, paste0("attenuation_pct_summary_", ro_pair, "_",age_ref_group,".csv")))
 77 | 
 78 | # apply attenuation factors
 79 | if(mean_factor){
 80 |   # apply mean of attenuation factors only
 81 |   plot_path <- paste0(plot_dir, "age_spec_smoking_", ro_pair,"_af_mean.pdf")
 82 |   age_spec_risk_curve <- apply_age_pattern_mean_af(ro_pair =ro_pair, 
 83 |                                                    risk_curve_draws_df = rr_draws, 
 84 |                                                    age_pattern_mean_df = age_pattern_mean, 
 85 |                                                    age_pattern_mean_log_df=age_pattern_mean_log, 
 86 |                                                    log_af = log_af, 
 87 |                                                    plot = T, 
 88 |                                                    plot_path = plot_path)
 89 |   
 90 | } else {
 91 |   plot_path <- paste0(plot_dir, "age_spec_smoking_", ro_pair,"_af_draws_no_gamma.pdf")
 92 |   age_spec_risk_curve <- apply_age_pattern(ro_pair =ro_pair, 
 93 |                                            risk_curve_draws_df = rr_draws, 
 94 |                                            age_pattern_draws_df = age_pattern_draws, 
 95 |                                            #draws_in_log = T, 
 96 |                                            #return_draws_log = F, 
 97 |                                            plot = T, 
 98 |                                            plot_path = plot_path)
 99 |   
100 | }
101 | 
102 | # re-shape the dataset
103 | age_spec_rr <- melt(age_spec_risk_curve, id.vars = c("exposure", "age_group_id"), variable.name = "draw", value.name = "rr")
104 | age_spec_rr[, draw:= as.numeric(draw)-1]
105 | 
106 | age_spec_rr_full <- copy(age_spec_rr)
107 | for(age_id in c(6:8)){
108 |   temp <- age_spec_rr[age_group_id==9]
109 |   temp[, age_group_id := age_id]
110 |   age_spec_rr_full <- rbindlist(list(temp, age_spec_rr_full), use.names = T)
111 | }
112 | age_spec_rr_full[, age_group_id] %>% unique
113 | 
114 | # add sex_id
115 | age_spec_rr_full_m <- copy(age_spec_rr_full)
116 | age_spec_rr_full_f <- copy(age_spec_rr_full)
117 | age_spec_rr_full_m[, sex_id := 1]
118 | age_spec_rr_full_f[, sex_id := 2]
119 | 
120 | age_spec_rr_full <- rbindlist(list(age_spec_rr_full_m, age_spec_rr_full_f), use.names = T)
121 | 
122 | setorder(age_spec_rr_full, "exposure","draw", "sex_id", "age_group_id")
123 | 
124 | # save the draws
125 | message("saving draws...")
126 | write.csv(age_spec_rr_full, paste0(new_dir, ro_pair, ".csv"), row.names = F)
127 | 


--------------------------------------------------------------------------------
/risks/smoking/prep_data_function.R:
--------------------------------------------------------------------------------
  1 | 
  2 | prep_diet_data <- function(
  3 |     ro_pair, obs_var, obs_se_var, ref_vars, alt_vars, allow_ref_gt_alt = FALSE,
  4 |     study_id_var = NA,
  5 |     drop_x_covs = NA, keep_x_covs = NA, drop_z_covs = NA, keep_z_covs = NA,
  6 |     diet_dir = NA,
  7 |     verbose = TRUE) {
  8 |   
  9 |   require(dplyr)
 10 |   require(rlang)
 11 |   
 12 |   if (verbose) cat(ro_pair, "\n")
 13 |   if (!is.na(drop_x_covs) & !is.na(keep_x_covs)) stop("Cannot specify both drop and keep X-covs")
 14 |   if (!is.na(drop_z_covs) & !is.na(keep_z_covs)) stop("Cannot specify both drop and keep Z-covs")
 15 |   
 16 |   df <- read.csv(paste0(diet_dir, "/", ro_pair, ".csv")) %>%
 17 |     filter(complete.cases(.[, c(ref_vars, alt_vars)]))
 18 |   
 19 |   if (nrow(df) == 0) stop("No observations with non-missing exposure columns")
 20 |   
 21 |   # # convert non-binary covariates into dummy variables
 22 |   # # and create list of bias covariates for the analysis
 23 |   create_dummy_vars <- function(dat, varname, reference_level) {
 24 |     dev <- FALSE
 25 |     if (dev) {
 26 |       dat <- data.frame(x1 = sample(c("a", "b", "c"), 30, TRUE))
 27 |       varname <- "x1"
 28 |       reference_level <- "a"
 29 |     }
 30 |     vec <- as.data.frame(dat)[, varname]
 31 |     lvls <- unique(vec)[!unique(vec) == reference_level]
 32 |     dat2 <- as.data.frame(do.call("cbind", lapply(lvls, function(x) as.integer(vec == x))))
 33 |     if(!is_empty(dat2)){
 34 |       names(dat2) <- paste0(varname, "_", lvls)
 35 |     }
 36 |     return(dat2)
 37 |   }
 38 |   
 39 |   confounders <- names(df)[grepl('confounders_', names(df))]
 40 |   cvs <- names(df)[grepl('cv_', names(df))]
 41 |   
 42 |   data_cols <- c(cvs)
 43 |   
 44 |   bias_covs <- c()
 45 |   for (cov in data_cols[data_cols %in% names(df)]) {
 46 |     
 47 |     dev <- FALSE
 48 |     if (dev) {
 49 |       cov <- "follow_up"
 50 |     }
 51 |     
 52 |     if (any(is.na(df[, cov]))) next 
 53 |     
 54 |     if (all(df[, cov] == round(df[, cov]))) {
 55 |       df[, cov] <- as.integer(df[, cov])
 56 |     } else {
 57 |       stop(paste0("Bias covariate '", cov, "' is not of type integer"))
 58 |     }
 59 |     bias_covs <- c(bias_covs, cov)
 60 |   }
 61 |   
 62 |   # use SVD to prevent adding collinear variables
 63 |   bias_covs_tmp <- c()
 64 |   
 65 |   # sort the bias_covs to make sure the cv_adj is always included
 66 |   bias_covs <- sort(bias_covs)
 67 |   
 68 |   for (bias_cov in bias_covs) {
 69 |     dev <- FALSE
 70 |     if (dev) {
 71 |       bias_cov <- "exposure_3"
 72 |     }
 73 |     d <- svd(cbind(df[, bias_covs_tmp], df[, bias_cov]))$d
 74 |     if (d[length(d)] > 1e-10) bias_covs_tmp <- c(bias_covs_tmp, bias_cov)
 75 |   }
 76 |   
 77 |   bias_covs <- bias_covs_tmp
 78 |   
 79 |   # warn if cv_adj is not selected
 80 |   if(!'cv_adj' %in% bias_covs) message("Warning: cv_adj is not selected")
 81 |   
 82 |   # dataset
 83 |   # NOTE: these covs cannot have missingness!
 84 |   df <- df[, c("nid", "ln_effect", "ln_se", ALT_EXPOSURE_COLS, REF_EXPOSURE_COLS, 'percent_male', 'age_start', 'age_end', 'age_ref', bias_covs)] %>%
 85 |     filter(complete.cases(.)) %>%
 86 |     arrange(nid)
 87 |   
 88 |   ##cov inclusion/exclusion
 89 |   # -- X
 90 |   if (!is.na(keep_x_covs)) {
 91 |     if (!all(keep_x_covs %in% bias_covs)) {
 92 |       stop("One or more provided X-covs not allowed.")
 93 |     } else {
 94 |       x_covs <- keep_x_covs
 95 |     }
 96 |   } else if (!is.na(drop_x_covs)) {
 97 |     x_covs <- bias_covs[!bias_covs %in% drop_x_covs]
 98 |   } else {
 99 |     x_covs <- bias_covs
100 |   }
101 |   
102 |   #-- Z
103 |   if (!is.na(keep_z_covs)) {
104 |     if (!all(keep_z_covs %in% bias_covs)) {
105 |       stop("One or more provided Z-covs not allowed.")
106 |     } else {
107 |       z_covs <- keep_z_covs
108 |     }
109 |   } else if (!is.na(drop_z_covs)) {
110 |     z_covs <- bias_covs[!bias_covs %in% drop_z_covs]
111 |   } else {
112 |     z_covs <- bias_covs
113 |   }
114 |   
115 |   out <- list(
116 |     df=df, ro_pair=ro_pair, x_covs=x_covs, z_covs=z_covs,
117 |     obs_var=obs_var, obs_se_var=obs_se_var,
118 |     ref_vars=ref_vars, alt_vars=alt_vars,
119 |     study_id_var=study_id_var,
120 |     allow_ref_gt_alt=allow_ref_gt_alt
121 |   )
122 |   return(out)
123 | }
124 | 
125 | 


--------------------------------------------------------------------------------
/risks/vegetables/README.md:
--------------------------------------------------------------------------------
1 | This folder contains custom code for diet low in vegetables burden of proof analysis.
2 | 


--------------------------------------------------------------------------------
/risks/vegetables/config.R:
--------------------------------------------------------------------------------
  1 | # Configuration of pipeline
  2 | 
  3 | # Directory settings
  4 | # ------------------------------------------------------------------------------
  5 | OUT_DIR <- FILEPATH
  6 | INPUT_DATA_DIR <- FILEPATH
  7 | 
  8 | # Output directory for each stage
  9 | SUB_DIRS <- c(
 10 |   paste0(OUT_DIR, "00_prepped_data"),
 11 |   paste0(OUT_DIR, "01_template_pkl_files"),
 12 |   paste0(OUT_DIR, "01_template_models"),
 13 |   paste0(OUT_DIR, "02_loglinear_models"),
 14 |   paste0(OUT_DIR, "02_loglinear_pkl_files"),
 15 |   paste0(OUT_DIR, "03_covariate_selection_models"),
 16 |   paste0(OUT_DIR, "04_mixed_effects_pkl_files"),
 17 |   paste0(OUT_DIR, "05_evidence_score"),
 18 |   paste0(OUT_DIR, "05_all_plots"),
 19 |   paste0(OUT_DIR, "05_all_csvs"),
 20 |   paste0(OUT_DIR, "05_pub_bias"),
 21 |   paste0(OUT_DIR, "05_draw_csvs")
 22 | ) 
 23 | 
 24 | # data settings
 25 | # ------------------------------------------------------------------------------
 26 | ALL_RO_PAIRS <- gsub(".csv", "", list.files(INPUT_DATA_DIR))
 27 | EXCLUDED_RO_PAIRS <- c("sugar_cvd", "sugar_obesity", "fruit_oral", "fruit_larynx", 
 28 |                        ALL_RO_PAIRS[grepl("original", ALL_RO_PAIRS)], 
 29 |                        ALL_RO_PAIRS[grepl("_stroke", ALL_RO_PAIRS)], 
 30 |                        ALL_RO_PAIRS[grepl("sugar", ALL_RO_PAIRS)])
 31 | RO_PAIRS <- ALL_RO_PAIRS[!(ALL_RO_PAIRS %in% EXCLUDED_RO_PAIRS)]
 32 | RO_PAIRS <- ALL_RO_PAIRS[grepl("veg", ALL_RO_PAIRS)] # Alternative Option 1: select a subset of RO pairs to run
 33 | 
 34 | 
 35 | OBS_VAR <- "ln_effect"
 36 | OBS_SE_VAR <- "ln_se"
 37 | STUDY_ID_VAR <- "nid"
 38 | 
 39 | ALT_EXPOSURE_COLS <- c("b_0", "b_1")
 40 | REF_EXPOSURE_COLS <- c("a_0", "a_1")
 41 | 
 42 | USE_GLOBAL_DIST_PREDICT <- F # use the data to predict = F; use the exposure model to predict = T
 43 | 
 44 | 
 45 | # model settings
 46 | # ------------------------------------------------------------------------------
 47 | BIAS_COVARIATES_AS_INTX <- TRUE
 48 | 
 49 | # For diet
 50 | DIRECTION = list(
 51 |     veg = "decreasing",
 52 |   )
 53 | 
 54 | BETA_PRIOR_MULTIPLIER = 0.1 #used in covfinder and final model on covs
 55 | 
 56 | 
 57 | colnames(PRE_SELECTED_COVS) <- c("ro_pair", "cov")
 58 | 
 59 | COV_FINDER_CONFIG = list(
 60 |   #pre_selected_covs = list("signal"), 
 61 |   num_samples = 1000L,
 62 |   power_range = list(-4, 4), 
 63 |   power_step_size = 0.05,
 64 |   laplace_threshold = 1e-5,
 65 |   inlier_pct = 1, #since we trim in stage 1
 66 |   bias_zero = TRUE
 67 | )
 68 | 
 69 | INLIER_PCT <- 0.9 # 0.9 standard trimming
 70 | 
 71 | 
 72 | N_I_KNOTS <- 2L
 73 | PRIOR_VAR_RSLOPE = 1e-6 #originally 1e-6
 74 | PRIOR_VAR_MAXDER <- 1e-4
 75 | 
 76 | # Monotonic risks will have monotonicity constraint included
 77 | CONFIG = list(
 78 |   use_spline = TRUE,
 79 |   use_re = FALSE,
 80 |   spline_degree = 2L, 
 81 |   spline_knots_type = 'domain',
 82 |   spline_r_linear = TRUE,
 83 |   spline_l_linear = FALSE,
 84 |   prior_spline_funval_uniform = array(c(-1 + 1e-6, 19)),     
 85 |   prior_spline_num_constraint_points = 150L,
 86 |   spline_knots = array(seq(0, 1, length.out = N_I_KNOTS + 2)),
 87 |   prior_spline_maxder_gaussian = cbind(rbind(rep(0, N_I_KNOTS),
 88 |                                              rep(Inf, N_I_KNOTS)),
 89 |                                        c(0, sqrt(PRIOR_VAR_RSLOPE)))
 90 | )
 91 | 
 92 | 
 93 | J_N_I_KNOTS <- 3L
 94 | # 
 95 | J_SHAPED_CONFIG = list(
 96 |   use_spline = TRUE,
 97 |   use_re = FALSE,
 98 |   spline_degree = 2L, 
 99 |   spline_knots_type = 'domain',
100 |   spline_r_linear = TRUE,
101 |   spline_l_linear = TRUE,
102 |   prior_spline_funval_uniform = array(c(-1 + 1e-6, 19)),     
103 |   prior_spline_num_constraint_points = 150L,
104 |   spline_knots = array(seq(0, 1, length.out = J_N_I_KNOTS + 2)),
105 |   prior_spline_maxder_gaussian = cbind(c(0, sqrt(PRIOR_VAR_RSLOPE)), 
106 |                                        rbind(rep(0, J_N_I_KNOTS-1),
107 |                                              rep(Inf, J_N_I_KNOTS-1)),
108 |                                        c(0, sqrt(PRIOR_VAR_RSLOPE)))
109 | )
110 | 
111 | 


--------------------------------------------------------------------------------
/risks/vegetables/veg_TMREL.R:
--------------------------------------------------------------------------------
 1 | # Drives
 2 | os <- Sys.info()["sysname"]
 3 | 
 4 | j <- if (os == "Linux") "/home/j/" else if (os == "Windows") "J:/"
 5 | h <- if (os == "Linux") paste0("/homes/", user, "/") else if (os == "Windows") "H:/"
 6 | 
 7 | out_dir <- "FILEPATH"
 8 | WORK_DIR <- "FILEPATH"
 9 | central_model_output_folder <- "FILEPATH"
10 | 
11 | library(dplyr)
12 | library(ggplot2)
13 | library(data.table)
14 | 
15 | 
16 | df <- fread("FILEPATH")
17 | 
18 | # FILE COMPARISON GROUPS IF LOW INTAKE GROUP WAS CODED AS ALTERNATIVE #
19 | df[a_0>b_0, b_0 := a_0]
20 | df[a_1>b_1, b_1 := a_1]
21 | 
22 | df[, index := 1:.N, by = "nid"]
23 | 
24 | alt_exp_study <- unique(df[a_0!=b_0, ][,.(nid, b_1,b_0)])
25 | alt_exp_study[, b_midpoint := b_0 + (b_1-b_0)/2]
26 | 
27 | lower <- as.numeric(quantile(alt_exp_study$b_0, 0.85))
28 | upper <- as.numeric(quantile(alt_exp_study$b_midpoint, 0.85))
29 | 
30 | 
31 | tmrel_draws <- data.table("tmrel" = runif(1000, min = lower, max = upper))
32 | tmrel_draws[, `:=`(ro = p, draw = paste0("draw_",0:999), lower = lower, upper = upper)]
33 | write.csv(tmrel_draws, "FILEPATH", row.names = F)
34 | 


--------------------------------------------------------------------------------
/second_process/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | notebooks/
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/


--------------------------------------------------------------------------------
/second_process/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2021, IHME Math Sciences
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/second_process/README.md:
--------------------------------------------------------------------------------
1 | # Evidence Score Pipeline
2 | 
3 | Used for pipeline to obtain the results from `mrtool` for the purpose of risk score.
4 | 


--------------------------------------------------------------------------------
/second_process/examples/gbd2020_continuous_risk.py:
--------------------------------------------------------------------------------
 1 | from espipeline.main import PostContinuousProcess, ContinuousPipeline
 2 | from espipeline.filemanager import FileManager
 3 | import warnings
 4 | warnings.filterwarnings("ignore")
 5 | 
 6 | 
 7 | def main():
 8 |     i_folder = "/mnt/team/msca/pub/archive/evidence-score/gbd2020"
 9 |     o_folder = "/mnt/team/msca/pub/archive/evidence-score/gbd2020-process"
10 | 
11 |     fm = FileManager(i_folder, o_folder)
12 |     pipeline = ContinuousPipeline(fm, PostContinuousProcess)
13 | 
14 |     for pair in pipeline.pairs:
15 |         print(pair)
16 |         if "metab_bmi_adult" in pair:
17 |             continue
18 |         process = pipeline.get_process(pair)
19 |         process.run()
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     main()
24 | 


--------------------------------------------------------------------------------
/second_process/setup.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from pathlib import Path
 3 | from setuptools import setup, find_packages
 4 | 
 5 | 
 6 | if __name__ == "__main__":
 7 |     base_dir = Path(__file__).parent
 8 |     spec = importlib.util.spec_from_file_location(
 9 |         "__about__",
10 |         base_dir / "src" / "espipeline" / "__about__.py"
11 |     )
12 |     about = importlib.util.module_from_spec(spec)
13 |     spec.loader.exec_module(about)
14 | 
15 |     with (base_dir/"README.md").open() as f:
16 |         long_description = f.read()
17 | 
18 |     install_requirements = [
19 |         "numpy",
20 |         "scipy",
21 |         "pandas",
22 |     ]
23 | 
24 |     test_requirements = [
25 |         "pytest",
26 |         "pytest-mock",
27 |     ]
28 | 
29 |     doc_requirements = []
30 | 
31 |     setup(name=about.__title__,
32 |           version=about.__version__,
33 | 
34 |           description=about.__summary__,
35 |           long_description=long_description,
36 |           license=about.__license__,
37 |           url=about.__uri__,
38 | 
39 |           author=about.__author__,
40 |           author_email=about.__email__,
41 | 
42 |           package_dir={"": "src"},
43 |           packages=find_packages(where="src"),
44 |           include_package_data=True,
45 | 
46 |           install_requires=install_requirements,
47 |           tests_require=test_requirements,
48 |           extras_require={
49 |               "docs": doc_requirements,
50 |               "test": test_requirements,
51 |               "dev": doc_requirements + test_requirements
52 |           },
53 |           zip_safe=False,)
54 | 


--------------------------------------------------------------------------------
/second_process/src/espipeline/__about__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "__title__", "__summary__", "__uri__", "__version__", "__author__",
 3 |     "__email__", "__license__", "__copyright__",
 4 | ]
 5 | 
 6 | __title__ = "espipeline"
 7 | __summary__ = "Evidence score pipeline"
 8 | __uri__ = "https://stash.ihme.washington.edu/projects/MSCA/repos/escore-pipeline"
 9 | 
10 | __version__ = "0.0.0"
11 | 
12 | __author__ = "IHME Math Sciences"
13 | __email__ = "ihme.math.sciences@gmail.com"
14 | 
15 | __license__ = "BSD 2-Clause License"
16 | __copyright__ = f"Copyright 2021 {__author__}"
17 | 


--------------------------------------------------------------------------------
/second_process/src/espipeline/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipline import Pipeline
2 | from .process import Process
3 | from threadpoolctl import threadpool_limits
4 | 
5 | 
6 | threadpool_limits(limits=1, user_api='blas')
7 | threadpool_limits(limits=1, user_api='openmp')
8 | 


--------------------------------------------------------------------------------
/second_process/src/espipeline/dichotomous.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Main GBD Evience Score Pipeline
 3 | """
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | from espipeline.process import Process
 7 | 
 8 | 
 9 | class PostDichotomousProcess(Process):
10 |     """
11 |     Post process for GBD 2020 dichotomous risk
12 |     """
13 | 
14 |     def plot_model(self):
15 |         _, ax = plt.subplots(figsize=(8, 5))
16 | 
17 |         # plot data
18 |         ax.scatter(self.study_data.log_rr,
19 |                    self.study_data.log_rr_se,
20 |                    color="grey", alpha=0.4)
21 |         outlier_index = self.study_data.is_outlier == 1
22 |         ax.scatter(self.study_data.log_rr[outlier_index],
23 |                    self.study_data.log_rr_se[outlier_index],
24 |                    color="grey", alpha=0.4)
25 | 
26 |         # plot funnel
27 |         beta = self.model.fe_soln["intercept"][0]
28 |         se_max = self.study_data.log_rr_se.max()
29 |         ax.fill_betweenx(
30 |             [0.0, se_max],
31 |             [beta, beta - 1.96*se_max],
32 |             [beta, beta + 1.96*se_max],
33 |             color="#B0E0E6", alpha=0.4
34 |         )
35 |         ax.plot([beta, beta - 1.96*se_max],
36 |                 [0.0, se_max],
37 |                 linewidth=1, color="#87CEFA")
38 |         ax.plot([beta, beta + 1.96*se_max],
39 |                 [0.0, se_max],
40 |                 linewidth=1, color="#87CEFA")
41 |         ax.set_ylim([se_max, 0.0])
42 | 
43 |         # plot vertical lines
44 |         ax.axvline(0.0, color="k")
45 |         ax.axvline(beta, color="#008080")
46 |         ax.fill_betweenx([0.0, se_max],
47 |                          [self.output_data.outer_log_cause_lower.values[0]]*2,
48 |                          [self.output_data.outer_log_cause_upper.values[0]]*2,
49 |                          color="#008080", alpha=0.2)
50 |         ax.fill_betweenx([0.0, se_max],
51 |                          [self.output_data.inner_log_cause_lower.values[0]]*2,
52 |                          [self.output_data.inner_log_cause_upper.values[0]]*2,
53 |                          color="#008080", alpha=0.2)
54 | 
55 |         # set title and labels
56 |         title = (f"name={self.name}, "
57 |                  f"score={self.risk_cause_metadata.score.values[0]: .3f}")
58 |         ax.set_title(title, loc="left")
59 |         ax.set_xlabel("log_rr")
60 |         ax.set_ylabel("log_rr_se")
61 | 
62 |         plt.savefig(self.o_path / "model_figure.pdf", bbox_inches="tight")
63 |         plt.close("all")
64 | 
65 |     def run(self):
66 |         super().run()
67 |         self.plot_model()
68 | 


--------------------------------------------------------------------------------
/second_process/src/espipeline/filemanager.py:
--------------------------------------------------------------------------------
 1 | """
 2 | File manager: organize file paths
 3 | """
 4 | from collections import defaultdict
 5 | from dataclasses import dataclass, field
 6 | from pathlib import Path
 7 | from typing import Dict, List
 8 | 
 9 | import pandas as pd
10 | 
11 | 
12 | @dataclass
13 | class FileManager:
14 |     """
15 |     Manager files for all risk outcome pair in the folder
16 |     """
17 | 
18 |     i_path: Path
19 |     o_path: Path
20 |     pairs: List[str] = field(init=False)
21 |     pair_paths: Dict[str, Path] = field(init=False)
22 |     pair_types: List[str] = field(init=False)
23 |     pairs_by_type: Dict[str, List[str]] = field(init=False)
24 | 
25 |     def __post_init__(self):
26 |         self.i_path = Path(self.i_path)
27 |         self.o_path = Path(self.o_path)
28 |         if not self.i_path.exists():
29 |             raise FileNotFoundError(str(self.i_path))
30 |         if not self.o_path.exists():
31 |             self.o_path.mkdir()
32 | 
33 |         self.pairs = [path.name
34 |                       for path in self.i_path.iterdir() if path.is_dir()]
35 |         self.pair_paths = {pair: self.i_path / pair for pair in self.pairs}
36 |         self.pairs_by_type = defaultdict(list)
37 |         self.sort_pairs()
38 |         self.pair_types = list(self.pairs_by_type.keys())
39 | 
40 |     def sort_pairs(self):
41 |         for pair, path in self.pair_paths.items():
42 |             meta = path / "risk_cause_metadata.csv"
43 |             if not meta.exists():
44 |                 raise FileNotFoundError(str(meta))
45 |             df_meta = pd.read_csv(meta)
46 |             self.pairs_by_type[df_meta.risk_type[0]].append(pair)
47 | 
48 |     def __getitem__(self, pair: str) -> Path:
49 |         return self.pair_paths[pair]
50 | 
51 |     def __repr__(self) -> str:
52 |         return f"{type(self).__name__}(num_pairs={len(self.pairs)})"
53 | 


--------------------------------------------------------------------------------
/second_process/src/espipeline/loglinear.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Main GBD Evience Score Pipeline
  3 | """
  4 | from typing import List
  5 | 
  6 | import matplotlib.pyplot as plt
  7 | import numpy as np
  8 | from pandas import DataFrame
  9 | 
 10 | from mrtool import MRBRT
 11 | 
 12 | from espipeline.process import Process
 13 | 
 14 | 
 15 | class PostLogLinearProcess(Process):
 16 |     """
 17 |     Post process for GBD 2020 loglinear risk
 18 |     """
 19 | 
 20 |     def plot_models(self):
 21 |         # plot original model
 22 |         plot_model(self.study_data,
 23 |                    self.output_data,
 24 |                    self.risk_cause_metadata,
 25 |                    self.model,
 26 |                    self.name)
 27 |         plt.savefig(self.o_path / "model_figure.pdf", bbox_inches="tight")
 28 |         plt.close("all")
 29 | 
 30 |     def run(self):
 31 |         super().run()
 32 |         self.plot_models()
 33 | 
 34 | 
 35 | def get_data_signal(model: MRBRT) -> np.ndarray:
 36 |     for cov_model in model.cov_models:
 37 |         if len(cov_model.ref_cov) != 0:
 38 |             break
 39 |     alt_cov = model.data.get_covs(cov_model.alt_cov).mean(axis=1)
 40 |     ref_cov = model.data.get_covs(cov_model.ref_cov).mean(axis=1)
 41 |     return alt_cov - ref_cov
 42 | 
 43 | 
 44 | def plot_model(study_data: DataFrame,
 45 |                output_data: DataFrame,
 46 |                risk_cause_metadata: DataFrame,
 47 |                model: MRBRT,
 48 |                name: str,
 49 |                ax: List[plt.Axes] = None):
 50 |     if ax is None:
 51 |         _, ax = plt.subplots(1, 2, figsize=(16, 5))
 52 | 
 53 |     # plot data
 54 |     ax[0].scatter(
 55 |         study_data.alt_risk,
 56 |         study_data.log_alt_cause,
 57 |         s=5.0/study_data.log_rr_se,
 58 |         color="gray",
 59 |         alpha=0.5
 60 |     )
 61 |     outlier_index = study_data.is_outlier == 1
 62 |     ax[0].scatter(
 63 |         study_data.alt_risk[outlier_index],
 64 |         study_data.log_alt_cause[outlier_index],
 65 |         s=5.0/study_data.log_rr_se[outlier_index],
 66 |         color="red",
 67 |         alpha=0.5,
 68 |         marker="x"
 69 |     )
 70 | 
 71 |     # plot prediction
 72 |     ax[0].plot(output_data.risk, output_data.log_cause,
 73 |                color="#008080", linewidth=1)
 74 | 
 75 |     # plot uncertainties
 76 |     ax[0].fill_between(output_data.risk,
 77 |                        output_data.inner_log_cause_lower,
 78 |                        output_data.inner_log_cause_upper,
 79 |                        color="#008080",
 80 |                        alpha=0.2)
 81 |     ax[0].fill_between(output_data.risk,
 82 |                        output_data.outer_log_cause_lower,
 83 |                        output_data.outer_log_cause_upper,
 84 |                        color="#008080",
 85 |                        alpha=0.2)
 86 | 
 87 |     # plot bounds
 88 |     for b in [risk_cause_metadata.risk_lower.values[0],
 89 |               risk_cause_metadata.risk_upper.values[0]]:
 90 |         ax[0].axvline(b, linestyle="--", linewidth=1, color="k")
 91 | 
 92 |     # plot 0 line
 93 |     ax[0].axhline(0.0, linestyle="-", linewidth=1, color="k")
 94 | 
 95 |     # add unit to the xaxis
 96 |     ax[0].set_xlabel(risk_cause_metadata.risk_unit.values[0])
 97 | 
 98 |     # title
 99 |     title = (f"name={name}, "
100 |              f"score={risk_cause_metadata.score.values[0]: .3f}")
101 |     ax[0].set_title(title, loc="left")
102 | 
103 |     # plot residual
104 |     residual = model.data.obs - model.predict(model.data)
105 |     residual_sd = np.sqrt(model.data.obs_se**2 + get_data_signal(model)**2*model.gamma_soln[0])
106 |     outlier_index = model.w_soln < 0.1
107 |     ax[1].set_ylim(residual_sd.max(), 0.0)
108 |     ax[1].scatter(residual, residual_sd,
109 |                   color="gray", alpha=0.4)
110 |     ax[1].scatter(residual[outlier_index],
111 |                   residual_sd[outlier_index],
112 |                   color="red", alpha=0.4, marker="x")
113 |     ax[1].fill_betweenx(
114 |         [0.0, residual_sd.max()],
115 |         [0.0, -1.96*residual_sd.max()],
116 |         [0.0, 1.96*residual_sd.max()],
117 |         color="#B0E0E6", alpha=0.4
118 |     )
119 |     ax[1].plot([0.0, -1.96*residual_sd.max()],
120 |                [0.0, residual_sd.max()],
121 |                linewidth=1, color="#87CEFA")
122 |     ax[1].plot([0.0, 1.96*residual_sd.max()],
123 |                [0.0, residual_sd.max()],
124 |                linewidth=1, color="#87CEFA")
125 |     ax[1].axvline(0.0, color="k", linewidth=1, linestyle="--")
126 |     ax[1].set_xlabel("residual")
127 |     ax[1].set_ylabel("residual sd")
128 | 


--------------------------------------------------------------------------------
/second_process/src/espipeline/pipline.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Pipeline Class
 3 | """
 4 | from typing import Callable, List
 5 | 
 6 | import numpy as np
 7 | 
 8 | from espipeline.filemanager import FileManager
 9 | from espipeline.utils import list_all_files
10 | 
11 | 
12 | class Pipeline:
13 |     """
14 |     Main Pipeline class
15 |     """
16 | 
17 |     def __init__(self,
18 |                  name: str,
19 |                  fm: FileManager,
20 |                  process_constructor: Callable,
21 |                  pairs: List[str] = None):
22 |         self.name = name
23 |         self.pairs = fm.pairs_by_type[self.name] if pairs is None else pairs
24 |         self.i_pair_paths = {
25 |             pair: fm.pair_paths[pair]
26 |             for pair in self.pairs
27 |         }
28 |         self.o_pair_paths = {
29 |             pair: fm.o_path / pair
30 |             for pair in self.pairs
31 |         }
32 |         self.process_constructor = process_constructor
33 | 
34 |     @property
35 |     def num_pairs(self) -> int:
36 |         return len(self.pairs)
37 | 
38 |     def get_process(self, pair: str) -> "Process":
39 |         return self.process_constructor(self.i_pair_paths[pair],
40 |                                         self.o_pair_paths[pair])
41 | 
42 |     def run(self):
43 |         for pair in self.pairs:
44 |             i_path = self.i_pair_paths[pair]
45 |             o_path = self.o_pair_paths[pair]
46 |             i_time = max(f.stat().st_mtime for f in list_all_files(i_path))
47 |             if (not o_path.exists() or o_path.stat().st_size == 0):
48 |                 o_time = -np.inf
49 |             else:
50 |                 o_time = max(f.stat().st_mtime for f in list_all_files(o_path))
51 |             if i_time > o_time:
52 |                 print(pair)
53 |                 process = self.get_process(pair)
54 |                 process.run()
55 | 
56 |     def __repr__(self) -> str:
57 |         return (f"{type(self).__name__}(name={self.name}, "
58 |                 f"num_pairs={self.num_pairs})")
59 | 


--------------------------------------------------------------------------------
/second_process/src/espipeline/process.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Process Class
 3 | """
 4 | from pathlib import Path
 5 | import shutil
 6 | 
 7 | import pickle5 as pkl
 8 | import pandas as pd
 9 | 
10 | from espipeline.utils import list_all_files
11 | 
12 | 
13 | class Process:
14 |     """
15 |     Process function
16 |     """
17 | 
18 |     def __init__(self, i_path: Path, o_path: Path):
19 |         self.i_path = Path(i_path)
20 |         self.o_path = Path(o_path)
21 |         self.name = i_path.name
22 | 
23 |         # load all files
24 |         for f in list_all_files(self.i_path):
25 |             stem, suffix = f.stem, f.suffix
26 |             if suffix == ".pkl":
27 |                 setattr(self, stem, pkl.load(open(f, "rb")))
28 |             elif suffix == ".csv":
29 |                 setattr(self, stem, pd.read_csv(f))
30 | 
31 |     def run(self):
32 |         if self.o_path.exists():
33 |             shutil.rmtree(self.o_path)
34 |         shutil.copytree(self.i_path, self.o_path)
35 | 


--------------------------------------------------------------------------------
/second_process/src/espipeline/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility Functions
 3 | """
 4 | from itertools import chain
 5 | from pathlib import Path
 6 | from typing import Dict, List, Tuple
 7 | 
 8 | import numpy as np
 9 | from mrtool import MRBRT
10 | from mrtool.core.other_sampling import (extract_simple_lme_hessian,
11 |                                         extract_simple_lme_specs)
12 | from numpy import ndarray
13 | from scipy.stats import norm
14 | 
15 | 
16 | def get_fe_hessian(model: MRBRT) -> ndarray:
17 |     specs = extract_simple_lme_specs(model)
18 |     return extract_simple_lme_hessian(specs)
19 | 
20 | 
21 | def get_re_fisher(model: MRBRT) -> ndarray:
22 |     lt = model.lt
23 |     return lt.get_gamma_fisher(lt.gamma)
24 | 
25 | 
26 | def get_beta_info(model: MRBRT, name: str = "signal") -> Tuple[float]:
27 |     # get beta solution
28 |     cov_index = model.cov_names.index(name)
29 |     beta = model.beta_soln[cov_index]
30 |     beta_hessian = get_fe_hessian(model)
31 |     beta_sd = 1.0/np.sqrt(np.diag(beta_hessian))[cov_index]
32 |     return (beta, beta_sd)
33 | 
34 | 
35 | def get_gamma_info(model: MRBRT) -> Tuple[float]:
36 |     # get gamma solution
37 |     gamma = model.gamma_soln[0]
38 |     gamma_fisher = get_re_fisher(model)
39 |     gamma_sd = 1.0/np.sqrt(gamma_fisher[0, 0])
40 |     return (gamma, gamma_sd)
41 | 
42 | 
43 | def get_pval(mean, sd, one_sided: bool = False) -> float:
44 |     zscore = np.abs(mean/sd)
45 |     if one_sided:
46 |         pval = 1 - norm.cdf(zscore)
47 |     else:
48 |         pval = 2*(1 - norm.cdf(zscore))
49 |     return pval
50 | 
51 | 
52 | def egger_regression(residual, residual_sd, one_sided: bool = True) -> Dict[str, float]:
53 |     weighted_residual = residual/residual_sd
54 |     r_mean = weighted_residual.mean()
55 |     r_sd = 1/np.sqrt(weighted_residual.size)
56 |     r_pval = get_pval(r_mean, r_sd, one_sided=one_sided)
57 |     return {
58 |         "mean": r_mean,
59 |         "sd": r_sd,
60 |         "pval": r_pval
61 |     }
62 | 
63 | 
64 | def get_pub_bias(*args, **kwargs) -> int:
65 |     result = egger_regression(*args, **kwargs)
66 |     return int(result["pval"] < 0.05)
67 | 
68 | 
69 | def list_all_files(path: Path) -> List[Path]:
70 |     if path.is_file():
71 |         return [path]
72 |     return chain.from_iterable(
73 |         list_all_files(sub_path) for sub_path in path.iterdir()
74 |     )
75 | 


--------------------------------------------------------------------------------