├── .Rbuildignore
├── .github
    ├── dependabot.yml
    ├── environment
    │   └── pixi.toml
    ├── rattler-build_container.df
    ├── recipe
    │   ├── recipe.yaml
    │   ├── variant_r43.yaml
    │   └── variant_r44.yaml
    └── workflows
    │   ├── README.md
    │   ├── ci.yml
    │   ├── conda_build.yml
    │   ├── create_toml_from_yaml.sh
    │   ├── dependabot_automerge.yml
    │   ├── merge.yml
    │   ├── release.yml
    │   └── update-documentation.yml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
    ├── LD.R
    ├── RcppExports.R
    ├── allele_qc.R
    ├── colocboost_pipeline.R
    ├── compute_qtl_enrichment.R
    ├── ctwas_wrapper.R
    ├── dentist_qc.R
    ├── encoloc.R
    ├── file_utils.R
    ├── fsusie_wrapper.R
    ├── mash_wrapper.R
    ├── misc.R
    ├── mr.R
    ├── mrmash_wrapper.R
    ├── multigene_udr.R
    ├── multitrait_data.R
    ├── multivariate_pipeline.R
    ├── plot.R
    ├── quail_rank_score.R
    ├── quail_vqtl.R
    ├── quantile_twas.R
    ├── quantile_twas_weight.R
    ├── raiss.R
    ├── regularized_regression.R
    ├── slalom.R
    ├── sumstats_qc.R
    ├── susie_wrapper.R
    ├── twas.R
    ├── twas_weights.R
    ├── univariate_pipeline.R
    └── univariate_rss_diagnostics.R
├── README.md
├── cleanup
├── configure
├── data
    └── multitrait_data.RData
├── inst
    ├── code
    │   ├── fastenloc_archive
    │   │   ├── README.md
    │   │   ├── controller.cc
    │   │   ├── controller.h
    │   │   ├── main.cc
    │   │   ├── sigCluster.cc
    │   │   └── sigCluster.h
    │   └── tensorqtl_postprocessor.R
    ├── misc
    │   ├── format_r_code.sh
    │   ├── post-commit.sh
    │   ├── pre-commit.sh
    │   └── uncrustify_default.cfg
    └── prototype
    │   ├── Dentist_R_cpp_comparison.ipynb
    │   ├── RAISS_R_python_comparison.ipynb
    │   ├── SDPR_testing
    │       ├── SDPR_chr22.txt
    │       ├── SDPR_testing.ipynb
    │       ├── chr10_region_posterior.csv
    │       ├── marginal_rss.txt
    │       └── output_matrix_1.txt
    │   ├── enrich_coloc_example.ipynb
    │   ├── generate_test_for_prs_cs.R
    │   ├── raiss_prototype.ipynb
    │   ├── run_prs_cs_test.py
    │   └── slalom_R_python_comparison.ipynb
├── man
    ├── QUAIL_pipeline.Rd
    ├── QUAIL_rank_score_pipeline.Rd
    ├── adjust_susie_weights.Rd
    ├── align_variant_names.Rd
    ├── allele_qc.Rd
    ├── auto_decision.Rd
    ├── batch_load_twas_weights.Rd
    ├── bayes_a_rss_weights.Rd
    ├── bayes_a_weights.Rd
    ├── bayes_alphabet_rss_weights.Rd
    ├── bayes_alphabet_weights.Rd
    ├── bayes_c_rss_weights.Rd
    ├── bayes_c_weights.Rd
    ├── bayes_l_rss_weights.Rd
    ├── bayes_l_weights.Rd
    ├── bayes_n_rss_weights.Rd
    ├── bayes_n_weights.Rd
    ├── bayes_r_rss_weights.Rd
    ├── bayes_r_weights.Rd
    ├── coloc_post_processor.Rd
    ├── coloc_wrapper.Rd
    ├── colocboost_analysis_pipeline.Rd
    ├── compute_qtl_enrichment.Rd
    ├── corr_filter.Rd
    ├── ctwas_ld_loader.Rd
    ├── dentist.Rd
    ├── dentist_single_window.Rd
    ├── extract_LD_for_region.Rd
    ├── extract_cs_info.Rd
    ├── extract_flatten_sumstats_from_nested.Rd
    ├── extract_top_pip_info.Rd
    ├── filter_X_with_Y.Rd
    ├── filter_molecular_events.Rd
    ├── filter_variants_by_ld_reference.Rd
    ├── find_data.Rd
    ├── find_duplicate_variants.Rd
    ├── find_valid_file_path.Rd
    ├── fsusie_get_cs.Rd
    ├── fsusie_wrapper.Rd
    ├── gbayes_rss.Rd
    ├── get_cormat.Rd
    ├── get_ctwas_meta_data.Rd
    ├── get_susie_result.Rd
    ├── harmonize_twas.Rd
    ├── lbf_to_alpha.Rd
    ├── load_LD_matrix.Rd
    ├── load_genotype_region.Rd
    ├── load_multicontext_sumstats.Rd
    ├── load_multitask_regional_data.Rd
    ├── load_quantile_twas_weights.Rd
    ├── load_regional_association_data.Rd
    ├── load_regional_functional_data.Rd
    ├── load_regional_multivariate_data.Rd
    ├── load_regional_regression_data.Rd
    ├── load_regional_univariate_data.Rd
    ├── load_rss_data.Rd
    ├── load_tsv_region.Rd
    ├── load_twas_weights.Rd
    ├── manhattan_plot.Rd
    ├── merge_sumstats_matrices.Rd
    ├── mr_analysis.Rd
    ├── mr_ash_rss.Rd
    ├── mr_ash_rss_weights.Rd
    ├── mr_format.Rd
    ├── mrash_weights.Rd
    ├── mrmash_wrapper.Rd
    ├── multicontext_ld_clumping.Rd
    ├── multigene_udr.Rd
    ├── multivariate_analysis_pipeline.Rd
    ├── parse_cs_corr.Rd
    ├── perform_qr_analysis.Rd
    ├── prs_cs.Rd
    ├── prs_cs_weights.Rd
    ├── qr_screen.Rd
    ├── quantile_twas_weight_pipeline.Rd
    ├── raiss.Rd
    ├── region_to_df.Rd
    ├── rescale_cov_w0.Rd
    ├── rss_analysis_pipeline.Rd
    ├── rss_basic_qc.Rd
    ├── sdpr.Rd
    ├── sdpr_weights.Rd
    ├── slalom.Rd
    ├── summary_stats_qc.Rd
    ├── susie_post_processor.Rd
    ├── susie_rss_pipeline.Rd
    ├── susie_rss_qc.Rd
    ├── susie_rss_wrapper.Rd
    ├── trim_ctwas_variants.Rd
    ├── twas_analysis.Rd
    ├── twas_joint_z.Rd
    ├── twas_multivariate_weights_pipeline.Rd
    ├── twas_pipeline.Rd
    ├── twas_predict.Rd
    ├── twas_weights.Rd
    ├── twas_weights_cv.Rd
    ├── twas_weights_pipeline.Rd
    ├── twas_z.Rd
    ├── univariate_analysis_pipeline.Rd
    ├── venn.Rd
    ├── xqtl_enrichment_wrapper.Rd
    └── z_to_pvalue.Rd
├── src
    ├── Makevars.in
    ├── RcppExports.cpp
    ├── dentist_iterative_impute.cpp
    ├── function_pool.cpp
    ├── function_pool.h
    ├── mr_ash.cpp
    ├── mr_ash.h
    ├── prscs_mcmc.cpp
    ├── prscs_mcmc.h
    ├── qtl_enrichment.cpp
    ├── qtl_enrichment.hpp
    ├── sdpr.cpp
    ├── sdpr_mcmc.cpp
    ├── sdpr_mcmc.h
    ├── simde
    │   ├── arm
    │   │   ├── neon.h
    │   │   └── sve.h
    │   └── x86
    │   │   ├── aes.h
    │   │   ├── avx.h
    │   │   ├── avx2.h
    │   │   ├── avx512.h
    │   │   ├── clmul.h
    │   │   ├── f16c.h
    │   │   ├── fma.h
    │   │   ├── gfni.h
    │   │   ├── mmx.h
    │   │   ├── sse.h
    │   │   ├── sse2.h
    │   │   ├── sse3.h
    │   │   ├── sse4.1.h
    │   │   ├── sse4.2.h
    │   │   ├── ssse3.h
    │   │   ├── svml.h
    │   │   └── xop.h
    └── sse_mathfun.h
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test_LD.R
    │   ├── test_allele_qc.R
    │   ├── test_compute_qtl_enrichment.R
    │   ├── test_data
    │       ├── LD_block_1.chr1_1000_1200.float16.bim
    │       ├── LD_block_1.chr1_1000_1200.float16.txt.xz
    │       ├── LD_block_2.chr1_1200_1400.float16.bim
    │       ├── LD_block_2.chr1_1200_1400.float16.txt.xz
    │       ├── LD_block_3.chr1_1400_1600.float16.bim
    │       ├── LD_block_3.chr1_1400_1600.float16.txt.xz
    │       ├── LD_block_4.chr1_1600_1800.float16.bim
    │       ├── LD_block_4.chr1_1600_1800.float16.txt.xz
    │       ├── LD_block_5.chr1_1800_2000.float16.bim
    │       ├── LD_block_5.chr1_1800_2000.float16.txt.xz
    │       ├── dummy_data.pgen
    │       ├── dummy_data.psam
    │       ├── dummy_data.pvar
    │       ├── protocol_example.genotype.bed
    │       ├── protocol_example.genotype.bim
    │       └── protocol_example.genotype.fam
    │   ├── test_dentist_qc.R
    │   ├── test_encoloc.R
    │   ├── test_file_utils.R
    │   ├── test_mash_wrapper.R
    │   ├── test_misc.R
    │   ├── test_mr.R
    │   ├── test_mrmash_wrapper.R
    │   ├── test_raiss.R
    │   ├── test_regularized_regression.R
    │   ├── test_slalom.R
    │   ├── test_sumstats_qc.R
    │   ├── test_twas.R
    │   └── test_twas_scan.R
└── vignettes
    ├── cis-analysis.Rmd
    ├── mrmash-intro.Rmd
    ├── qtl-gwas-resources.Rmd
    ├── susie-rss-qc.Rmd
    └── xqtl_enrichment.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^\.git
4 | ^\.github
5 | ^\.gitignore
6 | ^pixi.toml
7 | ^pixi.lock
8 | ^\.pixi
9 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | 
4 |   - package-ecosystem: "github-actions"
5 |     directory: "/"
6 |     schedule:
7 |       interval: "weekly"
8 | 


--------------------------------------------------------------------------------
/.github/environment/pixi.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "r-pecotmr"
 3 | channels = ["dnachun", "conda-forge", "bioconda"]
 4 | platforms = ["linux-64", "osx-64", "osx-arm64"]
 5 | 
 6 | [system-requirements]
 7 | libc = { family="glibc", version="2.17" }
 8 | 
 9 | [tasks]
10 | devtools_document = "R -e 'devtools::document()'"
11 | devtools_test = "R -e 'devtools::test()'"
12 | codecov = "R -e 'covr::codecov(quiet = FALSE)'"
13 | rcmdcheck = "R -e 'rcmdcheck::rcmdcheck()'"
14 | bioccheck_git_clone = "R -e 'BiocCheck::BiocCheckGitClone()'"
15 | bioccheck = "R -e 'BiocCheck::BiocCheck()'"
16 | use_major_version = "R -e 'usethis::use_version(which = \"major\", push = FALSE)'"
17 | use_minor_version = "R -e 'usethis::use_version(which = \"minor\", push = FALSE)'"
18 | use_patch_version = "R -e 'usethis::use_version(which = \"patch\", push = FALSE)'"
19 | 
20 | [feature.r43]
21 | dependencies = {"r-base" = "4.3.*"}
22 | 
23 | [feature.r44]
24 | dependencies = {"r-base" = "4.4.*"}
25 | 
26 | [environments]
27 | r43 = {features = ["r43"]}
28 | r44 = {features = ["r44"]}
29 | 
30 | [dependencies]
31 | "gsl" = "*"
32 | "bioconductor-bioccheck" = "*"
33 | "r-devtools" = "*"
34 | "r-rcmdcheck" = "*"
35 | "r-covr" = "*"
36 | "r-tidyverse" = "*"
37 | 


--------------------------------------------------------------------------------
/.github/rattler-build_container.df:
--------------------------------------------------------------------------------
 1 | FROM ghcr.io/prefix-dev/pixi:latest
 2 | 
 3 | SHELL ["/bin/bash", "-c"]
 4 | RUN apt-get update
 5 | RUN apt-get install -y libgl1 ca-certificates
 6 | RUN groupadd -g 118 github
 7 | RUN useradd -m -u 1001 -g 118 -s /bin/bash runner
 8 | USER runner
 9 | RUN pixi global install rattler-build git patch
10 | ENV PATH=/home/runner/.pixi/bin:${PATH}
11 | 


--------------------------------------------------------------------------------
/.github/recipe/recipe.yaml:
--------------------------------------------------------------------------------
  1 | context:
  2 |   version: VERSION_PLACEHOLDER
  3 | 
  4 | package:
  5 |   name: r-pecotmr
  6 |   version: ${{ version }}
  7 | 
  8 | source:
  9 |   path: pecotmr-${{ version }}.tar.gz
 10 |   sha256: SHA256SUM_PLACEHOLDER
 11 | 
 12 | build:
 13 |   number: BUILD_PLACEHOLDER
 14 |   dynamic_linking:
 15 |     rpaths:
 16 |       - lib/R/lib/
 17 |       - lib/
 18 |   script: R CMD INSTALL --build .
 19 | 
 20 | requirements:
 21 |   build:
 22 |     - ${{ compiler('c') }}
 23 |     - ${{ compiler('cxx') }}
 24 |   host:
 25 |     - bioconductor-iranges
 26 |     - bioconductor-qvalue
 27 |     - bioconductor-snpstats
 28 |     - r-base
 29 |     - r-bigsnpr
 30 |     - r-bigstatsr
 31 |     - r-coloc
 32 |     - r-data.table
 33 |     - r-dofuture
 34 |     - r-dplyr
 35 |     - r-fsusier
 36 |     - r-furrr
 37 |     - r-gbj
 38 |     - r-glmnet
 39 |     - r-harmonicmeanp
 40 |     - r-magrittr
 41 |     - r-matrixstats
 42 |     - r-mr.ash.alpha
 43 |     - r-mr.mash.alpha
 44 |     - r-mvsusier
 45 |     - r-pgenlibr
 46 |     - r-purrr
 47 |     - r-qgg
 48 |     - r-quadprog
 49 |     - r-quantreg
 50 |     - r-r6
 51 |     - r-rcpp
 52 |     - r-readr
 53 |     - r-rfast
 54 |     - r-stringr
 55 |     - r-susier
 56 |     - r-tidyr
 57 |     - r-vctrs
 58 |   run:
 59 |     - bioconductor-iranges
 60 |     - bioconductor-qvalue
 61 |     - bioconductor-snpstats
 62 |     - r-base
 63 |     - r-bigsnpr
 64 |     - r-bigstatsr
 65 |     - r-coloc
 66 |     - r-data.table
 67 |     - r-dofuture
 68 |     - r-dplyr
 69 |     - r-fsusier
 70 |     - r-furrr
 71 |     - r-gbj
 72 |     - r-glmnet
 73 |     - r-harmonicmeanp
 74 |     - r-magrittr
 75 |     - r-matrixstats
 76 |     - r-mr.ash.alpha
 77 |     - r-mr.mash.alpha
 78 |     - r-mvsusier
 79 |     - r-pgenlibr
 80 |     - r-purrr
 81 |     - r-qgg
 82 |     - r-quadprog
 83 |     - r-quantreg
 84 |     - r-r6
 85 |     - r-rcpp
 86 |     - r-readr
 87 |     - r-rfast
 88 |     - r-stringr
 89 |     - r-susier
 90 |     - r-tidyr
 91 |     - r-vctrs
 92 | 
 93 | tests:
 94 |   - script:
 95 |       - R -e "library('pecotmr')"
 96 | 
 97 | about:
 98 |   license: GPL-3.0-or-later
 99 |   license_file: LICENSE
100 |   summary: pecotmr implements pair-wise enrichment, colocalization, TWAS and Mendelian Randomization to integrate QTL and GWAS analysis based on a fine-mapped single effects model.
101 |   homepage: https://github.com/StatFunGen/pecotmr
102 | 
103 | extra:
104 |   recipe-maintainers:
105 |     - danielnachun
106 | 


--------------------------------------------------------------------------------
/.github/recipe/variant_r43.yaml:
--------------------------------------------------------------------------------
1 | r_base:
2 |   - 4.3
3 | 


--------------------------------------------------------------------------------
/.github/recipe/variant_r44.yaml:
--------------------------------------------------------------------------------
1 | r_base:
2 |   - 4.4
3 | 


--------------------------------------------------------------------------------
/.github/workflows/README.md:
--------------------------------------------------------------------------------
 1 | ## How to tag new releases with the "Upload new release" GitHub Action
 2 | When we are ready to tag a new release, we use this action to create in the repository and upload the source archive as release available on the [Releases](https://github.com/StatFunGen/pecotmr/releases) page.
 3 | 1. Go to "Actions" at the top of the repository page.
 4 | 2. Go to the "Upload new release" action on the side bar.
 5 | 3. Click on the "Run workflow" drop down menu in the blue highlighted area
 6 | 4. In the drop down menu, you do not need to modify any values if you just want to increment the patch version automatically.
 7 |     - We use semantic versioning of the form version X.Y.Z, where X is the major version, Y is the minor version and Z is the patch version
 8 |     - By default, the patch version is incremented because `increase_patch_version` is set to true
 9 |     - If you need to increment the minor or major version, this can be done by setting `increase_minor_version` or `increase_major_version` to true.  Please ask before changing these values.
10 |     - A custom version can be specified instead, but please ask before using this.
11 |     - In all cases the DESCRIPTION file will be updated automatically to the right version - do not manually change this file without asking.
12 |     - The commit checksum is optional --- by default it will use the latest commit to repository, but you can specify an older commit if needed.  Do **not** use a commit that is older than the commit used for the current version.
13 | 5. Click the green "Run workflow" button to dispatch the workflow.
14 | ## How to build a new conda package with the "Build conda package" GitHub Action
15 | When we have tagged a new release, we use this action to build a new conda package.  The conda packages are currently uploaded to the [personal channel](https://anaconda.org/dnachun) of Daniel Nachun.  Upon submission to Bioconductor, a recipe will be submitted to bioconda and this workflow will be replaced by one to submit releases to Bioconductor, and bioconda will automatically update the package.
16 | 1. Use the "Upload new release" GitHub Action to tag a new release.  This workflow will fail if you try to build a package for a version which is not already tagged.
17 | 2. Go to "Actions" at the top of the repository page.
18 | 3. Go to the "Build conda package" action on the side bar.
19 | 4. Click on the "Run workflow" drop down menu in the blue highlighted area
20 | 5. In the drop down menu, you do not need to modify any values if you just want to build the latest version of package.  If you need to build an older version of the package, you can enter a custom version, but please ask before doing this.
21 |     - The build version is optional and defaults to 0.  This can be incremented if the conda recipe has been changed but the tagged release has not been changed.  Do **not** change this setting without asking first - in most cases a new version needs to be tagged!
22 | 6. Click the green "Run workflow" button to dispatch the workflow.
23 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: Continuous Integration
  2 | 
  3 | on:
  4 |   pull_request:
  5 |     paths-ignore:
  6 |       - .github/*
  7 |       - .gitignore
  8 |       - README.md
  9 | 
 10 | jobs:
 11 |   ci_linux-64:
 12 |     name: linux-64 CI
 13 |     runs-on: ubuntu-latest
 14 |     env:
 15 |       CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 16 |     strategy:
 17 |       fail-fast: false
 18 |       matrix:
 19 |         environment: ["r43", "r44"]
 20 |     
 21 |     steps:
 22 |       - name: Checkout pull request branch
 23 |         uses: actions/checkout@v4
 24 |         with:
 25 |           fetch-depth: 0
 26 | 
 27 |       - name: Create TOML from recipe
 28 |         run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE}
 29 | 
 30 |       - name: Setup pixi
 31 |         uses: prefix-dev/setup-pixi@v0.8.10
 32 | 
 33 |       - name: Run unit tests
 34 |         run: pixi run --environment ${{ matrix.environment }} devtools_test
 35 | 
 36 |       - name: Check unit test code coverage
 37 |         run: pixi run --environment ${{ matrix.environment }} codecov
 38 | 
 39 |       #- name: Run R CMD CHECK
 40 |         #run: pixi run rcmdcheck
 41 | 
 42 |       #- name: Run BiocCheckGitClone
 43 |         #run: pixi run bioccheck_git_clone
 44 | 
 45 |       #- name: Run BiocCheck
 46 |         #run: pixi run bioccheck
 47 |         
 48 |   ci_osx-64:
 49 |     name: osx-64 CI
 50 |     runs-on: macos-13
 51 |     strategy:
 52 |       fail-fast: false
 53 |       matrix:
 54 |         environment: ["r43", "r44"]
 55 |     
 56 |     steps:
 57 |       - name: Checkout pull request branch
 58 |         uses: actions/checkout@v4
 59 |         with:
 60 |           fetch-depth: 0
 61 | 
 62 |       - name: Create TOML from recipe
 63 |         run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE}
 64 | 
 65 |       - name: Setup pixi
 66 |         uses: prefix-dev/setup-pixi@v0.8.10
 67 | 
 68 |       - name: Run unit tests
 69 |         run: pixi run --environment ${{ matrix.environment }} devtools_test
 70 | 
 71 |       #- name: Run R CMD CHECK
 72 |         #run: pixi run rcmdcheck
 73 | 
 74 |       #- name: Run BiocCheckGitClone
 75 |         #run: pixi run bioccheck_git_clone
 76 | 
 77 |       #- name: Run BiocCheck
 78 |         #run: pixi run bioccheck
 79 |   
 80 |   ci_osx-arm64:
 81 |     name: osx-arm64 CI
 82 |     runs-on: macos-14
 83 |     strategy:
 84 |       fail-fast: false
 85 |       matrix:
 86 |         environment: ["r43", "r44"]
 87 |     
 88 |     steps:
 89 |       - name: Checkout pull request branch
 90 |         uses: actions/checkout@v4
 91 |         with:
 92 |           fetch-depth: 0
 93 | 
 94 |       - name: Create TOML from recipe
 95 |         run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE}
 96 | 
 97 |       - name: Setup pixi
 98 |         uses: prefix-dev/setup-pixi@v0.8.10
 99 | 
100 |       - name: Run unit tests
101 |         run: pixi run --environment ${{ matrix.environment }} devtools_test
102 | 
103 |       #- name: Run R CMD CHECK
104 |         #run: pixi run rcmdcheck
105 | 
106 |       #- name: Run BiocCheckGitClone
107 |         #run: pixi run bioccheck_git_clone
108 | 
109 |       #- name: Run BiocCheck
110 |         #run: pixi run bioccheck
111 | 


--------------------------------------------------------------------------------
/.github/workflows/create_toml_from_yaml.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o xtrace -o nounset -o pipefail -o errexit
 4 | 
 5 | github_workspace=$1
 6 | 
 7 | cp ${github_workspace}/.github/environment/pixi.toml ${github_workspace}/pixi.toml
 8 | yq .requirements.host < ${github_workspace}/.github/recipe/recipe.yaml | \
 9 |     sed 's/- //' | sed 's/^/"/' | sed 's/$/" = "*"/'  >> ${github_workspace}/pixi.toml
10 | 


--------------------------------------------------------------------------------
/.github/workflows/dependabot_automerge.yml:
--------------------------------------------------------------------------------
 1 | name: Dependabot auto-merge
 2 | on:
 3 |   pull_request:
 4 |     paths:
 5 |       - .github/workflows/*
 6 | 
 7 | permissions:
 8 |   contents: write
 9 |   pull-requests: write
10 | 
11 | jobs:
12 |   dependabot:
13 |     runs-on: ubuntu-latest
14 |     if: github.event.pull_request.user.login == 'dependabot[bot]'
15 |     steps:
16 |       - name: Enable auto-merge for Dependabot PRs
17 |         run: gh pr merge --auto --merge "$PR_URL"
18 |         env:
19 |           PR_URL: ${{github.event.pull_request.html_url}}
20 |           GH_TOKEN: ${{secrets.GITHUB_TOKEN}}
21 | 


--------------------------------------------------------------------------------
/.github/workflows/merge.yml:
--------------------------------------------------------------------------------
 1 | name: Post-merge actions
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     types: [closed]
 6 |     paths-ignore:
 7 |       - .github/*
 8 |       - .gitignore
 9 |       - README.md
10 | 
11 | jobs:
12 |   update_coverage_badge:
13 |     if: github.event.pull_request.merged == true
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       fail-fast: false
17 |     env:
18 |       CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
19 | 
20 |     steps:
21 |       - name: Checkout main
22 |         uses: actions/checkout@v4
23 |         with:
24 |           ref: main
25 | 
26 |       - name: Create TOML from recipe
27 |         run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE}
28 | 
29 |       - name: Setup pixi
30 |         uses: prefix-dev/setup-pixi@v0.8.10
31 | 
32 |       - name: Check unit test code coverage
33 |         run: pixi run codecov
34 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
  1 | name: Upload new release
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       tag:
  7 |         description: Version to use for release tag
  8 |         default: auto
  9 |         required: true
 10 |       commit:
 11 |         description: Commit to use for tag
 12 |         default: auto
 13 |         required: true
 14 |       increment_major_version:
 15 |         description: Increment major version
 16 |         default: false
 17 |         required: true
 18 |       increment_minor_version:
 19 |         description: Increment minor version
 20 |         default: false
 21 |         required: true
 22 |       increment_patch_version:
 23 |         description: Increment patch version
 24 |         default: true
 25 |         required: true
 26 | 
 27 | jobs:
 28 |   update_version:
 29 |     outputs:
 30 |       commit: ${{ steps.commit-changes.outputs.commit_long_sha }}
 31 |     runs-on: ubuntu-latest
 32 |     if: ${{ github.event.inputs.commit }} == "auto"
 33 |     steps:
 34 |       - name: Checkout repository
 35 |         uses: actions/checkout@v4
 36 |         with:
 37 |           token: ${{ secrets.CI_TOKEN }}
 38 |           fetch-depth: 0
 39 |           repository: ${{ github.repository }}
 40 |           ref: main
 41 | 
 42 |       - name: Create TOML from recipe
 43 |         run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE}
 44 | 
 45 |       - name: Setup pixi
 46 |         uses: prefix-dev/setup-pixi@v0.8.10
 47 | 
 48 |       - name: Update version
 49 |         run: |
 50 |           if [[ ${{ github.event.inputs.tag }} != "auto" ]]; then
 51 |             sed -i 's/Version: .*$/Version: ${{ github.event.inputs.tag }}/' DESCRIPTION
 52 |           elif [[ ${{ github.event.inputs.increment_major_version }} == "true" ]]; then
 53 |             pixi run use_major_version
 54 |           elif [[ ${{ github.event.inputs.increment_minor_version }} == "true" ]]; then
 55 |             pixi run use_minor_version
 56 |           elif [[ ${{ github.event.inputs.increment_patch_version }} == "true" ]]; then
 57 |             pixi run use_patch_version
 58 |           fi
 59 | 
 60 |       - name: Commit changes to version
 61 |         id: commit-changes
 62 |         uses: EndBug/add-and-commit@v9
 63 |         with:
 64 |           push: true
 65 |           message: Update version
 66 | 
 67 |   create_release:
 68 |     needs: update_version
 69 |     runs-on: ubuntu-latest
 70 |     permissions:
 71 |       contents: write
 72 |     env:
 73 |       INPUT_COMMIT: ${{ github.event.inputs.commit }}
 74 |     steps:
 75 |       - name: Determine commit
 76 |         id: determine-commit
 77 |         run: |
 78 |           if [[ ${INPUT_COMMIT} != 'auto' ]]; then
 79 |             echo "commit=${{ github.event.inputs.commit }}" >> "$GITHUB_OUTPUT"
 80 |           else
 81 |             echo "commit=${{ needs.update_version.outputs.commit }}" >> "$GITHUB_OUTPUT"
 82 |           fi
 83 | 
 84 |       - name: Checkout HEAD
 85 |         uses: actions/checkout@v4
 86 |         with:
 87 |           ref: ${{ steps.determine-commit.outputs.commit }}
 88 | 
 89 |       - name: Set tag
 90 |         id: set-tag
 91 |         run: |
 92 |           if [[ ${{ github.event.inputs.tag }} != auto ]]; then
 93 |             tag=${{ github.event.inputs.tag }}
 94 |           else
 95 |             tag=$(grep "Version:" < DESCRIPTION | cut -d ' ' -f 2)
 96 |           fi
 97 |           echo "tag=${tag}" >> "$GITHUB_OUTPUT"
 98 | 
 99 |       - name: Create new tag
100 |         id: tag-version
101 |         uses: mathieudutour/github-tag-action@v6.2
102 |         with:
103 |           default_bump: false
104 |           default_prerelease_bump: false
105 |           github_token: ${{ secrets.GITHUB_TOKEN }}
106 |           custom_tag: ${{ steps.set-tag.outputs.tag }}
107 |           commit_sha: ${{ steps.determine-commit.outputs.commit }}
108 |           tag_prefix: ""
109 | 
110 |       - name: Create a GitHub release
111 |         uses: ncipollo/release-action@v1
112 |         with:
113 |           tag: ${{ steps.tag-version.outputs.new_tag }}
114 |           name: Release ${{ steps.tag-version.outputs.new_tag }}
115 |           body: ${{ steps.tag-version.outputs.changelog }}
116 | 


--------------------------------------------------------------------------------
/.github/workflows/update-documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Update documentation
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     paths-ignore:
 6 |       - .github/*
 7 |       - .gitignore
 8 |       - README.md
 9 | 
10 | jobs:
11 |   update_documentation:
12 |     runs-on: ubuntu-latest
13 |     
14 |     steps:
15 |       - name: Checkout pull request branch
16 |         uses: actions/checkout@v4
17 |         with:
18 |           token: ${{ secrets.CI_TOKEN }}
19 |           fetch-depth: 0
20 |           repository: ${{ github.event.pull_request.head.repo.full_name }}
21 |           ref: ${{ github.event.pull_request.head.ref }}
22 | 
23 |       - name: Create TOML from recipe
24 |         run: .github/workflows/create_toml_from_yaml.sh ${GITHUB_WORKSPACE}
25 | 
26 |       - name: Setup pixi
27 |         uses: prefix-dev/setup-pixi@v0.8.10
28 | 
29 |       - name: Run unit tests
30 |         run: pixi run devtools_document
31 | 
32 |       - name: Commit changes to documentation
33 |         uses: EndBug/add-and-commit@v9
34 |         with:
35 |           push: true
36 |           message: Update documentation
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | src/*.o
 2 | src/*.so
 3 | src/*.dylib
 4 | src/Makevars
 5 | **/.ipynb_checkpoints
 6 | .Rproj.user
 7 | **/.DS_Store
 8 | .Rhistory
 9 | pecotmr.Rproj
10 | /pixi.toml
11 | /pixi.lock
12 | /.pixi
13 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Encoding: UTF-8
 2 | Type: Package
 3 | Package: pecotmr
 4 | Version: 0.3.15
 5 | Date: 2025-07-02
 6 | Title: Pair-wise enrichment, colocalization, TWAS and Mendelian Randomization to integrate molecular QTL and GWAS.
 7 | Description: The majority of the statistical models in pecotmr are based on fine-mapped single effects described in Wang G et al (2020) JRSS-B. It also incorporates wrappers to a series of useful TWAS methods as well as utility functions for QTL and GWAS integration.
 8 | URL: https://github.com/StatFunGen/pecotmr
 9 | BugReports: https://github.com/StatFunGen/pecotmr/issues
10 | Authors@R: c(person("Gao Wang",role = c("cre","aut"),
11 |                   email = "wang.gaow@columbia.edu"),
12 |                   person("Daniel Nachun", role = "aut",
13 |                   email = "dnachun@stanford.edu"))
14 | License: MIT + file LICENSE
15 | Imports:
16 |     IRanges,
17 |     R6,
18 |     Rcpp,
19 |     S4Vectors,
20 |     bigsnpr,
21 |     bigstatsr,
22 |     coloc,
23 |     data.table,
24 |     doFuture,
25 |     dplyr,
26 |     furrr,
27 |     future,
28 |     magrittr,
29 |     matrixStats,
30 |     purrr,
31 |     readr,
32 |     rlang,
33 |     stringr,
34 |     susieR,
35 |     tibble,
36 |     tidyr,
37 |     vctrs,
38 |     vroom
39 | Suggests:
40 |     GBJ,
41 |     Rfast,
42 |     flashier,
43 |     fsusieR,
44 |     glmnet,
45 |     harmonicmeanp,
46 |     knitr,
47 |     mashr,
48 |     mr.ash.alpha,
49 |     mr.mash.alpha,
50 |     mvsusieR,
51 |     pgenlibr,
52 |     qgg,
53 |     quadprog,
54 |     quantreg,
55 |     qvalue,
56 |     rmarkdown,
57 |     snpStats,
58 |     testthat
59 | LinkingTo: 
60 |     Rcpp,
61 |     RcppArmadillo,
62 |     RcppGSL
63 | NeedsCompilation: yes
64 | VignetteBuilder: knitr
65 | RoxygenNote: 7.3.2
66 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2023
2 | COPYRIGHT HOLDER: Gao Wang
3 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | dentist_iterative_impute <- function(LD_mat, nSample, zScore, pValueThreshold, propSVD, gcControl, nIter, gPvalueThreshold, ncpus, seed, correct_chen_et_al_bug, verbose = FALSE) {
 5 |     .Call('_pecotmr_dentist_iterative_impute', PACKAGE = 'pecotmr', LD_mat, nSample, zScore, pValueThreshold, propSVD, gcControl, nIter, gPvalueThreshold, ncpus, seed, correct_chen_et_al_bug, verbose)
 6 | }
 7 | 
 8 | rcpp_mr_ash_rss <- function(bhat, shat, z, R, var_y, n, sigma2_e, s0, w0, mu1_init, tol = 1e-8, max_iter = 1e5L, update_w0 = TRUE, update_sigma = TRUE, compute_ELBO = TRUE, standardize = FALSE, ncpus = 1L) {
 9 |     .Call('_pecotmr_rcpp_mr_ash_rss', PACKAGE = 'pecotmr', bhat, shat, z, R, var_y, n, sigma2_e, s0, w0, mu1_init, tol, max_iter, update_w0, update_sigma, compute_ELBO, standardize, ncpus)
10 | }
11 | 
12 | prs_cs_rcpp <- function(a, b, phi, bhat, maf, n, ld_blk, n_iter, n_burnin, thin, verbose, seed) {
13 |     .Call('_pecotmr_prs_cs_rcpp', PACKAGE = 'pecotmr', a, b, phi, bhat, maf, n, ld_blk, n_iter, n_burnin, thin, verbose, seed)
14 | }
15 | 
16 | qtl_enrichment_rcpp <- function(r_gwas_pip, r_qtl_susie_fit, pi_gwas = 0, pi_qtl = 0, ImpN = 25L, shrinkage_lambda = 1.0, num_threads = 1L) {
17 |     .Call('_pecotmr_qtl_enrichment_rcpp', PACKAGE = 'pecotmr', r_gwas_pip, r_qtl_susie_fit, pi_gwas, pi_qtl, ImpN, shrinkage_lambda, num_threads)
18 | }
19 | 
20 | sdpr_rcpp <- function(bhat, LD, n, per_variant_sample_size = NULL, array = NULL, a = 0.1, c = 1.0, M = 1000L, a0k = 0.5, b0k = 0.5, iter = 1000L, burn = 200L, thin = 5L, n_threads = 1L, opt_llk = 1L, verbose = TRUE) {
21 |     .Call('_pecotmr_sdpr_rcpp', PACKAGE = 'pecotmr', bhat, LD, n, per_variant_sample_size, array, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose)
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/R/multigene_udr.R:
--------------------------------------------------------------------------------
 1 | #' Perform udr Analysis on Multigene Data
 2 | #'
 3 | #' @param combined_susie_list A list containing the combined SuSiE and summary statistics results.
 4 | #' @param coverage A numeric vector representing the coverage values.
 5 | #' @param independent_variant_list A list of independent variants to be used as a filter.
 6 | #' @param n_random An integer specifying the number of random samples to generate.
 7 | #' @param n_null An integer specifying the number of null samples to generate.
 8 | #' @param seed An integer specifying the seed for random number generation.
 9 | #' @param exclude_condition A character vector specifying conditions to be excluded from the analysis. Defaults to NULL.
10 | #'
11 | #' @return A list containing the results of the prior, or NULL if conditions are not met.
12 | #' @importFrom dplyr filter mutate group_by
13 | #' @export
14 | multigene_udr <- function(combined_susie_list, coverage, independent_variant_list, n_random, n_null, seed, exclude_condition = NULL) {
15 |   # Default to an empty vector if exclude_condition is NULL
16 |   if (is.null(exclude_condition)) {
17 |     exclude_condition <- c()
18 |   }
19 |   reformat_data <- function(dat) {
20 |     res <- list(
21 |       strong.b = dat$strong$bhat,
22 |       random.b = dat$random$bhat,
23 |       null.b = dat$null$bhat,
24 |       strong.s = dat$strong$sbhat,
25 |       null.s = dat$null$sbhat,
26 |       random.s = dat$random$sbhat
27 |     )
28 |     return(res)
29 |   }
30 |   # Load strong and random null summary statistics
31 |   strong_file <- load_multitrait_R_sumstat(
32 |     combined_susie_list$extracted_regional_window_combined_susie_result,
33 |     combined_susie_list$extracted_regional_window_combined_sumstats_result,
34 |     coverage,
35 |     top_loci = TRUE,
36 |     exclude_condition = exclude_condition
37 |   )
38 | 
39 |   ran_null_file <- load_multitrait_R_sumstat(
40 |     combined_susie_list$extracted_regional_window_combined_susie_result,
41 |     combined_susie_list$extracted_regional_window_combined_sumstats_result,
42 |     filter_file = independent_variant_list,
43 |     exclude_condition = exclude_condition
44 |   )
45 | 
46 |   # Generate random null samples
47 |   ran_null <- mash_rand_null_sample(
48 |     ran_null_file,
49 |     n_random = n_random,
50 |     n_null = n_null,
51 |     exclude_condition = exclude_condition,
52 |     seed = seed
53 |   )
54 | 
55 | 
56 |   # Prepare the strong summary statistics
57 |   strong <- list(strong = list(bhat = strong_file$bhat, sbhat = strong_file$sbhat))
58 | 
59 |   # Combine strong and random null samples
60 |   res <- c(strong, ran_null)
61 | 
62 |   # Reformat data for MASH analysis
63 |   mash_input <- reformat_data(res)
64 | 
65 |   # Filter invalid summary statistics for each condition
66 |   conditions <- c("strong", "random", "null")
67 |   for (cond in conditions) {
68 |     mash_input <- filter_invalid_summary_stat(
69 |       mash_input,
70 |       bhat = paste0(cond, ".b"),
71 |       sbhat = paste0(cond, ".s"),
72 |       btoz = TRUE,
73 |       filter_by_missing_rate = NULL,
74 |       sig_p_cutoff = NULL
75 |     )
76 |   }
77 | 
78 |   # Calculate ZtZ matrix
79 |   mash_input$ZtZ <- t(as.matrix(mash_input$strong.z)) %*% as.matrix(mash_input$strong.z) / nrow(mash_input$strong.z)
80 | 
81 |   # Perform MASH analysis if conditions are met
82 |   dd_prior <- if (nrow(mash_input$strong.b) < 2 || ncol(mash_input$strong.b) < 2) {
83 |     NULL
84 |   } else {
85 |     mash_pipeline(mash_input, alpha = 1)
86 |   }
87 |   return(dd_prior)
88 | }
89 | 


--------------------------------------------------------------------------------
/R/multitrait_data.R:
--------------------------------------------------------------------------------
 1 | #' @name multitraite_data
 2 | #'
 3 | #' @title Simulated Multi-condition Data for TWAS analysis
 4 | #'
 5 | #' @docType data
 6 | #'
 7 | #' @description Simulated data of a gene with multi-conditions (cell-type/tissues)
 8 | #' gene expression level matrix(Y) and genotype matrix(X) from 400 individuals,
 9 | #' plus mixure prior matrices, prior grid, as well as summary statistics from
10 | #' univariate regression and GWAS summary statistics that is ready for use for
11 | #' TWAS analysis. Genotype matrix is centered and scaled, expression matrix is
12 | #' normalized.
13 | #'
14 | #' @format \code{multitraite_data} is a list with the following elements:
15 | #'
16 | #' \describe{
17 | #'
18 | #'   \item{X}{Centered and scaled n x p matrix of genotype, where n is the total
19 | #'       number of individuals and p denotes the number of SNPs.}
20 | #'
21 | #'   \item{Y}{Normalized n x r matrix of residual for expression, where n is the
22 | #'       total number of individuals and r is the total number of conditions
23 | #'       (tissue/cell-types).}
24 | #'
25 | #'   \item{prior_matrices}{A list of data-driven covariance matrices.}
26 | #'
27 | #'   \item{prior_grid}{A vector of scaling factors to be used in fitting
28 | #'         mr.mash model.}
29 | #'
30 | #'   \item{prior_matrices_cv}{A list of list containing data-driven covariance
31 | #'         matrices for 5-fold cross validation.}
32 | #'
33 | #'   \item{prior_grid_cv}{A list of vectors of scaling factors for 5-fold
34 | #'         cross validation via sample partition.}
35 | #'
36 | #'   \item{gwas_sumstats}{A data frame for GWAS summary statistics.}
37 | #'
38 | #'   \item{sumstat}{Summary statistics of Bhat and Sbhat from univariate
39 | #'         regression for a gene.}
40 | #'
41 | #'    \item{sumstat_cv}{A list of 5 fold cross-validation summary statistics based
42 | #'         on sample partition for a gene.}
43 | #' }
44 | #'
45 | #' @keywords data
46 | #'
47 | #' @references
48 | #' Morgante, F., Carbonetto, P., Wang, G., Zou, Y., Sarkar, A. & Stephens, M. (2023).
49 | #'   A flexible empirical Bayes approach to multivariate multiple regression, and
50 | #'   its improved accuracy in predicting multi-tissue gene expression from genotypes.
51 | #'   PLoS Genetics 19(7): e1010539. https://doi.org/10.1371/journal.pgen.1010539
52 | #'
53 | #' @examples
54 | #' data(multitraite_data)
55 | #'
56 | 


--------------------------------------------------------------------------------
/R/slalom.R:
--------------------------------------------------------------------------------
 1 | #' Slalom Function for Summary Statistics QC for Fine-Mapping Analysis
 2 | #'
 3 | #' Performs Approximate Bayesian Factor (ABF) analysis, identifies credible sets,
 4 | #' and annotates lead variants based on fine-mapping results. It computes p-values
 5 | #' from z-scores assuming a two-sided standard normal distribution.
 6 | #'
 7 | #' @param zScore Numeric vector of z-scores corresponding to each variant.
 8 | #' @param LD_mat Square matrix representing linkage disequilibrium (LD) information
 9 | #'   between variants. Must have dimensions matching the length of `zScore`.
10 | #' @param standard_error Optional numeric vector of standard errors corresponding
11 | #'   to each z-score. If not provided, a default value of 1 is assumed for all variants.
12 | #' @param abf_prior_variance Numeric, the prior effect size variance for ABF calculations.
13 | #'   Default is 0.04.
14 | #' @param nlog10p_dentist_s_threshold Numeric, the -log10 DENTIST-S P value threshold
15 | #'   for identifying outlier variants for prediction. Default is 4.0.
16 | #' @param r2_threshold Numeric, the r2 threshold for DENTIST-S outlier variants
17 | #'   for prediction. Default is 0.6.
18 | #' @param lead_variant_choice Character, method to choose the lead variant, either
19 | #'   "pvalue" or "abf", with default "pvalue".
20 | #' @return A list containing the annotated LD matrix with ABF results, credible sets,
21 | #'   lead variant, and DENTIST-S statistics; and a summary dataframe with aggregate statistics.
22 | #' @examples
23 | #' # Assuming `zScore` is your vector of z-scores, `LD_mat` is your LD matrix,
24 | #' # and optionally `standard_error` is your vector of standard errors:
25 | #' results <- slalom(zScore, LD_mat, standard_error)
26 | #' @export
27 | #'
28 | slalom <- function(zScore, LD_mat, standard_error = rep(1, length(zScore)), abf_prior_variance = 0.04,
29 |                    nlog10p_dentist_s_threshold = 4.0, r2_threshold = 0.6, lead_variant_choice = "pvalue") {
30 |   if (!is.matrix(LD_mat) || nrow(LD_mat) != ncol(LD_mat) || nrow(LD_mat) != length(zScore)) {
31 |     stop("LD_mat must be a square matrix matching the length of zScore.")
32 |   }
33 | 
34 |   pvalue <- 2 * pnorm(abs(zScore), lower.tail = FALSE)
35 | 
36 |   logSumExp <- function(x) {
37 |     max_x <- max(x, na.rm = TRUE)
38 |     sum_exp <- sum(exp(x - max_x), na.rm = TRUE)
39 |     return(max_x + log(sum_exp))
40 |   }
41 | 
42 |   abf <- function(z, se, W = 0.04) {
43 |     V <- se^2
44 |     r <- W / (W + V)
45 |     lbf <- 0.5 * (log(1 - r) + (r * z^2))
46 |     denom <- logSumExp(lbf)
47 |     prob <- exp(lbf - denom)
48 |     return(list(lbf = lbf, prob = prob))
49 |   }
50 | 
51 |   abf_results <- abf(zScore, standard_error, W = abf_prior_variance)
52 |   lbf <- abf_results$lbf
53 |   prob <- abf_results$prob
54 | 
55 |   get_cs <- function(prob, coverage = 0.95) {
56 |     ordering <- order(prob, decreasing = TRUE)
57 |     cumprob <- cumsum(prob[ordering])
58 |     idx <- which(cumprob > coverage)[1]
59 |     cs <- ordering[1:idx]
60 |     return(cs)
61 |   }
62 | 
63 |   cs <- get_cs(prob, coverage = 0.95)
64 |   cs_99 <- get_cs(prob, coverage = 0.99)
65 | 
66 |   lead_idx <- if (lead_variant_choice == "pvalue") {
67 |     which.min(pvalue)
68 |   } else {
69 |     which.max(prob)
70 |   }
71 | 
72 |   r2 <- LD_mat^2
73 |   t_dentist_s <- (zScore - LD_mat[, lead_idx] * zScore[lead_idx])^2 / (1 - r2[, lead_idx])
74 |   t_dentist_s[t_dentist_s < 0] <- Inf
75 |   nlog10p_dentist_s <- -log10(1 - pchisq(t_dentist_s, df = 1))
76 |   outliers <- (r2[, lead_idx] > r2_threshold) & (nlog10p_dentist_s > nlog10p_dentist_s_threshold)
77 | 
78 |   n_r2 <- sum(r2[, lead_idx] > r2_threshold)
79 |   n_dentist_s_outlier <- sum(outliers, na.rm = TRUE)
80 |   max_pip <- max(prob)
81 | 
82 |   summary <- list(
83 |     lead_pip_variant = lead_idx,
84 |     n_total = length(zScore),
85 |     n_r2 = n_r2,
86 |     n_dentist_s_outlier = n_dentist_s_outlier,
87 |     fraction = ifelse(n_r2 > 0, n_dentist_s_outlier / n_r2, 0),
88 |     max_pip = max_pip,
89 |     cs_95 = cs,
90 |     cs_99 = cs_99
91 |   )
92 |   result <- as.data.frame(list(original_z = zScore, prob = prob, pvalue = pvalue, outliers = outliers, nlog10p_dentist_s = nlog10p_dentist_s))
93 | 
94 |   return(list(data = result, summary = summary))
95 | }
96 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pecotmr [![codecov](https://codecov.io/gh/cumc/pecotmr/graph/badge.svg?token=NFH9R1OPG7)](https://codecov.io/gh/cumc/pecotmr)
 2 | 
 3 | This R package, `pecotmr` implements **p**ost-GWAS **e**nrichment, **co**localization, **T**WAS and **M**endelian **R**andomization to integrate QTL and GWAS analysis, mostly based on fine-mapped single effects model although a number of other approaches are included for handy availiability in daily research.
 4 | `pecotmr` also stands for a **p**robabilistic **eco**system consolidating a range of established and new models into a streamlined, user-friendly interface complete with well-documented examples that will help getting your analysis done by **t**o**m**o**r**row if you start working on it today!
 5 | 
 6 | This project was developed by NIH/NIA FunGen-xQTL Project Working Group members with applications to FunGen-xQTL data analysis.
 7 | 
 8 | ## Quick Start
 9 | 
10 | To install the latest version of the pecotmr package
11 | from GitHub, use [BiocManager][BiocManager]:
12 | 
13 | ```R
14 | install.packages(c("BiocManager", "remotes"))
15 | BiocManager::install("StatFunGen/pecotmr")
16 | ```
17 | 
18 | If you have cloned the repository locally, you can install the package
19 | with the `install_local` function from devtools. Assuming your working
20 | directory contains the `pecotmr` repository, run this code to
21 | install the package:
22 | 
23 | ```R
24 | devtools::install_local("pecotmr",build_vignettes = TRUE)
25 | ```
26 | 
27 | `devtools` should automatically install all required packages if
28 | they are not installed already. **If automatic installation fails for some packages and you are unable to fix them, please try to use our conda package instead.**
29 | 
30 | To install using `conda` or `micromamba` for our released version,
31 | 
32 | ```
33 | micromamba install -c dnachun -c conda-forge -c bioconda r-pecotmr
34 | ```
35 | 
36 | To still use `devtools` to build `pecotmr` from source we suggest you build a local `conda` environment for pecotmr and all its dependencies,
37 | 
38 | ```
39 | cp .github/environment/environment.yaml /tmp/environment.yaml
40 | grep -v '{%' < .github/recipe/meta.yaml | yq .requirements.host | sed 's/^/  /' >> /tmp/environment.yaml
41 | micromamba create -f /tmp/environment.yaml
42 | ```
43 | 
44 | then use 
45 | 
46 | ```
47 | micromamba activate pecotmr
48 | ```
49 | 
50 | to activate it. Now you should be able to build `pecotmr` from source.
51 | 
52 | ## Developer's notes
53 | 
54 | + When any changes are made to `roxygen2` markup or the C++ code in
55 | the src directory, run `devtools::document()` to update the
56 | [RcppExports.cpp](src/RcppExports.cpp), the package namespaces (see
57 | [NAMESPACE](NAMESPACE)), and the package documentation files (in the
58 | "man" subdirectory),
59 | 
60 | + These are the R commands to build the website (make sure you are
61 | connected to Internet while running these commands):
62 | 
63 |    ```R
64 |    pkgdown::build_site(lazy=TRUE, examples=FALSE)
65 |    ```
66 | 
67 | + To format R codes in the `R` folder,
68 | 
69 |    ```bash
70 |    for i in `ls R/*.R`; do bash inst/misc/format_r_code.sh $i; done
71 |    ```
72 | 
73 | + After editing C++ code in the `src` directory, please use
74 | [uncrustify][uncrustify] (version >=0.74.0, available from conda-forge) 
75 | to format the code using configuration file
76 | `inst/misc/uncrustify_default.cfg`. For example:
77 | 
78 |    ```bash
79 |    uncrustify -c inst/misc/uncrustify_default.cfg --replace --no-backup -l CPP src/qtl_enrichment.cpp
80 |    uncrustify -c inst/misc/uncrustify_default.cfg --replace --no-backup -l CPP src/qtl_enrichment.hpp
81 |    ```
82 | 
83 | + Prior to submitting the package to Bioconductor, the following modifications
84 | need to be made: (1) remove the `Remotes:` entry in `DESCRIPTION`; and
85 | (2) remove the `fastenloc.Rmd` vignette.
86 | 
87 | [BiocManager]: https://github.com/Bioconductor/BiocManager
88 | [uncrustify]: https://github.com/uncrustify/uncrustify
89 | 


--------------------------------------------------------------------------------
/cleanup:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | rm -f config.log config.status confdefs.h src/*.o src/*.so src/Makevars


--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | GSL_CFLAGS=`${R_HOME}/bin/Rscript -e "RcppGSL:::CFlags()"`
4 | GSL_LIBS=`${R_HOME}/bin/Rscript -e "RcppGSL:::LdFlags()"`
5 | 
6 | sed -e "s|@GSL_LIBS@|${GSL_LIBS}|" \
7 |     -e "s|@GSL_CFLAGS@|${GSL_CFLAGS}|" \
8 |     src/Makevars.in > src/Makevars


--------------------------------------------------------------------------------
/data/multitrait_data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatFunGen/pecotmr/50e15f85e5b398c1831a42cee5b6e40308c880b4/data/multitrait_data.RData


--------------------------------------------------------------------------------
/inst/code/fastenloc_archive/README.md:
--------------------------------------------------------------------------------
1 | Downloaded from https://github.com/xqwen/fastenloc/tree/d4e2db1a7f17404c8267573acc69fc9b874e4c38


--------------------------------------------------------------------------------
/inst/code/fastenloc_archive/controller.h:
--------------------------------------------------------------------------------
 1 | using namespace std;
 2 | 
 3 | #include "sigCluster.h"
 4 | #include <vector>
 5 | #include <map>
 6 | #include <string>
 7 | 
 8 | class controller {
 9 | 
10 |     private:
11 |         
12 |         vector<sigCluster> eqtl_vec;
13 |         vector<sigCluster> gwas_vec;
14 |         map<string, int> eqtl_sig_index;
15 |         map<string, int> gwas_sig_index;
16 | 
17 | 	map<string, string> snp2gwas_locus;
18 | 
19 |         
20 |         map<string, int> snp_index;
21 |         vector<string> snp_vec;
22 |         vector<double> gwas_pip_vec;
23 | 
24 |         string prefix;
25 | 
26 |         int ImpN;
27 |         int nthread;
28 |         
29 |         int total_snp;
30 | 
31 |         double pi1;
32 |         double pi1_e;
33 |         double pi1_ne;
34 |         
35 |         double a0_est;
36 |         double a1_est;
37 |         
38 |         // for enrichment prior
39 |         double prior_variance;
40 |         double P_eqtl;
41 |         double P_gwas;
42 | 
43 |         // threshold value to output signal/snp coloc probs
44 |         double output_thresh;
45 | 
46 | 
47 |     public:
48 | 
49 |         void set_imp_num(int imp){
50 |             ImpN = imp;
51 |         }
52 | 
53 |         void set_snp_size(int size){
54 |             total_snp = size;
55 |         }
56 | 
57 |         void set_prior_variance (double pv){
58 |             prior_variance = pv;
59 |         }
60 | 
61 |         void set_thread(int thread){
62 |             nthread = thread;
63 |         }
64 | 
65 |         void set_prefix(char *str);
66 |         
67 |         void set_enrich_params(double p1, double p2, double p12);
68 |         void set_enrich_params(double a0, double a1);
69 | 
70 |         void load_eqtl(char *eqtl_file, char *tissue);
71 |         void load_gwas_torus(char *gwas_file);
72 | 
73 | 
74 |         void set_output_thresh(double value){
75 |             output_thresh = value;
76 |         }
77 | 
78 |         void enrich_est();
79 |         void compute_coloc_prob();
80 | 
81 |     private:
82 | 
83 |         vector<double> run_EM(vector<int> & eqtl_sample);
84 | };
85 | 


--------------------------------------------------------------------------------
/inst/code/fastenloc_archive/sigCluster.cc:
--------------------------------------------------------------------------------
 1 | #include "sigCluster.h"
 2 | #include <gsl/gsl_randist.h>
 3 | 
 4 | int sigCluster::impute_qtn(const gsl_rng *r){
 5 | 
 6 |     if(pip_prob==0){
 7 |         if(pip_vec.size() == 0)
 8 |             return -1;
 9 |         else{
10 |             pip_prob = new double[pip_vec.size()+1];
11 |             for(int i=0;i<pip_vec.size();i++){
12 |                 pip_prob[i] = pip_vec[i];
13 |             }
14 |             pip_prob[pip_vec.size()] = 1 - cpip;
15 |         }   
16 |     }
17 |     size_t K = pip_vec.size()+1;
18 |     unsigned int *sample = new unsigned int[pip_vec.size()+1];
19 |     gsl_ran_multinomial(r,  K, 1, pip_prob, sample);
20 |     for(int i=0;i<pip_vec.size();i++){
21 |         if(sample[i] == 1)
22 |             return i;
23 |     }
24 | 
25 |     return -1;
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/inst/code/fastenloc_archive/sigCluster.h:
--------------------------------------------------------------------------------
 1 | using namespace std;
 2 | #include <string>
 3 | #include <vector>
 4 | #include <gsl/gsl_rng.h>
 5 | 
 6 | class sigCluster {
 7 | 
 8 |     public:
 9 | 
10 |         string gene;
11 |         string id;  //signal id of the gene
12 |         double cpip;
13 |         vector<double> pip_vec;
14 |         vector<string> snp_vec;
15 |         vector<double> coloc_vec;    
16 | 
17 |         double coloc_prob; 
18 |         double locus_coloc_prob; 
19 | 
20 |     private:
21 |         double *pip_prob;
22 | 
23 |     public:
24 | 
25 |         sigCluster(){
26 |             pip_prob = 0;
27 |             coloc_prob = locus_coloc_prob = cpip = 0;
28 |         }
29 | 
30 |         int impute_qtn(const gsl_rng *r);
31 | 
32 | };
33 | 


--------------------------------------------------------------------------------
/inst/misc/format_r_code.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Function to display error messages and exit the script
 4 | display_error() {
 5 |     echo "Error: $1"
 6 |     exit 1
 7 | }
 8 | 
 9 | # Check if the R file is provided as an argument
10 | if [ $# -eq 0 ]; then
11 |     display_error "Please provide the path to the R file as an argument."
12 | fi
13 | 
14 | # Get the R file path from the argument
15 | r_file="$1"
16 | 
17 | # Check if the R file exists
18 | if [ ! -f "$r_file" ]; then
19 |     display_error "The specified R file '$r_file' does not exist."
20 | fi
21 | 
22 | echo "Formatting R code in file: $r_file"
23 | 
24 | # Format the R code using formatR and capture the output
25 | # output=$(echo "library(formatR); tryCatch(tidy_source(\"$r_file\", file = \"$r_file\", indent = 2, args.newline = TRUE, arrow = TRUE), error = function(e) {message(\"Error formatting R code:\"); print(e)})" | R --slave --no-save 2>&1)
26 | # Format the R code using styler and capture the output
27 | output=$(echo "tryCatch(styler::style_file(\"$r_file\"), error = function(e) {message(\"Error formatting R code:\"); print(e)})" | R --slave --no-save 2>&1)
28 | 
29 | # Check if the formatting was successful
30 | if echo "$output" | grep -q "Error formatting R code:"; then
31 |     echo "Formatting failed. Please check the R code for syntax errors."
32 |     echo -e "\033[0;31m$(echo "$output" | head -n -3)\033[0m"
33 |     echo -e "\033[1;31m$(echo "$output" | tail -n 3)\033[0m"
34 |     exit 1
35 | else
36 |     echo "R code formatting completed successfully."
37 | fi


--------------------------------------------------------------------------------
/inst/misc/post-commit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This script will be executed every time you run "git commit". It
 4 | # will commit changes made to package DESCRIPTION by the pre-commit hook
 5 | #
 6 | # To use this script, copy it to the .git/hooks directory of your
 7 | # local repository to filename `post-commit`, and make it executable.
 8 | #
 9 | ROOT_DIR=`git rev-parse --show-toplevel`
10 | # Only commit DESCRIPTION file when it is not staged (due to changes by pre-commit hook)
11 | if [[ -z `git diff HEAD` ]] || [[ ! -f $ROOT_DIR/DESCRIPTION ]] || [[ -z `git diff $ROOT_DIR/DESCRIPTION` ]]; then
12 |     exit 0
13 | else
14 |     git add $ROOT_DIR/DESCRIPTION
15 |     git commit --amend -C HEAD --no-verify
16 |     echo "Amend current commit to incorporate version bump"
17 |     exit 0
18 | fi
19 | 


--------------------------------------------------------------------------------
/inst/misc/pre-commit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This script will be executed every time you run "git commit". It
 4 | # will update the 4th digit of package version by revision number.
 5 | #
 6 | # To use this script, copy it to the .git/hooks directory of your
 7 | # local repository to filename `pre-commit`, and make it executable.
 8 | #
 9 | ROOT_DIR=`git rev-parse --show-toplevel`
10 | MSG="[WARNING] Auto-versioning disabled because string 'Version: x.y.z.r' cannot be found in DESCRIPTION file."
11 | GREP_REGEX='^Version: [0-9]*\.[0-9]*\.[0-9]*\.[0-9]*'
12 | SED_REGEX='^Version: \([0-9]*\.[0-9]*\.[0-9]*\)\.[0-9]*'
13 | # `git diff HEAD` shows both staged and unstaged changes
14 | if [[ -z `git diff HEAD` ]] || [[ ! -f $ROOT_DIR/DESCRIPTION ]]; then
15 |     exit 0
16 | elif [[ -z `grep "$GREP_REGEX" $ROOT_DIR/DESCRIPTION` ]]; then
17 |     echo -e "\e[1;31m$MSG\e[0m"
18 |     exit 0
19 | else
20 |     REV_ID=`git log --oneline | wc -l`
21 |     REV_ID=`printf "%04d\n" $((REV_ID+1))`
22 |     DATE=`date +%Y-%m-%d`
23 |     echo "Version string bumped to revision $REV_ID on $DATE"
24 |     sed -i "s/$SED_REGEX/Version: \1.$REV_ID/" $ROOT_DIR/DESCRIPTION
25 |     sed -i "s/^Date: .*/Date: $DATE/" $ROOT_DIR/DESCRIPTION
26 |     if [[ `git rev-parse --abbrev-ref HEAD` -eq "master" ]]; then
27 |         cd $ROOT_DIR
28 |         echo "Updating documentation ..."
29 |         R --slave -e 'devtools::document()' &> /dev/null && git add man/*.Rd
30 |         echo "Documentation updated!"
31 |         echo "Running unit tests ..."
32 |         R --slave -e 'devtools::test()'
33 |         echo "Unit test completed!"
34 |     fi
35 |     exit 0
36 | fi
37 | 


--------------------------------------------------------------------------------
/inst/prototype/generate_test_for_prs_cs.R:
--------------------------------------------------------------------------------
 1 |      set.seed(985115)
 2 |      n <- 350
 3 |      p <- 16
 4 |      sigmasq_error <- 0.5
 5 |      zeroes <- rbinom(p, 1, 0.6)
 6 |      beta.true <- rnorm(p, 1, sd = 4)
 7 |      beta.true[zeroes] <- 0
 8 |      
 9 |      X <- cbind(matrix(rnorm(n * p), nrow = n))
10 |      X <- scale(X, center = TRUE, scale = FALSE)
11 |      y <- X %*% matrix(beta.true, ncol = 1) + rnorm(n, 0, sqrt(sigmasq_error))
12 |      y <- scale(y, center = TRUE, scale = FALSE)
13 |      
14 |      # Calculate sufficient statistics
15 |      XtX <- t(X) %*% X
16 |      Xty <- t(X) %*% y
17 |      yty <- t(y) %*% y
18 |      
19 |      # Set the prior
20 |      K <- 9
21 |      sigma0 <- c(0.001, .1, .5, 1, 5, 10, 20, 30, .005)
22 |      omega0 <- rep(1/K, K)
23 |      
24 |      # Calculate summary statistics
25 |      b.hat <- sapply(1:p, function(j) { summary(lm(y ~ X[, j]))$coefficients[-1, 1] })
26 |      s.hat <- sapply(1:p, function(j) { summary(lm(y ~ X[, j]))$coefficients[-1, 2] })
27 |      R.hat <- cor(X)
28 |      var_y <- var(y)
29 |      sigmasq_init <- 1.5
30 |      
31 |      # Run PRS CS
32 |      maf = rep(0.5, length(b.hat)) # fake MAF
33 |      LD <- list(blk1 = R.hat)
34 |      sumstats <- list(BETA=b.hat, MAF=maf)
35 |      write.table(data.frame(sumstats), "sumstats.txt", sep = "\t", row.names = FALSE, col.names = TRUE)
36 |      write.table(LD$blk1, "LD.txt", sep = "\t", row.names = FALSE, col.names = FALSE)
37 | 
38 |      out2 <- pecotmr::prs_cs(b.hat, LD, n, maf=maf, verbose = TRUE, seed=999)
39 |      print(out2$beta_est)
40 | 
41 | 
42 | #Rscript generate_test_for_prs_cs.R 
43 | #Running Markov Chain Monte Carlo (MCMC) sampler...
44 | #Iteration  100 of 1000
45 | #Iteration  200 of 1000
46 | #Iteration  300 of 1000
47 | #Iteration  400 of 1000
48 | #Iteration  500 of 1000
49 | #Iteration  600 of 1000
50 | #Iteration  700 of 1000
51 | #Iteration  800 of 1000
52 | #Iteration  900 of 1000
53 | #Iteration 1000 of 1000
54 | #Estimated global shrinkage parameter: 0.573608
55 | #MCMC sampling completed.
56 | #            [,1]
57 | # [1,]  0.2014556
58 | # [2,]  1.0877955
59 | # [3,] -1.5719068
60 | # [4,]  1.1523724
61 | # [5,] -0.8181124
62 | # [6,] -0.8719565
63 | # [7,] -1.5151863
64 | # [8,]  1.7940113
65 | # [9,]  2.0403245
66 | #[10,] -1.7775449
67 | #[11,] -0.8175015
68 | #[12,]  6.1956311
69 | #[13,] -0.6421237
70 | #[14,] -1.9703685
71 | #[15,]  0.8190806
72 | #[16,] -0.3304925
73 | 


--------------------------------------------------------------------------------
/man/QUAIL_pipeline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quail_vqtl.R
 3 | \name{QUAIL_pipeline}
 4 | \alias{QUAIL_pipeline}
 5 | \title{Main QUAIL pipeline
 6 | QUAIL vQTL Analysis Pipeline}
 7 | \usage{
 8 | QUAIL_pipeline(genotype, rank_score, phenotype = NULL, covariates = NULL)
 9 | }
10 | \arguments{
11 | \item{genotype}{numeric matrix (n x p) of genotypes.}
12 | 
13 | \item{rank_score}{numeric vector (n x 1) of rank scores from Step 1.}
14 | 
15 | \item{phenotype}{optional numeric vector (n x 1) of original phenotype values.}
16 | 
17 | \item{covariates}{optional numeric matrix (n x k) of covariates.}
18 | }
19 | \value{
20 | A data frame containing vQTL results.
21 | }
22 | \description{
23 | Main QUAIL pipeline
24 | QUAIL vQTL Analysis Pipeline
25 | }
26 | \examples{
27 | \dontrun{
28 | results <- QUAIL_pipeline(genotype, rank_score, covariates = covariates)
29 | }
30 | }
31 | 


--------------------------------------------------------------------------------
/man/QUAIL_rank_score_pipeline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quail_rank_score.R
 3 | \name{QUAIL_rank_score_pipeline}
 4 | \alias{QUAIL_rank_score_pipeline}
 5 | \title{Main QUAIL Rank Score Pipeline}
 6 | \usage{
 7 | QUAIL_rank_score_pipeline(
 8 |   phenotype,
 9 |   covariates,
10 |   num_tau_levels = 19,
11 |   method = "equal",
12 |   num_cores = 1
13 | )
14 | }
15 | \arguments{
16 | \item{phenotype}{numeric vector of phenotype values}
17 | 
18 | \item{covariates}{matrix/data.frame of covariates}
19 | 
20 | \item{num_tau_levels}{integer number of quantile levels}
21 | 
22 | \item{method}{character "equal" or "ivw"}
23 | 
24 | \item{num_cores}{integer number of cores for parallel processing}
25 | }
26 | \value{
27 | data.frame with integrated rank scores
28 | }
29 | \description{
30 | Main QUAIL Rank Score Pipeline
31 | }
32 | 


--------------------------------------------------------------------------------
/man/adjust_susie_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/susie_wrapper.R
 3 | \name{adjust_susie_weights}
 4 | \alias{adjust_susie_weights}
 5 | \title{Adjust SuSiE Weights}
 6 | \usage{
 7 | adjust_susie_weights(
 8 |   twas_weights_results,
 9 |   keep_variants,
10 |   allele_qc = TRUE,
11 |   variable_name_obj = c("susie_results", context, "variant_names"),
12 |   susie_obj = c("susie_results", context, "susie_result_trimmed"),
13 |   twas_weights_table = c("weights", context),
14 |   combined_LD_variants,
15 |   match_min_prop = 0.2
16 | )
17 | }
18 | \arguments{
19 | \item{keep_variants}{Vector of variant names to keep.}
20 | 
21 | \item{allele_qc}{Optional}
22 | 
23 | \item{weight_db_file}{A RDS file containing TWAS weights.}
24 | 
25 | \item{condition}{specific condition.}
26 | }
27 | \value{
28 | A list of adjusted xQTL coefficients and remained variants ids
29 | }
30 | \description{
31 | This function adjusts the SuSiE weights based on a set of intersected variants.
32 | It subsets various components like lbf_matrix, mu, and scale factors based on these variants.
33 | }
34 | 


--------------------------------------------------------------------------------
/man/align_variant_names.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/allele_qc.R
 3 | \name{align_variant_names}
 4 | \alias{align_variant_names}
 5 | \title{Align Variant Names}
 6 | \usage{
 7 | align_variant_names(source, reference, remove_indels = FALSE)
 8 | }
 9 | \arguments{
10 | \item{source}{A character vector of variant names in the format "chr:pos:A2:A1" or "chr:pos_A2_A1".}
11 | 
12 | \item{reference}{A character vector of variant names in the format "chr:pos:A2:A1" or "chr:pos_A2_A1".}
13 | }
14 | \value{
15 | A list with two elements:
16 | - aligned_variants: A character vector of aligned variant names.
17 | - unmatched_indices: A vector of indices for the variants in the source that could not be matched.
18 | }
19 | \description{
20 | This function aligns variant names from two strings containing variant names in the format of
21 | "chr:pos:A1:A2" or "chr:pos_A1_A2". The first string should be the "source" and the second
22 | should be the "reference".
23 | }
24 | \examples{
25 | source <- c("1:123:A:C", "2:456:G:T", "3:789:C:A")
26 | reference <- c("1:123:A:C", "2:456:T:G", "4:101:G:C")
27 | align_variant_names(source, reference)
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/man/allele_qc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/allele_qc.R
 3 | \name{allele_qc}
 4 | \alias{allele_qc}
 5 | \title{Match alleles between target data and reference variants}
 6 | \usage{
 7 | allele_qc(
 8 |   target_data,
 9 |   ref_variants,
10 |   col_to_flip = NULL,
11 |   match_min_prop = 0.2,
12 |   remove_dups = TRUE,
13 |   remove_indels = FALSE,
14 |   remove_strand_ambiguous = TRUE,
15 |   flip_strand = FALSE,
16 |   remove_unmatched = TRUE,
17 |   remove_same_vars = FALSE
18 | )
19 | }
20 | \arguments{
21 | \item{target_data}{A data frame with columns "chrom", "pos", "A2", "A1" (and optionally other columns like "beta" or "z"),
22 | or a vector of strings in the format of "chr:pos:A2:A1"/"chr:pos_A2_A1". Can be automatically converted to a data frame if a vector.}
23 | 
24 | \item{ref_variants}{A data frame with columns "chrom", "pos", "A2", "A1" or strings in the format of "chr:pos:A2:A1"/"chr:pos_A2_A1".}
25 | 
26 | \item{col_to_flip}{The name of the column in target_data where flips are to be applied.}
27 | 
28 | \item{match_min_prop}{Minimum proportion of variants in the smallest data
29 | to be matched, otherwise stops with an error. Default is 20%.}
30 | 
31 | \item{remove_dups}{Whether to remove duplicates, default is TRUE.}
32 | 
33 | \item{remove_indels}{Whether to remove INDELs, default is FALSE.}
34 | 
35 | \item{remove_strand_ambiguous}{Whether to remove strand SNPs (if any). Default is `TRUE`.}
36 | 
37 | \item{flip_strand}{Whether to output the variants after strand flip. Default is `FALSE`.}
38 | 
39 | \item{remove_unmatched}{Whether to remove unmatched variants. Default is `TRUE`.}
40 | 
41 | \item{flip}{Whether the alleles must be flipped: A <--> T & C <--> G, in which case
42 | corresponding `col_to_flip` are multiplied by -1. Default is `TRUE`.}
43 | }
44 | \value{
45 | A single data frame with matched variants.
46 | }
47 | \description{
48 | Match by ("chrom", "A1", "A2" and "pos"), accounting for possible
49 | strand flips and major/minor allele flips (opposite effects and zscores).
50 | }
51 | 


--------------------------------------------------------------------------------
/man/auto_decision.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/univariate_rss_diagnostics.R
 3 | \name{auto_decision}
 4 | \alias{auto_decision}
 5 | \title{Process Credible Set Information and Determine Updating Strategy}
 6 | \usage{
 7 | auto_decision(df, high_corr_cols)
 8 | }
 9 | \arguments{
10 | \item{df}{Data frame. Contains information about Credible Sets for a specific study and block.}
11 | 
12 | \item{high_corr_cols}{Character vector. Names of columns in df that represent high correlations.}
13 | }
14 | \value{
15 | A modified data frame with additional columns attached to the diagnostic table:
16 |   \item{top_cs}{Logical. TRUE for the CS with the highest absolute Z-score.}
17 |   \item{tagged_cs}{Logical. TRUE for CS that are considered "tagged" based on p-value and correlation criteria.}
18 |   \item{method}{Character. The determined updating strategy ("BVSR", "SER", or "BCR").}
19 | }
20 | \description{
21 | This function categorizes Credible Sets (CS) within a study block into different 
22 | updating strategies based on their statistical properties and correlations.
23 | }
24 | \details{
25 | This function performs the following steps:
26 | 1. Identifies the top CS based on the highest absolute Z-score.
27 | 2. Identifies tagged CS based on high p-value and high correlations.
28 | 3. Counts total, tagged, and remaining CS.
29 | 4. Determines the appropriate updating method based on these counts.
30 | 
31 | The updating methods are:
32 | - BVSR (Bayesian Variable Selection Regression): Used when there's only one CS or all CS are accounted for.
33 | - SER (Single Effect Regression): Used when there are tagged CS but no remaining untagged CS.
34 | - BCR (Bayesian Conditional Regression): Used when there are remaining untagged CS.
35 | }
36 | \note{
37 | This function is part of a developing methodology for automatically handling 
38 | finemapping results. The thresholds and criteria used (e.g., p-value > 1e-4 for tagging) 
39 | are subject to refinement and may change in future versions.
40 | }
41 | 


--------------------------------------------------------------------------------
/man/batch_load_twas_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_utils.R
 3 | \name{batch_load_twas_weights}
 4 | \alias{batch_load_twas_weights}
 5 | \title{Split loaded twas_weights_results into batches based on maximum memory usage}
 6 | \usage{
 7 | batch_load_twas_weights(
 8 |   twas_weights_results,
 9 |   meta_data_df,
10 |   max_memory_per_batch = 750
11 | )
12 | }
13 | \arguments{
14 | \item{twas_weights_results}{List of loaded gene data by load_twas_weights()}
15 | 
16 | \item{meta_data_df}{Dataframe containing gene metadata with region_id and TSS columns}
17 | 
18 | \item{max_memory_per_batch}{Maximum memory per batch in MB (default: 750)}
19 | }
20 | \value{
21 | List of batches, where each batch contains a subset of twas_weights_results
22 | }
23 | \description{
24 | Split loaded twas_weights_results into batches based on maximum memory usage
25 | }
26 | 


--------------------------------------------------------------------------------
/man/bayes_a_rss_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_a_rss_weights}
 4 | \alias{bayes_a_rss_weights}
 5 | \title{Use t-distribution as prior.}
 6 | \usage{
 7 | bayes_a_rss_weights(sumstats, LD, ...)
 8 | }
 9 | \description{
10 | Use t-distribution as prior.
11 | }
12 | 


--------------------------------------------------------------------------------
/man/bayes_a_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_a_weights}
 4 | \alias{bayes_a_weights}
 5 | \title{Use t-distribution as prior.}
 6 | \usage{
 7 | bayes_a_weights(X, y, Z = NULL, ...)
 8 | }
 9 | \description{
10 | Use t-distribution as prior.
11 | }
12 | 


--------------------------------------------------------------------------------
/man/bayes_alphabet_rss_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_alphabet_rss_weights}
 4 | \alias{bayes_alphabet_rss_weights}
 5 | \title{Extract weights from gbayes_rss function}
 6 | \usage{
 7 | bayes_alphabet_rss_weights(sumstats, LD, method, ...)
 8 | }
 9 | \value{
10 | A numeric vector of the posterior mean of the coefficients.
11 | }
12 | \description{
13 | Extract weights from gbayes_rss function
14 | }
15 | 


--------------------------------------------------------------------------------
/man/bayes_alphabet_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_alphabet_weights}
 4 | \alias{bayes_alphabet_weights}
 5 | \title{Extract Coefficients From Bayesian Linear Regression}
 6 | \usage{
 7 | bayes_alphabet_weights(
 8 |   X,
 9 |   y,
10 |   method,
11 |   Z = NULL,
12 |   nit = 5000,
13 |   nburn = 1000,
14 |   nthin = 5,
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{X}{A numeric matrix of genotypes.}
20 | 
21 | \item{y}{A numeric vector of phenotypes.}
22 | 
23 | \item{method}{A character string declaring the method/prior to be used. Options are
24 | bayesN, bayesL, bayesA, bayesC, or bayesR.}
25 | 
26 | \item{Z}{An optional numeric matrix of covariates.}
27 | }
28 | \value{
29 | A vector containing the weights to be applied to each genotype in
30 |   predicting the phenotype.
31 | }
32 | \description{
33 | This function performs Bayesian linear regression using the `gbayes` function from
34 | the `qgg` package. It then returns the estimated slopes.
35 | }
36 | \details{
37 | This function fits a Bayesian linear regression model with a range of priors.
38 | }
39 | \examples{
40 | X <- matrix(rnorm(100000), nrow = 1000)
41 | Z <- matrix(round(runif(3000, 0, 0.8), 0), nrow = 1000)
42 | set1 <- sample(1:ncol(X), 5)
43 | set2 <- sample(1:ncol(X), 5)
44 | sets <- list(set1, set2)
45 | g <- rowSums(X[, c(set1, set2)])
46 | e <- rnorm(nrow(X), mean = 0, sd = 1)
47 | y <- g + e
48 | bayes_l_weights(y = y, X = X, Z = Z)
49 | bayes_r_weights(y = y, X = X, Z = Z)
50 | }
51 | 


--------------------------------------------------------------------------------
/man/bayes_c_rss_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_c_rss_weights}
 4 | \alias{bayes_c_rss_weights}
 5 | \title{Use a rounded spike prior (low-variance Gaussian).}
 6 | \usage{
 7 | bayes_c_rss_weights(sumstats, LD, ...)
 8 | }
 9 | \description{
10 | Use a rounded spike prior (low-variance Gaussian).
11 | }
12 | 


--------------------------------------------------------------------------------
/man/bayes_c_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_c_weights}
 4 | \alias{bayes_c_weights}
 5 | \title{Use a rounded spike prior (low-variance Gaussian).}
 6 | \usage{
 7 | bayes_c_weights(X, y, Z = NULL, ...)
 8 | }
 9 | \description{
10 | Use a rounded spike prior (low-variance Gaussian).
11 | }
12 | 


--------------------------------------------------------------------------------
/man/bayes_l_rss_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_l_rss_weights}
 4 | \alias{bayes_l_rss_weights}
 5 | \title{Use laplace/double exponential distribution as prior. This is equivalent to Bayesian LASSO.}
 6 | \usage{
 7 | bayes_l_rss_weights(sumstats, LD, ...)
 8 | }
 9 | \description{
10 | Use laplace/double exponential distribution as prior. This is equivalent to Bayesian LASSO.
11 | }
12 | 


--------------------------------------------------------------------------------
/man/bayes_l_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_l_weights}
 4 | \alias{bayes_l_weights}
 5 | \title{Use laplace/double exponential distribution as prior. This is equivalent to Bayesian LASSO.}
 6 | \usage{
 7 | bayes_l_weights(X, y, Z = NULL, ...)
 8 | }
 9 | \description{
10 | Use laplace/double exponential distribution as prior. This is equivalent to Bayesian LASSO.
11 | }
12 | 


--------------------------------------------------------------------------------
/man/bayes_n_rss_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_n_rss_weights}
 4 | \alias{bayes_n_rss_weights}
 5 | \title{Use Gaussian distribution as prior. Posterior means will be BLUP, equivalent to Ridge Regression.}
 6 | \usage{
 7 | bayes_n_rss_weights(sumstats, LD, ...)
 8 | }
 9 | \description{
10 | Use Gaussian distribution as prior. Posterior means will be BLUP, equivalent to Ridge Regression.
11 | }
12 | 


--------------------------------------------------------------------------------
/man/bayes_n_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_n_weights}
 4 | \alias{bayes_n_weights}
 5 | \title{Use Gaussian distribution as prior. Posterior means will be BLUP, equivalent to Ridge Regression.}
 6 | \usage{
 7 | bayes_n_weights(X, y, Z = NULL, ...)
 8 | }
 9 | \description{
10 | Use Gaussian distribution as prior. Posterior means will be BLUP, equivalent to Ridge Regression.
11 | }
12 | 


--------------------------------------------------------------------------------
/man/bayes_r_rss_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_r_rss_weights}
 4 | \alias{bayes_r_rss_weights}
 5 | \title{Use a hierarchical Bayesian mixture model with four Gaussian components. Variances are scaled
 6 | by 0, 0.0001 , 0.001 , and 0.01 .}
 7 | \usage{
 8 | bayes_r_rss_weights(sumstats, LD, ...)
 9 | }
10 | \description{
11 | Use a hierarchical Bayesian mixture model with four Gaussian components. Variances are scaled
12 | by 0, 0.0001 , 0.001 , and 0.01 .
13 | }
14 | 


--------------------------------------------------------------------------------
/man/bayes_r_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{bayes_r_weights}
 4 | \alias{bayes_r_weights}
 5 | \title{Use a hierarchical Bayesian mixture model with four Gaussian components. Variances are scaled
 6 | by 0, 0.0001 , 0.001 , and 0.01 .}
 7 | \usage{
 8 | bayes_r_weights(X, y, Z = NULL, ...)
 9 | }
10 | \description{
11 | Use a hierarchical Bayesian mixture model with four Gaussian components. Variances are scaled
12 | by 0, 0.0001 , 0.001 , and 0.01 .
13 | }
14 | 


--------------------------------------------------------------------------------
/man/coloc_post_processor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/encoloc.R
 3 | \name{coloc_post_processor}
 4 | \alias{coloc_post_processor}
 5 | \title{coloc_post_processor function}
 6 | \usage{
 7 | coloc_post_processor(
 8 |   coloc_res,
 9 |   LD_meta_file_path = NULL,
10 |   analysis_region = NULL,
11 |   ...
12 | )
13 | }
14 | \arguments{
15 | \item{coloc_res}{coloc results from coloc.susie.}
16 | 
17 | \item{LD_meta_file_path}{Path to the metadata of LD reference.}
18 | 
19 | \item{analysis_region}{Path to the analysis region of coloc result.}
20 | }
21 | \value{
22 | A list containing the coloc results and post processed coloc sets.
23 | }
24 | \description{
25 | coloc_post_processor function
26 | }
27 | 


--------------------------------------------------------------------------------
/man/coloc_wrapper.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/encoloc.R
 3 | \name{coloc_wrapper}
 4 | \alias{coloc_wrapper}
 5 | \title{Colocalization Analysis Wrapper}
 6 | \usage{
 7 | coloc_wrapper(
 8 |   xqtl_file,
 9 |   gwas_files,
10 |   xqtl_finemapping_obj = NULL,
11 |   xqtl_varname_obj = NULL,
12 |   xqtl_region_obj = NULL,
13 |   gwas_finemapping_obj = NULL,
14 |   gwas_varname_obj = NULL,
15 |   gwas_region_obj = NULL,
16 |   filter_lbf_cs = FALSE,
17 |   filter_lbf_cs_secondary = NULL,
18 |   prior_tol = 1e-09,
19 |   p1 = 1e-04,
20 |   p2 = 1e-04,
21 |   p12 = 5e-06,
22 |   ...
23 | )
24 | }
25 | \arguments{
26 | \item{xqtl_file}{Path to the xQTL RDS file.}
27 | 
28 | \item{gwas_files}{Vector of paths to GWAS RDS files.}
29 | 
30 | \item{xqtl_finemapping_obj}{Optional table name in xQTL RDS files (default 'susie_fit').}
31 | 
32 | \item{xqtl_varname_obj}{Optional table name in xQTL RDS files (default 'susie_fit').}
33 | 
34 | \item{xqtl_region_obj}{Optional table name in xQTL RDS files (default 'susie_fit').}
35 | 
36 | \item{gwas_finemapping_obj}{Optional table name in GWAS RDS files (default 'susie_fit').}
37 | 
38 | \item{gwas_varname_obj}{Optional table name in GWAS RDS files (default 'susie_fit').}
39 | 
40 | \item{gwas_region_obj}{Optional table name in GWAS RDS files (default 'susie_fit').}
41 | 
42 | \item{prior_tol}{When the prior variance is estimated, compare the estimated value to \code{prior_tol} at the end of the computation,
43 | and exclude a single effect from PIP computation if the estimated prior variance is smaller than this tolerance value.}
44 | 
45 | \item{p1, }{p2, and p12 are results from xqtl_enrichment_wrapper (default 'p1=1e-4, p2=1e-4, p12=5e-6', same as coloc.bf_bf).}
46 | 
47 | \item{region_obj}{Optional table name of region info in susie_twas output filess (default 'region_info').}
48 | }
49 | \value{
50 | A list containing the coloc results and the summarized sets.
51 | }
52 | \description{
53 | This function processes xQTL and multiple GWAS finemapped data files for colocalization analysis.
54 | }
55 | \examples{
56 | xqtl_file <- "xqtl_file.rds"
57 | gwas_files <- c("gwas_file1.rds", "gwas_file2.rds")
58 | result <- coloc_wrapper(xqtl_file, gwas_files, LD_meta_file_path)
59 | }
60 | 


--------------------------------------------------------------------------------
/man/colocboost_analysis_pipeline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/colocboost_pipeline.R
 3 | \name{colocboost_analysis_pipeline}
 4 | \alias{colocboost_analysis_pipeline}
 5 | \title{Multi-trait colocalization analysis pipeline}
 6 | \usage{
 7 | colocboost_analysis_pipeline(
 8 |   region_data,
 9 |   focal_trait = NULL,
10 |   event_filters = NULL,
11 |   xqtl_coloc = TRUE,
12 |   joint_gwas = FALSE,
13 |   separate_gwas = FALSE,
14 |   maf_cutoff = 5e-04,
15 |   pip_cutoff_to_skip_ind = 0,
16 |   remove_indels = FALSE,
17 |   pip_cutoff_to_skip_sumstat = 0,
18 |   qc_method = c("rss_qc", "dentist", "slalom"),
19 |   impute = TRUE,
20 |   impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01),
21 |   ...
22 | )
23 | }
24 | \arguments{
25 | \item{region_data}{A region data loaded from \code{load_regional_data}.}
26 | 
27 | \item{focal_trait}{Name of trait if perform focaled ColocBoost}
28 | 
29 | \item{event_filters}{A list of pattern for filtering events based on context names. Example: for sQTL, list(type_pattern = ".*clu_(\\d+_[+-?]).*",valid_pattern = "clu_(\\d+_[+-?]):PR:",exclude_pattern = "clu_(\\d+_[+-?]):IN:")}
30 | 
31 | \item{maf_cutoff}{A scalar to remove variants with maf < maf_cutoff, dafault is 0.005.}
32 | 
33 | \item{pip_cutoff_to_skip_ind}{A vector of cutoff values for skipping analysis based on PIP values for each context. Default is 0.}
34 | 
35 | \item{pip_cutoff_to_skip_sumstat}{A vector of cutoff values for skipping analysis based on PIP values for each sumstat Default is 0.}
36 | 
37 | \item{qc_method}{Quality control method to use. Options are "rss_qc", "dentist", or "slalom" (default: "rss_qc").}
38 | 
39 | \item{impute}{Logical; if TRUE, performs imputation for outliers identified in the analysis (default: TRUE).}
40 | 
41 | \item{impute_opts}{A list of imputation options including rcond, R2_threshold, and minimum_ld (default: list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5)).}
42 | }
43 | \value{
44 | A list containing the individual_data and sumstat_data after QC:
45 | individual_data contains the following components if exist
46 | \itemize{
47 |   \item Y: A list of residualized phenotype values for all tasks.
48 |   \item X: A list of residualized genotype matrices all tasks.
49 | }
50 | sumstat_data contains the following components if exist
51 | \itemize{
52 |   \item sumstats: A list of summary statistics for the matched LD_info, each sublist contains sumstats, n, var_y from \code{load_rss_data}.
53 |   \item LD_info: A list of LD information, each sublist contains combined_LD_variants, combined_LD_matrix, ref_panel  \code{load_LD_matrix}.
54 | }
55 | }
56 | \description{
57 | This function perform a multi-trait colocalization using ColocBoost
58 | }
59 | 


--------------------------------------------------------------------------------
/man/compute_qtl_enrichment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compute_qtl_enrichment.R
 3 | \name{compute_qtl_enrichment}
 4 | \alias{compute_qtl_enrichment}
 5 | \title{Implementation of enrichment analysis described in https://doi.org/10.1371/journal.pgen.1006646}
 6 | \usage{
 7 | compute_qtl_enrichment(
 8 |   gwas_pip,
 9 |   susie_qtl_regions,
10 |   num_gwas = NULL,
11 |   pi_qtl = NULL,
12 |   lambda = 1,
13 |   ImpN = 25,
14 |   num_threads = 1,
15 |   verbose = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{gwas_pip}{This is a vector of GWAS PIP, genome-wide.}
20 | 
21 | \item{susie_qtl_regions}{This is a list of SuSiE fitted objects per QTL unit analyzed}
22 | 
23 | \item{num_gwas}{This parameter is highly important if GWAS input does not contain all SNPs interrogated (e.g., in some cases, only fine-mapped geomic regions are included).
24 | Then users must pick a value of total_variants and estimate pi_gwas beforehand by: sum(gwas_pip$pip)/num_gwas. If num_gwas is null, pi_gwas would be sum(gwas_pip$pip)/total_variants.}
25 | 
26 | \item{pi_qtl}{This parameter can be safely left to default if your input QTL data has enough regions to estimate it.}
27 | 
28 | \item{lambda}{Similar to the shrinkage parameter used in ridge regression. It takes any non-negative value and shrinks the enrichment estimate towards 0.
29 | When it is set to 0, no shrinkage will be applied. A large value indicates strong shrinkage. The default value is set to 1.0.}
30 | 
31 | \item{ImpN}{Rounds of multiple imputation to draw QTL from, default is 25.}
32 | 
33 | \item{num_threads}{Number of Simultaneous running CPU threads for multiple imputation, default is 1.}
34 | }
35 | \value{
36 | A list of enrichment parameter estimates
37 | }
38 | \description{
39 | Largely follows from fastenloc https://github.com/xqwen/fastenloc
40 | but uses `susieR` fitted objects as input to estimate prior for use with `coloc` package (coloc v5, aka SuSiE-coloc).
41 | The main differences are 1) now enrichment is based on all QTL variants whether or not they are inside signal clusters;
42 | 2) Causal QTL are sampled from SuSiE single effects, not signal clusters;
43 | 3) Allow a variant to be QTL for not only multiple conditions (eg cell types) but also multiple regions (eg genes).
44 | Other minor improvements include 1) Make GSL RNG thread-safe; 2) Release memory from QTL binary annotation samples immediately after they are used.
45 | }
46 | \details{
47 | Uses output of \code{\link[susieR]{susie}} from the
48 |   \code{susieR} package.
49 | }
50 | \examples{
51 | 
52 | # Simulate fake data for gwas_pip
53 | n_gwas_pip <- 1000
54 | gwas_pip <- runif(n_gwas_pip)
55 | names(gwas_pip) <- paste0("snp", 1:n_gwas_pip)
56 | gwas_fit <- list(pip = gwas_pip)
57 | # Simulate fake data for a single SuSiEFit object
58 | simulate_susiefit <- function(n, p) {
59 |   pip <- runif(n)
60 |   names(pip) <- paste0("snp", 1:n)
61 |   alpha <- t(matrix(runif(n * p), nrow = n))
62 |   alpha <- t(apply(alpha, 1, function(row) row / sum(row)))
63 |   list(
64 |     pip = pip,
65 |     alpha = alpha,
66 |     prior_variance = runif(p)
67 |   )
68 | }
69 | # Simulate multiple SuSiEFit objects
70 | n_susie_fits <- 2
71 | susie_fits <- replicate(n_susie_fits, simulate_susiefit(n_gwas_pip, 10), simplify = FALSE)
72 | # Add these fits to a list, providing names to each element
73 | names(susie_fits) <- paste0("fit", 1:length(susie_fits))
74 | # Set other parameters
75 | ImpN <- 10
76 | lambda <- 1
77 | num_threads <- 1
78 | library(pecotmr)
79 | en <- compute_qtl_enrichment(gwas_fit, susie_fits, lambda = lambda, ImpN = ImpN, num_threads = num_threads)
80 | 
81 | }
82 | \seealso{
83 | \code{\link[susieR]{susie}}
84 | }
85 | 


--------------------------------------------------------------------------------
/man/corr_filter.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quantile_twas_weight.R
 3 | \name{corr_filter}
 4 | \alias{corr_filter}
 5 | \title{Filter Highly Correlated SNPs}
 6 | \usage{
 7 | corr_filter(X, cor_thres = 0.8)
 8 | }
 9 | \arguments{
10 | \item{X}{Matrix of genotypes}
11 | 
12 | \item{cor_thres}{Correlation threshold for filtering}
13 | }
14 | \value{
15 | A list containing filtered X matrix and filter IDs
16 | }
17 | \description{
18 | Filter Highly Correlated SNPs
19 | }
20 | 


--------------------------------------------------------------------------------
/man/ctwas_ld_loader.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ctwas_wrapper.R
 3 | \name{ctwas_ld_loader}
 4 | \alias{ctwas_ld_loader}
 5 | \title{Utility function to load LD in ctwas analyses, to interface with cTWAS package}
 6 | \usage{
 7 | ctwas_ld_loader(ld_matrix_file_path)
 8 | }
 9 | \arguments{
10 | \item{ld_matrix_file_path}{A string of file path to the LD matrix.}
11 | }
12 | \description{
13 | Utility function to load LD in ctwas analyses, to interface with cTWAS package
14 | }
15 | 


--------------------------------------------------------------------------------
/man/dentist_single_window.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dentist_qc.R
 3 | \name{dentist_single_window}
 4 | \alias{dentist_single_window}
 5 | \title{Perform DENTIST on a single window}
 6 | \usage{
 7 | dentist_single_window(
 8 |   zScore,
 9 |   LD_mat,
10 |   nSample,
11 |   pValueThreshold = 5e-08,
12 |   propSVD = 0.4,
13 |   gcControl = FALSE,
14 |   nIter = 10,
15 |   gPvalueThreshold = 0.05,
16 |   duprThreshold = 0.99,
17 |   ncpus = 1,
18 |   seed = 999,
19 |   correct_chen_et_al_bug = TRUE
20 | )
21 | }
22 | \arguments{
23 | \item{zScore}{A numeric vector containing the z-score values for variants within the window.}
24 | 
25 | \item{LD_mat}{A square matrix containing linkage disequilibrium (LD) information for variants within the window.}
26 | 
27 | \item{nSample}{The total number of samples.}
28 | 
29 | \item{pValueThreshold}{The p-value threshold for significance. Default is 5e-8.}
30 | 
31 | \item{propSVD}{The proportion of singular value decomposition (SVD) to use. Default is 0.4.}
32 | 
33 | \item{gcControl}{Logical indicating whether genomic control should be applied. Default is FALSE.}
34 | 
35 | \item{nIter}{The number of iterations for the Dentist algorithm. Default is 10.}
36 | 
37 | \item{gPvalueThreshold}{The genomic p-value threshold for significance. Default is 0.05.}
38 | 
39 | \item{duprThreshold}{The absolute correlation r value threshold to be considered duplicate. Default is 0.99.}
40 | 
41 | \item{ncpus}{The number of CPU cores to use for parallel processing. Default is 1.}
42 | 
43 | \item{seed}{The random seed for reproducibility. Default is 999.}
44 | 
45 | \item{correct_chen_et_al_bug}{Logical indicating whether to correct the Chen et al. bug. Default is TRUE.}
46 | }
47 | \value{
48 | data frame includes columns representing the imputed summary statistics and outlier detected.
49 | }
50 | \description{
51 | This function performs imputation of summary statistics for a single genomic window
52 | using the Dentist algorithm.
53 | }
54 | \examples{
55 | # Example usage of dentist_impute_single_window
56 | library(MASS)
57 | library(corpcor)
58 | set.seed(999)
59 | # Set the number of SNPs, sample size, and number of outliers
60 | n_snps <- 1000
61 | sample_size <- 10000
62 | n_outliers <- 5
63 | 
64 | # Generate a correlation matrix with more off-diagonal correlation
65 | cor_matrix <- matrix(0, nrow = n_snps, ncol = n_snps)
66 | for (i in 1:(n_snps - 1)) {
67 |   for (j in (i + 1):n_snps) {
68 |     cor_matrix[i, j] <- runif(1, 0.2, 0.8) # Generate random correlations between 0.2 and 0.8
69 |     cor_matrix[j, i] <- cor_matrix[i, j]
70 |   }
71 | }
72 | diag(cor_matrix) <- 1
73 | 
74 | # Convert the correlation matrix to a positive definite matrix
75 | ld_matrix <- cov2cor(make.positive.definite(cor_matrix))
76 | 
77 | # Simulate Z-scores based on the LD matrix
78 | z_scores <- mvrnorm(n = 1, mu = rep(0, n_snps), Sigma = ld_matrix)
79 | 
80 | # Introduce outliers
81 | outlier_indices <- sample(1:n_snps, n_outliers)
82 | z_scores[outlier_indices] <- rnorm(n_outliers, mean = 0, sd = 5)
83 | dentist_single_window(zScore, LD_mat, nSample)
84 | 
85 | }
86 | \references{
87 | https://github.com/Yves-CHEN/DENTIST
88 | }
89 | \seealso{
90 | \code{\link{dentist}} for detecting outliers using the Dentist algorithm.
91 | }
92 | 


--------------------------------------------------------------------------------
/man/extract_LD_for_region.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LD.R
 3 | \name{extract_LD_for_region}
 4 | \alias{extract_LD_for_region}
 5 | \title{Extract LD matrix and variants for a specific region}
 6 | \usage{
 7 | extract_LD_for_region(LD_matrix, variants, region, extract_coordinates)
 8 | }
 9 | \description{
10 | Extract LD matrix and variants for a specific region
11 | }
12 | 


--------------------------------------------------------------------------------
/man/extract_cs_info.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/univariate_rss_diagnostics.R
 3 | \name{extract_cs_info}
 4 | \alias{extract_cs_info}
 5 | \title{Process Credible Sets (CS) from Finemapping Results}
 6 | \usage{
 7 | extract_cs_info(con_data, cs_names, top_loci_table)
 8 | }
 9 | \arguments{
10 | \item{con_data}{List. The method layer data from a finemapping RDS file that is not empty.}
11 | 
12 | \item{cs_names}{Character vector. Names of the Credible Sets, usually in the format "L_<number>".}
13 | 
14 | \item{top_loci_table}{Data frame. The $top_loci layer data from the finemapping results.}
15 | }
16 | \value{
17 | A data frame with one row per CS, containing the following columns:
18 |   \item{cs_name}{Name of the Credible Set}
19 |   \item{variants_per_cs}{Number of variants in the CS}
20 |   \item{top_variant}{ID of the variant with the highest PIP in the CS}
21 |   \item{top_variant_index}{Global index of the top variant}
22 |   \item{top_pip}{Highest Posterior Inclusion Probability (PIP) in the CS}
23 |   \item{top_z}{Z-score of the top variant}
24 |   \item{p_value}{P-value calculated from the top Z-score}
25 |   \item{cs_corr}{Pairwise correlations of other CSs in this RDS with the CS of 
26 |     the current row, delimited by '|', if there is more than one CS in this RDS file}
27 | }
28 | \description{
29 | This function extracts and processes information for each Credible Set (CS) 
30 | from finemapping results, typically obtained from a finemapping RDS file.
31 | }
32 | \details{
33 | This function is designed to be used only when there is at least one Credible Set 
34 | in the finemapping results usually for a given study and block. It processes each CS, 
35 | extracting key information such as the top variant, its statistics, and 
36 | correlation information between multiple CS if available.
37 | }
38 | 


--------------------------------------------------------------------------------
/man/extract_flatten_sumstats_from_nested.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mash_wrapper.R
 3 | \name{extract_flatten_sumstats_from_nested}
 4 | \alias{extract_flatten_sumstats_from_nested}
 5 | \title{Extract Summary Statistics from Nested Data Structure}
 6 | \usage{
 7 | extract_flatten_sumstats_from_nested(data, extract_inf = "z", max_depth = 3)
 8 | }
 9 | \arguments{
10 | \item{data}{A nested list structure potentially containing `variant_names` and `sumstats`.}
11 | 
12 | \item{extract_inf}{Character. One of `"z"`, `"beta"`, or `"se"`.}
13 | 
14 | \item{max_depth}{Integer. Maximum depth to search within the list. Default is 3.}
15 | }
16 | \value{
17 | A data.frame with columns `variants` and the requested summary statistic.
18 | }
19 | \description{
20 | Recursively searches a nested list to extract summary statistics (z, beta, or se) 
21 | using `variant_names` and `sumstats`. Computes `z` if needed from `betahat` and `sebetahat`.
22 | }
23 | \examples{
24 | \dontrun{
25 | result <- extract_data(nested_list_object, extract_inf = "z")
26 | }
27 | }
28 | 


--------------------------------------------------------------------------------
/man/extract_top_pip_info.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/univariate_rss_diagnostics.R
 3 | \name{extract_top_pip_info}
 4 | \alias{extract_top_pip_info}
 5 | \title{Extract Information for Top Variant from Finemapping Results}
 6 | \usage{
 7 | extract_top_pip_info(con_data)
 8 | }
 9 | \arguments{
10 | \item{con_data}{List. The method layer data from a finemapping RDS file.}
11 | }
12 | \value{
13 | A data frame with one row containing the following columns:
14 |   \item{cs_name}{NA (as no CS is identified)}
15 |   \item{variants_per_cs}{NA (as no CS is identified)}
16 |   \item{top_variant}{ID of the variant with the highest PIP}
17 |   \item{top_variant_index}{Index of the top variant in the original data}
18 |   \item{top_pip}{Highest Posterior Inclusion Probability (PIP)}
19 |   \item{top_z}{Z-score of the top variant}
20 |   \item{p_value}{P-value calculated from the top Z-score}
21 |   \item{cs_corr}{NA (as no CS correlation is available)}
22 | }
23 | \description{
24 | This function extracts information about the variant with the highest Posterior 
25 | Inclusion Probability (PIP) from finemapping results, typically used when no 
26 | Credible Sets (CS) are identified in the analysis.
27 | }
28 | \details{
29 | This function is designed to be used when no Credible Sets are identified in 
30 | the finemapping results, but information about the most significant variant 
31 | is still desired. It identifies the variant with the highest PIP and extracts 
32 | relevant statistical information.
33 | }
34 | \note{
35 | This function is particularly useful for capturing information about potentially 
36 | important variants that might be included in Credible Sets under different 
37 | analysis parameters or lower coverage. It maintains a structure similar to 
38 | the output of `extract_cs_info()` for consistency in downstream analyses.
39 | }
40 | \seealso{
41 | \code{\link{extract_cs_info}} for processing when Credible Sets are present.
42 | }
43 | 


--------------------------------------------------------------------------------
/man/filter_X_with_Y.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{filter_X_with_Y}
 4 | \alias{filter_X_with_Y}
 5 | \title{This function performing filters on X variants based on Y subjects for TWAS analysis. This function checks
 6 | whether the absence (NA) of certain subjects would lead to monomorphic in some variants in X after removing
 7 | of these subjects data from X.}
 8 | \usage{
 9 | filter_X_with_Y(
10 |   X,
11 |   Y,
12 |   missing_rate_thresh,
13 |   maf_thresh,
14 |   var_thresh = 0,
15 |   maf = NULL,
16 |   X_variance = NULL
17 | )
18 | }
19 | \arguments{
20 | \item{missing_rate_thresh}{Maximum individual missingness cutoff.}
21 | 
22 | \item{maf_thresh}{Minimum minor allele frequency (MAF) cutoff.}
23 | 
24 | \item{var_thresh}{Minimum variance cutoff for a variant. Default is 0.}
25 | 
26 | \item{X_variance}{A vector of variance for X variants.}
27 | }
28 | \description{
29 | This function performing filters on X variants based on Y subjects for TWAS analysis. This function checks
30 | whether the absence (NA) of certain subjects would lead to monomorphic in some variants in X after removing
31 | of these subjects data from X.
32 | }
33 | 


--------------------------------------------------------------------------------
/man/filter_molecular_events.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{filter_molecular_events}
 4 | \alias{filter_molecular_events}
 5 | \title{Filter events based on provided context name pattern}
 6 | \usage{
 7 | filter_molecular_events(
 8 |   events,
 9 |   filters,
10 |   condition = NULL,
11 |   remove_all_group = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{events}{A character vector of event names}
16 | 
17 | \item{filters}{A data frame with character column of type_pattern, valid_pattern, and exclude_pattern.}
18 | 
19 | \item{condition}{Optional label context name}
20 | 
21 | \item{remove_all_group}{Logical if \code{TRUE}, removes all events from the same group and character-defined context.}
22 | }
23 | \description{
24 | Filter events based on provided context name pattern
25 | }
26 | 


--------------------------------------------------------------------------------
/man/filter_variants_by_ld_reference.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LD.R
 3 | \name{filter_variants_by_ld_reference}
 4 | \alias{filter_variants_by_ld_reference}
 5 | \title{Filter variants by LD Reference}
 6 | \usage{
 7 | filter_variants_by_ld_reference(
 8 |   variant_ids,
 9 |   ld_reference_meta_file,
10 |   keep_indel = TRUE
11 | )
12 | }
13 | \arguments{
14 | \item{variant_ids}{variant names in the format chr:pos_ref_alt or chr:pos:ref:alt.}
15 | 
16 | \item{ld_reference_meta_file}{A data frame similar to 'genomic_data' in get_regional_ld_meta function.}
17 | }
18 | \value{
19 | A subset of variants, filtered based on LD reference data.
20 | }
21 | \description{
22 | Filter variants by LD Reference
23 | }
24 | 


--------------------------------------------------------------------------------
/man/find_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{find_data}
 4 | \alias{find_data}
 5 | \title{Utility function to specify the path to access the target list item in a nested list, especially when some list layers
 6 | in between are dynamic or uncertain.}
 7 | \usage{
 8 | find_data(
 9 |   x,
10 |   depth_obj,
11 |   show_path = FALSE,
12 |   rm_null = TRUE,
13 |   rm_dup = FALSE,
14 |   docall = c,
15 |   last_obj = NULL
16 | )
17 | }
18 | \description{
19 | Utility function to specify the path to access the target list item in a nested list, especially when some list layers
20 | in between are dynamic or uncertain.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/find_duplicate_variants.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{find_duplicate_variants}
 4 | \alias{find_duplicate_variants}
 5 | \title{Filter a vector based on a correlation matrix}
 6 | \usage{
 7 | find_duplicate_variants(z, LD, rThreshold)
 8 | }
 9 | \arguments{
10 | \item{z}{A numeric vector to be filtered.}
11 | 
12 | \item{LD}{A square correlation matrix with dimensions equal to the length of `z`.}
13 | 
14 | \item{rThreshold}{The correlation threshold for filtering.}
15 | }
16 | \value{
17 | A list containing the following elements:
18 |   \describe{
19 |     \item{filteredZ}{The filtered vector `z` based on the correlation threshold.}
20 |     \item{filteredLD}{The filtered matrix `LD` based on the correlation threshold.}
21 |     \item{dupBearer}{A vector indicating the duplicate status of each element in `z`.}
22 |     \item{corABS}{A vector storing the absolute correlation values of duplicates.}
23 |     \item{sign}{A vector storing the sign of the correlation values (-1 for negative, 1 for positive).}
24 |     \item{minValue}{The minimum absolute correlation value encountered.}
25 |   }
26 | }
27 | \description{
28 | This function filters a vector `z` based on a correlation matrix `LD` and a correlation threshold `rThreshold`.
29 | It keeps only one element among those having an absolute correlation value greater than the threshold.
30 | }
31 | \examples{
32 | z <- c(1, 2, 3, 4, 5)
33 | LD <- matrix(c(
34 |   1.0, 0.8, 0.2, 0.1, 0.3,
35 |   0.8, 1.0, 0.4, 0.2, 0.5,
36 |   0.2, 0.4, 1.0, 0.6, 0.1,
37 |   0.1, 0.2, 0.6, 1.0, 0.3,
38 |   0.3, 0.5, 0.1, 0.3, 1.0
39 | ), nrow = 5, ncol = 5)
40 | rThreshold <- 0.5
41 | 
42 | result <- find_duplicate_variants(z, LD, rThreshold)
43 | print(result)
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/man/find_valid_file_path.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{find_valid_file_path}
 4 | \alias{find_valid_file_path}
 5 | \title{Find Valid File Path}
 6 | \usage{
 7 | find_valid_file_path(reference_file_path, target_file_path)
 8 | }
 9 | \description{
10 | Find Valid File Path
11 | }
12 | 


--------------------------------------------------------------------------------
/man/fsusie_get_cs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fsusie_wrapper.R
 3 | \name{fsusie_get_cs}
 4 | \alias{fsusie_get_cs}
 5 | \title{@title Create Sets Similar to SuSiE Output from fSuSiE Object}
 6 | \usage{
 7 | fsusie_get_cs(fSuSiE.obj, X, requested_coverage = 0.95)
 8 | }
 9 | \arguments{
10 | \item{fSuSiE.obj}{A fSuSiE object containing the results from a fSuSiE analysis.
11 | expected to at least have 'cs' and 'alpha' components.}
12 | 
13 | \item{requested_coverage}{A numeric value specifying the desired coverage level for the
14 | credible sets. This is purely for record purpose so should be
15 | manually ensured that it correctly reflect the actual coverage used. Defaults to 0.95.}
16 | }
17 | \value{
18 | A list containing named credible sets (cs), a dataframe of purity metrics
19 |         (min.abs.corr, mean.abs.corr, median.abs.corr), an index of credible sets (cs_index),
20 |         coverage values for each set, and the requested coverage level. Similar to the SuSiE set output
21 | }
22 | \description{
23 | This function constructs a list that mimics the structure of SuSiE output sets
24 | from a fSuSiE object. It includes credible sets (cs) with their names, a purity
25 | dataframe, coverage information, and the requested coverage level.
26 | }
27 | 


--------------------------------------------------------------------------------
/man/fsusie_wrapper.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fsusie_wrapper.R
 3 | \name{fsusie_wrapper}
 4 | \alias{fsusie_wrapper}
 5 | \title{Wrapper for fsusie Function with Automatic Post-Processing}
 6 | \usage{
 7 | fsusie_wrapper(
 8 |   X,
 9 |   Y,
10 |   pos,
11 |   L,
12 |   prior,
13 |   max_SNP_EM,
14 |   cov_lev,
15 |   min_purity,
16 |   max_scale,
17 |   ...
18 | )
19 | }
20 | \arguments{
21 | \item{X}{Residual genotype matrix.}
22 | 
23 | \item{Y}{Response phenotype matrix.}
24 | 
25 | \item{pos}{Genomics position of phenotypes, used for specifying the wavelet model.}
26 | 
27 | \item{L}{The maximum number of the credible set.}
28 | 
29 | \item{prior}{method to generate the prior.}
30 | 
31 | \item{max_SNP_EM}{maximum number of SNP used for learning the prior.}
32 | 
33 | \item{cov_lev}{Coverage level for the credible sets.}
34 | 
35 | \item{min_purity}{Minimum purity threshold for credible sets to be retained.}
36 | 
37 | \item{max_scale}{numeric, define the maximum of wavelet coefficients used in the analysis (2^max_scale).
38 | Set 10 true by default.}
39 | 
40 | \item{...}{Additional arguments passed to the fsusie function.}
41 | }
42 | \value{
43 | A modified fsusie object with the susie sets list, correlations for cs, alpha as df like susie,
44 |         and without the dummy cs that do not meet the minimum purity requirement.
45 | }
46 | \description{
47 | This function serves as a wrapper for the fsusie function, facilitating
48 | automatic post-processing such as removing dummy credible sets (cs) that don't meet
49 | the minimum purity threshold and calculating correlations for the remaining cs.
50 | The function parameters are identical to those of the fSuSiE function.
51 | }
52 | 


--------------------------------------------------------------------------------
/man/get_cormat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_utils.R
 3 | \name{get_cormat}
 4 | \alias{get_cormat}
 5 | \title{Compute genotype correlation}
 6 | \usage{
 7 | get_cormat(X, intercepte = TRUE)
 8 | }
 9 | \description{
10 | Compute genotype correlation
11 | }
12 | 


--------------------------------------------------------------------------------
/man/get_ctwas_meta_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ctwas_wrapper.R
 3 | \name{get_ctwas_meta_data}
 4 | \alias{get_ctwas_meta_data}
 5 | \title{Utility function to format meta data dataframe for cTWAS analyses}
 6 | \usage{
 7 | get_ctwas_meta_data(ld_meta_data_file, subset_region_ids = NULL)
 8 | }
 9 | \description{
10 | Utility function to format meta data dataframe for cTWAS analyses
11 | }
12 | 


--------------------------------------------------------------------------------
/man/get_susie_result.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/univariate_rss_diagnostics.R
 3 | \name{get_susie_result}
 4 | \alias{get_susie_result}
 5 | \title{Extract SuSiE Results from Finemapping Data}
 6 | \usage{
 7 | get_susie_result(con_data)
 8 | }
 9 | \arguments{
10 | \item{con_data}{List. The method layer data from a finemapping RDS file.}
11 | }
12 | \value{
13 | The trimmed SuSiE results (`$susie_result_trimmed`) if available,
14 | otherwise NULL.
15 | }
16 | \description{
17 | This function extracts the trimmed SuSiE results from a finemapping data object,
18 | typically obtained from a finemapping RDS file. It's designed to work with
19 | the method layer of these files, often named as 'method_RAISS_imputed', 'method',
20 | or 'method_NO_QC'. This layer is right under the study layer.
21 | }
22 | \details{
23 | The function checks if the input data is empty or if the `$susie_result_trimmed`
24 | element is missing. It returns NULL in these cases. If `$susie_result_trimmed`
25 | exists and is not empty, it returns this element.
26 | }
27 | \note{
28 | This function is particularly useful when working with large datasets
29 | where not all method layers may contain valid SuSiE results or method layer.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/harmonize_twas.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/twas.R
 3 | \name{harmonize_twas}
 4 | \alias{harmonize_twas}
 5 | \title{Function to perform allele flip QC and harmonization on the weights and GWAS against LD for a region.
 6 | FIXME: GWAS loading function from Haochen for both tabix & column-mapping yml application}
 7 | \usage{
 8 | harmonize_twas(
 9 |   twas_weights_data,
10 |   ld_meta_file_path,
11 |   gwas_meta_file,
12 |   column_file_path = NULL,
13 |   comment_string = "#"
14 | )
15 | }
16 | \arguments{
17 | \item{twas_weights_data}{List of list of twas weights output from from generate_twas_db function.}
18 | 
19 | \item{ld_meta_file_path}{A tab-delimited data frame with colname "#chrom", "start", "end", "path", where "path" column
20 | contains file paths for both LD matrix and bim file and is separated by ",". Bim file input would expect no headers, while the
21 | columns are aligned in the order of "chrom", "variants", "GD", "pos", "A1", "A2", "variance", "allele_freq", "n_nomiss".}
22 | 
23 | \item{gwas_meta_file}{A file path for a dataframe table with column of "study_id", "chrom" (integer), "file_path",
24 | "column_mapping_file". Each file in "file_path" column is tab-delimited dataframe of GWAS summary statistics with column name
25 | "chrom" (or #chrom" if tabix-indexed), "pos", "A2", "A1".}
26 | }
27 | \value{
28 | A list of list for harmonized weights and dataframe of gwas summary statistics that is add to the original input of
29 | twas_weights_data under each context.
30 | }
31 | \description{
32 | Function Conditions:
33 | - processes data in the format of either the output from load_twas_weights/generate_twas_db or
34 |   refined_twas_weights_data from twas pipeline.
35 | - For the first format, we expect there is only one gene/events's information, that can be accessed through `region_info_obj`
36 |   and refined_twas_weights_data contains per region multiple gene/event's refined weights data.
37 | }
38 | \details{
39 | Main Steps:
40 | 1. allele QC for TWAS weights against the LD meta
41 | 2. allele QC for GWA summary stats against the LD meta
42 | 3. adjust susie/mvsusie weights based on the overlap variants
43 | }
44 | 


--------------------------------------------------------------------------------
/man/lbf_to_alpha.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/susie_wrapper.R
 3 | \name{lbf_to_alpha}
 4 | \alias{lbf_to_alpha}
 5 | \title{Applies the 'lbf_to_alpha_vector' function row-wise to a matrix of log Bayes factors
 6 | to convert them to Single Effect PIP values.}
 7 | \usage{
 8 | lbf_to_alpha(lbf)
 9 | }
10 | \arguments{
11 | \item{lbf}{Matrix of log Bayes factors.}
12 | }
13 | \value{
14 | A matrix of alpha values with the same dimensions as the input LBF matrix.
15 | }
16 | \description{
17 | Applies the 'lbf_to_alpha_vector' function row-wise to a matrix of log Bayes factors
18 | to convert them to Single Effect PIP values.
19 | }
20 | \examples{
21 | lbf_matrix <- matrix(c(-0.5, 1.2, 0.3, 0.7, -1.1, 0.4), nrow = 2)
22 | alpha_matrix <- lbf_to_alpha(lbf_matrix)
23 | print(alpha_matrix)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/load_LD_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LD.R
 3 | \name{load_LD_matrix}
 4 | \alias{load_LD_matrix}
 5 | \title{Load and Process Linkage Disequilibrium (LD) Matrix}
 6 | \usage{
 7 | load_LD_matrix(LD_meta_file_path, region, extract_coordinates = NULL)
 8 | }
 9 | \arguments{
10 | \item{LD_meta_file_path}{path of LD_metadata, LD_metadata is a data frame specifying LD blocks with
11 | columns "chrom", "start", "end", and "path". "start" and "end" denote the positions of LD blocks.
12 | "path" is the path of each LD block, optionally including bim file paths.}
13 | 
14 | \item{region}{A data frame specifying region of interest with columns "chrom", "start", and "end".}
15 | 
16 | \item{extract_coordinates}{Optional data frame with columns "chrom" and "pos" for specific coordinates extraction.}
17 | }
18 | \value{
19 | A list of processed LD matrices and associated variant data frames for region of interest.
20 | Each element of the list contains:
21 | \describe{
22 | \item{combined_LD_variants}{A data frame merging selected variants within each LD block in bim file format with columns "chrom", "variants",
23 | "GD", "pos", "A1", and "A2".}
24 | \item{combined_LD_matrix}{The LD matrix for each region, with row and column names matching variant identifiers.}
25 | \item{block_indices}{A data frame tracking the indices of variants in the combined matrix by LD block,
26 | with columns "block_id", "start_idx", "end_idx", "chrom", "block_start", "block_end" to facilitate
27 | further partitioning if needed.}
28 | }
29 | }
30 | \description{
31 | Load and Process Linkage Disequilibrium (LD) Matrix
32 | }
33 | 


--------------------------------------------------------------------------------
/man/load_genotype_region.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_utils.R
 3 | \name{load_genotype_region}
 4 | \alias{load_genotype_region}
 5 | \title{Load genotype data for a specific region using vroom for efficiency}
 6 | \usage{
 7 | load_genotype_region(
 8 |   genotype,
 9 |   region = NULL,
10 |   keep_indel = TRUE,
11 |   keep_variants_path = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{genotype}{Path to the genotype data file (without extension).}
16 | 
17 | \item{region}{The target region in the format "chr:start-end".}
18 | 
19 | \item{keep_indel}{Whether to keep indel SNPs.}
20 | }
21 | \value{
22 | A vector of SNP IDs in the specified region.
23 | }
24 | \description{
25 | By default, plink usage dosage of the *major* allele, since "effect allele" A1 is
26 | usually the minor allele and the code "1" refers to the "other allele" A2,
27 | so that "11" is A2/A2 or major/major. We always use effect allele dosage, to
28 | be more consistent with the minor allele based convention ie, plink --recodeA which used minor allele
29 | dosage by default.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/load_multicontext_sumstats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mash_wrapper.R
 3 | \name{load_multicontext_sumstats}
 4 | \alias{load_multicontext_sumstats}
 5 | \title{Load and Align Summary Statistics for a Given Gene and Condition}
 6 | \usage{
 7 | load_multicontext_sumstats(
 8 |   dat_list,
 9 |   signal_df,
10 |   cond,
11 |   region,
12 |   extract_infs = "z",
13 |   tag_patterns = NULL,
14 |   result_list_format
15 | )
16 | }
17 | \arguments{
18 | \item{dat_list}{A named list of matrices or data.frames, each element corresponding to a summary statistics type (e.g., z, beta).}
19 | 
20 | \item{signal_df}{A data.frame containing signal information including `variant_ID`, `gene_ID`, and `event_ID`.}
21 | 
22 | \item{cond}{Character. Condition type: "strong", "null", or "random".}
23 | 
24 | \item{region}{Character. Target gene ID.}
25 | 
26 | \item{extract_infs}{Character vector. Names of summary statistics to extract (e.g., `"z"`, `"beta"`).}
27 | 
28 | \item{tag_patterns}{Optional named pattern list used to classify context.}
29 | 
30 | \item{result_list_format}{A nested list used as a running result container.}
31 | }
32 | \value{
33 | The updated `result_list_format` with processed results for the specified gene and condition.
34 | }
35 | \description{
36 | This function processes summary statistics matrices for a target gene across contexts, 
37 | optionally aligning with a reference panel and updating an existing result list.
38 | }
39 | 


--------------------------------------------------------------------------------
/man/load_quantile_twas_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quantile_twas.R
 3 | \name{load_quantile_twas_weights}
 4 | \alias{load_quantile_twas_weights}
 5 | \title{Load Quantile TWAS Weights}
 6 | \usage{
 7 | load_quantile_twas_weights(
 8 |   weight_db_files,
 9 |   tau_values = seq(0.01, 0.99, 0.01),
10 |   between_cluster = 0.8,
11 |   num_intervals = 3
12 | )
13 | }
14 | \arguments{
15 | \item{weight_db_files}{A character vector of file paths to the RDS files containing TWAS weights.}
16 | 
17 | \item{tau_values}{A numeric vector representing the tau values (quantiles) to use. Default is \code{seq(0.01, 0.99, 0.01)}.}
18 | 
19 | \item{between_cluster}{A numeric value specifying the correlation threshold for clustering. Default is 0.8.}
20 | 
21 | \item{num_intervals}{The number of fixed non-overlapping intervals to divide the tau values. Default is 3.}
22 | }
23 | \value{
24 | A list containing:
25 | \item{weights}{A list of integrated TWAS weights for each context.}
26 | \item{twas_cv_performance}{A list of TWAS cross-validation performance metrics for each context.}
27 | }
28 | \description{
29 | This function loads TWAS weights from RDS files and performs grouped integration using both correlation-based clustering and fixed tau intervals.
30 | }
31 | \examples{
32 | weight_db_files <- c("file1.rds", "file2.rds")
33 | tau_values <- seq(0.01, 0.99, by = 0.01)
34 | result <- load_quantile_twas_weights(weight_db_files, tau_values)
35 | weights <- result$weights
36 | performance <- result$twas_cv_performance
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/man/load_regional_association_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_utils.R
 3 | \name{load_regional_association_data}
 4 | \alias{load_regional_association_data}
 5 | \title{Load regional association data}
 6 | \usage{
 7 | load_regional_association_data(
 8 |   genotype,
 9 |   phenotype,
10 |   covariate,
11 |   region,
12 |   conditions,
13 |   maf_cutoff = 0,
14 |   mac_cutoff = 0,
15 |   xvar_cutoff = 0,
16 |   imiss_cutoff = 0,
17 |   association_window = NULL,
18 |   extract_region_name = NULL,
19 |   region_name_col = NULL,
20 |   keep_indel = TRUE,
21 |   keep_samples = NULL,
22 |   keep_variants = NULL,
23 |   phenotype_header = 4,
24 |   scale_residuals = FALSE,
25 |   tabix_header = TRUE
26 | )
27 | }
28 | \arguments{
29 | \item{genotype}{PLINK bed file containing genotype data.}
30 | 
31 | \item{phenotype}{A vector of phenotype file names.}
32 | 
33 | \item{covariate}{A vector of covariate file names corresponding to the phenotype file vector.}
34 | 
35 | \item{region}{A string of chr:start-end for the phenotype region.}
36 | 
37 | \item{conditions}{A vector of strings representing different conditions or groups.}
38 | 
39 | \item{maf_cutoff}{Minimum minor allele frequency (MAF) cutoff. Default is 0.}
40 | 
41 | \item{mac_cutoff}{Minimum minor allele count (MAC) cutoff. Default is 0.}
42 | 
43 | \item{xvar_cutoff}{Minimum variance cutoff. Default is 0.}
44 | 
45 | \item{imiss_cutoff}{Maximum individual missingness cutoff. Default is 0.}
46 | 
47 | \item{association_window}{A string of chr:start-end for the association analysis window (cis or trans). If not provided, all genotype data will be loaded.}
48 | 
49 | \item{extract_region_name}{A list of vectors of strings (e.g., gene ID ENSG00000269699) to subset the information when there are multiple regions available. Default is NULL.}
50 | 
51 | \item{region_name_col}{Column name containing the region name. Default is NULL.}
52 | 
53 | \item{keep_indel}{Logical indicating whether to keep insertions/deletions (INDELs). Default is TRUE.}
54 | 
55 | \item{keep_samples}{A vector of sample names to keep. Default is NULL.}
56 | 
57 | \item{phenotype_header}{Number of rows to skip at the beginning of the transposed phenotype file (default is 4 for chr, start, end, and ID).}
58 | 
59 | \item{scale_residuals}{Logical indicating whether to scale residuals. Default is FALSE.}
60 | 
61 | \item{tabix_header}{Logical indicating whether the tabix file has a header. Default is TRUE.}
62 | }
63 | \value{
64 | A list containing the following components:
65 | \itemize{
66 |   \item residual_Y: A list of residualized phenotype values (either a vector or a matrix).
67 |   \item residual_X: A list of residualized genotype matrices for each condition.
68 |   \item residual_Y_scalar: Scaling factor for residualized phenotype values.
69 |   \item residual_X_scalar: Scaling factor for residualized genotype values.
70 |   \item dropped_sample: A list of dropped samples for X, Y, and covariates.
71 |   \item covar: Covariate data.
72 |   \item Y: Original phenotype data.
73 |   \item X_data: Original genotype data.
74 |   \item X: Filtered genotype matrix.
75 |   \item maf: Minor allele frequency (MAF) for each variant.
76 |   \item chrom: Chromosome of the region.
77 |   \item grange: Genomic range of the region (start and end positions).
78 |   \item Y_coordinates: Phenotype coordinates if a region is specified.
79 | }
80 | }
81 | \description{
82 | This function loads genotype, phenotype, and covariate data for a specific region and performs data preprocessing.
83 | }
84 | 


--------------------------------------------------------------------------------
/man/load_regional_functional_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_utils.R
 3 | \name{load_regional_functional_data}
 4 | \alias{load_regional_functional_data}
 5 | \title{Load Regional Functional Association Data}
 6 | \usage{
 7 | load_regional_functional_data(...)
 8 | }
 9 | \value{
10 | A list
11 | }
12 | \description{
13 | This function loads precomputed regional functional association data.
14 | }
15 | 


--------------------------------------------------------------------------------
/man/load_regional_multivariate_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_utils.R
 3 | \name{load_regional_multivariate_data}
 4 | \alias{load_regional_multivariate_data}
 5 | \title{Load and Preprocess Regional Multivariate Data}
 6 | \usage{
 7 | load_regional_multivariate_data(matrix_y_min_complete = NULL, ...)
 8 | }
 9 | \value{
10 | A list
11 | }
12 | \description{
13 | This function loads regional association data and processes it into a multivariate format.
14 | It optionally filters out samples based on missingness thresholds in the response matrix.
15 | }
16 | 


--------------------------------------------------------------------------------
/man/load_regional_regression_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_utils.R
 3 | \name{load_regional_regression_data}
 4 | \alias{load_regional_regression_data}
 5 | \title{Load Regional Data for Regression Modeling}
 6 | \usage{
 7 | load_regional_regression_data(...)
 8 | }
 9 | \value{
10 | A list
11 | }
12 | \description{
13 | This function loads regional association data formatted for regression modeling.
14 | It includes phenotype, genotype, and covariate matrices along with metadata.
15 | }
16 | 


--------------------------------------------------------------------------------
/man/load_regional_univariate_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_utils.R
 3 | \name{load_regional_univariate_data}
 4 | \alias{load_regional_univariate_data}
 5 | \title{Load Regional Univariate Association Data}
 6 | \usage{
 7 | load_regional_univariate_data(...)
 8 | }
 9 | \value{
10 | A list
11 | }
12 | \description{
13 | This function loads regional association data for univariate analysis.
14 | It includes residual matrices, original genotype data, and additional metadata.
15 | }
16 | 


--------------------------------------------------------------------------------
/man/load_rss_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_utils.R
 3 | \name{load_rss_data}
 4 | \alias{load_rss_data}
 5 | \title{Load summary statistic data}
 6 | \usage{
 7 | load_rss_data(
 8 |   sumstat_path,
 9 |   column_file_path,
10 |   n_sample = 0,
11 |   n_case = 0,
12 |   n_control = 0,
13 |   region = NULL,
14 |   extract_region_name = NULL,
15 |   region_name_col = NULL,
16 |   comment_string = "#"
17 | )
18 | }
19 | \arguments{
20 | \item{sumstat_path}{File path to the summary statistics.}
21 | 
22 | \item{column_file_path}{File path to the column file for mapping.}
23 | 
24 | \item{n_sample}{User-specified sample size. If unknown, set as 0 to retrieve from the sumstat file.}
25 | 
26 | \item{n_case}{User-specified number of cases.}
27 | 
28 | \item{n_control}{User-specified number of controls.}
29 | 
30 | \item{region}{The region where tabix use to subset the input dataset.}
31 | 
32 | \item{extract_region_name}{User-specified gene/phenotype name used to further subset the phenotype data.}
33 | 
34 | \item{region_name_col}{Filter this specific column for the extract_region_name.}
35 | 
36 | \item{comment_string}{Comment sign in the column_mapping file, default is #}
37 | }
38 | \value{
39 | A list of rss_input, including the column-name-formatted summary statistics,
40 | sample size (n), and var_y.
41 | }
42 | \description{
43 | This function formats the input summary statistics dataframe with uniform column names
44 | to fit into the SuSiE pipeline. The mapping is performed through the specified column file.
45 | Additionally, it extracts sample size, case number, control number, and variance of Y.
46 | Missing values in n_sample, n_case, and n_control are backfilled with median values.
47 | }
48 | 


--------------------------------------------------------------------------------
/man/load_tsv_region.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_utils.R
 3 | \name{load_tsv_region}
 4 | \alias{load_tsv_region}
 5 | \title{Load and filter tabular data with optional region subsetting}
 6 | \usage{
 7 | load_tsv_region(
 8 |   file_path,
 9 |   region = NULL,
10 |   extract_region_name = NULL,
11 |   region_name_col = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{file_path}{Path to the summary statistics file.}
16 | 
17 | \item{region}{Genomic region for subsetting tabix-indexed files. Format: chr:start-end (e.g., "9:10000-50000").}
18 | 
19 | \item{extract_region_name}{Value to filter for in the specified filter column.}
20 | 
21 | \item{region_name_col}{Index of the column to apply the extract_region_name against.}
22 | }
23 | \value{
24 | A dataframe containing the filtered summary statistics.
25 | }
26 | \description{
27 | This function loads summary statistics data from tabular files (TSV, TXT).
28 | For compressed (.gz) and tabix-indexed files, it can subset data by genomic region.
29 | Additionally, it can filter results by a specified target value in a designated column.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/load_twas_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_utils.R
 3 | \name{load_twas_weights}
 4 | \alias{load_twas_weights}
 5 | \title{Load, Validate, and Consolidate TWAS Weights from Multiple RDS Files}
 6 | \usage{
 7 | load_twas_weights(
 8 |   weight_db_files,
 9 |   conditions = NULL,
10 |   variable_name_obj = c("preset_variants_result", "variant_names"),
11 |   susie_obj = c("preset_variants_result", "susie_result_trimmed"),
12 |   twas_weights_table = "twas_weights"
13 | )
14 | }
15 | \arguments{
16 | \item{variable_name_obj}{The name of the variable/object to fetch from each file, if not NULL.}
17 | 
18 | \item{weight_db_file}{weight_db_files Vector of file paths for RDS files containing TWAS weights..
19 | Each element organized as region/condition/weights}
20 | 
21 | \item{condition}{The specific condition to be checked and consolidated across all files.}
22 | }
23 | \value{
24 | A consolidated list of weights for the specified condition and a list of SuSiE results.
25 | }
26 | \description{
27 | This function loads TWAS weight data from multiple RDS files, checks for the presence
28 | of specified region and condition. If variable_name_obj is provided, it aligns and
29 | consolidates weight matrices based on the object's variant names, filling missing data
30 | with zeros. If variable_name_obj is NULL, it checks that all files have the same row
31 | numbers for the condition and consolidates weights accordingly.
32 | }
33 | \examples{
34 | # Example usage (replace with actual file paths, condition, region, and variable_name_obj):
35 | weight_db_files <- c("path/to/file1.rds", "path/to/file2.rds")
36 | condition <- "example_condition"
37 | region <- "example_region"
38 | variable_name_obj <- "example_variable" # or NULL for standard processing
39 | consolidated_weights <- load_twas_weights(weight_db_files, condition, region, variable_name_obj)
40 | print(consolidated_weights)
41 | }
42 | 


--------------------------------------------------------------------------------
/man/manhattan_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \name{manhattan_plot}
 4 | \alias{manhattan_plot}
 5 | \title{Manhattan plot}
 6 | \usage{
 7 | manhattan_plot(twas_results, gene_data)
 8 | }
 9 | \arguments{
10 | \item{twas_results}{a data frame of twas results with columns “gene_name", "gene_id","chr","susie_pval","lasso_pval","enet_pval" and "mrash_pval", where twas results are the output of the twas_scan function. "gene_name" is the ensemble ID and "gene_id" is the corresponding gene name,
11 | "susie_pval", "lasso_pval","enet_pval" and "mrash_pval" are the pvalues of susie and other three competing twas method.}
12 | 
13 | \item{gene_data}{a data frame with columns "chr", "start", "end", and "ID", "chr" is the chromosome of gene, "start" and "end" are the position, "ID" is the gene name.}
14 | }
15 | \value{
16 | plot object
17 | }
18 | \description{
19 | Manhattan plot
20 | }
21 | 


--------------------------------------------------------------------------------
/man/merge_sumstats_matrices.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mash_wrapper.R
 3 | \name{merge_sumstats_matrices}
 4 | \alias{merge_sumstats_matrices}
 5 | \title{Merge a List of Matrices or Data Frames with Optional Allele Flipping}
 6 | \usage{
 7 | merge_sumstats_matrices(
 8 |   matrix_list,
 9 |   value_column,
10 |   ref_panel = NULL,
11 |   ld_meta_file = NULL,
12 |   id_column = "variants",
13 |   remove_any_missing = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{matrix_list}{A named or unnamed list of data frames or matrices.}
18 | 
19 | \item{value_column}{Character string. The name of the column containing values to extract (e.g., z-scores or betas).}
20 | 
21 | \item{ref_panel}{Optional data frame. A reference panel for allele QC (must be compatible with `allele_qc`).}
22 | 
23 | \item{id_column}{Character string. The name of the column identifying variant IDs. Default is `"variants"`.}
24 | 
25 | \item{remove_any_missing}{Logical. If `TRUE`, rows with any missing values will be removed after merging.}
26 | }
27 | \value{
28 | A data frame containing merged values, one column per dataset with suffix `_i`.
29 | }
30 | \description{
31 | This function merges a list of matrices or data frames by a shared identifier column,
32 | optionally aligning to a reference panel using allele QC procedures.
33 | }
34 | \examples{
35 | \dontrun{
36 | merged <- merge_matrices(list(df1, df2), value_column = "variants", ref_panel = ref_df)
37 | }
38 | }
39 | 


--------------------------------------------------------------------------------
/man/mr_analysis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mr.R
 3 | \name{mr_analysis}
 4 | \alias{mr_analysis}
 5 | \title{Mendelian Randomization (MR)}
 6 | \usage{
 7 | mr_analysis(mr_formatted_input, cpip_cutoff = 0.5)
 8 | }
 9 | \arguments{
10 | \item{mr_formatted_input}{the output of twas_mr_format_input function}
11 | 
12 | \item{cpip_cutoff}{the threshold of cumulative posterior inclusion probability, default is 0.5}
13 | }
14 | \value{
15 | A single data frame of output with columns "gene_name", "num_CS", "num_IV",
16 | "meta_eff", "se_meta_eff", "meta_pval", "Q", "Q_pval" and "I2". "gene_name" is ensemble ID. "num_CS" is the number of credible sets
17 | contained in each gene, "num_IV" is the number of variants contained in each gene. "meta_eff", "se_meta_eff" and "meta_pval" are the MR estimate, standard error and pvalue.
18 | "Q" is Cochran’s Q statistic, "I2" quantifies the heterogeneity, range from 0 to 1.
19 | }
20 | \description{
21 | Mendelian Randomization (MR)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/mr_ash_rss.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/regularized_regression.R
  3 | \name{mr_ash_rss}
  4 | \alias{mr_ash_rss}
  5 | \title{Bayesian Multiple Regression with Mixture-of-Normals Prior}
  6 | \usage{
  7 | mr_ash_rss(
  8 |   bhat,
  9 |   shat,
 10 |   R,
 11 |   var_y,
 12 |   n,
 13 |   sigma2_e,
 14 |   s0,
 15 |   w0,
 16 |   mu1_init = numeric(0),
 17 |   tol = 1e-08,
 18 |   max_iter = 1e+05,
 19 |   z = numeric(0),
 20 |   update_w0 = TRUE,
 21 |   update_sigma = TRUE,
 22 |   compute_ELBO = TRUE,
 23 |   standardize = FALSE,
 24 |   ncpu = 1L
 25 | )
 26 | }
 27 | \arguments{
 28 | \item{bhat}{Numeric vector of observed effect sizes (standardized).}
 29 | 
 30 | \item{shat}{Numeric vector of standard errors of effect sizes.}
 31 | 
 32 | \item{R}{Numeric matrix of the correlation matrix.}
 33 | 
 34 | \item{var_y}{Numeric value of the variance of the outcome.}
 35 | 
 36 | \item{n}{Integer value of the sample size.}
 37 | 
 38 | \item{sigma2_e}{Numeric value of the error variance.}
 39 | 
 40 | \item{s0}{Numeric vector of prior variances for the mixture components.}
 41 | 
 42 | \item{w0}{Numeric vector of prior weights for the mixture components.}
 43 | 
 44 | \item{mu1_init}{Numeric vector of initial values for the posterior mean of the coefficients.}
 45 | 
 46 | \item{tol}{Numeric value of the convergence tolerance. Default is 1e-8.}
 47 | 
 48 | \item{max_iter}{Integer value of the maximum number of iterations. Default is 1e5.}
 49 | 
 50 | \item{z}{Numeric vector of Z-scores.}
 51 | 
 52 | \item{update_w0}{Logical value indicating whether to update the mixture weights. Default is TRUE.}
 53 | 
 54 | \item{update_sigma}{Logical value indicating whether to update the error variance. Default is TRUE.}
 55 | 
 56 | \item{compute_ELBO}{Logical value indicating whether to compute the Evidence Lower Bound (ELBO). Default is TRUE.}
 57 | 
 58 | \item{standardize}{Logical value indicating whether to standardize the input data. Default is FALSE.}
 59 | 
 60 | \item{ncpu}{An integer specifying the number of CPU cores to use for parallel computation. Default is 1.}
 61 | }
 62 | \value{
 63 | A list containing the following components:
 64 | \describe{
 65 |   \item{mu1}{Numeric vector of the posterior mean of the coefficients.}
 66 |   \item{sigma2_1}{Numeric vector of the posterior variance of the coefficients.}
 67 |   \item{w1}{Numeric matrix of the posterior assignment probabilities.}
 68 |   \item{sigma2_e}{Numeric value of the error variance.}
 69 |   \item{w0}{Numeric vector of the mixture weights.}
 70 |   \item{ELBO}{Numeric value of the Evidence Lower Bound (if `compute_ELBO = TRUE`).}
 71 | }
 72 | }
 73 | \description{
 74 | This function performs Bayesian multiple regression with a mixture-of-normals prior using the `rcpp_mr_ash_rss` function from the C++ implementation.
 75 | }
 76 | \examples{
 77 | # Generate example data
 78 | set.seed(985115)
 79 | n <- 350
 80 | p <- 16
 81 | sigmasq_error <- 0.5
 82 | zeroes <- rbinom(p, 1, 0.6)
 83 | beta.true <- rnorm(p, 1, sd = 4)
 84 | beta.true[zeroes] <- 0
 85 | 
 86 | X <- cbind(matrix(rnorm(n * p), nrow = n))
 87 | X <- scale(X, center = TRUE, scale = FALSE)
 88 | y <- X \%*\% matrix(beta.true, ncol = 1) + rnorm(n, 0, sqrt(sigmasq_error))
 89 | y <- scale(y, center = TRUE, scale = FALSE)
 90 | 
 91 | # Calculate sufficient statistics
 92 | XtX <- t(X) \%*\% X
 93 | Xty <- t(X) \%*\% y
 94 | yty <- t(y) \%*\% y
 95 | 
 96 | # Set the prior
 97 | K <- 9
 98 | sigma0 <- c(0.001, .1, .5, 1, 5, 10, 20, 30, .005)
 99 | omega0 <- rep(1 / K, K)
100 | 
101 | # Calculate summary statistics
102 | b.hat <- sapply(1:p, function(j) {
103 |   summary(lm(y ~ X[, j]))$coefficients[-1, 1]
104 | })
105 | s.hat <- sapply(1:p, function(j) {
106 |   summary(lm(y ~ X[, j]))$coefficients[-1, 2]
107 | })
108 | R.hat <- cor(X)
109 | var_y <- var(y)
110 | sigmasq_init <- 1.5
111 | 
112 | # Run mr_ash_rss
113 | out <- mr_ash_rss(b.hat, s.hat,
114 |   R = R.hat, var_y = var_y, n = n,
115 |   sigma2_e = sigmasq_init, s0 = sigma0, w0 = omega0,
116 |   mu1_init = rep(0, ncol(X)), tol = 1e-8, max_iter = 1e5,
117 |   update_w0 = TRUE, update_sigma = TRUE, compute_ELBO = TRUE,
118 |   standardize = FALSE
119 | )
120 | # In sample prediction correlations
121 | cor(X \%*\% out1$mu1, y) # 0.9984064
122 | }
123 | 


--------------------------------------------------------------------------------
/man/mr_ash_rss_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{mr_ash_rss_weights}
 4 | \alias{mr_ash_rss_weights}
 5 | \title{Extract weights from mr_ash_rss function}
 6 | \usage{
 7 | mr_ash_rss_weights(stat, LD, var_y, sigma2_e, s0, w0, z = numeric(0), ...)
 8 | }
 9 | \value{
10 | A numeric vector of the posterior mean of the coefficients.
11 | }
12 | \description{
13 | Extract weights from mr_ash_rss function
14 | }
15 | 


--------------------------------------------------------------------------------
/man/mr_format.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mr.R
 3 | \name{mr_format}
 4 | \alias{mr_format}
 5 | \title{MR Format Function}
 6 | \usage{
 7 | mr_format(
 8 |   susie_result,
 9 |   condition,
10 |   gwas_sumstats_db,
11 |   coverage = "cs_coverage_0.95",
12 |   allele_qc = TRUE,
13 |   molecular_name_obj = c("susie_results", condition, "region_info", "region_name"),
14 |   ld_meta_df
15 | )
16 | }
17 | \arguments{
18 | \item{susie_result}{A list containing the results of SuSiE analysis. This list should include nested elements such as 'susie_results', 'susie_result_trimmed', and 'top_loci', containing details about the statistical analysis of genetic variants.}
19 | 
20 | \item{condition}{A character string specifying the conditions. This is used to select the corresponding subset of results within 'susie_result'.}
21 | 
22 | \item{gwas_sumstats_db}{A data frame containing summary statistics from GWAS studies. It should include columns for variant id and their associated statistics such as beta coefficients and standard errors.}
23 | 
24 | \item{coverage}{A character string specifying the coverage threshold for credible sets, used when 'sets' is not "sets". Defaults to "coverage_0.95", indicating a 95% coverage credible set.}
25 | 
26 | \item{allele_qc}{Optional. A logical value indicating whether allele qc should be performed on the variants. When TRUE, allele qc are applied to the variants based on the GWAS summary statistics database ('gwas_sumstats_db').}
27 | 
28 | \item{sets}{A character string indicating the method used to define sets of genetic variants. Defaults to "sets". This parameter is used to specify the type of sets to extract from the 'susie_result' object.}
29 | }
30 | \value{
31 | A data frame formatted for MR analysis or NULL if cs_list is empty.
32 | }
33 | \description{
34 | Description of what the function does.
35 | }
36 | 


--------------------------------------------------------------------------------
/man/mrash_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{mrash_weights}
 4 | \alias{mrash_weights}
 5 | \title{Compute Weights Using mr.ash Shrinkage}
 6 | \usage{
 7 | mrash_weights(X, y, init_prior_sd = TRUE, ...)
 8 | }
 9 | \description{
10 | This function fits the `mr.ash` model (adaptive shrinkage regression) to estimate weights
11 | for a given set of predictors and response. It uses optional prior standard deviation initialization
12 | and can accept custom initial beta values.
13 | }
14 | \examples{
15 | wgt.mr.ash <- mrash_weights(eqtl$X, eqtl$y_res, beta.init = lasso_weights(X, y))
16 | }
17 | 


--------------------------------------------------------------------------------
/man/multicontext_ld_clumping.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quantile_twas_weight.R
 3 | \name{multicontext_ld_clumping}
 4 | \alias{multicontext_ld_clumping}
 5 | \title{Perform Clumping and Pruning}
 6 | \usage{
 7 | multicontext_ld_clumping(
 8 |   X,
 9 |   qr_results,
10 |   maf_list = NULL,
11 |   ld_clump_r2 = 0.2,
12 |   final_clump_r2 = 0.8
13 | )
14 | }
15 | \arguments{
16 | \item{X}{Matrix of genotypes}
17 | 
18 | \item{qr_results}{Results from QR_screen}
19 | 
20 | \item{maf_list}{List of minor allele frequencies (optional)}
21 | 
22 | \item{ld_clump_r2}{R-squared threshold for initial LD clumping based on pvalue}
23 | 
24 | \item{final_clump_r2}{R-squared threshold for final LD clumping based on MAF}
25 | }
26 | \value{
27 | A list containing final SNPs and clumped SNPs
28 | }
29 | \description{
30 | Perform Clumping and Pruning
31 | }
32 | 


--------------------------------------------------------------------------------
/man/multigene_udr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/multigene_udr.R
 3 | \name{multigene_udr}
 4 | \alias{multigene_udr}
 5 | \title{Perform udr Analysis on Multigene Data}
 6 | \usage{
 7 | multigene_udr(
 8 |   combined_susie_list,
 9 |   coverage,
10 |   independent_variant_list,
11 |   n_random,
12 |   n_null,
13 |   seed,
14 |   exclude_condition = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{combined_susie_list}{A list containing the combined SuSiE and summary statistics results.}
19 | 
20 | \item{coverage}{A numeric vector representing the coverage values.}
21 | 
22 | \item{independent_variant_list}{A list of independent variants to be used as a filter.}
23 | 
24 | \item{n_random}{An integer specifying the number of random samples to generate.}
25 | 
26 | \item{n_null}{An integer specifying the number of null samples to generate.}
27 | 
28 | \item{seed}{An integer specifying the seed for random number generation.}
29 | 
30 | \item{exclude_condition}{A character vector specifying conditions to be excluded from the analysis. Defaults to NULL.}
31 | }
32 | \value{
33 | A list containing the results of the prior, or NULL if conditions are not met.
34 | }
35 | \description{
36 | Perform udr Analysis on Multigene Data
37 | }
38 | 


--------------------------------------------------------------------------------
/man/parse_cs_corr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/univariate_rss_diagnostics.R
 3 | \name{parse_cs_corr}
 4 | \alias{parse_cs_corr}
 5 | \title{Parse Credible Set Correlations from extract_cs_info() Output}
 6 | \usage{
 7 | parse_cs_corr(df)
 8 | }
 9 | \arguments{
10 | \item{df}{Data frame or data.table. The output from `extract_cs_info()` function,
11 | containing a `cs_corr` column with correlation information.}
12 | }
13 | \value{
14 | A data.table with the original columns from the input, plus:
15 |   \item{cs_corr_1, cs_corr_2, ...}{Individual correlation values, with column names
16 |         based on their position in the original string}
17 |   \item{cs_corr_max}{Maximum absolute correlation value (excluding 1)}
18 |   \item{cs_corr_min}{Minimum absolute correlation value}
19 | }
20 | \description{
21 | This function takes the output from `extract_cs_info()` and expands the `cs_corr` column
22 | into multiple columns, preserving the original order of correlations. It also
23 | calculates maximum and minimum correlation values for each Credible Set.
24 | }
25 | \details{
26 | The function splits the `cs_corr` column, which typically contains correlation
27 | values separated by '|', into individual columns. It preserves the order of
28 | these correlations, allowing for easy interpretation in a matrix-like format.
29 | }
30 | \note{
31 | - This function converts the input to a data.table if it isn't already one.
32 | - It handles cases where correlation values might be missing or not in the expected format.
33 | - The function assumes that correlation values of 1 represent self-correlations and excludes
34 |   these when calculating max and min correlations.
35 | }
36 | 


--------------------------------------------------------------------------------
/man/perform_qr_analysis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quantile_twas_weight.R
 3 | \name{perform_qr_analysis}
 4 | \alias{perform_qr_analysis}
 5 | \title{Perform Quantile Regression Analysis to get beta}
 6 | \usage{
 7 | perform_qr_analysis(X, Y, Z = NULL, tau_values = seq(0.05, 0.95, by = 0.05))
 8 | }
 9 | \arguments{
10 | \item{X}{Matrix of predictors}
11 | 
12 | \item{Y}{Matrix or vector of response variables}
13 | 
14 | \item{Z}{Matrix of covariates (optional)}
15 | 
16 | \item{tau_values}{Vector of quantiles to be analyzed}
17 | }
18 | \value{
19 | A data frame with QR coefficients for each quantile
20 | }
21 | \description{
22 | Perform Quantile Regression Analysis to get beta
23 | }
24 | 


--------------------------------------------------------------------------------
/man/prs_cs.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/regularized_regression.R
  3 | \name{prs_cs}
  4 | \alias{prs_cs}
  5 | \title{PRS-CS: a polygenic prediction method that infers posterior SNP effect sizes under continuous shrinkage (CS) priors}
  6 | \usage{
  7 | prs_cs(
  8 |   bhat,
  9 |   LD,
 10 |   n,
 11 |   a = 1,
 12 |   b = 0.5,
 13 |   phi = NULL,
 14 |   maf = NULL,
 15 |   n_iter = 1000,
 16 |   n_burnin = 500,
 17 |   thin = 5,
 18 |   verbose = FALSE,
 19 |   seed = NULL
 20 | )
 21 | }
 22 | \arguments{
 23 | \item{bhat}{A vector of marginal effect sizes.}
 24 | 
 25 | \item{LD}{A list of LD blocks, where each element is a matrix representing an LD block.}
 26 | 
 27 | \item{n}{Sample size of the GWAS.}
 28 | 
 29 | \item{a}{Shape parameter for the prior distribution of psi. Default is 1.}
 30 | 
 31 | \item{b}{Scale parameter for the prior distribution of psi. Default is 0.5.}
 32 | 
 33 | \item{phi}{Global shrinkage parameter. If NULL, it will be estimated automatically. Default is NULL.}
 34 | 
 35 | \item{maf}{A vector of minor allele frequencies, if available, will standardize the effect sizes by MAF. Default is NULL.}
 36 | 
 37 | \item{n_iter}{Number of MCMC iterations. Default is 1000.}
 38 | 
 39 | \item{n_burnin}{Number of burn-in iterations. Default is 500.}
 40 | 
 41 | \item{thin}{Thinning factor for MCMC. Default is 5.}
 42 | 
 43 | \item{verbose}{Whether to print verbose output. Default is FALSE.}
 44 | 
 45 | \item{seed}{Random seed for reproducibility. Default is NULL.}
 46 | }
 47 | \value{
 48 | A list containing the posterior estimates:
 49 |   - beta_est: Posterior estimates of SNP effect sizes.
 50 |   - psi_est: Posterior estimates of psi (shrinkage parameters).
 51 |   - sigma_est: Posterior estimate of the residual variance.
 52 |   - phi_est: Posterior estimate of the global shrinkage parameter.
 53 | }
 54 | \description{
 55 | This function is a wrapper for the PRS-CS method implemented in C++. It takes marginal effect size estimates from regression and an external LD reference panel
 56 | and infers posterior SNP effect sizes using Bayesian regression with continuous shrinkage priors.
 57 | }
 58 | \examples{
 59 | # Generate example data
 60 | set.seed(985115)
 61 | n <- 350
 62 | p <- 16
 63 | sigmasq_error <- 0.5
 64 | zeroes <- rbinom(p, 1, 0.6)
 65 | beta.true <- rnorm(p, 1, sd = 4)
 66 | beta.true[zeroes] <- 0
 67 | 
 68 | X <- cbind(matrix(rnorm(n * p), nrow = n))
 69 | X <- scale(X, center = TRUE, scale = FALSE)
 70 | y <- X \%*\% matrix(beta.true, ncol = 1) + rnorm(n, 0, sqrt(sigmasq_error))
 71 | y <- scale(y, center = TRUE, scale = FALSE)
 72 | 
 73 | # Calculate sufficient statistics
 74 | XtX <- t(X) \%*\% X
 75 | Xty <- t(X) \%*\% y
 76 | yty <- t(y) \%*\% y
 77 | 
 78 | # Set the prior
 79 | K <- 9
 80 | sigma0 <- c(0.001, .1, .5, 1, 5, 10, 20, 30, .005)
 81 | omega0 <- rep(1 / K, K)
 82 | 
 83 | # Calculate summary statistics
 84 | b.hat <- sapply(1:p, function(j) {
 85 |   summary(lm(y ~ X[, j]))$coefficients[-1, 1]
 86 | })
 87 | s.hat <- sapply(1:p, function(j) {
 88 |   summary(lm(y ~ X[, j]))$coefficients[-1, 2]
 89 | })
 90 | R.hat <- cor(X)
 91 | var_y <- var(y)
 92 | sigmasq_init <- 1.5
 93 | 
 94 | # Run PRS CS
 95 | maf <- rep(0.5, length(b.hat)) # fake MAF
 96 | LD <- list(blk1 = R.hat)
 97 | out <- prs_cs(b.hat, LD, n, maf = maf)
 98 | # In sample prediction correlations
 99 | cor(X \%*\% out$beta_est, y) # 0.9944553
100 | }
101 | 


--------------------------------------------------------------------------------
/man/prs_cs_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{prs_cs_weights}
 4 | \alias{prs_cs_weights}
 5 | \title{Extract weights from prs_cs function}
 6 | \usage{
 7 | prs_cs_weights(stat, LD, ...)
 8 | }
 9 | \value{
10 | A numeric vector of the posterior SNP coefficients.
11 | }
12 | \description{
13 | Extract weights from prs_cs function
14 | }
15 | 


--------------------------------------------------------------------------------
/man/qr_screen.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quantile_twas_weight.R
 3 | \name{qr_screen}
 4 | \alias{qr_screen}
 5 | \title{Quantile TWAS Weight Calculation and QTL Analysis}
 6 | \usage{
 7 | qr_screen(
 8 |   X,
 9 |   Y,
10 |   Z = NULL,
11 |   tau.list = seq(0.05, 0.95, by = 0.05),
12 |   screen_threshold = 0.05,
13 |   screen_method = "qvalue",
14 |   top_count = 10,
15 |   top_percent = 15
16 | )
17 | }
18 | \arguments{
19 | \item{X}{Matrix of predictors}
20 | 
21 | \item{Y}{Matrix or vector of response variables}
22 | 
23 | \item{Z}{Matrix of covariates (optional)}
24 | 
25 | \item{tau.list}{Vector of quantiles to be analyzed}
26 | 
27 | \item{screen_threshold}{Significance threshold for adjusted p-values}
28 | 
29 | \item{screen_method}{Method for p-value adjustment ('fdr' or 'qvalue')}
30 | 
31 | \item{top_count}{Number of top SNPs to select}
32 | 
33 | \item{top_percent}{Percentage of top SNPs to select}
34 | }
35 | \value{
36 | A list containing various results from the QR screen
37 | }
38 | \description{
39 | This file contains functions for performing Quantile Transcriptome-Wide
40 | Association Studies (TWAS) weight calculations and Quantile QTL analysis.
41 | It provides tools for screening quantile regression results, performing
42 | LD clumping and pruning, and calculating TWAS weights.
43 | }
44 | \details{
45 | The main function in this file is `quantile_twas_weight_pipeline`, which
46 | orchestrates the entire analysis process. Other functions are helper
47 | functions used within the main pipeline.
48 | 
49 | Qrank Score Test Screen
50 | }
51 | 


--------------------------------------------------------------------------------
/man/quantile_twas_weight_pipeline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quantile_twas_weight.R
 3 | \name{quantile_twas_weight_pipeline}
 4 | \alias{quantile_twas_weight_pipeline}
 5 | \title{Quantile TWAS Weight Pipeline}
 6 | \usage{
 7 | quantile_twas_weight_pipeline(
 8 |   X,
 9 |   Y,
10 |   Z = NULL,
11 |   maf = NULL,
12 |   region_id = "",
13 |   ld_reference_meta_file = NULL,
14 |   twas_maf_cutoff = 0.01,
15 |   ld_pruning = FALSE,
16 |   quantile_qtl_tau_list = seq(0.05, 0.95, by = 0.05),
17 |   quantile_twas_tau_list = seq(0.01, 0.99, by = 0.01),
18 |   screen_threshold = 0.05
19 | )
20 | }
21 | \arguments{
22 | \item{X}{Matrix of genotypes}
23 | 
24 | \item{Y}{Matrix or vector of phenotypes}
25 | 
26 | \item{Z}{Matrix of covariates (optional)}
27 | 
28 | \item{maf}{Vector of minor allele frequencies (optional)}
29 | 
30 | \item{region_id}{Name of the region being analyzed}
31 | 
32 | \item{quantile_qtl_tau_list}{Vector of quantiles for QTL analysis}
33 | 
34 | \item{quantile_twas_tau_list}{Vector of quantiles for TWAS analysis}
35 | }
36 | \value{
37 | A list containing various results from the TWAS weight pipeline:
38 | \itemize{
39 |   \item qr_screen_pvalue_df: Data frame with QR screening results: pavlue, qvalue and zscore.
40 |   \item message: Any informational or warning messages.
41 |   \item twas_variant_names: Names of variants used in TWAS weight calculation.
42 |   \item rq_coef_df: Data frame with quantile regression coefficients.
43 |   \item twas_weight: Matrix of TWAS weights.
44 |   \item pseudo_R2: Vector of pseudo R-squared values.
45 |   \item quantile_twas_prediction: Matrix of TWAS predictions.
46 | }
47 | }
48 | \description{
49 | Quantile TWAS Weight Pipeline
50 | }
51 | \details{
52 | The function performs the following steps:
53 | 1. QR screening to identify significant SNPs.
54 | 2. Filtering of highly correlated SNPs.
55 | 3. LD clumping and pruning(use filtered SNPs from step 1).
56 | 4. Calculation of QR coefficients for selected SNPs(use filtered SNPs from step 3).
57 | 5. Calculation of TWAS weights and pseudo R-squared values(use filtered SNPs from step 2).
58 | }
59 | \examples{
60 | # Example usage:
61 | # X <- matrix of genotypes
62 | # Y <- vector of phenotypes
63 | # Z <- matrix of covariates
64 | # results <- quantile_twas_weight_pipeline(X, Y, Z, region_id = "GeneA")
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/man/raiss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/raiss.R
 3 | \name{raiss}
 4 | \alias{raiss}
 5 | \title{Robust and accurate imputation from summary statistics}
 6 | \usage{
 7 | raiss(
 8 |   ref_panel,
 9 |   known_zscores,
10 |   LD_matrix,
11 |   lamb = 0.01,
12 |   rcond = 0.01,
13 |   R2_threshold = 0.6,
14 |   minimum_ld = 5,
15 |   verbose = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{ref_panel}{A data frame containing 'chrom', 'pos', 'variant_id', 'A1', and 'A2'.}
20 | 
21 | \item{known_zscores}{A data frame containing 'chrom', 'pos', 'variant_id', 'A1', 'A2', and 'z' values.}
22 | 
23 | \item{LD_matrix}{Either a square matrix or a list of matrices for LD blocks.}
24 | 
25 | \item{lamb}{Regularization term added to the diagonal of the LD_matrix.}
26 | 
27 | \item{rcond}{Threshold for filtering eigenvalues in the pseudo-inverse computation.}
28 | 
29 | \item{R2_threshold}{R square threshold below which SNPs are filtered from the output.}
30 | 
31 | \item{minimum_ld}{Minimum LD score threshold for SNP filtering.}
32 | 
33 | \item{verbose}{Logical indicating whether to print progress information.}
34 | }
35 | \value{
36 | A list containing filtered and unfiltered results, and filtered LD matrix.
37 | }
38 | \description{
39 | This function is a part of the statistical library for SNP imputation from:
40 | https://gitlab.pasteur.fr/statistical-genetics/raiss/-/blob/master/raiss/stat_models.py
41 | It is R implementation of the imputation model described in the paper by Bogdan Pasaniuc,
42 | Noah Zaitlen, et al., titled "Fast and accurate imputation of summary
43 | statistics enhances evidence of functional enrichment", published in
44 | Bioinformatics in 2014.
45 | }
46 | \details{
47 | This function can process either a single LD matrix or a list of LD matrices for different blocks.
48 | For a list of matrices, it processes each block separately and combines the results.
49 | }
50 | 


--------------------------------------------------------------------------------
/man/region_to_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{region_to_df}
 4 | \alias{region_to_df}
 5 | \title{Utility function to convert LD region_ids to `region of interest` dataframe}
 6 | \usage{
 7 | region_to_df(ld_region_id, colnames = c("chrom", "start", "end"))
 8 | }
 9 | \arguments{
10 | \item{ld_region_id}{A string of region in the format of chrom_start_end.}
11 | }
12 | \description{
13 | Utility function to convert LD region_ids to `region of interest` dataframe
14 | }
15 | 


--------------------------------------------------------------------------------
/man/rescale_cov_w0.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mrmash_wrapper.R
 3 | \name{rescale_cov_w0}
 4 | \alias{rescale_cov_w0}
 5 | \title{Re-normalize mrmash weight w0 to have total weight sum to 1}
 6 | \usage{
 7 | rescale_cov_w0(w0)
 8 | }
 9 | \arguments{
10 | \item{w0}{is the weight of mr.mash prior matrices that was generated from mr.mash() function.}
11 | }
12 | \description{
13 | Re-normalize mrmash weight w0 to have total weight sum to 1
14 | }
15 | 


--------------------------------------------------------------------------------
/man/rss_analysis_pipeline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/univariate_pipeline.R
 3 | \name{rss_analysis_pipeline}
 4 | \alias{rss_analysis_pipeline}
 5 | \title{RSS Analysis Pipeline}
 6 | \usage{
 7 | rss_analysis_pipeline(
 8 |   sumstat_path,
 9 |   column_file_path,
10 |   LD_data,
11 |   n_sample = 0,
12 |   n_case = 0,
13 |   n_control = 0,
14 |   region = NULL,
15 |   skip_region = NULL,
16 |   extract_region_name = NULL,
17 |   region_name_col = NULL,
18 |   qc_method = c("rss_qc", "dentist", "slalom"),
19 |   finemapping_method = c("susie_rss", "single_effect", "bayesian_conditional_regression"),
20 |   finemapping_opts = list(init_L = 5, max_L = 20, l_step = 5, coverage = c(0.95, 0.7,
21 |     0.5), signal_cutoff = 0.025),
22 |   impute = TRUE,
23 |   impute_opts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01),
24 |   pip_cutoff_to_skip = 0,
25 |   remove_indels = FALSE,
26 |   comment_string = "#",
27 |   diagnostics = FALSE
28 | )
29 | }
30 | \arguments{
31 | \item{sumstat_path}{File path to the summary statistics.}
32 | 
33 | \item{column_file_path}{File path to the column file for mapping.}
34 | 
35 | \item{LD_data}{A list containing combined LD variants data that is generated by load_LD_matrix.}
36 | 
37 | \item{n_sample}{User-specified sample size. If unknown, set as 0 to retrieve from the sumstat file.}
38 | 
39 | \item{n_case}{User-specified number of cases.}
40 | 
41 | \item{n_control}{User-specified number of controls.}
42 | 
43 | \item{region}{The region where tabix use to subset the input dataset.}
44 | 
45 | \item{skip_region}{A character vector specifying regions to be skipped in the analysis (optional).
46 | Each region should be in the format "chrom:start-end" (e.g., "1:1000000-2000000").}
47 | 
48 | \item{extract_region_name}{User-specified gene/phenotype name used to further subset the phenotype data.}
49 | 
50 | \item{region_name_col}{Filter this specific column for the extract_region_name.}
51 | 
52 | \item{qc_method}{Quality control method to use. Options are "rss_qc", "dentist", or "slalom" (default: "rss_qc").}
53 | 
54 | \item{impute}{Logical; if TRUE, performs imputation for outliers identified in the analysis (default: TRUE).}
55 | 
56 | \item{impute_opts}{A list of imputation options including rcond, R2_threshold, and minimum_ld (default: list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5)).}
57 | 
58 | \item{pip_cutoff_to_skip}{PIP cutoff to skip imputation (default: 0).}
59 | 
60 | \item{L}{Initial number of causal configurations to consider in the analysis (default: 8).}
61 | 
62 | \item{max_L}{Maximum number of causal configurations to consider when dynamically adjusting L (default: 20).}
63 | 
64 | \item{l_step}{Step size for increasing L when the limit is reached during dynamic adjustment (default: 5).}
65 | 
66 | \item{analysis_method}{Analysis method to use. Options are "susie_rss", "single_effect", or "bayesian_conditional_regression" (default: "susie_rss").}
67 | 
68 | \item{coverage}{Coverage levels for SuSiE RSS analysis (default: c(0.95, 0.7, 0.5)).}
69 | 
70 | \item{signal_cutoff}{Signal cutoff for susie_post_processor (default: 0.025).}
71 | }
72 | \value{
73 | A list containing the final_result and input_rss_data.
74 |   - final_result: A list containing the results of various SuSiE RSS analyses.
75 |   - input_rss_data: A processed data frame containing summary statistics after preprocessing.
76 | }
77 | \description{
78 | This function performs an end-to-end RSS analysis pipeline, including data loading,
79 | preprocessing, quality control, imputation, and SuSiE RSS analysis. It provides flexibility
80 | in specifying various analysis options and parameters.
81 | }
82 | 


--------------------------------------------------------------------------------
/man/rss_basic_qc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sumstats_qc.R
 3 | \name{rss_basic_qc}
 4 | \alias{rss_basic_qc}
 5 | \title{Preprocess input data for RSS analysis}
 6 | \usage{
 7 | rss_basic_qc(sumstats, LD_data, skip_region = NULL, remove_indels = FALSE)
 8 | }
 9 | \arguments{
10 | \item{sumstats}{A data frame containing summary statistics with columns "chrom", "pos", "A1", and "A2".}
11 | 
12 | \item{LD_data}{A list containing combined LD variants data that is generated by load_LD_matrix.}
13 | 
14 | \item{skip_region}{A character vector specifying regions to be skipped in the analysis (optional).
15 | Each region should be in the format "chrom:start-end" (e.g., "1:1000000-2000000").}
16 | }
17 | \value{
18 | A list containing the processed summary statistics and LD matrix.
19 |   - sumstats: A data frame containing the processed summary statistics.
20 |   - LD_mat: The processed LD matrix.
21 | }
22 | \description{
23 | This function preprocesses summary statistics and LD data for RSS analysis.
24 | It performs allele quality control, flipping alleles as necessary, and removes
25 | specified regions from the analysis.
26 | }
27 | 


--------------------------------------------------------------------------------
/man/sdpr.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/regularized_regression.R
  3 | \name{sdpr}
  4 | \alias{sdpr}
  5 | \title{SDPR (Summary-Statistics-Based Dirichelt Process Regression for Polygenic Risk Prediction)}
  6 | \usage{
  7 | sdpr(
  8 |   bhat,
  9 |   LD,
 10 |   n,
 11 |   per_variant_sample_size = NULL,
 12 |   array = NULL,
 13 |   a = 0.1,
 14 |   c = 1,
 15 |   M = 1000,
 16 |   a0k = 0.5,
 17 |   b0k = 0.5,
 18 |   iter = 1000,
 19 |   burn = 200,
 20 |   thin = 5,
 21 |   n_threads = 1,
 22 |   opt_llk = 1,
 23 |   verbose = TRUE
 24 | )
 25 | }
 26 | \arguments{
 27 | \item{bhat}{A vector of marginal beta values for each SNP.}
 28 | 
 29 | \item{LD}{A list of LD matrices, where each matrix corresponds to a subset of SNPs.}
 30 | 
 31 | \item{n}{The total sample size of the GWAS.}
 32 | 
 33 | \item{per_variant_sample_size}{(Optional) A vector of sample sizes for each SNP. If NULL (default), it will be initialized
 34 | to a vector of length equal to `bhat`, with all values set to `n`.}
 35 | 
 36 | \item{array}{(Optional) A vector of genotyping array information for each SNP. If NULL (default), it will be
 37 | initialized to a vector of 1's with length equal to `bhat`.}
 38 | 
 39 | \item{a}{Factor to shrink the reference LD matrix. Default is 0.1.}
 40 | 
 41 | \item{c}{Factor to correct for the deflation. Default is 1.}
 42 | 
 43 | \item{M}{Max number of variance components. Default is 1000.}
 44 | 
 45 | \item{a0k}{Hyperparameter for inverse gamma distribution. Default is 0.5.}
 46 | 
 47 | \item{b0k}{Hyperparameter for inverse gamma distribution. Default is 0.5.}
 48 | 
 49 | \item{iter}{Number of iterations for MCMC. Default is 1000.}
 50 | 
 51 | \item{burn}{Number of burn-in iterations for MCMC. Default is 200.}
 52 | 
 53 | \item{thin}{Thinning interval for MCMC. Default is 5.}
 54 | 
 55 | \item{n_threads}{Number of threads to use. Default is 1.}
 56 | 
 57 | \item{opt_llk}{Which likelihood to evaluate. 1 for equation 6 (slightly shrink the correlation of SNPs)
 58 | and 2 for equation 5 (SNPs genotyped on different arrays in a separate cohort).
 59 | Default is 1.}
 60 | 
 61 | \item{verbose}{Whether to print verbose output. Default is true.}
 62 | }
 63 | \value{
 64 | A list containing the estimated effect sizes (beta) and heritability (h2).
 65 | }
 66 | \description{
 67 | This function is a wrapper for the SDPR C++ implementation, which performs Markov Chain Monte Carlo (MCMC)
 68 | for estimating effect sizes and heritability based on summary statistics and reference LD matrices.
 69 | }
 70 | \note{
 71 | This function is a wrapper for the SDPR C++ implementation, which is a rewritten and adopted version
 72 |       of the SDPR package. The original SDPR documentation is available at
 73 |       https://htmlpreview.github.io/?https://github.com/eldronzhou/SDPR/blob/main/doc/Manual.html
 74 | }
 75 | \examples{
 76 | # Generate example data
 77 | set.seed(985115)
 78 | n <- 350
 79 | p <- 16
 80 | sigmasq_error <- 0.5
 81 | zeroes <- rbinom(p, 1, 0.6)
 82 | beta.true <- rnorm(p, 1, sd = 4)
 83 | beta.true[zeroes] <- 0
 84 | 
 85 | X <- cbind(matrix(rnorm(n * p), nrow = n))
 86 | X <- scale(X, center = TRUE, scale = FALSE)
 87 | y <- X \%*\% matrix(beta.true, ncol = 1) + rnorm(n, 0, sqrt(sigmasq_error))
 88 | y <- scale(y, center = TRUE, scale = FALSE)
 89 | 
 90 | # Calculate sufficient statistics
 91 | XtX <- t(X) \%*\% X
 92 | Xty <- t(X) \%*\% y
 93 | yty <- t(y) \%*\% y
 94 | 
 95 | # Set the prior
 96 | K <- 9
 97 | sigma0 <- c(0.001, .1, .5, 1, 5, 10, 20, 30, .005)
 98 | omega0 <- rep(1 / K, K)
 99 | 
100 | # Calculate summary statistics
101 | b.hat <- sapply(1:p, function(j) {
102 |   summary(lm(y ~ X[, j]))$coefficients[-1, 1]
103 | })
104 | s.hat <- sapply(1:p, function(j) {
105 |   summary(lm(y ~ X[, j]))$coefficients[-1, 2]
106 | })
107 | R.hat <- cor(X)
108 | var_y <- var(y)
109 | sigmasq_init <- 1.5
110 | 
111 | # Run SDPR
112 | LD <- list(blk1 = R.hat)
113 | out <- sdpr(b.hat, LD, n)
114 | # In sample prediction correlations
115 | cor(X \%*\% out$beta_est, y) #
116 | 
117 | }
118 | 


--------------------------------------------------------------------------------
/man/sdpr_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regularized_regression.R
 3 | \name{sdpr_weights}
 4 | \alias{sdpr_weights}
 5 | \title{Extract weights from sdpr function}
 6 | \usage{
 7 | sdpr_weights(stat, LD, ...)
 8 | }
 9 | \value{
10 | A numeric vector of the posterior SNP coefficients.
11 | }
12 | \description{
13 | Extract weights from sdpr function
14 | }
15 | 


--------------------------------------------------------------------------------
/man/slalom.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/slalom.R
 3 | \name{slalom}
 4 | \alias{slalom}
 5 | \title{Slalom Function for Summary Statistics QC for Fine-Mapping Analysis}
 6 | \usage{
 7 | slalom(
 8 |   zScore,
 9 |   LD_mat,
10 |   standard_error = rep(1, length(zScore)),
11 |   abf_prior_variance = 0.04,
12 |   nlog10p_dentist_s_threshold = 4,
13 |   r2_threshold = 0.6,
14 |   lead_variant_choice = "pvalue"
15 | )
16 | }
17 | \arguments{
18 | \item{zScore}{Numeric vector of z-scores corresponding to each variant.}
19 | 
20 | \item{LD_mat}{Square matrix representing linkage disequilibrium (LD) information
21 | between variants. Must have dimensions matching the length of `zScore`.}
22 | 
23 | \item{standard_error}{Optional numeric vector of standard errors corresponding
24 | to each z-score. If not provided, a default value of 1 is assumed for all variants.}
25 | 
26 | \item{abf_prior_variance}{Numeric, the prior effect size variance for ABF calculations.
27 | Default is 0.04.}
28 | 
29 | \item{nlog10p_dentist_s_threshold}{Numeric, the -log10 DENTIST-S P value threshold
30 | for identifying outlier variants for prediction. Default is 4.0.}
31 | 
32 | \item{r2_threshold}{Numeric, the r2 threshold for DENTIST-S outlier variants
33 | for prediction. Default is 0.6.}
34 | 
35 | \item{lead_variant_choice}{Character, method to choose the lead variant, either
36 | "pvalue" or "abf", with default "pvalue".}
37 | }
38 | \value{
39 | A list containing the annotated LD matrix with ABF results, credible sets,
40 |   lead variant, and DENTIST-S statistics; and a summary dataframe with aggregate statistics.
41 | }
42 | \description{
43 | Performs Approximate Bayesian Factor (ABF) analysis, identifies credible sets,
44 | and annotates lead variants based on fine-mapping results. It computes p-values
45 | from z-scores assuming a two-sided standard normal distribution.
46 | }
47 | \examples{
48 | # Assuming `zScore` is your vector of z-scores, `LD_mat` is your LD matrix,
49 | # and optionally `standard_error` is your vector of standard errors:
50 | results <- slalom(zScore, LD_mat, standard_error)
51 | }
52 | 


--------------------------------------------------------------------------------
/man/summary_stats_qc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sumstats_qc.R
 3 | \name{summary_stats_qc}
 4 | \alias{summary_stats_qc}
 5 | \title{Perform Quality Control on Summary Statistics}
 6 | \usage{
 7 | summary_stats_qc(
 8 |   sumstats,
 9 |   LD_data,
10 |   n = NULL,
11 |   var_y = NULL,
12 |   method = c("rss_qc", "dentist", "slalom")
13 | )
14 | }
15 | \arguments{
16 | \item{sumstats}{A data frame containing the processed summary statistics.}
17 | 
18 | \item{LD_data}{A list containing the combined LD variants data generated by load_LD_matrix.}
19 | 
20 | \item{method}{The quality control method to use. Options are "rss_qc", "dentist", or "slalom" (default: "rss_qc").}
21 | }
22 | \value{
23 | A list containing the quality-controlled summary statistics and updated LD matrix.
24 |   - sumstats_qc: The quality-controlled summary statistics data frame.
25 |   - LD_mat_qc: The updated LD matrix after quality control.
26 | }
27 | \description{
28 | This function performs quality control on the processed summary statistics using the specified method.
29 | }
30 | \details{
31 | This function applies the specified quality control method to the processed summary statistics.
32 | 
33 |   The available quality control methods are:
34 |   - "rss_qc": Applies the RSS QC quality control procedure (Sun and Dong et al 2023+).
35 |   - "dentist": Applies the DENTIST quality control procedure (Chen et al 2021).
36 |   - "slalom": Applies the SLALOM quality control procedure.
37 | 
38 |   The function returns the quality-controlled summary statistics along with the updated LD matrix.
39 | }
40 | \examples{
41 | # Perform RSS quality control
42 | qc_results <- summary_stats_qc(sumstats, LD_data, method = "rss_qc")
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/man/susie_post_processor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/susie_wrapper.R
 3 | \name{susie_post_processor}
 4 | \alias{susie_post_processor}
 5 | \title{Post-process SuSiE Analysis Results}
 6 | \usage{
 7 | susie_post_processor(
 8 |   susie_output,
 9 |   data_x,
10 |   data_y,
11 |   X_scalar,
12 |   y_scalar,
13 |   maf = NULL,
14 |   secondary_coverage = c(0.5, 0.7),
15 |   signal_cutoff = 0.1,
16 |   other_quantities = NULL,
17 |   prior_eff_tol = 1e-09,
18 |   min_abs_corr = 0.8,
19 |   mode = c("susie", "susie_rss", "mvsusie")
20 | )
21 | }
22 | \arguments{
23 | \item{susie_output}{Output from running susieR::susie() or susieR::susie_rss() or mvsusieR::mvsusie()}
24 | 
25 | \item{data_x}{Genotype data matrix for 'susie' or Xcorr matrix for 'susie_rss'.}
26 | 
27 | \item{data_y}{Phenotype data vector for 'susie' or summary stats object for 'susie_rss' (a list contain attribute betahat and sebetahat AND/OR z). i.e. data_y = list(betahat = ..., sebetahat = ...), or NULL for mvsusie}
28 | 
29 | \item{X_scalar}{Scalar for the genotype data, used in residual scaling.}
30 | 
31 | \item{y_scalar}{Scalar for the phenotype data, used in residual scaling.}
32 | 
33 | \item{maf}{Minor Allele Frequencies vector.}
34 | 
35 | \item{secondary_coverage}{Vector of coverage thresholds for secondary conditional analysis.}
36 | 
37 | \item{signal_cutoff}{Cutoff value for signal identification in PIP values.}
38 | 
39 | \item{other_quantities}{A list of other quantities to be added to the final object.}
40 | 
41 | \item{prior_eff_tol}{Prior effective tolerance.}
42 | 
43 | \item{mode}{Specify the analysis mode: 'susie' or 'susie_rss'.}
44 | }
45 | \value{
46 | A list containing modified SuSiE object along with additional post-processing information.
47 | }
48 | \description{
49 | This function processes the results from SuSiE (Sum of Single Effects) genetic analysis.
50 | It extracts and processes various statistics and indices based on the provided SuSiE object and other parameters.
51 | The function can operate in 3 modes: 'susie', 'susie_rss', 'mvsusie', based on the method used for the SuSiE analysis.
52 | }
53 | \examples{
54 | # Example usage for SuSiE
55 | # result <- susie_post_processor(susie_output, X_data, y_data, maf, mode = "susie")
56 | # Example usage for SuSiE RSS
57 | # result <- susie_post_processor(susie_output, Xcorr, z, maf, mode = "susie_rss")
58 | }
59 | 


--------------------------------------------------------------------------------
/man/susie_rss_pipeline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/susie_wrapper.R
 3 | \name{susie_rss_pipeline}
 4 | \alias{susie_rss_pipeline}
 5 | \title{Run the SuSiE RSS pipeline}
 6 | \usage{
 7 | susie_rss_pipeline(
 8 |   sumstats,
 9 |   LD_mat,
10 |   n = NULL,
11 |   var_y = NULL,
12 |   L = 5,
13 |   max_L = 30,
14 |   l_step = 5,
15 |   analysis_method = c("susie_rss", "single_effect", "bayesian_conditional_regression"),
16 |   coverage = 0.95,
17 |   secondary_coverage = c(0.7, 0.5),
18 |   signal_cutoff = 0.1
19 | )
20 | }
21 | \arguments{
22 | \item{sumstats}{A list or data frame containing summary statistics with 'z' or 'beta' and 'se' columns.}
23 | 
24 | \item{LD_mat}{The LD matrix.}
25 | 
26 | \item{n}{Sample size (default: NULL).}
27 | 
28 | \item{var_y}{Variance of Y (default: NULL).}
29 | 
30 | \item{L}{Initial number of causal configurations to consider in the analysis (default: 5).}
31 | 
32 | \item{max_L}{Maximum number of causal configurations to consider in the analysis (default: 30).}
33 | 
34 | \item{l_step}{Step size for increasing L when the limit is reached during dynamic adjustment (default: 5).}
35 | 
36 | \item{analysis_method}{The analysis method to use. Options are "susie_rss", "single_effect", or "bayesian_conditional_regression" (default: "susie_rss").}
37 | 
38 | \item{coverage}{Coverage level for susie_rss analysis (default: 0.95).}
39 | 
40 | \item{secondary_coverage}{Secondary coverage levels for susie_rss analysis (default: c(0.7, 0.5)).}
41 | 
42 | \item{signal_cutoff}{Signal cutoff for susie_post_processor (default: 0.1).}
43 | }
44 | \value{
45 | A list containing the results of the SuSiE RSS analysis based on the specified method.
46 | }
47 | \description{
48 | This function runs the SuSiE RSS pipeline, performing analysis based on the specified method.
49 | It processes the input summary statistics and LD data to provide results in a structured output.
50 | }
51 | \details{
52 | The `susie_rss_pipeline` function runs the SuSiE RSS pipeline based on the specified analysis method.
53 |   It takes the following main inputs:
54 |   - `sumstats`: A list or data frame containing summary statistics with 'z' or 'beta' and 'se' columns.
55 |   - `LD_mat`: The LD matrix.
56 |   - `n`: Sample size (optional).
57 |   - `var_y`: Variance of Y (optional).
58 |   - `L`: Initial number of causal configurations to consider in the analysis.
59 |   - `max_L`: Maximum number of causal configurations to consider in the analysis.
60 |   - `l_step`: Step size for increasing L when the limit is reached during dynamic adjustment.
61 |   - `analysis_method`: The analysis method to use. Options are "susie_rss", "single_effect", or "bayesian_conditional_regression".
62 |   The function first checks if the `sumstats` input contains 'z' or 'beta' and 'se' columns. If 'z' is present, it is used directly.
63 |   If 'beta' and 'se' are present, 'z' is calculated as 'beta' divided by 'se'.
64 | 
65 |   Based on the specified `analysis_method`, the function calls the `susie_rss_wrapper` with the appropriate parameters.
66 |   - For "single_effect" method, `L` is set to 1.
67 |   - For "susie_rss" and "bayesian_conditional_regression" methods, `L`, `max_L`, and `l_step` are used.
68 |   - For "bayesian_conditional_regression" method, `max_iter` is set to 1.
69 | 
70 |   The results are then post-processed using the `susie_post_processor` function with the specified `signal_cutoff` and `secondary_coverage` values.
71 | 
72 |   The function returns a list containing the results of the SuSiE RSS analysis based on the specified method.
73 | }
74 | 


--------------------------------------------------------------------------------
/man/susie_rss_qc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sumstats_qc.R
 3 | \name{susie_rss_qc}
 4 | \alias{susie_rss_qc}
 5 | \title{Perform Quality Control based on SuSiE RSS}
 6 | \usage{
 7 | susie_rss_qc(sumstats, LD_mat, n = NULL, var_y = NULL, L = 10)
 8 | }
 9 | \arguments{
10 | \item{sumstats}{A data frame containing summary statistics with 'variant_id', 'z', and 'pos' columns.}
11 | 
12 | \item{LD_mat}{Numeric matrix representing the LD (linkage disequilibrium) matrix.}
13 | 
14 | \item{L}{Number of causal configurations to consider in the analysis.}
15 | }
16 | \value{
17 | A list containing the quality-controlled summary statistics and updated LD matrix.
18 |   - sumstats_qc: A data frame containing the quality-controlled summary statistics.
19 |   - LD_mat_qc: The updated LD matrix excluding outlier variants.
20 | }
21 | \description{
22 | This function performs quality control on summary statistics using SuSiE RSS.
23 | It identifies and removes outliers based on z-score and LD matrix discrepancy correction.
24 | }
25 | \details{
26 | This function performs quality control on summary statistics using SuSiE RSS.
27 |   It first extracts the z-scores from the `sumstats` data frame and performs SuSiE RSS analysis
28 |   with discrepancy correction using the `susie_rss` function.
29 | 
30 |   Next, it identifies outlier variants based on the results of the SuSiE RSS analysis.
31 |   It removes the outlier variants from the summary statistics and updates the LD matrix accordingly.
32 | 
33 |   Finally, it returns a list containing the quality-controlled summary statistics and the updated LD matrix.
34 | }
35 | 


--------------------------------------------------------------------------------
/man/susie_rss_wrapper.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/susie_wrapper.R
 3 | \name{susie_rss_wrapper}
 4 | \alias{susie_rss_wrapper}
 5 | \title{Wrapper Function for SuSiE RSS with Dynamic L Adjustment}
 6 | \usage{
 7 | susie_rss_wrapper(
 8 |   z,
 9 |   R,
10 |   n = NULL,
11 |   var_y = NULL,
12 |   L = 10,
13 |   max_L = 30,
14 |   l_step = 5,
15 |   zR_discrepancy_correction = FALSE,
16 |   coverage = 0.95,
17 |   ...
18 | )
19 | }
20 | \arguments{
21 | \item{z}{Z score vector.}
22 | 
23 | \item{R}{LD matrix.}
24 | 
25 | \item{n}{Sample size; if NULL, certain functionalities that require sample size will be skipped.}
26 | 
27 | \item{var_y}{Total phenotypic variance.}
28 | 
29 | \item{L}{Initial number of causal configurations to consider.}
30 | 
31 | \item{max_L}{Maximum number of causal configurations to consider.}
32 | 
33 | \item{l_step}{Step size for increasing L when the limit is reached.}
34 | 
35 | \item{zR_discrepancy_correction}{Logical indicating if z-score and R matrix discrepancy correction should be performed.}
36 | 
37 | \item{...}{Extra parameters to pass to the susie_rss function.}
38 | }
39 | \value{
40 | SuSiE RSS fit object after dynamic L adjustment
41 | }
42 | \description{
43 | This function performs SuSiE RSS analysis, dynamically adjusting the number of causal configurations (L)
44 | and applying quality control and imputation as necessary. It includes the total phenotypic variance `var_y`
45 | as one of its parameters to align with the `susie_rss` function's interface.
46 | }
47 | 


--------------------------------------------------------------------------------
/man/trim_ctwas_variants.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ctwas_wrapper.R
 3 | \name{trim_ctwas_variants}
 4 | \alias{trim_ctwas_variants}
 5 | \title{Function to select variants for ctwas weights input}
 6 | \usage{
 7 | trim_ctwas_variants(
 8 |   region_data,
 9 |   twas_weight_cutoff = 1e-05,
10 |   cs_min_cor = 0.8,
11 |   min_pip_cutoff = 0.1,
12 |   max_num_variants = 1000
13 | )
14 | }
15 | \arguments{
16 | \item{region_data}{A list of list containing weights list and snp_info list data for multiple genes/events within a single LD block region.}
17 | 
18 | \item{export_twas_weight_db}{A list of list of fine-mapping result data formatted by generate_twas_db function.}
19 | 
20 | \item{region_block}{A string for region information for region_weights, consisted of chromosome number, star and end position of LD block conneced with "_".}
21 | }
22 | \description{
23 | Function to select variants for ctwas weights input
24 | }
25 | 


--------------------------------------------------------------------------------
/man/twas_analysis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/twas.R
 3 | \name{twas_analysis}
 4 | \alias{twas_analysis}
 5 | \title{TWAS Analysis}
 6 | \usage{
 7 | twas_analysis(
 8 |   weights_matrix,
 9 |   gwas_sumstats_db,
10 |   LD_matrix,
11 |   extract_variants_objs
12 | )
13 | }
14 | \arguments{
15 | \item{weights_matrix}{A matrix containing weights for all methods.}
16 | 
17 | \item{gwas_sumstats_db}{A data frame containing the GWAS summary statistics.}
18 | 
19 | \item{LD_matrix}{A matrix representing linkage disequilibrium between variants.}
20 | 
21 | \item{extract_variants_objs}{A vector of variant identifiers to extract from the GWAS and LD matrix.}
22 | }
23 | \value{
24 | A list with TWAS z-scores and p-values across four methods for each gene.
25 | }
26 | \description{
27 | Performs TWAS analysis using the provided weights matrix, GWAS summary statistics database,
28 | and LD matrix. It extracts the necessary GWAS summary statistics and LD matrix based on the
29 | specified variants and computes the z-score and p-value for each gene.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/twas_joint_z.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/twas.R
 3 | \name{twas_joint_z}
 4 | \alias{twas_joint_z}
 5 | \title{Multi-condition TWAS joint test}
 6 | \usage{
 7 | twas_joint_z(weights, z, R = NULL, X = NULL)
 8 | }
 9 | \arguments{
10 | \item{weights}{A matrix of weights, where each column corresponds to a different condition.}
11 | 
12 | \item{z}{A vector of GWAS z-scores.}
13 | 
14 | \item{R}{An optional correlation matrix. If not provided, it will be calculated from the genotype matrix X.}
15 | 
16 | \item{X}{An optional genotype matrix. If R is not provided, X must be supplied to calculate the correlation matrix.}
17 | }
18 | \value{
19 | A list containing the following elements:
20 | \itemize{
21 |   \item Z: A matrix of TWAS z-scores and p-values for each condition.
22 |   \item GBJ: The result of the GBJ test.
23 | }
24 | }
25 | \description{
26 | This function performs a multi-condition TWAS joint test using the GBJ method.
27 | It assumes that the input genotype matrix (X) is standardized.
28 | }
29 | 


--------------------------------------------------------------------------------
/man/twas_multivariate_weights_pipeline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/twas_weights.R
 3 | \name{twas_multivariate_weights_pipeline}
 4 | \alias{twas_multivariate_weights_pipeline}
 5 | \title{TWAS Multivariate Weights Pipeline}
 6 | \usage{
 7 | twas_multivariate_weights_pipeline(
 8 |   X,
 9 |   Y,
10 |   mnm_fit,
11 |   cv_folds = 5,
12 |   sample_partition = NULL,
13 |   data_driven_prior_matrices = NULL,
14 |   data_driven_prior_matrices_cv = NULL,
15 |   canonical_prior_matrices = FALSE,
16 |   mvsusie_max_iter = 200,
17 |   mrmash_max_iter = 5000,
18 |   max_cv_variants = -1,
19 |   cv_threads = 1,
20 |   verbose = FALSE
21 | )
22 | }
23 | \arguments{
24 | \item{X}{A matrix of genotype data where rows represent samples and columns represent genetic variants.}
25 | 
26 | \item{Y}{A matrix of phenotype measurements, where rows represent samples and columns represent conditions.}
27 | 
28 | \item{mnm_fit}{An object containing the fitted multivariate models (e.g., mvSuSiE and mr.mash fits).}
29 | 
30 | \item{cv_folds}{The number of folds to use for cross-validation. Defaults to 5. Set to 0 to skip cross-validation.}
31 | 
32 | \item{sample_partition}{An optional vector specifying the partition of samples for cross-validation. If NULL, a random partition is generated.}
33 | 
34 | \item{data_driven_prior_matrices}{A list of data-driven covariance matrices for mr.mash weights. Defaults to NULL.}
35 | 
36 | \item{data_driven_prior_matrices_cv}{A list of data-driven covariance matrices for mr.mash weights in cross-validation. Defaults to NULL.}
37 | 
38 | \item{canonical_prior_matrices}{If TRUE, computes canonical covariance matrices for mr.mash. Defaults to FALSE.}
39 | 
40 | \item{mvsusie_max_iter}{The maximum number of iterations for mvSuSiE. Defaults to 200.}
41 | 
42 | \item{mrmash_max_iter}{The maximum number of iterations for mr.mash. Defaults to 5000.}
43 | 
44 | \item{max_cv_variants}{The maximum number of variants to be included in cross-validation. Defaults to -1 which means no limit.}
45 | 
46 | \item{cv_threads}{The number of threads to use for parallel computation in cross-validation. Defaults to 1.}
47 | 
48 | \item{verbose}{If TRUE, provides more detailed output during execution. Defaults to FALSE.}
49 | }
50 | \value{
51 | A list containing results from the TWAS pipeline, including TWAS weights, predictions, and optionally cross-validation results.
52 | }
53 | \description{
54 | This function performs weights computation for Transcriptome-Wide Association Study (TWAS)
55 | in a multivariate setting. It incorporates steps such as fitting models using mvSuSiE and mr.mash,
56 | calculating TWAS weights and predictions, and optionally performing cross-validation for TWAS weights.
57 | }
58 | \examples{
59 | # Example usage (assuming appropriate objects for X, Y, and mnm_fit are available):
60 | twas_results <- twas_multivariate_weights_pipeline(X, Y, mnm_fit)
61 | }
62 | 


--------------------------------------------------------------------------------
/man/twas_pipeline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/twas.R
 3 | \name{twas_pipeline}
 4 | \alias{twas_pipeline}
 5 | \title{Function to perform TWAS analysis for across multiple contexts.
 6 | This function peforms TWAS analysis for multiple contexts for imputable genes within an LD region and summarize the twas results.}
 7 | \usage{
 8 | twas_pipeline(
 9 |   twas_weights_data,
10 |   ld_meta_file_path,
11 |   gwas_meta_file,
12 |   region_block,
13 |   rsq_cutoff = 0.01,
14 |   rsq_pval_cutoff = 0.05,
15 |   rsq_option = c("rsq", "adj_rsq"),
16 |   rsq_pval_option = c("pval", "adj_rsq_pval"),
17 |   mr_pval_cutoff = 0.05,
18 |   mr_coverage_column = "cs_coverage_0.95",
19 |   quantile_twas = FALSE,
20 |   output_twas_data = FALSE,
21 |   event_filters = NULL,
22 |   column_file_path = NULL,
23 |   comment_string = "#"
24 | )
25 | }
26 | \arguments{
27 | \item{twas_weights_data}{List of list of twas weights output from generate_twas_db function.}
28 | 
29 | \item{region_block}{A string with LD region informaiton of chromosome number, star and end position of LD block conneced with "_".}
30 | }
31 | \value{
32 | A list of list containing twas result table and formatted TWAS data compatible with ctwas_sumstats() function.
33 | \itemize{
34 |   \item{twas_table}{ A dataframe of twas results summary is generated for each gene-contexts-method pair of all methods for imputable genes.}
35 |   \item{twas_data}{ A list of list containing formatted TWAS data.}
36 | }
37 | }
38 | \description{
39 | Function to perform TWAS analysis for across multiple contexts.
40 | This function peforms TWAS analysis for multiple contexts for imputable genes within an LD region and summarize the twas results.
41 | }
42 | 


--------------------------------------------------------------------------------
/man/twas_predict.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/twas_weights.R
 3 | \name{twas_predict}
 4 | \alias{twas_predict}
 5 | \title{Predict outcomes using TWAS weights}
 6 | \usage{
 7 | twas_predict(X, weights_list)
 8 | }
 9 | \arguments{
10 | \item{X}{A matrix or data frame of predictors where each row is an observation and each
11 | column is a variable.}
12 | 
13 | \item{weights_list}{A list of numeric vectors representing the weights for each predictor.
14 | The names of the list elements should follow the pattern \code{[outcome]_weights}, where
15 | \code{[outcome]} is the name of the outcome variable that the weights are associated with.}
16 | }
17 | \value{
18 | A named list of numeric vectors, where each vector is the predicted outcome for the
19 | corresponding set of weights in \code{weights_list}. The names of the list elements are
20 | derived from the names in \code{weights_list} by replacing "_weights" with "_predicted".
21 | }
22 | \description{
23 | This function takes a matrix of predictors (\code{X}) and a list of TWAS (transcriptome-wide
24 | association studies) weights (\code{weights_list}), and calculates the predicted outcomes by
25 | multiplying \code{X} by each set of weights in \code{weights_list}. The names of the elements
26 | in the output list are derived from the names in \code{weights_list}, with "_weights" replaced
27 | by "_predicted".
28 | }
29 | \examples{
30 | # Assuming `X` is your matrix of predictors and `weights_list` is your list of weights:
31 | predicted_outcomes <- twas_predict(X, weights_list)
32 | print(predicted_outcomes)
33 | }
34 | 


--------------------------------------------------------------------------------
/man/twas_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/twas_weights.R
 3 | \name{twas_weights}
 4 | \alias{twas_weights}
 5 | \title{Run multiple TWAS weight methods}
 6 | \usage{
 7 | twas_weights(X, Y, weight_methods, num_threads = 1)
 8 | }
 9 | \arguments{
10 | \item{X}{A matrix of samples by features, where each row represents a sample and each column a feature.}
11 | 
12 | \item{Y}{A matrix (or vector, which will be converted to a matrix) of samples by outcomes, where each row corresponds to a sample.}
13 | 
14 | \item{weight_methods}{A list of methods and their specific arguments, formatted as list(method1 = method1_args, method2 = method2_args), or alternatively a character vector of method names (eg, c("susie_weights", "enet_weights")) in which case default arguments will be used for all methods.
15 | methods in the list can be either univariate (applied to each column of Y) or multivariate (applied to the entire Y matrix).}
16 | 
17 | \item{num_threads}{The number of threads to use for parallel processing.
18 | If set to -1, the function uses all available cores.
19 | If set to 0 or 1, no parallel processing is performed.
20 | If set to 2 or more, parallel processing is enabled with that many threads.}
21 | }
22 | \value{
23 | A list where each element is named after a method and contains the weight matrix produced by that method.
24 | }
25 | \description{
26 | Applies specified weight methods to the datasets X and Y, returning weight matrices for each method.
27 | Handles both univariate and multivariate methods, and filters out columns in X with zero standard error.
28 | This function utilizes parallel processing to handle multiple methods.
29 | }
30 | 


--------------------------------------------------------------------------------
/man/twas_weights_cv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/twas_weights.R
 3 | \name{twas_weights_cv}
 4 | \alias{twas_weights_cv}
 5 | \title{Cross-Validation for weights selection in Transcriptome-Wide Association Studies (TWAS)}
 6 | \usage{
 7 | twas_weights_cv(
 8 |   X,
 9 |   Y,
10 |   fold = NULL,
11 |   sample_partitions = NULL,
12 |   weight_methods = NULL,
13 |   max_num_variants = NULL,
14 |   variants_to_keep = NULL,
15 |   num_threads = 1,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{X}{A matrix of samples by features, where each row represents a sample and each column a feature.}
21 | 
22 | \item{Y}{A matrix (or vector, which will be converted to a matrix) of samples by outcomes, where each row corresponds to a sample.}
23 | 
24 | \item{fold}{An optional integer specifying the number of folds for cross-validation.
25 | If NULL, 'sample_partitions' must be provided.}
26 | 
27 | \item{sample_partitions}{An optional dataframe with predefined sample partitions,
28 | containing columns 'Sample' (sample names) and 'Fold' (fold number). If NULL, 'fold' must be provided.}
29 | 
30 | \item{weight_methods}{A list of methods and their specific arguments, formatted as list(method1 = method1_args, method2 = method2_args), or alternatively a character vector of method names (eg, c("susie_weights", "enet_weights")) in which case default arguments will be used for all methods.
31 | methods in the list can be either univariate (applied to each column of Y) or multivariate (applied to the entire Y matrix).}
32 | 
33 | \item{max_num_variants}{An optional integer to set the randomly selected maximum number of variants to use for CV purpose, to save computing time.}
34 | 
35 | \item{variants_to_keep}{An optional integer to ensure that the listed variants are kept in the CV when there is a limit on the max_num_variants to use.}
36 | 
37 | \item{num_threads}{The number of threads to use for parallel processing.
38 | If set to -1, the function uses all available cores.
39 | If set to 0 or 1, no parallel processing is performed.
40 | If set to 2 or more, parallel processing is enabled with that many threads.}
41 | }
42 | \value{
43 | A list with the following components:
44 | \itemize{
45 |   \item `sample_partition`: A dataframe showing the sample partitioning used in the cross-validation.
46 |   \item `prediction`: A list of matrices with predicted Y values for each method and fold.
47 |   \item `metrics`: A matrix with rows representing methods and columns for various metrics:
48 |     \itemize{
49 |       \item `corr`: Pearson's correlation between predicated and observed values.
50 |       \item `adj_rsq`: Adjusted R-squared value (which indicates the proportion of variance explained by the model) that accounts for the number of predictors in the model.
51 |       \item `pval`: P-value assessing the significance of the model's predictions.
52 |       \item `RMSE`: Root Mean Squared Error, a measure of the model's prediction error.
53 |       \item `MAE`: Mean Absolute Error, a measure of the average magnitude of errors in a set of predictions.
54 |     }
55 |   \item `time_elapsed`: The time taken to complete the cross-validation process.
56 | }
57 | }
58 | \description{
59 | Performs cross-validation for TWAS, supporting both univariate and multivariate methods.
60 | It can either create folds for cross-validation or use pre-defined sample partitions.
61 | For multivariate methods, it applies the method to the entire Y matrix for each fold.
62 | }
63 | 


--------------------------------------------------------------------------------
/man/twas_weights_pipeline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/twas_weights.R
 3 | \name{twas_weights_pipeline}
 4 | \alias{twas_weights_pipeline}
 5 | \title{TWAS Weights Pipeline}
 6 | \usage{
 7 | twas_weights_pipeline(
 8 |   X,
 9 |   y,
10 |   susie_fit = NULL,
11 |   cv_folds = 5,
12 |   sample_partition = NULL,
13 |   weight_methods = list(enet_weights = list(), lasso_weights = list(), bayes_r_weights =
14 |     list(), bayes_l_weights = list(), mrash_weights = list(init_prior_sd = TRUE, max.iter
15 |     = 100), susie_weights = list(refine = FALSE, init_L = 5, max_L = 20)),
16 |   max_cv_variants = -1,
17 |   cv_threads = 1,
18 |   cv_weight_methods = NULL
19 | )
20 | }
21 | \arguments{
22 | \item{X}{A matrix of genotype data where rows represent samples and columns represent genetic variants.}
23 | 
24 | \item{y}{A vector of phenotype measurements for each sample.}
25 | 
26 | \item{susie_fit}{An object returned by the SuSiE function, containing the SuSiE model fit.}
27 | 
28 | \item{cv_folds}{The number of folds to use for cross-validation. Set to 0 to skip cross-validation. Defaults to 5.}
29 | 
30 | \item{weight_methods}{List of methods to use to compute weights for TWAS; along with their parameters.}
31 | 
32 | \item{max_cv_variants}{The maximum number of variants to be included in cross-validation. Defaults to -1 which means no limit.}
33 | 
34 | \item{cv_threads}{The number of threads to use for parallel computation in cross-validation. Defaults to 1.}
35 | 
36 | \item{cv_weight_methods}{List of methods to use for cross-validation. If NULL, uses the same methods as weight_methods.}
37 | }
38 | \value{
39 | A list containing results from the TWAS pipeline, including TWAS weights, predictions, and optionally cross-validation results.
40 | }
41 | \description{
42 | This function performs weights computation for Transcriptome-Wide Association Study (TWAS)
43 | incorporating various steps such as filtering variants by linkage disequilibrium reference panel variants,
44 | fitting models using SuSiE and other methods, and calculating TWAS weights and predictions.
45 | Optionally, it can perform cross-validation for TWAS weights.
46 | }
47 | \examples{
48 | # Example usage (assuming appropriate objects for X, y, and susie_fit are available):
49 | twas_results <- twas_weights_pipeline(X, y, susie_fit)
50 | }
51 | 


--------------------------------------------------------------------------------
/man/twas_z.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/twas.R
 3 | \name{twas_z}
 4 | \alias{twas_z}
 5 | \title{Calculate TWAS z-score and p-value}
 6 | \usage{
 7 | twas_z(weights, z, R = NULL, X = NULL)
 8 | }
 9 | \arguments{
10 | \item{weights}{A numeric vector of weights.}
11 | 
12 | \item{z}{A numeric vector of z-scores.}
13 | 
14 | \item{R}{An optional correlation matrix. If not provided, it will be calculated from the genotype matrix X.}
15 | 
16 | \item{X}{An optional genotype matrix. If R is not provided, X must be supplied to calculate the correlation matrix.}
17 | }
18 | \value{
19 | A list containing the following elements:
20 | \itemize{
21 |   \item z: The TWAS z-score.
22 |   \item pval: The corresponding p-value.
23 | }
24 | }
25 | \description{
26 | This function calculates the TWAS z-score and p-value given the weights, z-scores,
27 | and optionally the correlation matrix (R) or the genotype matrix (X).
28 | }
29 | 


--------------------------------------------------------------------------------
/man/univariate_analysis_pipeline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/univariate_pipeline.R
 3 | \name{univariate_analysis_pipeline}
 4 | \alias{univariate_analysis_pipeline}
 5 | \title{Univariate Analysis Pipeline}
 6 | \usage{
 7 | univariate_analysis_pipeline(
 8 |   X,
 9 |   Y,
10 |   maf,
11 |   X_scalar = 1,
12 |   Y_scalar = 1,
13 |   X_variance = NULL,
14 |   other_quantities = list(),
15 |   imiss_cutoff = 1,
16 |   maf_cutoff = NULL,
17 |   xvar_cutoff = 0,
18 |   ld_reference_meta_file = NULL,
19 |   pip_cutoff_to_skip = 0,
20 |   init_L = 5,
21 |   max_L = 20,
22 |   l_step = 5,
23 |   signal_cutoff = 0.025,
24 |   coverage = c(0.95, 0.7, 0.5),
25 |   finemapping_extra_opts = list(refine = TRUE),
26 |   twas_weights = TRUE,
27 |   sample_partition = NULL,
28 |   max_cv_variants = -1,
29 |   cv_folds = 5,
30 |   cv_threads = 1,
31 |   verbose = 0
32 | )
33 | }
34 | \arguments{
35 | \item{X}{A matrix of genotype data where rows represent samples and columns represent genetic variants.}
36 | 
37 | \item{Y}{A vector of phenotype measurements.}
38 | 
39 | \item{maf}{A vector of minor allele frequencies for each variant in X.}
40 | 
41 | \item{X_scalar}{A scalar or vector to rescale X to its original scale.}
42 | 
43 | \item{Y_scalar}{A scalar to rescale Y to its original scale.}
44 | 
45 | \item{X_variance}{Optional variance of X. Default is NULL.}
46 | 
47 | \item{other_quantities}{A list of other quantities to be passed to susie_post_processor. Default is an empty list.}
48 | 
49 | \item{imiss_cutoff}{Individual missingness cutoff. Default is 1.0.}
50 | 
51 | \item{maf_cutoff}{Minor allele frequency cutoff. Default is NULL.}
52 | 
53 | \item{xvar_cutoff}{Variance cutoff for X. Default is 0.05.}
54 | 
55 | \item{ld_reference_meta_file}{An optional path to a file containing linkage disequilibrium reference data. Default is NULL.}
56 | 
57 | \item{pip_cutoff_to_skip}{Cutoff value for skipping analysis based on PIP values. Default is 0.}
58 | 
59 | \item{init_L}{Initial number of components for SuSiE model optimization. Default is 5.}
60 | 
61 | \item{max_L}{The maximum number of components in SuSiE. Default is 20.}
62 | 
63 | \item{l_step}{Step size for increasing the number of components during SuSiE optimization. Default is 5.}
64 | 
65 | \item{signal_cutoff}{Cutoff value for signal identification in PIP values. Default is 0.025.}
66 | 
67 | \item{coverage}{A vector of coverage probabilities for credible sets. Default is c(0.95, 0.7, 0.5).}
68 | 
69 | \item{twas_weights}{Whether to compute TWAS weights. Default is TRUE.}
70 | 
71 | \item{sample_partition}{Sample partition for cross-validation. Default is NULL.}
72 | 
73 | \item{max_cv_variants}{The maximum number of variants to be included in cross-validation. Default is -1 (no limit).}
74 | 
75 | \item{cv_folds}{The number of folds to use for cross-validation. Default is 5.}
76 | 
77 | \item{cv_threads}{The number of threads to use for parallel computation in cross-validation. Default is 1.}
78 | 
79 | \item{verbose}{Verbosity level. Default is 0.}
80 | }
81 | \value{
82 | A list containing the univariate analysis results.
83 | }
84 | \description{
85 | This function performs univariate analysis for fine-mapping and Transcriptome-Wide Association Study (TWAS)
86 | with optional cross-validation.
87 | }
88 | 


--------------------------------------------------------------------------------
/man/venn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.R
 3 | \name{venn}
 4 | \alias{venn}
 5 | \title{Venn Diagram}
 6 | \usage{
 7 | venn(data)
 8 | }
 9 | \arguments{
10 | \item{data}{a list with the twas siginificant gene_id results of four method "SuSiE","Lasso","Enet" and "MR.ASH"}
11 | }
12 | \value{
13 | plot object
14 | }
15 | \description{
16 | Venn Diagram
17 | }
18 | 


--------------------------------------------------------------------------------
/man/xqtl_enrichment_wrapper.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/encoloc.R
 3 | \name{xqtl_enrichment_wrapper}
 4 | \alias{xqtl_enrichment_wrapper}
 5 | \title{xQTL GWAS Enrichment Analysis}
 6 | \usage{
 7 | xqtl_enrichment_wrapper(
 8 |   xqtl_files,
 9 |   gwas_files,
10 |   xqtl_finemapping_obj = NULL,
11 |   gwas_finemapping_obj = NULL,
12 |   xqtl_varname_obj = NULL,
13 |   gwas_varname_obj = NULL,
14 |   num_gwas = NULL,
15 |   pi_qtl = NULL,
16 |   lambda = 1,
17 |   ImpN = 25,
18 |   num_threads = 1
19 | )
20 | }
21 | \arguments{
22 | \item{xqtl_files}{Vector of xQTL RDS file paths.}
23 | 
24 | \item{gwas_files}{Vector of GWAS RDS file paths.}
25 | 
26 | \item{xqtl_finemapping_obj}{Optional table name in xQTL RDS files (default 'susie_fit').}
27 | 
28 | \item{gwas_finemapping_obj}{Optional table name in GWAS RDS files (default 'susie_fit').}
29 | 
30 | \item{xqtl_varname_obj}{Optional table name in xQTL RDS files (default 'susie_fit').}
31 | 
32 | \item{gwas_varname_obj}{Optional table name in GWAS RDS files (default 'susie_fit').}
33 | 
34 | \item{pi_qtl}{Optional parameter for xQTL enrichment estimation (see `compute_qtl_enrichment`).}
35 | 
36 | \item{lambda}{Shrinkage parameter for enrichment computation (see `compute_qtl_enrichment`).}
37 | 
38 | \item{ImpN}{Importance parameter for enrichment computation (see `compute_qtl_enrichment`).}
39 | 
40 | \item{num_threads}{Number of threads for parallel processing (see `compute_qtl_enrichment`).}
41 | 
42 | \item{pi_gwas}{Optional parameter for GWAS enrichment estimation (see `compute_qtl_enrichment`).}
43 | }
44 | \value{
45 | The output from the compute_qtl_enrichment function.
46 | }
47 | \description{
48 | This function processes GWAS and xQTL finemapped data files and then computes QTL enrichment.
49 | For details on the parameters `pi_gwas`, `pi_qtl`, `lambda`, `ImpN`, and `num_threads`,
50 | refer to the documentation of the `compute_qtl_enrichment` function.
51 | }
52 | \examples{
53 | gwas_files <- c("gwas_file1.rds", "gwas_file2.rds")
54 | xqtl_files <- c("xqtl_file1.rds", "xqtl_file2.rds")
55 | result <- xqtl_enrichment_wrapper(gwas_files, xqtl_files)
56 | }
57 | 


--------------------------------------------------------------------------------
/man/z_to_pvalue.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misc.R
 3 | \name{z_to_pvalue}
 4 | \alias{z_to_pvalue}
 5 | \title{Convert Z-scores to P-values}
 6 | \usage{
 7 | z_to_pvalue(z)
 8 | }
 9 | \arguments{
10 | \item{z}{Numeric vector. The z-scores to be converted to p-values.}
11 | }
12 | \value{
13 | A numeric vector of p-values corresponding to the input z-scores.
14 | }
15 | \description{
16 | This function calculates p-values from given z-scores using a two-tailed normal distribution.
17 | It supports vector input to process multiple z-scores simultaneously.
18 | }
19 | \details{
20 | The function uses the following formula to calculate p-values:
21 | p-value = 2 * Φ(-|z|)
22 | Where Φ is the cumulative distribution function of the standard normal distribution.
23 | }
24 | \note{
25 | This function assumes that the input z-scores are from a two-tailed test and
26 | are normally distributed. It calculates two-sided p-values.
27 | For extremely large absolute z-scores, the resulting p-values may be computed as zero
28 | due to floating-point limitations in R. This occurs when the absolute z-score > 37.
29 | }
30 | \examples{
31 | z <- c(2.5, -1.8, 3.2, 0.7)
32 | pvalues <- z_to_pvalue(z)
33 | print(pvalues)
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/src/Makevars.in:
--------------------------------------------------------------------------------
1 | CXX_STD      = CXX11
2 | GSL_LIBS     = @GSL_LIBS@
3 | GSL_CFLAGS   = @GSL_CFLAGS@
4 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) $(GSL_CFLAGS) -DARMA_64BIT_WORD=1 -DHAVE_WORKING_LOG1P -DSIMDE_ENABLE_NATIVE_ALIASES
5 | PKG_LIBS     = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) \
6 |                $(FLIBS) $(LDFLAGS) $(GSL_LIBS)
7 | 


--------------------------------------------------------------------------------
/src/function_pool.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "function_pool.h"
 3 | #include <iostream>
 4 | /* based on 
 5 | https://stackoverflow.com/a/51400041
 6 | 
 7 | https://stackoverflow.com/questions/23896421/efficiently-waiting-for-all-tasks-in-a-threadpool-to-finish
 8 | */
 9 | 
10 | Function_pool::Function_pool(unsigned int n) : m_function_queue(), m_lock(), m_data_condition(), busy(0), stop(false) {
11 |     for (size_t i=0; i<n; i++) {
12 | 	thread_pool.push_back(std::thread(&Function_pool::infinite_loop_func, this));
13 |     }
14 | }
15 | 
16 | Function_pool::~Function_pool() {
17 |     std::unique_lock<std::mutex> lock(m_lock);
18 |     stop = true;
19 |     m_data_condition.notify_all();
20 |     lock.unlock();
21 |     for (size_t i=0; i<thread_pool.size(); i++) {
22 | 	thread_pool[i].join();
23 |     }
24 | }
25 | 
26 | void Function_pool::push(std::function<void()> func) {
27 |     std::unique_lock<std::mutex> lock(m_lock);
28 |     m_function_queue.push(func);
29 |     lock.unlock();
30 |     m_data_condition.notify_one();
31 | }
32 | 
33 | void Function_pool::infinite_loop_func() {
34 |     std::function<void()> func;
35 |     while (true) {
36 | 	std::unique_lock<std::mutex> lock(m_lock);
37 | 	m_data_condition.wait(lock, [this]() {return \
38 | 		!m_function_queue.empty() || stop; });
39 | 	if (!m_function_queue.empty()) {
40 | 	    busy++;
41 | 	    func = m_function_queue.front();
42 | 	    m_function_queue.pop();
43 | 	    lock.unlock();
44 | 	    func();
45 | 	    lock.lock();
46 | 	    busy--;
47 | 	    lock.unlock();
48 | 	    finished.notify_one();
49 | 	}
50 | 	else if (stop) {
51 | 	    return;
52 | 	}
53 |     }
54 | }
55 | 
56 | void Function_pool::waitFinished() {
57 |     std::unique_lock<std::mutex> lock(m_lock);
58 |     finished.wait(lock, [this](){return m_function_queue.empty() && busy == 0; });
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/src/function_pool.h:
--------------------------------------------------------------------------------
 1 | #include <queue>
 2 | #include <functional>
 3 | #include <mutex>
 4 | #include <condition_variable>
 5 | #include <atomic>
 6 | #include <cassert>
 7 | #include <thread>
 8 | 
 9 | class Function_pool {
10 |     private:
11 | 	std::vector<std::thread> thread_pool;
12 | 	std::queue<std::function<void()>> m_function_queue;
13 | 	std::mutex m_lock;
14 | 	std::condition_variable m_data_condition;
15 | 	std::condition_variable finished;
16 | 	std::atomic<bool> m_accept_functions;
17 | 	int busy;
18 | 	bool stop;
19 | 
20 |     public:
21 | 	Function_pool(unsigned int n);
22 | 	~Function_pool();
23 | 	void push(std::function<void()> func);
24 | 	void infinite_loop_func();
25 | 	void waitFinished();
26 | };
27 | 


--------------------------------------------------------------------------------
/src/mr_ash.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppArmadillo.h>
 2 | #include "mr_ash.h"
 3 | 
 4 | using namespace Rcpp;
 5 | using namespace arma;
 6 | using namespace std;
 7 | 
 8 | // [[Rcpp::export]]
 9 | List rcpp_mr_ash_rss(const NumericVector& bhat, const NumericVector& shat, const NumericVector& z,
10 |                      const NumericMatrix& R, double var_y, int n, double sigma2_e, const NumericVector& s0,
11 |                      const NumericVector& w0, const NumericVector& mu1_init, double tol = 1e-8,
12 |                      int max_iter = 1e5, bool update_w0 = true, bool update_sigma = true,
13 |                      bool compute_ELBO = true, bool standardize = false, int ncpus = 1) {
14 | 
15 | 	    // Convert input types
16 | 	vec bhat_vec = as<vec>(bhat);
17 | 	vec shat_vec = as<vec>(shat);
18 | 	vec z_vec = as<vec>(z);
19 | 	mat R_mat = as<mat>(R);
20 | 	vec s0_vec = as<vec>(s0);
21 | 	vec w0_vec = as<vec>(w0);
22 | 	vec mu1_init_vec = as<vec>(mu1_init);
23 | 
24 | 	    // Call the C++ function
25 | 	unordered_map<string, mat> result = mr_ash_rss(bhat_vec, shat_vec, z_vec, R_mat, var_y, n, sigma2_e, s0_vec, w0_vec,
26 | 	                                               mu1_init_vec, tol, max_iter, update_w0, update_sigma, compute_ELBO,
27 | 	                                               standardize, ncpus);
28 | 
29 | 	    // Convert the result to a list
30 | 	List ret;
31 | 	for (const auto& item : result) {
32 | 		ret[item.first] = wrap(item.second);
33 | 	}
34 | 
35 | 	return ret;
36 | }


--------------------------------------------------------------------------------
/src/prscs_mcmc.cpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file prs_cs_wrapper.cpp
 3 |  * @brief Rcpp wrapper for the prs_cs function.
 4 |  */
 5 | 
 6 | #include <RcppArmadillo.h>
 7 | #include "prscs_mcmc.h"
 8 | 
 9 | // [[Rcpp::depends(RcppArmadillo)]]
10 | /**
11 |  * @brief Rcpp wrapper for the prs_cs function.
12 |  *
13 |  * @param a Shape parameter for the prior distribution of psi.
14 |  * @param b Scale parameter for the prior distribution of psi.
15 |  * @param phi Global shrinkage parameter. If nullptr, it will be estimated automatically.
16 |  * @param bhat Vector of effect sizes.
17 |  * @param maf Vector of minor allele frequencies. If nullptr, it is assumed to be a vector of zeros.
18 |  * @param n Sample size.
19 |  * @param ld_blk List of LD blocks.
20 |  * @param n_iter Number of MCMC iterations.
21 |  * @param n_burnin Number of burn-in iterations.
22 |  * @param thin Thinning interval.
23 |  * @param verbose Whether to print verbose output.
24 |  * @param seed Random seed. If nullptr, no seed is set.
25 |  * @return A list containing the posterior estimates.
26 |  */
27 | // [[Rcpp::export]]
28 | Rcpp::List prs_cs_rcpp(double a, double b, Rcpp::Nullable<double> phi,
29 |                        Rcpp::NumericVector bhat, Rcpp::Nullable<Rcpp::NumericVector> maf,
30 |                        int n, Rcpp::List ld_blk,
31 |                        int n_iter, int n_burnin, int thin,
32 |                        bool verbose, Rcpp::Nullable<unsigned int> seed) {
33 | 	// Convert Rcpp types to C++ types
34 | 	std::vector<double> bhat_vec = Rcpp::as<std::vector<double> >(bhat);
35 | 	std::vector<double> maf_vec;
36 | 	if (maf.isNotNull()) {
37 | 		maf_vec = Rcpp::as<std::vector<double> >(maf.get());
38 | 	} else {
39 | 		maf_vec = std::vector<double>(bhat_vec.size(), 0.0); // Populate with zeros if maf is NULL
40 | 	}
41 | 
42 | 	std::vector<arma::mat> ld_blk_vec;
43 | 	for (int i = 0; i < ld_blk.size(); ++i) {
44 | 		ld_blk_vec.push_back(Rcpp::as<arma::mat>(ld_blk[i]));
45 | 	}
46 | 
47 | 	double* phi_ptr = nullptr;
48 | 	if (phi.isNotNull()) {
49 | 		phi_ptr = new double(Rcpp::as<double>(phi));
50 | 	}
51 | 
52 | 	unsigned int seed_val = 0;
53 | 	if (seed.isNotNull()) {
54 | 		seed_val = Rcpp::as<unsigned int>(seed);
55 | 	} else {
56 | 		seed_val = std::random_device{}();
57 | 	}
58 | 
59 | 	std::map<std::string, arma::vec> output = prs_cs_mcmc(a, b, phi_ptr, bhat_vec, maf_vec, n, ld_blk_vec,
60 | 	                                                      n_iter, n_burnin, thin, verbose, seed_val);
61 | 
62 | 	// Convert the output to an Rcpp::List
63 | 	Rcpp::List result;
64 | 	result["beta_est"] = output["beta_est"];
65 | 	result["psi_est"] = output["psi_est"];
66 | 	result["sigma_est"] = output["sigma_est"](0);
67 | 	result["phi_est"] = output["phi_est"](0);
68 | 
69 | 	// Clean up dynamically allocated memory
70 | 	delete phi_ptr;
71 | 	return result;
72 | }
73 | 


--------------------------------------------------------------------------------
/src/qtl_enrichment.cpp:
--------------------------------------------------------------------------------
 1 | #include "qtl_enrichment.hpp"
 2 | 
 3 | // [[Rcpp::export]]
 4 | Rcpp::List qtl_enrichment_rcpp(
 5 | 	SEXP r_gwas_pip, SEXP r_qtl_susie_fit,
 6 | 	double pi_gwas = 0, double pi_qtl = 0,
 7 | 	int ImpN = 25, double shrinkage_lambda = 1.0,
 8 | 	int num_threads = 1)
 9 | {
10 | 	// Convert r_gwas_pip to C++ type
11 | 	Rcpp::NumericVector gwas_pip_vec = Rcpp::as<Rcpp::NumericVector>(r_gwas_pip);
12 | 	std::vector<double> gwas_pip = Rcpp::as<std::vector<double> >(gwas_pip_vec);
13 | 	std::vector<std::string> gwas_pip_names = Rcpp::as<std::vector<std::string> >(gwas_pip_vec.names());
14 | 
15 | 	// Convert r_qtl_susie_fit to C++ type
16 | 	Rcpp::List susie_fit_list(r_qtl_susie_fit);
17 | 	std::vector<SuSiEFit> susie_fits;
18 | 
19 | 	for (int i = 0; i < susie_fit_list.size(); ++i) {
20 | 		SuSiEFit susie_fit(Rcpp::wrap(susie_fit_list[i]));
21 | 		susie_fits.push_back(susie_fit);
22 | 	}
23 | 
24 | 	std::map<std::string, double> output = qtl_enrichment_workhorse(susie_fits, gwas_pip, gwas_pip_names, pi_gwas, pi_qtl, ImpN, shrinkage_lambda, num_threads);
25 | 
26 | 	// Convert std::map to Rcpp::List
27 | 	Rcpp::List output_list;
28 | 	for (auto const& element : output) {
29 | 		output_list[element.first] = element.second;
30 | 	}
31 | 
32 | 	return output_list;
33 | }


--------------------------------------------------------------------------------
/src/sdpr.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppArmadillo.h>
 2 | #include <unordered_map>
 3 | #include "sdpr_mcmc.h"
 4 | 
 5 | // Rcpp interface function
 6 | // [[Rcpp::export]]
 7 | Rcpp::List sdpr_rcpp(
 8 | 	const std::vector<double>&          bhat,
 9 | 	const Rcpp::List&                   LD,
10 | 	int                                 n,
11 | 	Rcpp::Nullable<Rcpp::NumericVector> per_variant_sample_size = R_NilValue,
12 | 	Rcpp::Nullable<Rcpp::IntegerVector> array = R_NilValue,
13 | 	double                              a = 0.1,
14 | 	double                              c = 1.0,
15 | 	size_t                              M = 1000,
16 | 	double                              a0k = 0.5,
17 | 	double                              b0k = 0.5,
18 | 	int                                 iter = 1000,
19 | 	int                                 burn = 200,
20 | 	int                                 thin = 5,
21 | 	unsigned                            n_threads = 1,
22 | 	int                                 opt_llk = 1,
23 | 	bool                                verbose = true
24 | 	) {
25 | 	// Convert Rcpp::List to std::vector<arma::mat>
26 | 	std::vector<arma::mat> ref_ld_mat;
27 | 	for (int i = 0; i < LD.size(); i++) {
28 | 		ref_ld_mat.push_back(Rcpp::as<arma::mat>(LD[i]));
29 | 	}
30 | 
31 | 	// Initialize per_variant_sample_size and array if NULL
32 | 	std::vector<double> sz;
33 | 	std::vector<int> arr;
34 | 	if (per_variant_sample_size.isNotNull()) {
35 | 		sz = Rcpp::as<std::vector<double> >(per_variant_sample_size);
36 | 	} else {
37 | 		sz = std::vector<double>(bhat.size(), n);
38 | 	}
39 | 	if (array.isNotNull()) {
40 | 		arr = Rcpp::as<std::vector<int> >(array);
41 | 	} else {
42 | 		arr = std::vector<int>(bhat.size(), 1);
43 | 	}
44 | 
45 | 	// Create mcmc_data object
46 | 	mcmc_data data(bhat, ref_ld_mat, sz, arr);
47 | 
48 | 	// Call the mcmc function
49 | 	std::unordered_map<std::string, arma::vec> results = mcmc(
50 | 		data, n, a, c, M, a0k, b0k, iter, burn, thin, n_threads, opt_llk, verbose
51 | 		);
52 | 
53 | 	// Convert results to Rcpp::List
54 | 	Rcpp::List output = Rcpp::List::create(
55 | 		Rcpp::Named("beta_est") = results["beta"],
56 | 		Rcpp::Named("h2") = results["h2"]
57 | 		);
58 | 
59 | 	return output;
60 | }


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(pecotmr)
3 | test_check("pecotmr")
4 | 


--------------------------------------------------------------------------------
/tests/testthat/test_compute_qtl_enrichment.R:
--------------------------------------------------------------------------------
 1 | context("compute_qtl_enrichment")
 2 | 
 3 | generate_mock_data <- function(seed=1, num_pips = 1000, num_susie_fits = 2) {
 4 |   # Simulate fake data for gwas_pip
 5 |   n_gwas_pip <- num_pips
 6 |   gwas_pip <- runif(n_gwas_pip)
 7 |   names(gwas_pip) <- paste0("snp", 1:n_gwas_pip)
 8 |   gwas_fit <- list(pip=gwas_pip)
 9 | 
10 |   # Simulate fake data for a single SuSiEFit object
11 |   simulate_susiefit <- function(n, p) {
12 |     pip <- runif(n)
13 |     names(pip) <- paste0("snp", 1:n)
14 |     alpha <- t(matrix(runif(n * p), nrow = n))
15 |     alpha <- t(apply(alpha, 1, function(row) row / sum(row)))
16 |     list(
17 |       pip = pip,
18 |       alpha = alpha,
19 |       prior_variance = runif(p)
20 |     )
21 |   }
22 |   
23 |   # Simulate multiple SuSiEFit objects
24 |   n_susie_fits <- num_susie_fits
25 |   susie_fits <- replicate(n_susie_fits, simulate_susiefit(n_gwas_pip, 10), simplify = FALSE)
26 |   # Add these fits to a list, providing names to each element
27 |   names(susie_fits) <- paste0("fit", 1:length(susie_fits))
28 |   return(list(gwas_fit=gwas_fit, susie_fits=susie_fits))
29 | }
30 | 
31 | test_that("compute_qtl_enrichment dummy data single-threaded works",{
32 |   local_mocked_bindings(
33 |       qtl_enrichment_rcpp = function(...) TRUE)
34 |   input_data <- generate_mock_data(seed=1, num_pips=10)
35 |   expect_warning(
36 |     compute_qtl_enrichment(input_data$gwas_fit$pip, input_data$susie_fits, lambda = 1, ImpN = 10, num_threads = 1),
37 |     "num_gwas is not provided. Estimating pi_gwas from the data. Note that this estimate may be biased if the input gwas_pip does not contain genome-wide variants.")
38 |   expect_warning(
39 |     compute_qtl_enrichment(input_data$gwas_fit$pip, input_data$susie_fits, lambda = 1, ImpN = 10, num_threads = 1),
40 |     "pi_qtl is not provided. Estimating pi_qtl from the data. Note that this estimate may be biased if either 1) the input susie_qtl_regions does not have enough data, or 2) the single effects only include variables inside of credible sets or signal clusters.")
41 |   res <- expect_warning(compute_qtl_enrichment(input_data$gwas_fit$pip, input_data$susie_fits, num_gwas=5000, pi_qtl=0.49819, lambda = 1, ImpN = 10, num_threads = 1))
42 |   expect_true(length(res) > 0)
43 | })
44 | 
45 | test_that("compute_qtl_enrichment dummy data single thread and multi-threaded are equivalent",{
46 |   local_mocked_bindings(
47 |       qtl_enrichment_rcpp = function(...) TRUE)
48 |   input_data <- generate_mock_data(seed=1, num_pips=10)
49 |   res_single <- expect_warning(compute_qtl_enrichment(input_data$gwas_fit$pip, input_data$susie_fits, num_gwas=5000, pi_qtl=0.49819, lambda = 1, ImpN = 10, num_threads = 1))
50 |   res_multi <- expect_warning(compute_qtl_enrichment(input_data$gwas_fit$pip, input_data$susie_fits, num_gwas=5000, pi_qtl=0.49819, lambda = 1, ImpN = 10, num_threads = 2))
51 |   expect_equal(res_single, res_multi)
52 | })


--------------------------------------------------------------------------------
/tests/testthat/test_data/LD_block_1.chr1_1000_1200.float16.bim:
--------------------------------------------------------------------------------
1 | 1	chr1:1000_A_G	0	1000	A	G
2 | 1	chr1:1040_A_G	0	1040	A	G
3 | 1	chr1:1080_A_G	0	1080	A	G
4 | 1	chr1:1120_A_G	0	1120	A	G
5 | 1	chr1:1160_A_G	0	1160	A	G
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test_data/LD_block_1.chr1_1000_1200.float16.txt.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatFunGen/pecotmr/50e15f85e5b398c1831a42cee5b6e40308c880b4/tests/testthat/test_data/LD_block_1.chr1_1000_1200.float16.txt.xz


--------------------------------------------------------------------------------
/tests/testthat/test_data/LD_block_2.chr1_1200_1400.float16.bim:
--------------------------------------------------------------------------------
1 | 1	chr1:1200_A_G	0	1200	A	G
2 | 1	chr1:1240_A_G	0	1240	A	G
3 | 1	chr1:1280_A_G	0	1280	A	G
4 | 1	chr1:1320_A_G	0	1320	A	G
5 | 1	chr1:1360_A_G	0	1360	A	G
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test_data/LD_block_2.chr1_1200_1400.float16.txt.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatFunGen/pecotmr/50e15f85e5b398c1831a42cee5b6e40308c880b4/tests/testthat/test_data/LD_block_2.chr1_1200_1400.float16.txt.xz


--------------------------------------------------------------------------------
/tests/testthat/test_data/LD_block_3.chr1_1400_1600.float16.bim:
--------------------------------------------------------------------------------
1 | 1	chr1:1400_A_G	0	1400	A	G
2 | 1	chr1:1440_A_G	0	1440	A	G
3 | 1	chr1:1480_A_G	0	1480	A	G
4 | 1	chr1:1520_A_G	0	1520	A	G
5 | 1	chr1:1560_A_G	0	1560	A	G
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test_data/LD_block_3.chr1_1400_1600.float16.txt.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatFunGen/pecotmr/50e15f85e5b398c1831a42cee5b6e40308c880b4/tests/testthat/test_data/LD_block_3.chr1_1400_1600.float16.txt.xz


--------------------------------------------------------------------------------
/tests/testthat/test_data/LD_block_4.chr1_1600_1800.float16.bim:
--------------------------------------------------------------------------------
1 | 1	chr1:1600_A_G	0	1600	A	G
2 | 1	chr1:1640_A_G	0	1640	A	G
3 | 1	chr1:1680_A_G	0	1680	A	G
4 | 1	chr1:1720_A_G	0	1720	A	G
5 | 1	chr1:1760_A_G	0	1760	A	G
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test_data/LD_block_4.chr1_1600_1800.float16.txt.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatFunGen/pecotmr/50e15f85e5b398c1831a42cee5b6e40308c880b4/tests/testthat/test_data/LD_block_4.chr1_1600_1800.float16.txt.xz


--------------------------------------------------------------------------------
/tests/testthat/test_data/LD_block_5.chr1_1800_2000.float16.bim:
--------------------------------------------------------------------------------
1 | 1	chr1:1800_A_G	0	1800	A	G
2 | 1	chr1:1840_A_G	0	1840	A	G
3 | 1	chr1:1880_A_G	0	1880	A	G
4 | 1	chr1:1920_A_G	0	1920	A	G
5 | 1	chr1:1960_A_G	0	1960	A	G
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test_data/LD_block_5.chr1_1800_2000.float16.txt.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatFunGen/pecotmr/50e15f85e5b398c1831a42cee5b6e40308c880b4/tests/testthat/test_data/LD_block_5.chr1_1800_2000.float16.txt.xz


--------------------------------------------------------------------------------
/tests/testthat/test_data/dummy_data.pgen:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatFunGen/pecotmr/50e15f85e5b398c1831a42cee5b6e40308c880b4/tests/testthat/test_data/dummy_data.pgen


--------------------------------------------------------------------------------
/tests/testthat/test_data/dummy_data.pvar:
--------------------------------------------------------------------------------
 1 | #CHROM	POS	ID	REF	ALT
 2 | 1	1	null_0	D	d
 3 | 1	2	null_1	d	D
 4 | 1	3	null_2	d	D
 5 | 1	4	null_3	d	D
 6 | 1	5	null_4	d	D
 7 | 1	6	null_5	d	D
 8 | 1	7	null_6	d	D
 9 | 1	8	null_7	d	D
10 | 1	9	null_8	d	D
11 | 1	10	null_9	d	D
12 | 1	11	disease_0	D	d
13 | 1	12	disease_1	D	d
14 | 1	13	disease_2	D	d
15 | 1	14	disease_3	D	d
16 | 1	15	disease_4	d	D
17 | 


--------------------------------------------------------------------------------
/tests/testthat/test_data/protocol_example.genotype.bed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatFunGen/pecotmr/50e15f85e5b398c1831a42cee5b6e40308c880b4/tests/testthat/test_data/protocol_example.genotype.bed


--------------------------------------------------------------------------------
/tests/testthat/test_data/protocol_example.genotype.fam:
--------------------------------------------------------------------------------
  1 | sample_1	sample_1	0	0	0	-9
  2 | sample_41	sample_41	0	0	0	-9
  3 | sample_81	sample_81	0	0	0	-9
  4 | sample_122	sample_122	0	0	0	-9
  5 | sample_133	sample_133	0	0	0	-9
  6 | sample_140	sample_140	0	0	0	-9
  7 | sample_160	sample_160	0	0	0	-9
  8 | sample_174	sample_174	0	0	0	-9
  9 | sample_176	sample_176	0	0	0	-9
 10 | sample_214	sample_214	0	0	0	-9
 11 | sample_225	sample_225	0	0	0	-9
 12 | sample_228	sample_228	0	0	0	-9
 13 | sample_236	sample_236	0	0	0	-9
 14 | sample_237	sample_237	0	0	0	-9
 15 | sample_238	sample_238	0	0	0	-9
 16 | sample_246	sample_246	0	0	0	-9
 17 | sample_264	sample_264	0	0	0	-9
 18 | sample_274	sample_274	0	0	0	-9
 19 | sample_320	sample_320	0	0	0	-9
 20 | sample_332	sample_332	0	0	0	-9
 21 | sample_357	sample_357	0	0	0	-9
 22 | sample_380	sample_380	0	0	0	-9
 23 | sample_381	sample_381	0	0	0	-9
 24 | sample_408	sample_408	0	0	0	-9
 25 | sample_420	sample_420	0	0	0	-9
 26 | sample_426	sample_426	0	0	0	-9
 27 | sample_429	sample_429	0	0	0	-9
 28 | sample_440	sample_440	0	0	0	-9
 29 | sample_451	sample_451	0	0	0	-9
 30 | sample_484	sample_484	0	0	0	-9
 31 | sample_494	sample_494	0	0	0	-9
 32 | sample_499	sample_499	0	0	0	-9
 33 | sample_507	sample_507	0	0	0	-9
 34 | sample_520	sample_520	0	0	0	-9
 35 | sample_525	sample_525	0	0	0	-9
 36 | sample_545	sample_545	0	0	0	-9
 37 | sample_556	sample_556	0	0	0	-9
 38 | sample_576	sample_576	0	0	0	-9
 39 | sample_591	sample_591	0	0	0	-9
 40 | sample_594	sample_594	0	0	0	-9
 41 | sample_607	sample_607	0	0	0	-9
 42 | sample_611	sample_611	0	0	0	-9
 43 | sample_617	sample_617	0	0	0	-9
 44 | sample_618	sample_618	0	0	0	-9
 45 | sample_623	sample_623	0	0	0	-9
 46 | sample_634	sample_634	0	0	0	-9
 47 | sample_639	sample_639	0	0	0	-9
 48 | sample_682	sample_682	0	0	0	-9
 49 | sample_684	sample_684	0	0	0	-9
 50 | sample_689	sample_689	0	0	0	-9
 51 | sample_722	sample_722	0	0	0	-9
 52 | sample_730	sample_730	0	0	0	-9
 53 | sample_732	sample_732	0	0	0	-9
 54 | sample_739	sample_739	0	0	0	-9
 55 | sample_758	sample_758	0	0	0	-9
 56 | sample_764	sample_764	0	0	0	-9
 57 | sample_768	sample_768	0	0	0	-9
 58 | sample_775	sample_775	0	0	0	-9
 59 | sample_801	sample_801	0	0	0	-9
 60 | sample_840	sample_840	0	0	0	-9
 61 | sample_851	sample_851	0	0	0	-9
 62 | sample_863	sample_863	0	0	0	-9
 63 | sample_867	sample_867	0	0	0	-9
 64 | sample_868	sample_868	0	0	0	-9
 65 | sample_888	sample_888	0	0	0	-9
 66 | sample_891	sample_891	0	0	0	-9
 67 | sample_893	sample_893	0	0	0	-9
 68 | sample_927	sample_927	0	0	0	-9
 69 | sample_950	sample_950	0	0	0	-9
 70 | sample_954	sample_954	0	0	0	-9
 71 | sample_956	sample_956	0	0	0	-9
 72 | sample_983	sample_983	0	0	0	-9
 73 | sample_991	sample_991	0	0	0	-9
 74 | sample_1017	sample_1017	0	0	0	-9
 75 | sample_1109	sample_1109	0	0	0	-9
 76 | sample_1208	sample_1208	0	0	0	-9
 77 | sample_1228	sample_1228	0	0	0	-9
 78 | sample_1238	sample_1238	0	0	0	-9
 79 | sample_1257	sample_1257	0	0	0	-9
 80 | sample_1270	sample_1270	0	0	0	-9
 81 | sample_1287	sample_1287	0	0	0	-9
 82 | sample_1301	sample_1301	0	0	0	-9
 83 | sample_1307	sample_1307	0	0	0	-9
 84 | sample_1311	sample_1311	0	0	0	-9
 85 | sample_1323	sample_1323	0	0	0	-9
 86 | sample_1333	sample_1333	0	0	0	-9
 87 | sample_1344	sample_1344	0	0	0	-9
 88 | sample_1345	sample_1345	0	0	0	-9
 89 | sample_1388	sample_1388	0	0	0	-9
 90 | sample_1399	sample_1399	0	0	0	-9
 91 | sample_1416	sample_1416	0	0	0	-9
 92 | sample_1419	sample_1419	0	0	0	-9
 93 | sample_1423	sample_1423	0	0	0	-9
 94 | sample_1429	sample_1429	0	0	0	-9
 95 | sample_1448	sample_1448	0	0	0	-9
 96 | sample_1461	sample_1461	0	0	0	-9
 97 | sample_1478	sample_1478	0	0	0	-9
 98 | sample_1490	sample_1490	0	0	0	-9
 99 | sample_1493	sample_1493	0	0	0	-9
100 | sample_1503	sample_1503	0	0	0	-9
101 | 


--------------------------------------------------------------------------------
/tests/testthat/test_mrmash_wrapper.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatFunGen/pecotmr/50e15f85e5b398c1831a42cee5b6e40308c880b4/tests/testthat/test_mrmash_wrapper.R


--------------------------------------------------------------------------------
/tests/testthat/test_slalom.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatFunGen/pecotmr/50e15f85e5b398c1831a42cee5b6e40308c880b4/tests/testthat/test_slalom.R


--------------------------------------------------------------------------------
/tests/testthat/test_sumstats_qc.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatFunGen/pecotmr/50e15f85e5b398c1831a42cee5b6e40308c880b4/tests/testthat/test_sumstats_qc.R


--------------------------------------------------------------------------------
/vignettes/qtl-gwas-resources.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Publicly available QTL and GWAS summary statistics resources"
 3 | author: "Gao Wang"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Publicly available QTL and GWAS summary statistics resources}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | Here we list some publicly available QTL and GWAS summary statistics resources


--------------------------------------------------------------------------------
/vignettes/susie-rss-qc.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "SuSiE RSS with QC"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{SuSiE RSS with QC}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | date: "2023-12-17"
 9 | ---
10 | 
11 | ```{r setup, include=FALSE}
12 | knitr::opts_chunk$set(echo = TRUE)
13 | ```
14 | 
15 | ## Overview
16 | 
17 | We demonstrates how we use SuSiE RSS wrapper implemented in `pecotmr` package to fine-map using summary statistics and reference LD panel data precomputed and available as resources.
18 | 
19 | To load the `pecotmr` package,
20 | 
21 | ```{r load-pkgs}
22 | library(pecotmr)
23 | ```
24 | 
25 | ## Load summary statistics and corresponding LD matrix
26 | 
27 | For given region of interest, we load the LD matrix and corresponding summary statistics data, where the summary statistics data should be a data frame with the columns "chrom" and "pos":
28 | 
29 | ```{r AD_GWAS sumstats}
30 | library(dplyr)
31 | library(vroom)
32 | #### Load LD_meta_file
33 | LD_meta_file = data.frame(chrom = c("chr8","chr8"), start = c(25007602,26225312), end = c(26225312, 27515963), 
34 | path = c(paste0("/home/aw3600/MR_KMT_analysis/test/chr8_25007602_26225312.cor.xz",",","/home/aw3600/MR_KMT_analysis/test/chr8_25007602_26225312.cor.xz.bim",sep=""),
35 |   paste0("/home/aw3600/MR_KMT_analysis/test/chr8_26225312_27515963.cor.xz",",","/home/aw3600/MR_KMT_analysis/test/chr8_26225312_27515963.cor.xz.bim",sep="")))
36 | #### Load region of interest
37 | region  = data.frame(chrom = 8,start = 26220000,end = 26225400)
38 | #### Load GWAS summary statistics
39 | AD_GWAS_path = "/mnt/vast/hpc/csg/xqtl_workflow_testing/ADGWAS/data_intergration/ADGWAS2022/ADGWAS_Bellenguez_2022.8/ADGWAS2022.chr8.sumstat.tsv"
40 | sumstats = vroom(AD_GWAS_path) %>%
41 |     rename("pos"="position") %>%
42 |     rename("chrom"="chromosome") %>%
43 |     mutate(z=beta/se) %>%
44 |     rename("A1"="ref","A2"="alt")
45 | ```
46 | Then extract the LD matrix based on the `LD_meta_file`, `region` and `sumstats`,
47 | ```{r load LD matrix}
48 | LD_meta_file_path = tempfile(fileext = ".csv")
49 | write.csv(LD_meta_file, LD_meta_file_path, row.names=FALSE)
50 | 
51 | LD_data = load_LD_matrix(LD_meta_file_path, region, sumstats)
52 | LD_data$combined_LD_matrix[1:5,1:5]
53 | ```
54 | 
55 | The output LD_data is a list contains the `LD` and `variants_df`, where the `variants_df` is a data frame with the columns "chr","variants","GD(genetic distance)"," pos","A1" and "A2" in a format of bim file. The row and column names of LD are identical to the elements of the `variants` in the data frame `variants_df`. The `variants_df` will be the input of `allele_qc` as the reference panel data.
56 | 
57 | ## Summary statistics QC
58 | 
59 | Match summary statistics data and `variants_df` by ("chr", "A1", "A2" and "pos"), accounting for possible strand flips and major/minor allele flips (opposite effects and zscores). We need to ensure that the format of summary statistics data with the columns "chr", "pos", "A1" and "A2". Because some end positions of one LD block are input positions of another LD block, we will keep these variants after allele flip by setting the parameters `remove_dups` as "FALSE".
60 | 
61 | ```{r allele_qc}
62 | allele_flip = allele_qc(sumstats$variant, LD_data$combined_LD_variants, sumstats, match_min_prop=0.2, remove_dups=FALSE, flip_strand=TRUE)
63 | head(allele_flip)
64 | ```
65 | 
66 | The output `allele_flip` will be the summary statistics after allele flip.
67 | 
68 | Because we match sumstats and `LD_data[[1]]$variants_df` by chrom and position after ``allele_qc` step, we need to use the variants of output to extract the `LD.data[[1]]$LD`.
69 | 
70 | ```{r qc LD}
71 | LD_extract = LD_data$combined_LD_matrix[allele_flip$target_data_qced$variant_id,allele_flip$target_data_qced$variant_id]
72 | ```
73 | ## run SuSiE RSS:
74 | ```{r susie rss}
75 | library(susieR)
76 | res = susie_rss(allele_flip$target_data_qced$z, LD_extract)
77 | ```
78 | 


--------------------------------------------------------------------------------
/vignettes/xqtl_enrichment.Rmd:
--------------------------------------------------------------------------------
 1 | 
 2 | ---
 3 | title: "Enrichment analysis of molecular QTL in genetic variants associated with complex traits"
 4 | author: "Ru Feng"
 5 | date: "`r Sys.Date()`"
 6 | output: rmarkdown::html_vignette
 7 | vignette: >
 8 |   %\VignetteIndexEntry{intro-to-pecotmr}
 9 |   %\VignetteEngine{knitr::rmarkdown}
10 |   %\VignetteEncoding{UTF-8}
11 | ---
12 | 
13 | This vignette demonstrates the enrichment analysis part of the `pecotmr` package, which largely follows from `fastENLOC` (https://github.com/xqwen/fastenloc) but uses `susieR` fitted objects as input to estimate priors for use with the `coloc` package (coloc v5, aka SuSiE-coloc). The main differences are:
14 | 1) now enrichment is based on all QTL variants whether or not they are inside signal clusters;
15 | 2) Causal QTL are sampled from SuSiE single effects, not signal clusters;
16 | 3) Allow a variant to be QTL for not only multiple conditions (eg cell types) but also multiple regions (eg genes).
17 | 
18 | 
19 | ## Set up environment and Load data
20 | 
21 | Load the `pecotmr` package for this enrichment analysis, abd load the susieR package for data checking and visualization
22 | ```{r load-pkgs}
23 | library(pecotmr)
24 | library(susieR)
25 | ```
26 | 
27 | Load fine mapping results. 
28 | ```{r}
29 | gwas_path <- "gwas_toy.rds"
30 | qtl_path <- "QTL_toy.rds"
31 | gwas <- readRDS(gwas_path)
32 | qtl <- readRDS(qtl_path)
33 | ```
34 | 
35 | ## Input data requirements
36 | 
37 | ### GWAS data requirements
38 | In the GWAS toy data, the file is stored as a list, with the fine mapping results already stored under the first layer.
39 | ```{r}
40 | names(gwas[[1]])
41 | ```
42 | Posterior inclusion probabilities of GWAS data:
43 | ```{r}
44 | susie_plot(gwas[[1]], y = "PIP")
45 | ```
46 | 
47 | ### QTL data requirements
48 | In the QTL toy data, the file is stored as a list and is more complicated. The first layer is named as the molecular trait object ID (gene), followed by different contexts as the second layer (e.g. Mic). The variant names and fine mapping results can be found under these contexts (`variant_names` and `susie_result_trimmed`). 
49 | ```{r}
50 | names(qtl)
51 | names(qtl[[1]])
52 | names(qtl[[1]][["Mic"]])
53 | names(qtl[[1]][["Mic"]][["susie_result_trimmed"]])
54 | ```
55 | Posterior inclusion probabilities of QTL data:
56 | ```{r}
57 | susie_plot(qtl[[1]][["Mic"]][["susie_result_trimmed"]], y = "PIP")
58 | ```
59 | 
60 | ## Perform enrichment analysis 
61 | The input for `xqtl_enrichment_wrapper` requires the paths for GWAS and QTL fine mapping data. You need to specify the list name of SuSiE results in each dataset as `xqtl_finemapping_obj` and the list name of variant names as `xqtl_varname_obj`. For example, if SuSiE results for QTL data are under `qtl[[1]]$Mic$susie_result_trimmed`, then `xqtl_finemapping_obj` would be `c("Mic", "susie_result_trimmed")`, and `xqtl_varname_obj` would be `c("Mic", "variant_names")` since the variant names are stored directly under `Mic`. 
62 | For GWAS data, the fine mapping results and variant names are stored under the first layer directly, so you do not need to specify `gwas_finemapping_obj` and `gwas_varname_obj`. However, if your data does not follow this structure, you may need to specify these parameters accordingly.
63 | 
64 | ```{r}
65 | enrich_res <- xqtl_enrichment_wrapper(
66 |   gwas_files = gwas_path, xqtl_files = qtl_path,
67 |   xqtl_finemapping_obj = c("Mic", "susie_result_trimmed"), xqtl_varname_obj = c("Mic", "variant_names")
68 | )
69 | print(enrich_res[[1]])
70 | print(head(enrich_res[["unused_xqtl_variants"]][[1]]))
71 | ```
72 | The enrichment analysis output is stored as a 2-layer list. The first layer contains the enrichment results, which will be used as priors in coloc analysis. The second layer stores the QTL variants that were not detected in the GWAS dataset.
73 | 
74 | ## Session information
75 | Here are some details about the computing environment, including the versions of R, and the R packages, used to generate these results.
76 | ```{R}
77 | sessionInfo()
78 | ```
79 | 


--------------------------------------------------------------------------------