├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ └── R-CMD-check.yaml ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── TwoSampleMR.r ├── clump.r ├── cojo.r ├── coloc.r ├── finemapr.r ├── gassocplot.r ├── harmonise.r ├── pwcoco.r ├── susieR.r └── utils-pipe.r ├── README.md ├── docs ├── 404.html ├── LICENSE-text.html ├── LICENSE.html ├── articles │ ├── cojo.html │ ├── cojo_files │ │ └── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ ├── colocalisation.html │ ├── colocalisation_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ └── figure-html │ │ │ ├── unnamed-chunk-10-1.png │ │ │ ├── unnamed-chunk-11-1.png │ │ │ └── unnamed-chunk-7-1.png │ ├── finemapping.html │ ├── finemapping_files │ │ └── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ ├── gwas2020.html │ ├── gwas2020_files │ │ └── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ ├── index.html │ ├── ld_ref.html │ ├── ld_ref_files │ │ └── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ ├── mr.html │ └── mr_files │ │ └── accessible-code-block-0.0.1 │ │ └── empty-anchor.js ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── index.html ├── link.svg ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml └── reference │ ├── clump_gwasvcf.html │ ├── cojo_cond.html │ ├── cojo_sumstat_file.html │ ├── coloc_to_gassocplot.html │ ├── gwasvcf_to_TwoSampleMR.html │ ├── gwasvcf_to_coloc.html │ ├── gwasvcf_to_finemapr.html │ ├── harmonise.html │ ├── harmonise_against_ref.html │ ├── ieugwasr_to_TwoSampleMR.html │ ├── ieugwasr_to_coloc.html │ ├── ieugwasr_to_finemapr.html │ ├── ieugwasr_to_gassocplot.html │ ├── index.html │ ├── is_forward_strand.html │ ├── make_TwoSampleMR_dat.html │ ├── map_variants_to_regions.html │ ├── organise_ids.html │ ├── pipe.html │ ├── read_gwas.html │ ├── read_reference.html │ ├── set_bc4_files.html │ ├── susieR_pipeline.html │ └── write_out.html ├── inst ├── extdata │ └── ldetect │ │ ├── AFR.bed │ │ ├── ASN.bed │ │ └── EUR.bed └── hapmap3 │ └── hapmap3_autosome.snplist.gz ├── man ├── clump_gwasvcf.Rd ├── cojo_cond.Rd ├── cojo_sumstat_file.Rd ├── coloc_to_gassocplot.Rd ├── gwasvcf_to_TwoSampleMR.Rd ├── gwasvcf_to_coloc.Rd ├── gwasvcf_to_finemapr.Rd ├── harmonise.Rd ├── harmonise_against_ref.Rd ├── ieugwasr_to_TwoSampleMR.Rd ├── ieugwasr_to_coloc.Rd ├── ieugwasr_to_finemapr.Rd ├── ieugwasr_to_gassocplot.Rd ├── is_forward_strand.Rd ├── make_TwoSampleMR_dat.Rd ├── map_variants_to_regions.Rd ├── organise_ids.Rd ├── pipe.Rd ├── read_gwas.Rd ├── read_reference.Rd ├── set_bc4_files.Rd ├── susieR_pipeline.Rd └── write_out.Rd ├── tests ├── testthat.R └── testthat │ ├── test_coloc.r │ ├── test_finemapr.r │ ├── test_gassocplot.r │ └── test_twosamplemr.r └── vignettes ├── cojo.Rmd ├── colocalisation.Rmd ├── finemapping.Rmd ├── finemapping_experiment.Rmd ├── gwas2020.Rmd ├── ld_ref.Rmd └── mr.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^LICENSE\.md$ 2 | ^\.travis\.yml$ 3 | ^doc$ 4 | ^docs$ 5 | ^Meta$ 6 | ^codecov\.yml$ 7 | ^\.github$ 8 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | pull_request: 9 | branches: 10 | - main 11 | - master 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: macOS-latest 18 | env: 19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 20 | steps: 21 | - uses: actions/checkout@v2 22 | - uses: r-lib/actions/setup-r@v1 23 | - name: Install dependencies 24 | run: | 25 | install.packages(c("remotes", "rcmdcheck")) 26 | remotes::install_deps(dependencies = TRUE) 27 | shell: Rscript {0} 28 | - name: Check 29 | run: | 30 | options(crayon.enabled = TRUE) 31 | rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error") 32 | shell: Rscript {0} 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rhistory 2 | .RData 3 | .Rproj.user 4 | inst/doc 5 | doc 6 | Meta 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | r: 3 | - release 4 | - devel 5 | cache: packages 6 | 7 | warnings_are_errors: false 8 | 9 | r_packages: 10 | - covr 11 | 12 | after_success: 13 | - Rscript -e 'library(covr); codecov()' 14 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: gwasglue 2 | Title: GWAS summary data sources connected to analytical tools 3 | Version: 0.0.0.9000 4 | Authors@R: 5 | person(given = "Gibran", 6 | family = "Hemani", 7 | role = c("aut", "cre"), 8 | email = "g.hemani@bristol.ac.uk", 9 | comment = c(ORCID = "0000-0003-0920-1055")) 10 | Description: Many tools exist that use GWAS summary data for colocalisation, fine mapping, Mendelian randomization, visualisation, etc. This package is a conduit that connects R packages that can retrieve GWAS summary data to various tools for analysing those data. 11 | URL: https://github.com/mrcieu/gwasglue 12 | BugReports: https://github.com/mrcieu/gwasglue/issues 13 | Depends: 14 | R (>= 3.6.0), 15 | gwasvcf, 16 | ieugwasr 17 | Imports: 18 | dplyr, 19 | testthat, 20 | mr.raps, 21 | MendelianRandomization, 22 | MRPRESSO, 23 | RadialMR, 24 | MRMix, 25 | TwoSampleMR, 26 | magrittr, 27 | susieR 28 | Suggests: 29 | knitr, 30 | rmarkdown, 31 | finemapr, 32 | covr 33 | Remotes: 34 | mrcieu/gwasvcf, 35 | rondolab/MR-PRESSO, 36 | mrcieu/ieugwasr, 37 | mrcieu/TwoSampleMR, 38 | mrcieu/MRInstruments, 39 | WSpiller/RadialMR, 40 | gqi/MRMix, 41 | stephenslab/susieR, 42 | variani/finemapr 43 | License: MIT + file LICENSE 44 | Encoding: UTF-8 45 | LazyData: true 46 | RoxygenNote: 7.1.1 47 | VignetteBuilder: knitr 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2019 2 | COPYRIGHT HOLDER: Gibran Hemani 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2019 Gibran Hemani 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("%>%") 4 | export(clump_gwasvcf) 5 | export(cojo_cond) 6 | export(cojo_sumstat_file) 7 | export(coloc_to_gassocplot) 8 | export(gwasvcf_to_TwoSampleMR) 9 | export(gwasvcf_to_coloc) 10 | export(gwasvcf_to_finemapr) 11 | export(harmonise) 12 | export(harmonise_against_ref) 13 | export(ieugwasr_to_TwoSampleMR) 14 | export(ieugwasr_to_coloc) 15 | export(ieugwasr_to_finemapr) 16 | export(ieugwasr_to_gassocplot) 17 | export(is_forward_strand) 18 | export(make_TwoSampleMR_dat) 19 | export(map_variants_to_regions) 20 | export(pwcoco) 21 | export(read_gwas) 22 | export(read_reference) 23 | export(set_bc4_files) 24 | export(susieR_pipeline) 25 | export(write_out) 26 | importFrom(magrittr,"%>%") 27 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # gwasglue 0.0.0.9000 2 | 3 | * Added a `NEWS.md` file to track changes to the package. 4 | -------------------------------------------------------------------------------- /R/TwoSampleMR.r: -------------------------------------------------------------------------------- 1 | #' Convert output from query to TwoSampleMR format 2 | #' 3 | #' @param x Output from ieugwasr query e.g. associations, tophits, phewas 4 | #' @param type "exposure" (default) or "outcome" 5 | #' 6 | #' @export 7 | #' @return data frame 8 | ieugwasr_to_TwoSampleMR <- function(x, type="exposure") 9 | { 10 | stopifnot(type %in% c("exposure", "outcome")) 11 | stopifnot(is.data.frame(x)) 12 | names(x) <- paste0(names(x), ".", type) 13 | nom <- names(x) 14 | names(x)[nom == paste0("rsid.", type)] <- "SNP" 15 | names(x)[nom == paste0("ea.", type)] <- paste0("effect_allele.", type) 16 | names(x)[nom == paste0("nea.", type)] <- paste0("other_allele.", type) 17 | names(x)[nom == paste0("eaf.", type)] <- paste0("eaf.", type) 18 | names(x)[nom == paste0("p.", type)] <- paste0("pval.", type) 19 | names(x)[nom == paste0("n.", type)] <- paste0("samplesize.", type) 20 | names(x)[nom == paste0("trait.", type)] <- type 21 | 22 | x[[paste0("mr_keep.", type)]] <- !is.na(x[[paste0("beta.", type)]]) & !is.na(x[[paste0("se.", type)]]) & !is.na(x[[paste0("effect_allele.", type)]]) & !is.na(x[["SNP"]]) 23 | return(x) 24 | } 25 | 26 | 27 | #' Create exposure or outcome data format for TwoSampleMR from vcf 28 | #' 29 | #' @param vcf VCF object 30 | #' @param type ="exposure" or "outcome" 31 | #' 32 | #' @export 33 | #' @return data frame 34 | gwasvcf_to_TwoSampleMR <- function(vcf, type="exposure") 35 | { 36 | a <- vcf %>% gwasvcf::vcf_to_granges() 37 | S4Vectors::mcols(a)[["SNP"]] <- names(a) 38 | a <- dplyr::as_tibble(a) 39 | if(!"ES" %in% names(a)) a[["ES"]] <- NA 40 | if(!"SE" %in% names(a)) a[["SE"]] <- NA 41 | if(!"LP" %in% names(a)) a[["LP"]] <- NA 42 | if(!"SS" %in% names(a)) a[["SS"]] <- NA 43 | if(!"NC" %in% names(a)) a[["NC"]] <- NA 44 | if(!"id" %in% names(a)) a[["id"]] <- NA 45 | a[["LP"]] <- 10^-a[["LP"]] 46 | a[["NCONT"]] <- a[["SS"]] - a[["NC"]] 47 | TwoSampleMR::format_data( 48 | a, type=type, 49 | snp_col="SNP", 50 | effect_allele_col="ALT", 51 | other_allele_col="REF", 52 | eaf_col="AF", 53 | chr_col="seqnames", 54 | pos_col="start", 55 | beta_col="ES", 56 | se_col="SE", 57 | pval_col="LP", 58 | samplesize_col="SS", 59 | ncase_col="NC", 60 | ncontrol_col="NCONT", 61 | phenotype_col="id" 62 | ) 63 | } 64 | 65 | 66 | 67 | #' Create a harmonised dataset from lists of vcf files 68 | #' 69 | #' This mimics the TwoSampleMR::make_dat function, which automatically looks up exposure and outcome datasets and harmonises them, except this function uses GWAS-VCF datasets instead. 70 | #' The supporting reference datasets can be accessed by UoB users on BC4 using set_bc4_files() 71 | #' 72 | #' @param id1 Exposure datasets. Either an array of vcf files, or array of IDs if vcfdir is set 73 | #' @param id2 Outcome datasets. Either an array of vcf files, or array of IDs if vcfdir is set 74 | #' @param proxies Lookup proxies? default=TRUE but requires either bfile or proxydb to be set 75 | #' @param nthreads Parellelise default=1 76 | #' @param vcfdir Location of vcf files if id1 and id2 are just IDs. Defaults to options()$gwasglue.vcfdir 77 | #' @param proxydb Location of LD proxy database Default=options()$gwasglue.proxydb 78 | #' @param rsidx Location of rsidx index database Default=options()$gwasglue.rsidx 79 | #' @param bfile Location of LD reference panel Default=options()$gwasglue.bfile 80 | #' 81 | #' @export 82 | #' @return harmonised dataset 83 | make_TwoSampleMR_dat <- function(id1, id2, proxies=TRUE, nthreads=1, vcfdir=options()$gwasglue.vcfdir, proxydb=options()$gwasglue.proxydb, rsidx=options()$gwasglue.rsidx, bfile=options()$gwasglue.bfile, action=1, plink_bin=genetics.binaRies::get_plink_binary()) 84 | { 85 | 86 | id1 <- organise_ids(id1, vcfdir) 87 | id2 <- organise_ids(id2, vcfdir) 88 | 89 | exposure_dat <- parallel::mclapply(1:nrow(id1), function(i) 90 | { 91 | message("extracting tophits for ", id1$id[i]) 92 | d <- dirname(id1$filename[i]) 93 | if(file.exists(file.path(d, "clump.txt"))) 94 | { 95 | tophits <- scan(file.path(d, "clump.txt"), character()) 96 | if(length(tophits) > 0) 97 | { 98 | out <- gwasvcf::query_gwas(id1$filename[i], rsidx=rsidx, rsid=tophits) %>% 99 | gwasvcf_to_TwoSampleMR("exposure") %>% 100 | dplyr::mutate(exposure=id1$trait[i], id.exposure=id1$id[i]) 101 | } else { 102 | out <- NULL 103 | } 104 | } else { 105 | message("Clumping ", id1$filename[i]) 106 | stopifnot(file.exists(bfile)) 107 | o <- query_gwas(id1$filename[i], pval=pval) 108 | p <- gwasvcf_to_TwoSampleMR(o, "exposure") 109 | names(p)[names(p) == "pval.exposure"] <- "pval" 110 | names(p)[names(p) == "SNP"] <- "rsid" 111 | tophits <- ieugwasr::ld_clump(p, bfile=bfile, plink_bin=plink_bin)$rsid 112 | if(length(tophits) > 0) 113 | { 114 | out <- gwasvcf::query_gwas(o, rsid=tophits) %>% 115 | gwasvcf_to_TwoSampleMR("exposure") %>% 116 | dplyr::mutate(exposure=id1$trait[i], id.exposure=id1$id[i]) 117 | } else { 118 | out <- NULL 119 | } 120 | } 121 | return(out) 122 | }, mc.cores=nthreads) %>% 123 | dplyr::bind_rows() 124 | 125 | outcome_dat <- parallel::mclapply(1:nrow(id2), function(i) 126 | { 127 | gwasvcf::query_gwas(id2$filename[i], rsidx=rsidx, rsid=exposure_dat$SNP) %>% 128 | gwasvcf_to_TwoSampleMR("outcome") %>% 129 | dplyr::mutate(id.outcome=id2$id[i], outcome=id2$trait[i]) 130 | }, mc.cores=nthreads) %>% 131 | dplyr::bind_rows() 132 | 133 | # harmonise and perform MR 134 | dat <- TwoSampleMR::harmonise_data(exposure_dat, outcome_dat, action=action) 135 | return(dat) 136 | } 137 | 138 | 139 | #' Figure out specific files and IDs depending on what files exist and whethet vcfdir is set 140 | #' 141 | #' @param id List of IDs within the vcfdir structure, or a list of GWAS VCF files, or a mixture 142 | #' @param vcfdir Location of GWAS VCF files, or NULL if id is a list of actual files 143 | #' 144 | #' @return File paths to all datasets 145 | organise_ids <- function(id, vcfdir) 146 | { 147 | dat <- dplyr::tibble(id=id, trait=id, filename=id) 148 | # if id is a file 149 | index <- file.exists(id) 150 | if(!all(index)) 151 | { 152 | if(is.null(vcfdir)) 153 | { 154 | stop("vcfdir is not set, and the following files do not exist\n", paste(id[!index], collapse="\n")) 155 | } else { 156 | message("Constructing file names") 157 | dat$filename[!index] <- file.path(vcfdir, id[!index], paste0(id[!index], ".vcf.gz")) 158 | index2 <- file.exists(dat$filename) 159 | if(!all(index2)) 160 | { 161 | stop("can't find the following files: \n", paste(id[!index], collapse="\n")) 162 | } 163 | index3 <- index2 & !index 164 | for(i in which(index3)) 165 | { 166 | jf <- file.path(dirname(dat$filename[i]), paste0(dat$id[i], ".json")) 167 | dat$trait[i] <- jsonlite::read_json(jf)$trait 168 | } 169 | } 170 | } 171 | return(dat) 172 | } 173 | 174 | #' Determine locations of useful reference datasets on bluecrystal4 175 | #' 176 | #' This is a convenience function for members at the University of Bristol 177 | #' to automatically set file locations for various reference datasets. It relates 178 | #' only to paths on bc4 179 | #' 180 | #' @export 181 | #' @return NULL 182 | set_bc4_files <- function() 183 | { 184 | l <- list( 185 | gwasglue.vcfdir="/mnt/storage/private/mrcieu/data/IGD/data/public", 186 | gwasglue.proxydb="/mnt/storage/private/mrcieu/research/mr-eve/mr-eve/reference/data_maf0.01_rs_ref.sqlite", 187 | gwasglue.rsidx="/mnt/storage/private/mrcieu/research/mr-eve/vcf-reference-datasets/1000g_filtered/annotations.vcf.gz.rsidx", 188 | gwasglue.bfile="/mnt/storage/private/mrcieu/research/mr-eve/mr-eve/reference/data_maf0.01_rs_ref" 189 | ) 190 | options(l) 191 | } 192 | 193 | -------------------------------------------------------------------------------- /R/clump.r: -------------------------------------------------------------------------------- 1 | #' Perform LD clumping 2 | #' 3 | #' 4 | #' 5 | #' @param vcf VCF file or VCF object 6 | #' @param clump_kb Clumping kb window. Default is very strict, 10000 7 | #' @param clump_r2 Clumping r2 threshold. Default is very strict, 0.001 8 | #' @param clump_p Clumping sig level for index variants. Default = 1 (i.e. no threshold) 9 | #' @param pop Super-population to use as reference panel. Default = "EUR". Options are EUR, SAS, EAS, AFR, AMR. 'legacy' also available - which is a previously used verison of the EUR panel with a slightly different set of markers 10 | #' @param bfile If this is provided then will use the API. Default = NULL 11 | #' @param plink_bin If null and bfile is not null then will detect packaged plink binary for specific OS. Otherwise specify path to plink binary. Default = NULL 12 | #' @param access_token Google OAuth2 access token. Used to authenticate level of access to data 13 | #' 14 | #' @export 15 | #' @return data frame of clumped results 16 | clump_gwasvcf <- function(vcf, clump_kb=1000, clump_r2=0.001, clump_p=5e-8, pop=NULL, bfile=NULL, plink_bin=NULL, access_token=NULL) 17 | { 18 | message("Applying threshold to vcf") 19 | sig <- gwasvcf::query_gwas(vcf, pval=clump_p) 20 | 21 | if(is.null(bfile)) 22 | { 23 | message("Using API. Note that this could be slow, and to reduce server disruption it is recommended to use local LD reference files") 24 | message("See gwasglue vignette on how to do this") 25 | 26 | fn <- function(dat) 27 | { 28 | ieugwasr::ld_clump(dat, pop=pop, clump_kb=clump_kb, clump_r2=clump_r2, clump_p=clump_p, access_token=check_access_token()) 29 | } 30 | } else { 31 | fn <- function(dat) 32 | { 33 | ieugwasr::ld_clump(dat, clump_kb=clump_kb, clump_r2=clump_r2, clump_p=clump_p, bfile=bfile, plink_bin=plink_bin) 34 | } 35 | } 36 | 37 | # Clump 38 | clumped <- sig %>% 39 | gwasvcf::vcf_to_tibble() %>% 40 | dplyr::mutate(pval=10^{-LP}) %>% 41 | dplyr::select(rsid, pval) %>% 42 | fn(.) %>% 43 | {.$rsid} %>% 44 | {sig[names(sig) %in% .]} %>% 45 | SummarizedExperiment::rowRanges() %>% 46 | {dplyr::tibble(rsid = names(.), chrpos=paste0(SummarizedExperiment::seqnames(.), ":", SummarizedExperiment::ranges(.)@start))} 47 | return(clumped) 48 | } 49 | -------------------------------------------------------------------------------- /R/cojo.r: -------------------------------------------------------------------------------- 1 | #' Write vcf file to cojo sumstat file 2 | #' 3 | #' @param vcffile Path to vcf file 4 | #' @param outfile Path to output file 5 | #' 6 | #' @export 7 | #' @return vcf object 8 | cojo_sumstat_file <- function(vcffile, outfile) 9 | { 10 | vcf <- VariantAnnotation::readVcf(vcffile) 11 | tib <- gwasvcf::vcf_to_tibble(vcf) 12 | tib$LP <- 10^(-tib$LP) 13 | tib <- tib %>% dplyr::select(rsid, ALT, REF, AF, ES, SE, LP, SS) 14 | names(tib) <- c("SNP", "A1", "A2", "freq", "b", "se", "p", "N") 15 | write.table(tib, file=outfile, row=F, col=T, qu=F) 16 | return(vcf) 17 | } 18 | 19 | #' For a set of variants map to LD regions 20 | #' 21 | #' LD regions defined here https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4731402/ 22 | #' 23 | #' @param chrpos Array of chr:pos 24 | #' @param pop EUR, AFR or ASN 25 | #' 26 | #' @export 27 | #' @return 28 | map_variants_to_regions <- function(chrpos, pop) 29 | { 30 | regionfile <- system.file("extdata", "ldetect", paste0(pop, ".bed"), package="gwasglue") 31 | regions <- data.table::fread(regionfile, header=TRUE) %>% 32 | dplyr::mutate( 33 | chr=as.numeric(gsub("chr", "", chr)), 34 | start=as.numeric(start), 35 | stop=as.numeric(stop) 36 | ) %>% dplyr::as_tibble() 37 | 38 | reg <- lapply(chrpos, function(i) 39 | { 40 | x <- strsplit(i, split=":")[[1]] %>% as.numeric() 41 | subset(regions, chr == x[1] & x[2] >= start & x[2] <= stop)[1,] %>% 42 | dplyr::mutate( 43 | variant=i, 44 | region=paste0(chr, ":", start, "-", stop) 45 | ) %>% 46 | dplyr::select(variant, region) 47 | }) %>% dplyr::bind_rows() 48 | return(reg) 49 | } 50 | 51 | 52 | #' Perform conditional analysis using GCTA COJO 53 | #' 54 | #' For a list of fine-mapped rsids, will assign to regions and generate conditionally independent summary stats for each rsid 55 | #' 56 | #' @param vcffile Path to vcffile 57 | #' @param bfile LD reference panel 58 | #' @param snplist List of rsids 59 | #' @param pop EUR, ASN or AFR 60 | #' @param gcta Path to gcta binary. For convenience can use default=genetics.binaRies::get_gcta_binary() 61 | #' @param workdir Location to store temporary files. Default=tempdir() 62 | #' @param threads Number of parallel threads. Default=1 63 | #' 64 | #' @export 65 | #' @return List of independent summary stats 66 | cojo_cond <- function(vcffile, bfile, snplist, pop, gcta=genetics.binaRies::get_gcta_binary(), workdir=tempdir(), threads=1) 67 | { 68 | message("Formatting sumstats") 69 | vcf <- cojo_sumstat_file(vcffile, file.path(workdir, "sum.txt")) 70 | 71 | ext <- vcf[names(vcf) %in% snplist] %>% 72 | SummarizedExperiment::rowRanges() 73 | chrpos <- paste0(SummarizedExperiment::seqnames(ext), ":", SummarizedExperiment::ranges(ext)@start) 74 | 75 | message("Organising regions") 76 | regions <- map_variants_to_regions(chrpos, pop) 77 | regions$rsid <- names(ext)[match(regions$variant, chrpos)] 78 | dup_reg <- unique(regions$region[duplicated(regions$region)]) 79 | 80 | message(length(dup_reg), " out of ", nrow(regions), " regions have multiple variants") 81 | 82 | l <- parallel::mclapply(dup_reg, function(i) 83 | { 84 | message(i) 85 | x <- subset(regions, region == i) 86 | m <- list() 87 | y <- gwasvcf::query_gwas(vcf, chrompos=i) 88 | extract_list <- names(y) 89 | write.table(extract_list, file=file.path(workdir, "extract.txt"), row=F, col=F, qu=F) 90 | for(j in x$variant) 91 | { 92 | message(j) 93 | condsnps <- subset(x, variant != j)$rsid 94 | write.table(condsnps, file=file.path(workdir, "cond.txt"), row=F, col=F, qu=F) 95 | cmd <- glue::glue("{gcta} --bfile {bfile} --extract {file.path(workdir, 'extract.txt')} --cojo-file {file.path(workdir, 'sum.txt')} --cojo-cond {file.path(workdir, 'cond.txt')} --out {file.path(workdir, 'out')}") 96 | system(cmd) 97 | res <- data.table::fread(file.path(workdir, 'out.cma.cojo')) 98 | m[[j]] <- dplyr::select(res, rsid=SNP, chr=Chr, pos=bp, alt=refA, ES=bC, SE=bC_se, pval=pC, n=n) 99 | } 100 | return(m) 101 | }, mc.cores=threads) 102 | 103 | message("Adding in remaining regions in the same format") 104 | single_reg <- regions$region[!regions$region %in% dup_reg] 105 | 106 | for(i in single_reg) 107 | { 108 | message(i) 109 | x <- subset(regions, region == i) 110 | j <- x$variant 111 | y <- gwasvcf::query_gwas(vcf, chrompos=i) %>% gwasvcf::vcf_to_tibble() %>% 112 | dplyr::mutate(pval=10^{-LP}) 113 | l[[i]][[j]] <- dplyr::select(y, rsid=rsid, chr=seqnames, pos=start, alt=ALT, ES=ES, SE=SE, pval=pval, n=SS) 114 | } 115 | 116 | return(l) 117 | } 118 | -------------------------------------------------------------------------------- /R/coloc.r: -------------------------------------------------------------------------------- 1 | #' Generate coloc dataset from vcf files 2 | #' 3 | #' @param vcf1 VCF object or path to vcf file 4 | #' @param vcf2 VCF object or path to vcf file 5 | #' @param chrompos Character of chr:pos1-pos2 6 | #' 7 | #' @export 8 | #' @return List of datasets to feed into coloc 9 | gwasvcf_to_coloc <- function(vcf1, vcf2, chrompos) 10 | { 11 | ## TODO: binary or quantitative traits 12 | ## TODO: multiallelic variants 13 | 14 | o <- gwasvcf::vcflist_overlaps(list(vcf1, vcf2), chrompos) 15 | vcf1 <- o[[1]] 16 | vcf2 <- o[[2]] 17 | 18 | if(length(vcf1) == 0) 19 | { 20 | message("No overlaps in vcf1") 21 | return(NULL) 22 | } 23 | if(length(vcf2) == 0) 24 | { 25 | message("No overlaps in vcf2") 26 | return(NULL) 27 | } 28 | 29 | stopifnot(length(vcf1) == length(vcf2)) 30 | tab1 <- vcf1 %>% gwasvcf::vcf_to_granges() %>% dplyr::as_tibble() 31 | tab2 <- vcf2 %>% gwasvcf::vcf_to_granges() %>% dplyr::as_tibble() 32 | index <- as.character(tab1$REF) == as.character(tab2$REF) & 33 | as.character(tab1$ALT) == as.character(tab2$ALT) & 34 | as.character(tab1$seqnames) == as.character(tab2$seqnames) & 35 | tab1$start == tab2$start 36 | stopifnot(sum(index) > 0) 37 | 38 | type1 <- ifelse(VariantAnnotation::header(vcf1) %>% 39 | VariantAnnotation::meta() %>% 40 | {.[["SAMPLE"]][["StudyType"]]} == "Continuous", "quant", "cc") 41 | 42 | type2 <- ifelse(VariantAnnotation::header(vcf2) %>% 43 | VariantAnnotation::meta() %>% 44 | {.[["SAMPLE"]][["StudyType"]]} == "Continuous", "quant", "cc") 45 | 46 | tab1$AF[is.na(tab1$AF)] <- 0.5 47 | tab2$AF[is.na(tab2$AF)] <- 0.5 48 | 49 | out1 <- tab1[index,] %>% {list(pvalues = 10^-.$LP, N = .$SS, MAF = .$AF, beta = .$ES, varbeta = .$SE^2, type = type1, snp = names(vcf2)[index], z = .$ES / .$SE, chr = .$seqnames, pos = .$start, id = VariantAnnotation::samples(VariantAnnotation::header(vcf1))[1])} 50 | out2 <- tab2[index,] %>% {list(pvalues = 10^-.$LP, N = .$SS, MAF = .$AF, beta = .$ES, varbeta = .$SE^2, type = type2, snp = names(vcf2)[index], z = .$ES / .$SE, chr = .$seqnames, pos = .$start, id = VariantAnnotation::samples(VariantAnnotation::header(vcf2))[1])} 51 | 52 | if(type1 == "cc") 53 | { 54 | out1$s <- mean(tab1$NC / tab1$SS, na.rm=TRUE) 55 | } else 56 | 57 | if(type2 == "cc") 58 | { 59 | out2$s <- mean(tab1$NC / tab1$SS, na.rm=TRUE) 60 | } 61 | 62 | return(list(dataset1=out1, dataset2=out2)) 63 | } 64 | 65 | 66 | #' Generate coloc dataset from the IEU GWAS database 67 | #' 68 | #' @param id1 ID for trait 1 69 | #' @param id2 ID for trait 2 70 | #' @param chrompos Character of chr:pos1-pos2 71 | #' @param type1 Provide "cc" or "quant" to override automatic lookup of trait type for trait 1 72 | #' @param type2 Provide "cc" or "quant" to override automatic lookup of trait type for trait 2 73 | #' 74 | #' @export 75 | #' @return List of datasets to feed into coloc 76 | ieugwasr_to_coloc <- function(id1, id2, chrompos, type1=NULL, type2=NULL) 77 | { 78 | tab1 <- ieugwasr::associations(id=id1, variants=chrompos) %>% subset(., !duplicated(rsid)) 79 | tab2 <- ieugwasr::associations(id=id2, variants=chrompos) %>% subset(., !duplicated(rsid)) 80 | commonsnps <- tab1$rsid[tab1$rsid %in% tab2$rsid] 81 | tab1 <- tab1[tab1$rsid %in% commonsnps, ] %>% dplyr::arrange(rsid) 82 | tab2 <- tab2[tab2$rsid %in% commonsnps, ] %>% dplyr::arrange(rsid) 83 | stopifnot(all(tab1$rsid == tab2$rsid)) 84 | 85 | index <- as.character(tab1$ea) == as.character(tab2$ea) & 86 | as.character(tab1$nea) == as.character(tab2$nea) & 87 | as.character(tab1$rsid) == as.character(tab2$rsid) & 88 | tab1$position == tab2$position 89 | stopifnot(sum(index) > 0) 90 | tab1$eaf <- as.numeric(tab1$eaf) 91 | tab2$eaf <- as.numeric(tab2$eaf) 92 | tab1$eaf[which(tab1$eaf > 0.5)] <- 1 - tab1$eaf[which(tab1$eaf > 0.5)] 93 | tab2$eaf[which(tab2$eaf > 0.5)] <- 1 - tab2$eaf[which(tab2$eaf > 0.5)] 94 | s <- sum(is.na(tab1$eaf)) 95 | if(s > 0) 96 | { 97 | warning(s, " out of ", nrow(tab1), " variants have missing allele frequencies in ", id1, ". Setting to 0.5") 98 | tab1$eaf[is.na(tab1$eaf)] <- 0.5 99 | } 100 | s <- sum(is.na(tab2$eaf)) 101 | if(s > 0) 102 | { 103 | warning(s, " out of ", nrow(tab2), " variants have missing allele frequencies in ", id2, ". Setting to 0.5") 104 | tab2$eaf[is.na(tab2$eaf)] <- 0.5 105 | } 106 | 107 | info1 <- ieugwasr::gwasinfo(id1) 108 | type1 <- get_type(info1, type1) 109 | info2 <- ieugwasr::gwasinfo(id2) 110 | type2 <- get_type(info2, type2) 111 | 112 | 113 | tab1$n[is.na(tab1$n)] <- info1$sample_size 114 | tab2$n[is.na(tab2$n)] <- info2$sample_size 115 | 116 | tab1 <- tab1[index,] %>% {list(pvalues = .$p, N = .$n, MAF = .$eaf, beta = .$beta, varbeta = .$se^2, type = type1, snp = .$rsid, z = .$beta / .$se, chr = .$chr, pos = .$position, id = id1)} 117 | tab2 <- tab2[index,] %>% {list(pvalues = .$p, N = .$n, MAF = .$eaf, beta = .$beta, varbeta = .$se^2, type = type2, snp = .$rsid, z = .$beta / .$se, chr = .$chr, pos = .$position, id = id2)} 118 | 119 | if(type1 == "cc") 120 | { 121 | tab1$s <- info1$ncase / info1$sample_size 122 | } 123 | 124 | if(type2 == "cc") 125 | { 126 | tab2$s <- info2$ncase / info2$sample_size 127 | } 128 | 129 | return(list(dataset1=tab1, dataset2=tab2)) 130 | } 131 | 132 | 133 | get_type <- function(info, typex) 134 | { 135 | if(!is.null(typex)) 136 | { 137 | stopifnot(typex %in% c("cc", "quant")) 138 | return(typex) 139 | } else if(is.na(info$unit)) { 140 | if(! "ncase" %in% names(info)) 141 | { 142 | info$ncase <- NA 143 | } 144 | if(is.na(info$ncase)) 145 | { 146 | message("Type information not available for ", info$id, ". Assuming 'quant' but override using 'type' arguments.") 147 | return("quant") 148 | } else { 149 | message("No units available but assuming cc due to number of cases being stated") 150 | return("cc") 151 | } 152 | } else { 153 | return(ifelse(info$unit %in% c("logOR", "log odds"), "cc", "quant")) 154 | } 155 | } 156 | 157 | -------------------------------------------------------------------------------- /R/finemapr.r: -------------------------------------------------------------------------------- 1 | #' Generate data for analysis in various finemapping methods 2 | #' 3 | #' Uses the finemapr package https://github.com/variani/finemapr 4 | #' 5 | #' @param region Region of the genome to extract eg 1:109317192-110317192" 6 | #' @param id Array of GWAS studies to query. See \code{gwasinfo} for available studies 7 | #' @param bfile If this is provided then will use the API. Default = NULL 8 | #' @param plink_bin If null and bfile is not null then will detect packaged plink binary for specific OS. Otherwise specify path to plink binary. Default = NULL 9 | #' 10 | #' @export 11 | #' @return Each id will be a list of z score data, ld matrix, and sample size 12 | ieugwasr_to_finemapr <- function(region, id, bfile=NULL, plink_bin=NULL) 13 | { 14 | id <- unique(id) 15 | message("Getting rsids in region") 16 | rsid <- ieugwasr::variants_to_rsid(region) 17 | message("Extracting rsids from data") 18 | as <- ieugwasr::associations(rsid, id, proxies=0) 19 | rsid_avail <- unique(as$rsid) 20 | message("Calculating LD for ", length(rsid_avail), " variants") 21 | ld <- suppressWarnings(ieugwasr::ld_matrix(rsid_avail, bfile, plink_bin, with_alleles=FALSE)) %>% greedy_remove() 22 | rsid_avail <- rownames(ld) 23 | message("Data available for ", length(rsid_avail), " variants") 24 | as <- subset(as, rsid %in% rsid_avail) 25 | out <- list() 26 | for(i in 1:length(unique(id))) 27 | { 28 | dat <- list() 29 | x <- as[as[["rsid"]] %in% rsid_avail & as[["id"]] == id[i],] 30 | dat[["z"]] <- dplyr::tibble(snp = x[["rsid"]], zscore = x[["beta"]] / x[["se"]]) 31 | index <- match(x[["rsid"]], rsid_avail) 32 | dat[["ld"]] <- ld[index, index] 33 | stopifnot(all(x[["rsid"]] == rownames(dat[["ld"]]))) 34 | 35 | n <- x[["n"]] 36 | if(all(is.na(n))) 37 | { 38 | g <- ieugwasr::gwasinfo(id[i]) 39 | n <- g[["sample_size"]] 40 | } 41 | dat[["n"]] <- n 42 | out[[id[i]]] <- dat 43 | } 44 | class(out) <- "FinemaprList" 45 | return(out) 46 | } 47 | 48 | print.FinemaprList <- function(x) 49 | { 50 | utils::str(x) 51 | } 52 | 53 | 54 | #' Generate data for fine mapping analysis 55 | #' 56 | #' For a given region and VCF file, extracts the variants in the region along with LD matrix from a reference panel 57 | #' 58 | #' @param region Region of the genome to extract eg 1:109317192-110317192". Can be array 59 | #' @param vcf Path to VCF file or VCF object 60 | #' @param bfile LD reference panel 61 | #' @param plink_bin Path to plink. Default = genetics.binaRies::get_plink_binary() 62 | #' @param threads Number of threads to run in parallel. Default=1 63 | #' 64 | #' @export 65 | #' @return List of datasets for finemapping 66 | gwasvcf_to_finemapr <- function(region, vcf, bfile, plink_bin=genetics.binaRies::get_plink_binary(), threads=1) 67 | { 68 | message("Extracting data from vcf") 69 | ext <- gwasvcf::query_gwas(vcf=vcf, chrompos=region) 70 | out <- parallel::mclapply(unique(region), function(i){ 71 | message(i) 72 | m <- list() 73 | temp <- gwasvcf::query_gwas(vcf=ext, chrompos=i) 74 | m[["ld"]] <- ieugwasr::ld_matrix(names(temp), bfile=bfile, plink_bin=plink_bin, with_alleles=FALSE) %>% 75 | greedy_remove() 76 | tib <- gwasvcf::vcf_to_tibble(temp) 77 | m[["z"]] <- tib %>% 78 | subset(rsid %in% rownames(m[["ld"]])) %>% 79 | dplyr::mutate(z=ES/SE) %>% 80 | dplyr::select(snp=rsid, zscore=z) 81 | m[["n"]] <- tib[["SS"]] 82 | out[[i]] <- m 83 | return(out) 84 | }, mc.cores=threads) 85 | class(out) <- "FinemaprList" 86 | return(out) 87 | } 88 | 89 | 90 | 91 | greedy_remove <- function(ld) 92 | { 93 | ind <- which(!is.finite(ld), arr.ind=TRUE) 94 | if(length(ind) == 0) 95 | { 96 | return(ld) 97 | } 98 | tab <- table(ind) %>% sort(decreasing=TRUE) %>% as.data.frame(stringsAsFactors=FALSE) 99 | rem <- c() 100 | for(i in 1:nrow(tab)) 101 | { 102 | ind <- ind[!(ind[,1] == tab[["ind"]][i] | ind[,2] == tab[["ind"]][i]), ] 103 | rem <- c(rem, tab[["ind"]][i]) 104 | if(nrow(ind) == 0) break 105 | } 106 | rem <- as.numeric(rem) 107 | ld <- ld[-rem, -rem] 108 | stopifnot(all(is.finite(ld))) 109 | return(ld) 110 | } 111 | -------------------------------------------------------------------------------- /R/gassocplot.r: -------------------------------------------------------------------------------- 1 | #' Generate regional plot for ieugwasr 2 | #' 3 | #' Uses James Staley's gassocplot package https://github.com/jrs95/gassocplot 4 | #' 5 | #' @param chrpos A window range to plot e.g. 16:3349655-3849655 6 | #' @param id Vector of one or more IEU GWAS db study IDs 7 | #' @param bfile If number of SNPs > 500 then need to provide your own LD reference panel. Provide plink dataset here. 8 | #' @param plink_bin If number of SNPs > 500 then need to provide your own LD reference panel. Provide plink executable here 9 | #' 10 | #' @export 11 | #' @return assoc_plot or stack_assoc_plot if multiple markers given 12 | ieugwasr_to_gassocplot <- function(chrpos, id, bfile=NULL, plink_bin=NULL) 13 | { 14 | stopifnot(length(chrpos) == 1) 15 | r1 <- ieugwasr::associations(chrpos, id, proxies=0) 16 | r1 <- r1[!duplicated(paste(r1[["rsid"]], r1[["id"]])),] 17 | r1[["z"]] <- r1[["beta"]] / r1[["se"]] 18 | r1 <- tidyr::spread(subset(r1, select=c("rsid", "id", "z", "chr", "position")), key="id", value="z") 19 | message("Found ", nrow(r1), " variants") 20 | message("Extracting LD matrix for ", nrow(r1), " variants") 21 | ld <- suppressWarnings(suppressMessages( 22 | ieugwasr::ld_matrix(r1[["rsid"]], with_alleles=FALSE, bfile=bfile, plink_bin=plink_bin) 23 | )) 24 | message("Found ", nrow(ld), " variants in LD reference panel") 25 | r1 <- r1[match(rownames(ld), r1[["rsid"]]), ] 26 | stopifnot(all(r1[["rsid"]] == rownames(ld))) 27 | if(length(id) == 1) 28 | { 29 | list( 30 | data = dplyr::tibble(marker=r1[["rsid"]], chr=r1[["chr"]], pos=r1[["position"]], z=r1[[id]]), 31 | corr = ld 32 | ) %>% return() 33 | } else { 34 | list( 35 | markers = dplyr::tibble(marker=r1[["rsid"]], chr=r1[["chr"]], pos=r1[["position"]]), 36 | z = subset(r1, select=id), 37 | corr = ld, 38 | traits = id 39 | ) %>% return() 40 | } 41 | } 42 | 43 | 44 | #' Convert coloc dataset to gassocplot dataset 45 | #' 46 | #' @param coloclist Output from *_to_coloc 47 | #' @param bfile If number of SNPs > 500 then need to provide your own LD reference panel. Provide plink dataset here. 48 | #' @param plink_bin If number of SNPs > 500 then need to provide your own LD reference panel. Provide plink executable here 49 | #' 50 | #' @export 51 | #' @return List to feed into gassocplot 52 | coloc_to_gassocplot <- function(coloclist, bfile=NULL, plink_bin=NULL) 53 | { 54 | markers <- dplyr::tibble( 55 | marker = coloclist$dataset1$snp, 56 | chr = coloclist$dataset1$chr, 57 | pos = coloclist$dataset1$pos, 58 | ) 59 | z <- dplyr::tibble( 60 | id1 = coloclist$dataset1$z, 61 | id2 = coloclist$dataset2$z 62 | ) 63 | message("Extracting LD matrix for ", nrow(markers), " variants") 64 | ld <- ieugwasr::ld_matrix(markers[["marker"]], with_alleles=FALSE, bfile=bfile, plink_bin=plink_bin) 65 | message("Found ", nrow(ld), " variants in LD reference panel") 66 | index <- match(rownames(ld), markers[["marker"]]) 67 | markers <- markers[index, ] 68 | z <- z[index, ] 69 | stopifnot(all(markers$marker == rownames(ld))) 70 | traits <- c(coloclist[["dataset1"]][["id"]], coloclist[["dataset2"]][["id"]]) 71 | names(z) <- traits 72 | 73 | list(markers = markers, z = z, corr = ld, traits = traits) %>% return() 74 | } 75 | 76 | -------------------------------------------------------------------------------- /R/pwcoco.r: -------------------------------------------------------------------------------- 1 | #' Write files for PWCoCo where data are read from two VCF objects or files. 2 | #' 3 | #' @param vcf1 VCF object or path to VCF file 4 | #' @param vcf2 VCF object or path to VCF file 5 | #' @param chrompos Character of the format chr:pos1-pos2 6 | #' @param type1 How to treat vcffile1 for coloc, either "quant" or "cc" 7 | #' @param type2 How to treat vcffile2 for coloc, either "quant" or "cc" 8 | #' @param outfile Path to output files, without file ending 9 | #' 10 | #' return 0 if success, 1 if there was a problem 11 | gwasvcf_to_pwcoco <- function(vcf1, vcf2, chrompos, type1=NULL, type2=NULL, outfile) 12 | { 13 | overlap <- gwasvcf::vcflist_overlap(list(vcf1, vcf2), chrompos) 14 | vcf1 <- overlap[[1]] 15 | vcf2 <- overlap[[2]] 16 | 17 | if (length(vcf1) == 0 || length(vcf2) == 0) 18 | { 19 | message("No overlaps for the given chrompos in ", ifelse(length(vcf1) == 0, "vcf1", "vcf2"), ".") 20 | return(1) 21 | } 22 | 23 | # vcf1 24 | tib1 <- vcf1 %>% gwasvcf::vcf_to_granges() %>% dplyr::as_tibble() %>% 25 | dplyr::select(rsid, ALT, REF, AF, ES, SE, LP, SS, NC) %>% 26 | dplyr::rename( 27 | SNP = rsid, 28 | A1 = ALT, 29 | A2 = REF, 30 | freq = AF, 31 | b = ES, 32 | se = SE, 33 | p = LP, 34 | N = ss, 35 | N_case = NC 36 | ) 37 | tib1$p <- 10^(-tib1$p) 38 | 39 | # Coloc type -- if study type is continuous then do not need the case column 40 | if (type1 == "quant" || VariantAnnotation::header(vcf1) %>% VariantAnnotation::meta() %>% {.[["SAMPLE"]][["StudyType"]]} == "Continuous") 41 | { 42 | tib1 <- tib1[c("SNP", "A1", "A2", "freq", "b", "se", "p", "N")] 43 | } 44 | 45 | # vcf2 46 | tib2 <- vcf2 %>% gwasvcf::vcf_to_granges() %>% dplyr::as_tibble() %>% 47 | dplyr::select(rsid, ALT, REF, AF, ES, SE, LP, SS, NC) %>% 48 | dplyr::rename( 49 | SNP = rsid, 50 | A1 = ALT, 51 | A2 = REF, 52 | freq = AF, 53 | b = ES, 54 | se = SE, 55 | p = LP, 56 | N = ss, 57 | N_case = NC 58 | ) 59 | tib2$p <- 10^(-tib2$p) 60 | 61 | # Coloc type -- if study type is continuous then do not need the case column 62 | if (type2 == "quant" || VariantAnnotation::header(vcf2) %>% VariantAnnotation::meta() %>% {.[["SAMPLE"]][["StudyType"]]} == "Continuous") 63 | { 64 | tib2 <- tib2[c("SNP", "A1", "A2", "freq", "b", "se", "p", "N")] 65 | } 66 | 67 | write.table(tib1, file=paste0(outfile, "1.txt"), row=F, col=T, qu=F) 68 | write.table(tib1, file=paste0(outfile, "2.txt"), row=F, col=T, qu=F) 69 | return(0) 70 | } 71 | 72 | #' Write files for PWCoCo where data are read from the OpenGWAS DB. 73 | #' 74 | #' @param id1 ID for trait 1 75 | #' @param id2 ID for trait 2 76 | #' @param chrompos Character of the format chr:pos1-pos2 77 | #' @param type1 How to treat vcffile1 for coloc, either "quant" or "cc" 78 | #' @param type2 How to treat vcffile2 for coloc, either "quant" or "cc" 79 | #' @param outfile Path to output files, without file ending 80 | #' 81 | #' return 0 if success, 1 if there was a problem 82 | ieugwasr_to_pwcoco <- function(id1, id2, chrompos, type1=NULL, type2=NULL, outfile) 83 | { 84 | tib1 <- ieugwasr::associations(id=id1, variants=chrompos) %>% subset(., !duplicated(rsid)) 85 | tib2 <- ieugwasr::associations(id=id2, variants=chrompos) %>% subset(., !duplicated(rsid)) 86 | 87 | if (length(tib1) < 1 || length(tib2) < 1) 88 | { 89 | message("Data could not be read using the ieugwasr package for id1 = ", id1, " and id2 = ", id2, ".") 90 | return(1) 91 | } 92 | 93 | # Matching the files is quicker for PWCoCo, so best to off-load to that? 94 | # Save data -- PWCoCo handles the matching and cleaning mostly by itself 95 | tib1 %<>% dplyr::select(rsid, ea, nea, eaf, beta, se, p, n) %>% 96 | dplyr::rename( 97 | SNP = rsid, 98 | A1 = ea, 99 | A2 = nea, 100 | freq = eaf, 101 | b = beta, 102 | se = se, 103 | p = p, 104 | N = n 105 | ) 106 | # Need to determine whether there are cases 107 | info1 <- ieugwasr::gwasinfo(id1) 108 | if ("ncase" %in% colnames(info1)) 109 | { 110 | tib1$N_case <- info1$ncase 111 | } 112 | 113 | tib2 %<>% dplyr::select(rsid, ea, nea, eaf, beta, se, p, n) %>% 114 | dplyr::rename( 115 | SNP = rsid, 116 | A1 = ea, 117 | A2 = nea, 118 | freq = eaf, 119 | b = beta, 120 | se = se, 121 | p = p, 122 | N = n 123 | ) 124 | info2 <- ieugwasr::gwasinfo(id2) 125 | if ("ncase" %in% colnames(info2)) 126 | { 127 | tib2$N_case <- info2$ncase 128 | } 129 | 130 | write.table(tib1, file=paste0(outfile, "1.txt"), row=F, col=T, qu=F) 131 | write.table(tib2, file=paste0(outfile, "2.txt"), row=F, col=T, qu=F) 132 | return(0) 133 | } 134 | 135 | #' Perform pair-wise conditional and colocalisation analysis using PWCoCo 136 | #' 137 | #' For a list of fine-mapped rsids, will assign to regions and generate colocalisation results for conditionally independent summary stats for each rsid 138 | #' 139 | #' @param id1 Path to vcffile or ID for trait1 140 | #' @param id2 Path to vcffile2 or ID for trait2 141 | #' @param bfile LD reference panel in Plink format (.bed, .bim, .fam) 142 | #' @param chrompos Chromosome position (format: chr:pos1-pos2) region of interest 143 | #' @param pwcoco Path to pwcoco binary 144 | #' @param type1 How to treat vcffile1 for coloc, either "quant" or "cc" 145 | #' @param type2 How to treat vcffile2 for coloc, either "quant" or "cc" 146 | #' @param workdir Location to store files, default=tempdir() 147 | #' 148 | #' @export 149 | #' @return List of colocalisation results 150 | pwcoco <- function(id1, id2, bfile, chrompos, pwcoco, type1=NULL, type2=NULL, workdir=tempdir()) 151 | { 152 | if (file.exists(id1) && file.exists(id2)) 153 | { 154 | message("Reading two VCF files for PWCoCo.") 155 | 156 | stopifnot(gwasvcf_to_pwcoco(id1, id2, chrompos, type1, type2, outfile=file.path(workdir, "sum_stats")) == 0) 157 | } else if (!file.exists(id1) && !file.exists(id2)) 158 | { 159 | message("Reading two IDs from OpenGWAS.") 160 | 161 | stopifnot(ieugwasr_to_pwcoco(id1, id2, chrompos, type1, type2, outfile=file.path(workdir, "sum_stats")) == 0) 162 | } 163 | # else; mixed 164 | 165 | # PWCoCo itself is multi-threaded; is it a good idea to multi-thread this function call too? 166 | chr <- as.integer(strsplit(chrompos, ":")[[1]][1]) 167 | cmd <- glue::glue("{pwcoco} --bfile {bfile} --sum_stats1 {file.path(workdir, 'sum_stats1.txt')} --sum_stats2 {file.path(workdir, 'sum_stats2.txt')} --out {file.path(workdir, 'out')} --chr {chr}") 168 | system(cmd) 169 | res <- data.table::fread(file.path(workdir, 'out.coloc')) 170 | 171 | return(res) 172 | } 173 | -------------------------------------------------------------------------------- /R/susieR.r: -------------------------------------------------------------------------------- 1 | #' Perform fine mapping pipeline using susieR 2 | #' 3 | #' Clumps data, then maps those to LD representative regions. Within each detected LD representative region, fine mapping is performed 4 | #' 5 | #' @param vcffile Path to vcf file 6 | #' @param bfile Path to ld reference panel 7 | #' @param plink_bin Path to plink 8 | #' @param pop EUR, ASN or AFR 9 | #' @param clump_kb 10 | #' @param clump_r2 11 | #' @param clump_p 12 | #' @param ... Optional arguments to be passed to susie_rss 13 | #' 14 | #' @export 15 | #' @return List 16 | susieR_pipeline <- function(vcffile, bfile, plink_bin, pop, threads=1, clump_kb=1000, clump_r2=0.001, clump_p=5e-8, ...) 17 | { 18 | message("Performing clumping") 19 | clumped <- clump_gwasvcf(vcffile, plink=plink_bin, bfile=bfile) 20 | 21 | message("Map clumps to regions") 22 | regions <- map_variants_to_regions(clumped$chrpos, pop=pop) 23 | regions$rsid <- clumped$rsid[match(regions$variant, clumped$chrpos)] 24 | 25 | message("Obtain LD matrices for each region") 26 | m <- gwasvcf_to_finemapr(region=regions$region, vcf=vcffile, bfile=ldref, plink_bin=plink, threads=threads) 27 | 28 | message("Perform susieR finemapping in each region") 29 | m2 <- parallel::mclapply(1:length(m), function(i) { 30 | message(i) 31 | res <- susieR::susie_rss( 32 | m[[i]]$z$zscore, 33 | m[[i]]$ld, 34 | ... 35 | ) 36 | res$fmset <- sapply(m[[i]]$susieR$sets$cs, function(x){ 37 | m[[i]]$z$snp[x[which.max(m[[i]]$susieR$pip[x])]] 38 | }) 39 | return(res) 40 | }, mc.cores=threads) 41 | 42 | for(i in 1:length(m)) 43 | { 44 | m[[i]]$susieR <- m2[[i]] 45 | } 46 | out <- list(clumped=clumped, regions=regions, res=m) 47 | return(out) 48 | } 49 | -------------------------------------------------------------------------------- /R/utils-pipe.r: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | NULL 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gwasglue 2 | 3 | 4 | [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://www.tidyverse.org/lifecycle/#experimental) 5 | [![Codecov test coverage](https://codecov.io/gh/MRCIEU/gwasglue/branch/master/graph/badge.svg)](https://codecov.io/gh/MRCIEU/gwasglue?branch=master) 6 | [![R build status](https://github.com/MRCIEU/gwasglue/workflows/R-CMD-check/badge.svg)](https://github.com/MRCIEU/gwasglue/actions) 7 | 8 | 9 | **Under development** 10 | 11 | This R package serves as a conduit between packages that can read or query GWAS summary data, and packages that can analyse GWAS summary data. Here is where it lies in the general ecosystem of GWAS data and analysis: 12 | 13 | 14 | ![schematic](https://drive.google.com/uc?id=15w33jAaI6lAKINfLlNw343njUuPd4M6i) 15 | 16 | The figure above depicts a set of packages that we plan to connect to. Here is a list of what has been done and what is still to do: 17 | 18 | #### Data sources 19 | - [ieugwasr](https://github.com/mrcieu/ieugwasr) 20 | - [gwasvcf](https://github.com/mrcieu/gwasvcf) 21 | 22 | #### Finemapping 23 | - [finemapr](https://github.com/variani/finemapr) 24 | - [FINEMAP](http://www.christianbenner.com/) 25 | - [PAINTOR](https://github.com/gkichaev/PAINTOR_V3.0) 26 | - [CAVIAR](https://github.com/fhormoz/caviar) 27 | - [SuSIE](https://stephenslab.github.io/susie-paper/index.html) - TODO 28 | - [JAM](https://github.com/pjnewcombe/R2BGLiMS) - TODO 29 | 30 | #### Colocalisation 31 | - [coloc](https://cloud.r-project.org/web/packages/coloc/index.html) 32 | - [HEIDI](http://cnsgenomics.com/software/gsmr/) - TODO 33 | - [eCAVIAR](https://github.com/fhormoz/caviar) - TODO 34 | - [S-Predixcan](https://github.com/hakyimlab/MetaXcan) - TODO 35 | 36 | #### Mendelian randomization 37 | - [TwoSampleMR](https://github.com/mrcieu/TwoSampleMR) 38 | - [MendelianRandomization](https://cran.r-project.org/web/packages/MendelianRandomization/index.html) - port from TwoSampleMR 39 | - [RadialMR](https://github.com/WSpiller/RadialMR) - port from TwoSampleMR 40 | - [MRPRESSO](https://github.com/rondolab/MR-PRESSO) - port from TwoSampleMR 41 | - [GSMR](http://cnsgenomics.com/software/gsmr/) - TODO 42 | - [MRMix](https://github.com/gqi/MRMix) - TODO 43 | 44 | #### Visualisation 45 | - [gassocplot](https://github.com/jrs95/gassocplot) 46 | - Locus zoom plots e.g. [https://github.com/jrs95/gassocplot] - TODO 47 | 48 | 49 | ## Installation 50 | 51 | You can install the development version of gwasglue with: 52 | 53 | ``` r 54 | devtools::install_github("mrcieu/gwasglue") 55 | ``` 56 | 57 | 58 | ## Usage 59 | 60 | See vignettes etc here: [https://mrcieu.github.io/gwasglue](https://mrcieu.github.io/gwasglue). 61 | 62 | ## Reference datasets 63 | 64 | Example GWAS VCF (GIANT 2010 BMI): 65 | 66 | - http://fileserve.mrcieu.ac.uk/vcf/IEU-a-2.vcf.gz 67 | - http://fileserve.mrcieu.ac.uk/vcf/IEU-a-2.vcf.gz.tbi 68 | 69 | Updated 1000 genomes LD reference panels (multiple populations): 70 | 71 | - http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz 72 | 73 | 1kg European reference panel for LD (legacy): 74 | 75 | - http://fileserve.mrcieu.ac.uk/ld/data_maf0.01_rs_ref.tgz 76 | 77 | 1kg vcf harmonised against human genome reference: 78 | 79 | - http://fileserve.mrcieu.ac.uk/vcf/1kg_v3_nomult.vcf.gz 80 | - http://fileserve.mrcieu.ac.uk/vcf/1kg_v3_nomult.vcf.gz.tbi 81 | 82 | ## Contributing to the resource 83 | 84 | For any `` package we create a new file called `R/.r` which contains two functions: 85 | 86 | - `gwasvcf_to_` 87 | - `ieugwasr_to_` 88 | 89 | For an example, see the `R/TwoSampleMR.r` file, which contains the functions `gwasvcf_to_TwoSampleMR` and `ieugwasr_to_TwoSampleMR`. 90 | 91 | -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Page not found (404) • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 55 | 56 | 57 | 58 | 59 | 60 | 61 |
62 |
63 | 132 | 133 | 134 | 135 |
136 | 137 |
138 |
139 | 142 | 143 | Content not found. Please use links in the navbar. 144 | 145 |
146 | 147 | 152 | 153 |
154 | 155 | 156 | 157 |
158 | 161 | 162 |
163 |

Site built with pkgdown 1.6.1.

164 |
165 | 166 |
167 |
168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /docs/LICENSE-text.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | License • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 55 | 56 | 57 | 58 | 59 | 60 | 61 |
62 |
63 | 132 | 133 | 134 | 135 |
136 | 137 |
138 |
139 | 142 | 143 |
YEAR: 2019
144 | COPYRIGHT HOLDER: Gibran Hemani
145 | 
146 | 147 |
148 | 149 | 154 | 155 |
156 | 157 | 158 | 159 |
160 | 163 | 164 |
165 |

Site built with pkgdown 1.6.1.

166 |
167 | 168 |
169 |
170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | -------------------------------------------------------------------------------- /docs/LICENSE.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | MIT License • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 55 | 56 | 57 | 58 | 59 | 60 | 61 |
62 |
63 | 132 | 133 | 134 | 135 |
136 | 137 |
138 |
139 | 142 | 143 |
144 | 145 |

Copyright (c) 2019 Gibran Hemani

146 |

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

147 |

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

148 |

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

149 |
150 | 151 |
152 | 153 | 158 | 159 |
160 | 161 | 162 | 163 |
164 | 167 | 168 |
169 |

Site built with pkgdown 1.6.1.

170 |
171 | 172 |
173 |
174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /docs/articles/cojo_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/colocalisation_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/colocalisation_files/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MRCIEU/gwasglue/c2d5660eed389e1a9b3e04406b88731d642243f1/docs/articles/colocalisation_files/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/articles/colocalisation_files/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MRCIEU/gwasglue/c2d5660eed389e1a9b3e04406b88731d642243f1/docs/articles/colocalisation_files/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/articles/colocalisation_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MRCIEU/gwasglue/c2d5660eed389e1a9b3e04406b88731d642243f1/docs/articles/colocalisation_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/articles/finemapping_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/gwas2020_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Articles • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 55 | 56 | 57 | 58 | 59 | 60 | 61 |
62 |
63 | 132 | 133 | 134 | 135 |
136 | 137 |
138 |
139 | 142 | 143 |
144 |

All vignettes

145 |

146 | 147 |
148 |
Conditional analysis of VCF files
149 |
150 |
Genetic colocalisation
151 |
152 |
Clumping and finemapping
153 |
154 |
Major changes to the IEU GWAS resources for 2020
155 |
156 |
Generate LD matrices
157 |
158 |
Mendelian randomization
159 |
160 |
161 |
162 |
163 |
164 | 165 | 166 |
167 | 170 | 171 |
172 |

Site built with pkgdown 1.6.1.

173 |
174 | 175 |
176 |
177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | -------------------------------------------------------------------------------- /docs/articles/ld_ref_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/mr_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Authors • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 55 | 56 | 57 | 58 | 59 | 60 | 61 |
62 |
63 | 132 | 133 | 134 | 135 |
136 | 137 |
138 |
139 | 142 | 143 |
    144 |
  • 145 |

    Gibran Hemani. Author, maintainer. 146 |

    147 |
  • 148 |
149 | 150 |
151 | 152 |
153 | 154 | 155 | 156 |
157 | 160 | 161 |
162 |

Site built with pkgdown 1.6.1.

163 |
164 | 165 |
166 |
167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | (function() { 6 | 'use strict'; 7 | 8 | window.Toc = { 9 | helpers: { 10 | // return all matching elements in the set, or their descendants 11 | findOrFilter: function($el, selector) { 12 | // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ 13 | // http://stackoverflow.com/a/12731439/358804 14 | var $descendants = $el.find(selector); 15 | return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); 16 | }, 17 | 18 | generateUniqueIdBase: function(el) { 19 | var text = $(el).text(); 20 | var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); 21 | return anchor || el.tagName.toLowerCase(); 22 | }, 23 | 24 | generateUniqueId: function(el) { 25 | var anchorBase = this.generateUniqueIdBase(el); 26 | for (var i = 0; ; i++) { 27 | var anchor = anchorBase; 28 | if (i > 0) { 29 | // add suffix 30 | anchor += '-' + i; 31 | } 32 | // check if ID already exists 33 | if (!document.getElementById(anchor)) { 34 | return anchor; 35 | } 36 | } 37 | }, 38 | 39 | generateAnchor: function(el) { 40 | if (el.id) { 41 | return el.id; 42 | } else { 43 | var anchor = this.generateUniqueId(el); 44 | el.id = anchor; 45 | return anchor; 46 | } 47 | }, 48 | 49 | createNavList: function() { 50 | return $(''); 51 | }, 52 | 53 | createChildNavList: function($parent) { 54 | var $childList = this.createNavList(); 55 | $parent.append($childList); 56 | return $childList; 57 | }, 58 | 59 | generateNavEl: function(anchor, text) { 60 | var $a = $(''); 61 | $a.attr('href', '#' + anchor); 62 | $a.text(text); 63 | var $li = $('
  • '); 64 | $li.append($a); 65 | return $li; 66 | }, 67 | 68 | generateNavItem: function(headingEl) { 69 | var anchor = this.generateAnchor(headingEl); 70 | var $heading = $(headingEl); 71 | var text = $heading.data('toc-text') || $heading.text(); 72 | return this.generateNavEl(anchor, text); 73 | }, 74 | 75 | // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). 76 | getTopLevel: function($scope) { 77 | for (var i = 1; i <= 6; i++) { 78 | var $headings = this.findOrFilter($scope, 'h' + i); 79 | if ($headings.length > 1) { 80 | return i; 81 | } 82 | } 83 | 84 | return 1; 85 | }, 86 | 87 | // returns the elements for the top level, and the next below it 88 | getHeadings: function($scope, topLevel) { 89 | var topSelector = 'h' + topLevel; 90 | 91 | var secondaryLevel = topLevel + 1; 92 | var secondarySelector = 'h' + secondaryLevel; 93 | 94 | return this.findOrFilter($scope, topSelector + ',' + secondarySelector); 95 | }, 96 | 97 | getNavLevel: function(el) { 98 | return parseInt(el.tagName.charAt(1), 10); 99 | }, 100 | 101 | populateNav: function($topContext, topLevel, $headings) { 102 | var $context = $topContext; 103 | var $prevNav; 104 | 105 | var helpers = this; 106 | $headings.each(function(i, el) { 107 | var $newNav = helpers.generateNavItem(el); 108 | var navLevel = helpers.getNavLevel(el); 109 | 110 | // determine the proper $context 111 | if (navLevel === topLevel) { 112 | // use top level 113 | $context = $topContext; 114 | } else if ($prevNav && $context === $topContext) { 115 | // create a new level of the tree and switch to it 116 | $context = helpers.createChildNavList($prevNav); 117 | } // else use the current $context 118 | 119 | $context.append($newNav); 120 | 121 | $prevNav = $newNav; 122 | }); 123 | }, 124 | 125 | parseOps: function(arg) { 126 | var opts; 127 | if (arg.jquery) { 128 | opts = { 129 | $nav: arg 130 | }; 131 | } else { 132 | opts = arg; 133 | } 134 | opts.$scope = opts.$scope || $(document.body); 135 | return opts; 136 | } 137 | }, 138 | 139 | // accepts a jQuery object, or an options object 140 | init: function(opts) { 141 | opts = this.helpers.parseOps(opts); 142 | 143 | // ensure that the data attribute is in place for styling 144 | opts.$nav.attr('data-toggle', 'toc'); 145 | 146 | var $topContext = this.helpers.createChildNavList(opts.$nav); 147 | var topLevel = this.helpers.getTopLevel(opts.$scope); 148 | var $headings = this.helpers.getHeadings(opts.$scope, topLevel); 149 | this.helpers.populateNav($topContext, topLevel, $headings); 150 | } 151 | }; 152 | 153 | $(function() { 154 | $('nav[data-toggle="toc"]').each(function(i, el) { 155 | var $nav = $(el); 156 | Toc.init($nav); 157 | }); 158 | }); 159 | })(); 160 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/news/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Changelog • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 55 | 56 | 57 | 58 | 59 | 60 | 61 |
    62 |
    63 | 132 | 133 | 134 | 135 |
    136 | 137 |
    138 |
    139 | 143 | 144 |
    145 |

    146 | gwasglue 0.0.0.9000

    147 |
      148 |
    • Added a NEWS.md file to track changes to the package.
    • 149 |
    150 |
    151 |
    152 | 153 | 158 | 159 |
    160 | 161 | 162 |
    163 | 166 | 167 |
    168 |

    Site built with pkgdown 1.6.1.

    169 |
    170 | 171 |
    172 |
    173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('[data-toggle="tooltip"]').tooltip(); 13 | 14 | var cur_path = paths(location.pathname); 15 | var links = $("#navbar ul li a"); 16 | var max_length = -1; 17 | var pos = -1; 18 | for (var i = 0; i < links.length; i++) { 19 | if (links[i].getAttribute("href") === "#") 20 | continue; 21 | // Ignore external links 22 | if (links[i].host !== location.host) 23 | continue; 24 | 25 | var nav_path = paths(links[i].pathname); 26 | 27 | var length = prefix_length(nav_path, cur_path); 28 | if (length > max_length) { 29 | max_length = length; 30 | pos = i; 31 | } 32 | } 33 | 34 | // Add class to parent
  • , and enclosing
  • if in dropdown 35 | if (pos >= 0) { 36 | var menu_anchor = $(links[pos]); 37 | menu_anchor.parent().addClass("active"); 38 | menu_anchor.closest("li.dropdown").addClass("active"); 39 | } 40 | }); 41 | 42 | function paths(pathname) { 43 | var pieces = pathname.split("/"); 44 | pieces.shift(); // always starts with / 45 | 46 | var end = pieces[pieces.length - 1]; 47 | if (end === "index.html" || end === "") 48 | pieces.pop(); 49 | return(pieces); 50 | } 51 | 52 | // Returns -1 if not found 53 | function prefix_length(needle, haystack) { 54 | if (needle.length > haystack.length) 55 | return(-1); 56 | 57 | // Special case for length-0 haystack, since for loop won't run 58 | if (haystack.length === 0) { 59 | return(needle.length === 0 ? 0 : -1); 60 | } 61 | 62 | for (var i = 0; i < haystack.length; i++) { 63 | if (needle[i] != haystack[i]) 64 | return(i); 65 | } 66 | 67 | return(haystack.length); 68 | } 69 | 70 | /* Clipboard --------------------------*/ 71 | 72 | function changeTooltipMessage(element, msg) { 73 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 74 | element.setAttribute('data-original-title', msg); 75 | $(element).tooltip('show'); 76 | element.setAttribute('data-original-title', tooltipOriginalTitle); 77 | } 78 | 79 | if(ClipboardJS.isSupported()) { 80 | $(document).ready(function() { 81 | var copyButton = ""; 82 | 83 | $(".examples, div.sourceCode").addClass("hasCopyButton"); 84 | 85 | // Insert copy buttons: 86 | $(copyButton).prependTo(".hasCopyButton"); 87 | 88 | // Initialize tooltips: 89 | $('.btn-copy-ex').tooltip({container: 'body'}); 90 | 91 | // Initialize clipboard: 92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 93 | text: function(trigger) { 94 | return trigger.parentNode.textContent; 95 | } 96 | }); 97 | 98 | clipboardBtnCopies.on('success', function(e) { 99 | changeTooltipMessage(e.trigger, 'Copied!'); 100 | e.clearSelection(); 101 | }); 102 | 103 | clipboardBtnCopies.on('error', function() { 104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 105 | }); 106 | }); 107 | } 108 | })(window.jQuery || window.$) 109 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.7.3 2 | pkgdown: 1.6.1 3 | pkgdown_sha: ~ 4 | articles: 5 | cojo: cojo.html 6 | colocalisation: colocalisation.html 7 | finemapping: finemapping.html 8 | gwas2020: gwas2020.html 9 | ld_ref: ld_ref.html 10 | mr: mr.html 11 | last_built: 2021-02-24T14:36Z 12 | 13 | -------------------------------------------------------------------------------- /docs/reference/cojo_sumstat_file.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Write vcf file to cojo sumstat file — cojo_sumstat_file • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |
    63 |
    64 | 133 | 134 | 135 | 136 |
    137 | 138 |
    139 |
    140 | 145 | 146 |
    147 |

    Write vcf file to cojo sumstat file

    148 |
    149 | 150 |
    cojo_sumstat_file(vcffile, outfile)
    151 | 152 |

    Arguments

    153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 |
    vcffile

    Path to vcf file

    outfile

    Path to output file

    164 | 165 |

    Value

    166 | 167 |

    vcf object

    168 | 169 |
    170 | 175 |
    176 | 177 | 178 |
    179 | 182 | 183 |
    184 |

    Site built with pkgdown 1.6.1.

    185 |
    186 | 187 |
    188 |
    189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | -------------------------------------------------------------------------------- /docs/reference/gwasvcf_to_TwoSampleMR.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Create exposure or outcome data format for TwoSampleMR from vcf — gwasvcf_to_TwoSampleMR • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |
    63 |
    64 | 133 | 134 | 135 | 136 |
    137 | 138 |
    139 |
    140 | 145 | 146 |
    147 |

    Create exposure or outcome data format for TwoSampleMR from vcf

    148 |
    149 | 150 |
    gwasvcf_to_TwoSampleMR(vcf, type = "exposure")
    151 | 152 |

    Arguments

    153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 |
    vcf

    VCF object

    type

    ="exposure" or "outcome"

    164 | 165 |

    Value

    166 | 167 |

    data frame

    168 | 169 |
    170 | 175 |
    176 | 177 | 178 |
    179 | 182 | 183 |
    184 |

    Site built with pkgdown 1.6.1.

    185 |
    186 | 187 |
    188 |
    189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | -------------------------------------------------------------------------------- /docs/reference/gwasvcf_to_coloc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Generate coloc dataset from vcf files — gwasvcf_to_coloc • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |
    63 |
    64 | 133 | 134 | 135 | 136 |
    137 | 138 |
    139 |
    140 | 145 | 146 |
    147 |

    Generate coloc dataset from vcf files

    148 |
    149 | 150 |
    gwasvcf_to_coloc(vcf1, vcf2, chrompos)
    151 | 152 |

    Arguments

    153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 |
    vcf1

    VCF object or path to vcf file

    vcf2

    VCF object or path to vcf file

    chrompos

    Character of chr:pos1-pos2

    168 | 169 |

    Value

    170 | 171 |

    List of datasets to feed into coloc

    172 | 173 |
    174 | 179 |
    180 | 181 | 182 |
    183 | 186 | 187 |
    188 |

    Site built with pkgdown 1.6.1.

    189 |
    190 | 191 |
    192 |
    193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | -------------------------------------------------------------------------------- /docs/reference/harmonise_against_ref.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Harmonise gwas alleles to be same as reference — harmonise_against_ref • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |
    63 |
    64 | 133 | 134 | 135 | 136 |
    137 | 138 |
    139 |
    140 | 145 | 146 |
    147 |

    Harmonise gwas alleles to be same as reference

    148 |
    149 | 150 |
    harmonise_against_ref(gwas, reference)
    151 | 152 |

    Arguments

    153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 |
    gwas

    <what param does>

    reference

    <what param does>

    164 | 165 |

    Value

    166 | 167 |

    data frame with attributes

    168 | 169 |
    170 | 175 |
    176 | 177 | 178 |
    179 | 182 | 183 |
    184 |

    Site built with pkgdown 1.6.1.

    185 |
    186 | 187 |
    188 |
    189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | -------------------------------------------------------------------------------- /docs/reference/ieugwasr_to_TwoSampleMR.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Convert output from query to TwoSampleMR format — ieugwasr_to_TwoSampleMR • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |
    63 |
    64 | 133 | 134 | 135 | 136 |
    137 | 138 |
    139 |
    140 | 145 | 146 |
    147 |

    Convert output from query to TwoSampleMR format

    148 |
    149 | 150 |
    ieugwasr_to_TwoSampleMR(x, type = "exposure")
    151 | 152 |

    Arguments

    153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 |
    x

    Output from ieugwasr query e.g. associations, tophits, phewas

    type

    "exposure" (default) or "outcome"

    164 | 165 |

    Value

    166 | 167 |

    data frame

    168 | 169 |
    170 | 175 |
    176 | 177 | 178 |
    179 | 182 | 183 |
    184 |

    Site built with pkgdown 1.6.1.

    185 |
    186 | 187 |
    188 |
    189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | -------------------------------------------------------------------------------- /docs/reference/map_variants_to_regions.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | For a set of variants map to LD regions — map_variants_to_regions • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |
    63 |
    64 | 133 | 134 | 135 | 136 |
    137 | 138 |
    139 |
    140 | 145 | 146 |
    147 |

    LD regions defined here https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4731402/

    148 |
    149 | 150 |
    map_variants_to_regions(chrpos, pop)
    151 | 152 |

    Arguments

    153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 |
    chrpos

    Array of chr:pos

    pop

    EUR, AFR or ASN

    164 | 165 |

    Value

    166 | 167 | 168 | 169 | 170 | 171 |
    172 | 177 |
    178 | 179 | 180 |
    181 | 184 | 185 |
    186 |

    Site built with pkgdown 1.6.1.

    187 |
    188 | 189 |
    190 |
    191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | -------------------------------------------------------------------------------- /docs/reference/pipe.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Pipe operator — %>% • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |
    63 |
    64 | 133 | 134 | 135 | 136 |
    137 | 138 |
    139 |
    140 | 145 | 146 |
    147 |

    See magrittr::%>% for details.

    148 |
    149 | 150 |
    lhs %>% rhs
    151 | 152 | 153 | 154 |
    155 | 160 |
    161 | 162 | 163 |
    164 | 167 | 168 |
    169 |

    Site built with pkgdown 1.6.1.

    170 |
    171 | 172 |
    173 |
    174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /docs/reference/set_bc4_files.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Determine locations of useful reference datasets on bluecrystal4 — set_bc4_files • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 58 | 59 | 60 | 61 | 62 | 63 | 64 |
    65 |
    66 | 135 | 136 | 137 | 138 |
    139 | 140 |
    141 |
    142 | 147 | 148 |
    149 |

    This is a convenience function for members at the University of Bristol 150 | to automatically set file locations for various reference datasets. It relates 151 | only to paths on bc4

    152 |
    153 | 154 |
    set_bc4_files()
    155 | 156 | 157 | 158 |
    159 | 164 |
    165 | 166 | 167 |
    168 | 171 | 172 |
    173 |

    Site built with pkgdown 1.6.1.

    174 |
    175 | 176 |
    177 |
    178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /docs/reference/write_out.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Create format for HPC pipeline — write_out • gwasglue 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |
    63 |
    64 | 133 | 134 | 135 | 136 |
    137 | 138 |
    139 |
    140 | 145 | 146 |
    147 |

    Takes raw files and aligns them to reference. Important if files don't have chr:pos already

    148 |
    149 | 150 |
    write_out(harmonised, path)
    151 | 152 |

    Arguments

    153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 |
    harmonised

    Output from /codeharmonise_against_ref

    path

    Path to write out json file and txt file

    164 | 165 | 166 |
    167 | 172 |
    173 | 174 | 175 |
    176 | 179 | 180 |
    181 |

    Site built with pkgdown 1.6.1.

    182 |
    183 | 184 |
    185 |
    186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | -------------------------------------------------------------------------------- /inst/hapmap3/hapmap3_autosome.snplist.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MRCIEU/gwasglue/c2d5660eed389e1a9b3e04406b88731d642243f1/inst/hapmap3/hapmap3_autosome.snplist.gz -------------------------------------------------------------------------------- /man/clump_gwasvcf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clump.r 3 | \name{clump_gwasvcf} 4 | \alias{clump_gwasvcf} 5 | \title{Perform LD clumping} 6 | \usage{ 7 | clump_gwasvcf( 8 | vcf, 9 | clump_kb = 1000, 10 | clump_r2 = 0.001, 11 | clump_p = 5e-08, 12 | pop = NULL, 13 | bfile = NULL, 14 | plink_bin = NULL, 15 | access_token = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{vcf}{VCF file or VCF object} 20 | 21 | \item{clump_kb}{Clumping kb window. Default is very strict, 10000} 22 | 23 | \item{clump_r2}{Clumping r2 threshold. Default is very strict, 0.001} 24 | 25 | \item{clump_p}{Clumping sig level for index variants. Default = 1 (i.e. no threshold)} 26 | 27 | \item{pop}{Super-population to use as reference panel. Default = "EUR". Options are EUR, SAS, EAS, AFR, AMR. 'legacy' also available - which is a previously used verison of the EUR panel with a slightly different set of markers} 28 | 29 | \item{bfile}{If this is provided then will use the API. Default = NULL} 30 | 31 | \item{plink_bin}{If null and bfile is not null then will detect packaged plink binary for specific OS. Otherwise specify path to plink binary. Default = NULL} 32 | 33 | \item{access_token}{Google OAuth2 access token. Used to authenticate level of access to data} 34 | } 35 | \value{ 36 | data frame of clumped results 37 | } 38 | \description{ 39 | 40 | } 41 | -------------------------------------------------------------------------------- /man/cojo_cond.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cojo.r 3 | \name{cojo_cond} 4 | \alias{cojo_cond} 5 | \title{Perform conditional analysis using GCTA COJO} 6 | \usage{ 7 | cojo_cond( 8 | vcffile, 9 | bfile, 10 | snplist, 11 | pop, 12 | gcta = genetics.binaRies::get_gcta_binary(), 13 | workdir = tempdir(), 14 | threads = 1 15 | ) 16 | } 17 | \arguments{ 18 | \item{vcffile}{Path to vcffile} 19 | 20 | \item{bfile}{LD reference panel} 21 | 22 | \item{snplist}{List of rsids} 23 | 24 | \item{pop}{EUR, ASN or AFR} 25 | 26 | \item{gcta}{Path to gcta binary. For convenience can use default=genetics.binaRies::get_gcta_binary()} 27 | 28 | \item{workdir}{Location to store temporary files. Default=tempdir()} 29 | 30 | \item{threads}{Number of parallel threads. Default=1} 31 | } 32 | \value{ 33 | List of independent summary stats 34 | } 35 | \description{ 36 | For a list of fine-mapped rsids, will assign to regions and generate conditionally independent summary stats for each rsid 37 | } 38 | -------------------------------------------------------------------------------- /man/cojo_sumstat_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cojo.r 3 | \name{cojo_sumstat_file} 4 | \alias{cojo_sumstat_file} 5 | \title{Write vcf file to cojo sumstat file} 6 | \usage{ 7 | cojo_sumstat_file(vcffile, outfile) 8 | } 9 | \arguments{ 10 | \item{vcffile}{Path to vcf file} 11 | 12 | \item{outfile}{Path to output file} 13 | } 14 | \value{ 15 | vcf object 16 | } 17 | \description{ 18 | Write vcf file to cojo sumstat file 19 | } 20 | -------------------------------------------------------------------------------- /man/coloc_to_gassocplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gassocplot.r 3 | \name{coloc_to_gassocplot} 4 | \alias{coloc_to_gassocplot} 5 | \title{Convert coloc dataset to gassocplot dataset} 6 | \usage{ 7 | coloc_to_gassocplot(coloclist, bfile = NULL, plink_bin = NULL) 8 | } 9 | \arguments{ 10 | \item{coloclist}{Output from *_to_coloc} 11 | 12 | \item{bfile}{If number of SNPs > 500 then need to provide your own LD reference panel. Provide plink dataset here.} 13 | 14 | \item{plink_bin}{If number of SNPs > 500 then need to provide your own LD reference panel. Provide plink executable here} 15 | } 16 | \value{ 17 | List to feed into gassocplot 18 | } 19 | \description{ 20 | Convert coloc dataset to gassocplot dataset 21 | } 22 | -------------------------------------------------------------------------------- /man/gwasvcf_to_TwoSampleMR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TwoSampleMR.r 3 | \name{gwasvcf_to_TwoSampleMR} 4 | \alias{gwasvcf_to_TwoSampleMR} 5 | \title{Create exposure or outcome data format for TwoSampleMR from vcf} 6 | \usage{ 7 | gwasvcf_to_TwoSampleMR(vcf, type = "exposure") 8 | } 9 | \arguments{ 10 | \item{vcf}{VCF object} 11 | 12 | \item{type}{="exposure" or "outcome"} 13 | } 14 | \value{ 15 | data frame 16 | } 17 | \description{ 18 | Create exposure or outcome data format for TwoSampleMR from vcf 19 | } 20 | -------------------------------------------------------------------------------- /man/gwasvcf_to_coloc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/coloc.r 3 | \name{gwasvcf_to_coloc} 4 | \alias{gwasvcf_to_coloc} 5 | \title{Generate coloc dataset from vcf files} 6 | \usage{ 7 | gwasvcf_to_coloc(vcf1, vcf2, chrompos) 8 | } 9 | \arguments{ 10 | \item{vcf1}{VCF object or path to vcf file} 11 | 12 | \item{vcf2}{VCF object or path to vcf file} 13 | 14 | \item{chrompos}{Character of chr:pos1-pos2} 15 | } 16 | \value{ 17 | List of datasets to feed into coloc 18 | } 19 | \description{ 20 | Generate coloc dataset from vcf files 21 | } 22 | -------------------------------------------------------------------------------- /man/gwasvcf_to_finemapr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/finemapr.r 3 | \name{gwasvcf_to_finemapr} 4 | \alias{gwasvcf_to_finemapr} 5 | \title{Generate data for fine mapping analysis} 6 | \usage{ 7 | gwasvcf_to_finemapr( 8 | region, 9 | vcf, 10 | bfile, 11 | plink_bin = genetics.binaRies::get_plink_binary(), 12 | threads = 1 13 | ) 14 | } 15 | \arguments{ 16 | \item{region}{Region of the genome to extract eg 1:109317192-110317192". Can be array} 17 | 18 | \item{vcf}{Path to VCF file or VCF object} 19 | 20 | \item{bfile}{LD reference panel} 21 | 22 | \item{plink_bin}{Path to plink. Default = genetics.binaRies::get_plink_binary()} 23 | 24 | \item{threads}{Number of threads to run in parallel. Default=1} 25 | } 26 | \value{ 27 | List of datasets for finemapping 28 | } 29 | \description{ 30 | For a given region and VCF file, extracts the variants in the region along with LD matrix from a reference panel 31 | } 32 | -------------------------------------------------------------------------------- /man/harmonise.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/harmonise.r 3 | \name{harmonise} 4 | \alias{harmonise} 5 | \title{Generic harmonisation function} 6 | \usage{ 7 | harmonise( 8 | chr1, 9 | pos1, 10 | ref1, 11 | alt1, 12 | chr2, 13 | pos2, 14 | ref2, 15 | alt2, 16 | rsid2 = NULL, 17 | indel_recode = FALSE, 18 | strand_flip = FALSE 19 | ) 20 | } 21 | \arguments{ 22 | \item{chr1}{Vector} 23 | 24 | \item{pos1}{Vector} 25 | 26 | \item{ref1}{Vector} 27 | 28 | \item{alt1}{Vector} 29 | 30 | \item{chr2}{Vector} 31 | 32 | \item{pos2}{Vector} 33 | 34 | \item{ref2}{Vector} 35 | 36 | \item{alt2}{Vector} 37 | 38 | \item{rsid2}{Optional vector} 39 | 40 | \item{indel_recode}{=FALSE. If TRUE then attempts to recode D/I} 41 | 42 | \item{strand_flip}{=FALSE. If TRUE then attempts to flip strand when alignment is not otherwise possible} 43 | } 44 | \value{ 45 | Dataframe of outcomes 46 | } 47 | \description{ 48 | Assumes ref and alt alleles available for target and reference datasets, and uses chr:pos for alignment 49 | } 50 | \details{ 51 | 0: stick 52 | 1: swap 53 | 2: rename indel 54 | 3: rename indel and swap 55 | 4: flip 56 | 5: flip and swap 57 | 6: drop (no match) 58 | 7: drop (no reference) 59 | } 60 | -------------------------------------------------------------------------------- /man/harmonise_against_ref.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/harmonise.r 3 | \name{harmonise_against_ref} 4 | \alias{harmonise_against_ref} 5 | \title{Harmonise gwas alleles to be same as reference} 6 | \usage{ 7 | harmonise_against_ref(gwas, reference) 8 | } 9 | \arguments{ 10 | \item{gwas}{} 11 | 12 | \item{reference}{} 13 | } 14 | \value{ 15 | data frame with attributes 16 | } 17 | \description{ 18 | Harmonise gwas alleles to be same as reference 19 | } 20 | -------------------------------------------------------------------------------- /man/ieugwasr_to_TwoSampleMR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TwoSampleMR.r 3 | \name{ieugwasr_to_TwoSampleMR} 4 | \alias{ieugwasr_to_TwoSampleMR} 5 | \title{Convert output from query to TwoSampleMR format} 6 | \usage{ 7 | ieugwasr_to_TwoSampleMR(x, type = "exposure") 8 | } 9 | \arguments{ 10 | \item{x}{Output from ieugwasr query e.g. associations, tophits, phewas} 11 | 12 | \item{type}{"exposure" (default) or "outcome"} 13 | } 14 | \value{ 15 | data frame 16 | } 17 | \description{ 18 | Convert output from query to TwoSampleMR format 19 | } 20 | -------------------------------------------------------------------------------- /man/ieugwasr_to_coloc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/coloc.r 3 | \name{ieugwasr_to_coloc} 4 | \alias{ieugwasr_to_coloc} 5 | \title{Generate coloc dataset from the IEU GWAS database} 6 | \usage{ 7 | ieugwasr_to_coloc(id1, id2, chrompos, type1 = NULL, type2 = NULL) 8 | } 9 | \arguments{ 10 | \item{id1}{ID for trait 1} 11 | 12 | \item{id2}{ID for trait 2} 13 | 14 | \item{chrompos}{Character of chr:pos1-pos2} 15 | 16 | \item{type1}{Provide "cc" or "quant" to override automatic lookup of trait type for trait 1} 17 | 18 | \item{type2}{Provide "cc" or "quant" to override automatic lookup of trait type for trait 2} 19 | } 20 | \value{ 21 | List of datasets to feed into coloc 22 | } 23 | \description{ 24 | Generate coloc dataset from the IEU GWAS database 25 | } 26 | -------------------------------------------------------------------------------- /man/ieugwasr_to_finemapr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/finemapr.r 3 | \name{ieugwasr_to_finemapr} 4 | \alias{ieugwasr_to_finemapr} 5 | \title{Generate data for analysis in various finemapping methods} 6 | \usage{ 7 | ieugwasr_to_finemapr(region, id, bfile = NULL, plink_bin = NULL) 8 | } 9 | \arguments{ 10 | \item{region}{Region of the genome to extract eg 1:109317192-110317192"} 11 | 12 | \item{id}{Array of GWAS studies to query. See \code{gwasinfo} for available studies} 13 | 14 | \item{bfile}{If this is provided then will use the API. Default = NULL} 15 | 16 | \item{plink_bin}{If null and bfile is not null then will detect packaged plink binary for specific OS. Otherwise specify path to plink binary. Default = NULL} 17 | } 18 | \value{ 19 | Each id will be a list of z score data, ld matrix, and sample size 20 | } 21 | \description{ 22 | Uses the finemapr package https://github.com/variani/finemapr 23 | } 24 | -------------------------------------------------------------------------------- /man/ieugwasr_to_gassocplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gassocplot.r 3 | \name{ieugwasr_to_gassocplot} 4 | \alias{ieugwasr_to_gassocplot} 5 | \title{Generate regional plot for ieugwasr} 6 | \usage{ 7 | ieugwasr_to_gassocplot(chrpos, id, bfile = NULL, plink_bin = NULL) 8 | } 9 | \arguments{ 10 | \item{chrpos}{A window range to plot e.g. 16:3349655-3849655} 11 | 12 | \item{id}{Vector of one or more IEU GWAS db study IDs} 13 | 14 | \item{bfile}{If number of SNPs > 500 then need to provide your own LD reference panel. Provide plink dataset here.} 15 | 16 | \item{plink_bin}{If number of SNPs > 500 then need to provide your own LD reference panel. Provide plink executable here} 17 | } 18 | \value{ 19 | assoc_plot or stack_assoc_plot if multiple markers given 20 | } 21 | \description{ 22 | Uses James Staley's gassocplot package https://github.com/jrs95/gassocplot 23 | } 24 | -------------------------------------------------------------------------------- /man/is_forward_strand.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/harmonise.r 3 | \name{is_forward_strand} 4 | \alias{is_forward_strand} 5 | \title{Check a GWAS dataset against a reference known to be on the forward strand} 6 | \usage{ 7 | is_forward_strand( 8 | gwas_snp, 9 | gwas_a1, 10 | gwas_a2, 11 | ref_snp, 12 | ref_a1, 13 | ref_a2, 14 | threshold = 0.9 15 | ) 16 | } 17 | \arguments{ 18 | \item{gwas_snp}{Vector of SNP names for the dataset being checked} 19 | 20 | \item{gwas_a1}{Vector of alleles} 21 | 22 | \item{gwas_a2}{Vector of alleles} 23 | 24 | \item{ref_snp}{Vector of SNP names for the reference dataset} 25 | 26 | \item{ref_a1}{Vector of alleles} 27 | 28 | \item{ref_a2}{Vector of alleles} 29 | 30 | \item{threshold}{=0.9 If the proportion of allele strands match is above this threshold, then declare the dataset to be on the forward strand} 31 | } 32 | \value{ 33 | 1 = Forward strand; 2 = Not on forward strand 34 | } 35 | \description{ 36 | Assuming reference data is all on forward strand, check if 37 | the GWAS is also. 38 | Use some threshold e.g. if more than 90% of alleles don't 39 | need to be flipped then it's likely that the dataset is on 40 | the forward strand 41 | } 42 | \details{ 43 | This function can be used to evaluate how strict harmonisation should be 44 | The trade off if you assume we are not on the forward strand then palindromic SNPs are dropped within a particular frequency range 45 | But you could instead have some small probability of error for whether palindromic SNPs are on the forward strand, and avoid dropping too many variants. 46 | } 47 | -------------------------------------------------------------------------------- /man/make_TwoSampleMR_dat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TwoSampleMR.r 3 | \name{make_TwoSampleMR_dat} 4 | \alias{make_TwoSampleMR_dat} 5 | \title{Create a harmonised dataset from lists of vcf files} 6 | \usage{ 7 | make_TwoSampleMR_dat( 8 | id1, 9 | id2, 10 | proxies = TRUE, 11 | nthreads = 1, 12 | vcfdir = options()$gwasglue.vcfdir, 13 | proxydb = options()$gwasglue.proxydb, 14 | rsidx = options()$gwasglue.rsidx, 15 | bfile = options()$gwasglue.bfile, 16 | action = 1, 17 | plink_bin = genetics.binaRies::get_plink_binary() 18 | ) 19 | } 20 | \arguments{ 21 | \item{id1}{Exposure datasets. Either an array of vcf files, or array of IDs if vcfdir is set} 22 | 23 | \item{id2}{Outcome datasets. Either an array of vcf files, or array of IDs if vcfdir is set} 24 | 25 | \item{proxies}{Lookup proxies? default=TRUE but requires either bfile or proxydb to be set} 26 | 27 | \item{nthreads}{Parellelise default=1} 28 | 29 | \item{vcfdir}{Location of vcf files if id1 and id2 are just IDs. Defaults to options()$gwasglue.vcfdir} 30 | 31 | \item{proxydb}{Location of LD proxy database Default=options()$gwasglue.proxydb} 32 | 33 | \item{rsidx}{Location of rsidx index database Default=options()$gwasglue.rsidx} 34 | 35 | \item{bfile}{Location of LD reference panel Default=options()$gwasglue.bfile} 36 | } 37 | \value{ 38 | harmonised dataset 39 | } 40 | \description{ 41 | This mimics the TwoSampleMR::make_dat function, which automatically looks up exposure and outcome datasets and harmonises them, except this function uses GWAS-VCF datasets instead. 42 | The supporting reference datasets can be accessed by UoB users on BC4 using set_bc4_files() 43 | } 44 | -------------------------------------------------------------------------------- /man/map_variants_to_regions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cojo.r 3 | \name{map_variants_to_regions} 4 | \alias{map_variants_to_regions} 5 | \title{For a set of variants map to LD regions} 6 | \usage{ 7 | map_variants_to_regions(chrpos, pop) 8 | } 9 | \arguments{ 10 | \item{chrpos}{Array of chr:pos} 11 | 12 | \item{pop}{EUR, AFR or ASN} 13 | } 14 | \value{ 15 | 16 | } 17 | \description{ 18 | LD regions defined here https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4731402/ 19 | } 20 | -------------------------------------------------------------------------------- /man/organise_ids.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TwoSampleMR.r 3 | \name{organise_ids} 4 | \alias{organise_ids} 5 | \title{Figure out specific files and IDs depending on what files exist and whethet vcfdir is set} 6 | \usage{ 7 | organise_ids(id, vcfdir) 8 | } 9 | \arguments{ 10 | \item{id}{List of IDs within the vcfdir structure, or a list of GWAS VCF files, or a mixture} 11 | 12 | \item{vcfdir}{Location of GWAS VCF files, or NULL if id is a list of actual files} 13 | } 14 | \value{ 15 | File paths to all datasets 16 | } 17 | \description{ 18 | Figure out specific files and IDs depending on what files exist and whethet vcfdir is set 19 | } 20 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.r 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \description{ 10 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/read_gwas.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/harmonise.r 3 | \name{read_gwas} 4 | \alias{read_gwas} 5 | \title{Read in GWAS dataset} 6 | \usage{ 7 | read_gwas( 8 | filename, 9 | skip, 10 | delimiter, 11 | gzipped, 12 | snp, 13 | nea, 14 | ea, 15 | ea_af, 16 | effect, 17 | se, 18 | pval, 19 | n, 20 | info, 21 | z 22 | ) 23 | } 24 | \arguments{ 25 | \item{filename}{} 26 | 27 | \item{skip}{} 28 | 29 | \item{delimiter}{} 30 | 31 | \item{gzipped}{} 32 | 33 | \item{snp}{} 34 | 35 | \item{nea}{} 36 | 37 | \item{ea}{} 38 | 39 | \item{ea_af}{} 40 | 41 | \item{effect}{} 42 | 43 | \item{se}{} 44 | 45 | \item{pval}{} 46 | 47 | \item{n}{} 48 | 49 | \item{info}{} 50 | 51 | \item{z}{} 52 | } 53 | \value{ 54 | data frame with log attributes 55 | } 56 | \description{ 57 | Read in GWAS dataset 58 | } 59 | -------------------------------------------------------------------------------- /man/read_reference.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/harmonise.r 3 | \name{read_reference} 4 | \alias{read_reference} 5 | \title{Read in reference dataset} 6 | \usage{ 7 | read_reference( 8 | reference_file, 9 | rsid = NULL, 10 | chrompos = NULL, 11 | remove_dup_rsids = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{reference_file}{Reference vcf} 16 | 17 | \item{rsid}{List of variants to read} 18 | 19 | \item{chrompos}{List of chrompos to read} 20 | 21 | \item{remove_dup_rsids}{=TRUE Remove duplicates from output} 22 | } 23 | \value{ 24 | data frame 25 | } 26 | \description{ 27 | Read in reference dataset 28 | } 29 | -------------------------------------------------------------------------------- /man/set_bc4_files.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TwoSampleMR.r 3 | \name{set_bc4_files} 4 | \alias{set_bc4_files} 5 | \title{Determine locations of useful reference datasets on bluecrystal4} 6 | \usage{ 7 | set_bc4_files() 8 | } 9 | \description{ 10 | This is a convenience function for members at the University of Bristol 11 | to automatically set file locations for various reference datasets. It relates 12 | only to paths on bc4 13 | } 14 | -------------------------------------------------------------------------------- /man/susieR_pipeline.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/susieR.r 3 | \name{susieR_pipeline} 4 | \alias{susieR_pipeline} 5 | \title{Perform fine mapping pipeline using susieR} 6 | \usage{ 7 | susieR_pipeline( 8 | vcffile, 9 | bfile, 10 | plink_bin, 11 | pop, 12 | threads = 1, 13 | clump_kb = 1000, 14 | clump_r2 = 0.001, 15 | clump_p = 5e-08, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{vcffile}{Path to vcf file} 21 | 22 | \item{bfile}{Path to ld reference panel} 23 | 24 | \item{plink_bin}{Path to plink} 25 | 26 | \item{pop}{EUR, ASN or AFR} 27 | 28 | \item{clump_kb}{} 29 | 30 | \item{clump_r2}{} 31 | 32 | \item{clump_p}{} 33 | 34 | \item{...}{Optional arguments to be passed to susie_rss} 35 | } 36 | \value{ 37 | List 38 | } 39 | \description{ 40 | Clumps data, then maps those to LD representative regions. Within each detected LD representative region, fine mapping is performed 41 | } 42 | -------------------------------------------------------------------------------- /man/write_out.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/harmonise.r 3 | \name{write_out} 4 | \alias{write_out} 5 | \title{Create format for HPC pipeline} 6 | \usage{ 7 | write_out(harmonised, path) 8 | } 9 | \arguments{ 10 | \item{harmonised}{Output from /code{harmonise_against_ref}} 11 | 12 | \item{path}{Path to write out json file and txt file} 13 | } 14 | \description{ 15 | Takes raw files and aligns them to reference. Important if files don't have chr:pos already 16 | } 17 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(gwasglue) 3 | 4 | test_check("gwasglue") 5 | -------------------------------------------------------------------------------- /tests/testthat/test_coloc.r: -------------------------------------------------------------------------------- 1 | context("coloc") 2 | library(gwasglue) 3 | 4 | fn <- system.file("extdata","data.vcf.gz", package="gwasvcf") 5 | vcf1 <- readVcf(fn) 6 | vcf2 <- readVcf(fn) 7 | 8 | test_that("coloc vcf", { 9 | a <- gwasvcf_to_coloc(vcf1, vcf2, "1:1-100000000") 10 | expect_true(is.list(a)) 11 | 12 | b <- expect_warning(coloc::coloc.abf(a$dataset1, a$dataset2)) 13 | expect_true(is.list(b)) 14 | }) 15 | 16 | 17 | test_that("coloc ieugwasr", { 18 | chrpos <- "1:109724880-109904880" 19 | out <- expect_warning(ieugwasr_to_coloc(id1='ieu-a-300', id2='ieu-a-7', chrompos=chrpos)) 20 | b <- expect_warning(coloc::coloc.abf(out$dataset1, out$dataset2)) 21 | expect_true(is.list(b)) 22 | }) 23 | 24 | 25 | test_that("arth bbj", { 26 | chrpos <- "1:38228579-38328579" 27 | out <- ieugwasr_to_coloc(id1='bbj-a-73', id2='bbj-a-73', chrompos=chrpos, type1 = "cc", type2 = "cc") 28 | res <- coloc::coloc.abf(out$dataset1, out$dataset2) 29 | expect_true(res$summary[6] > 0.8) 30 | }) 31 | 32 | 33 | test_that("coloc ieugwasr 2", { 34 | chrpos <- "1:47634677-47734677" 35 | out <- expect_warning(ieugwasr_to_coloc("ieu-a-2", "eqtl-a-ENSG00000162366", chrpos)) 36 | res <- expect_warning(coloc::coloc.abf(out$dataset1, out$dataset2)) 37 | }) 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /tests/testthat/test_finemapr.r: -------------------------------------------------------------------------------- 1 | context("finemapr") 2 | library(ieugwasr) 3 | 4 | 5 | test_that("ieugwasr_to_finemapr", { 6 | v <- ieugwasr::variants_rsid("rs7528419") 7 | r <- paste0(v[["chr"]], ":", v[["pos"]]-100000, "-", v[["pos"]]+100000) 8 | a <- ieugwasr_to_finemapr(r, c("ieu-a-7", "ieu-a-2")) 9 | expect_true(length(a) == 2) 10 | expect_true(class(a) == "FinemaprList") 11 | 12 | # options(finemapr_caviar = "/Users/gh13047/bin/caviar") 13 | # library(dplyr) 14 | # finemapr::run_caviar(a[["IEU-a-7"]]$z, a[["IEU-a-7"]]$ld, args = "-c 3") 15 | }) 16 | 17 | -------------------------------------------------------------------------------- /tests/testthat/test_gassocplot.r: -------------------------------------------------------------------------------- 1 | context("gassocplot") 2 | library(gwasglue) 3 | 4 | radius <- 70000 5 | a <- ieugwasr::tophits("ieu-a-2") 6 | b <- ieugwasr::variants_rsid(a$rsid) 7 | chrpos <- paste0(b$chr[1], ":", b$pos[1]-radius, "-", b$pos[1]+radius) 8 | 9 | test_that("ieugwasr1", { 10 | 11 | a <- ieugwasr_to_gassocplot(chrpos, "ieu-a-2") 12 | expect_true(class(a) == "list") 13 | expect_true(all(c("data", "corr") %in% names(a))) 14 | 15 | skip_if_not_installed("gassocplot") 16 | library(gassocplot) 17 | b <- do.call(assoc_plot, a) 18 | expect_true("gtable" %in% class(b)) 19 | }) 20 | 21 | 22 | 23 | test_that("ieugwasr2", { 24 | 25 | a <- ieugwasr_to_gassocplot(chrpos, c("ieu-a-2", "ieu-a-7")) 26 | expect_true(class(a) == "list") 27 | expect_true(all(c("markers", "z", "corr") %in% names(a))) 28 | 29 | skip_if_not_installed("gassocplot") 30 | library(gassocplot) 31 | b <- do.call(stack_assoc_plot, a) 32 | expect_true("gtable" %in% class(b)) 33 | 34 | }) 35 | 36 | -------------------------------------------------------------------------------- /tests/testthat/test_twosamplemr.r: -------------------------------------------------------------------------------- 1 | context("TwoSampleMR") 2 | library(gwasglue) 3 | 4 | 5 | test_that("gwasvcf_to_TwoSampleMR", { 6 | fn <- system.file("extdata","data.vcf.gz", package="gwasvcf") 7 | vcf1 <- VariantAnnotation::readVcf(fn) 8 | exposure_dat <- gwasvcf_to_TwoSampleMR(vcf1) 9 | expect_true(nrow(exposure_dat) == nrow(vcf1)) 10 | }) 11 | 12 | 13 | test_that("ieugwasr_to_TwoSampleMR", { 14 | a <- ieugwasr::tophits("ieu-a-2") 15 | b <- ieugwasr::associations(a$rsid, "ieu-a-7") 16 | exposure_dat <- ieugwasr_to_TwoSampleMR(a) 17 | outcome_dat <- ieugwasr_to_TwoSampleMR(b, type="outcome") 18 | dat <- TwoSampleMR::harmonise_data(exposure_dat, outcome_dat) 19 | out <- TwoSampleMR::mr(dat) 20 | expect_true(nrow(exposure_dat) == nrow(dat)) 21 | expect_true(nrow(out) > 3) 22 | }) 23 | 24 | -------------------------------------------------------------------------------- /vignettes/cojo.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Conditional analysis of VCF files" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Conditional analysis of VCF files} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | eval=FALSE, 13 | collapse = TRUE, 14 | comment = "#>" 15 | ) 16 | ``` 17 | 18 | ```{r setup} 19 | library(gwasglue) 20 | library(gwasvcf) 21 | ``` 22 | 23 | 24 | Conditional analysis of VCF files can be performed using GCTA's COJO routine. The procedure implemented here is as follows 25 | 26 | 1. Obtain clumped top-hits 27 | 2. Assign each top-hit to an LD region. The LD regions are demarkated using [this approach](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4731402/). 28 | 3. Perform finemapping within each LD region that has a top-hit, retaining a representative variant for every credible set 29 | 4. For each LD region that has multiple finemapped loci, perform conditional analysis. e.g. If there are three finemapped loci in a particular region, three conditional analyses will be performed. First, obtain the effects of variant 1 conditional on variants 2 and 3; then variant 2 conditional on variants 1 and 3; then variant 3 conditional on variants 1 and 2. 30 | 31 | Ultimately, a list of results will be returned where every fine-mapped variant has a regional set of summary data that is conditionally independent of all neighbouring fine-mapped variants. 32 | 33 | 34 | 35 | ## Finemapping pipeline 36 | 37 | 1. Clump dataset 38 | 2. Map clumps to LD regions 39 | 3. Perform fine mapping in each LD region 40 | 41 | Setup: 42 | 43 | ```{r} 44 | vcffile <- "ieu-a-300.vcf.gz" 45 | ldref <- "/Users/gh13047/repo/mr-base-api/app/ld_files/EUR" 46 | gwasvcf::set_bcftools() 47 | ``` 48 | 49 | Perform susieR pipeline: 50 | 51 | ```{r} 52 | out <- susieR_pipeline( 53 | vcffile=vcffile, 54 | bfile=ldref, 55 | plink_bin=genetics.binaRies::get_plink_binary(), 56 | pop="EUR", 57 | threads=1, 58 | L=10, 59 | estimate_residual_variance=TRUE, 60 | estimate_prior_variance=TRUE, 61 | check_R=FALSE, 62 | z_ld_weight=1/500 63 | ) 64 | ``` 65 | 66 | Each detected region now has a finemapped object stored against it. You can see them for example like this: 67 | 68 | ```{r} 69 | summary(out$res[[1]]$susieR) 70 | susieR::susie_plot(out$res[[1]]$susieR, y="PIP") 71 | ``` 72 | 73 | For each region we can extract the variants with the highest posterior inclusion probability per credible set, e.g.: 74 | 75 | ```{r} 76 | out$res[[1]]$susieR$fmset 77 | ``` 78 | 79 | ## Conditional analysis pipeline 80 | 81 | Now we can perform conditional analysis at each region using knowledge of the finemapped variants. The `cojo_cond` function does the following 82 | 83 | 1. Creates temporary directory to store files 84 | 2. Writes vcf file to summary stats file in COJO format 85 | 3. Determines regions that have multiple fine-mapped variants 86 | 4. For each fine-mapped variant, obtains summary stats conditional on other fine-mapped variants in the region 87 | 88 | The result is a list of regions, with a set of conditional summary stats for every fine-mapped variant in that region. 89 | 90 | ```{r} 91 | out2 <- cojo_cond( 92 | vcffile=vcffile, 93 | bfile=ldref, 94 | pop="EUR", 95 | snplist=unlist(sapply(out$res, function(x) x$susieR$fmset)) 96 | ) 97 | ``` 98 | 99 | 100 | TODO 101 | 102 | - Make sure finemapped variants are in reference panel 103 | - Improve speed of cojo by implementing within R so don't have to use GCTA 104 | - Determine how to combine cojo with coloc 105 | -------------------------------------------------------------------------------- /vignettes/colocalisation.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Genetic colocalisation" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Genetic colocalisation} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | Here we'll perform colocalisation analysis for a particular region, and plot the regions as well. We'll do the same analysis two ways: 18 | 19 | - querying the association data from the [IEU GWAS database](https://gwas.mrcieu.ac.uk/), and 20 | - downloading the [GWAS VCF](https://github.com/MRCIEU/gwas_vcf_spec) files and querying those. 21 | 22 | We'll use the example of LDL cholesterol [ieu-a-300](https://gwas.mrcieu.ac.uk/datasets/ieu-a-300/) and coronary heart disease [ieu-a-7](https://gwas.mrcieu.ac.uk/datasets/ieu-a-7/). 23 | 24 | 25 | Load libraries: 26 | 27 | ```{r} 28 | suppressPackageStartupMessages(suppressWarnings({ 29 | library(gwasglue) 30 | library(dplyr) 31 | library(gassocplot) 32 | library(coloc) 33 | })) 34 | ``` 35 | 36 | ## ieugwasr 37 | 38 | First find a region that we know to be associated with LDL cholesterol. 39 | 40 | ```{r} 41 | top <- ieugwasr::tophits('ieu-a-300') %>% arrange(p) 42 | top 43 | ``` 44 | 45 | Choose the best signal and create a range 46 | 47 | ```{r} 48 | chrpos <- paste0(top$chr[1], ":", top$position[1] - 90000, "-", top$position[1] + 90000) 49 | chrpos 50 | ``` 51 | 52 | Extract, harmonise and format the data 53 | 54 | ```{r} 55 | out <- ieugwasr_to_coloc(id1='ieu-a-300', id2='ieu-a-7', chrompos=chrpos) 56 | ``` 57 | 58 | Run colocalisation analysis 59 | 60 | ```{r} 61 | res <- coloc::coloc.abf(out[[1]], out[[2]]) 62 | ``` 63 | 64 | Plot 65 | 66 | ```{r} 67 | temp <- coloc_to_gassocplot(out) 68 | gassocplot::stack_assoc_plot(temp$markers, temp$z, temp$corr, traits=temp$traits) 69 | ``` 70 | 71 | 72 | ## gwasvcf 73 | 74 | Let's do the same with the vcf files (and the indexes). Download from here: 75 | 76 | - [https://gwas.mrcieu.ac.uk/files/ieu-a-300/ieu-a-300.vcf.gz](https://gwas.mrcieu.ac.uk/files/ieu-a-300/ieu-a-300.vcf.gz) 77 | - [https://gwas.mrcieu.ac.uk/files/ieu-a-300/ieu-a-300.vcf.gz.tbi](https://gwas.mrcieu.ac.uk/files/ieu-a-300/ieu-a-300.vcf.gz.tbi) 78 | - [https://gwas.mrcieu.ac.uk/files/ieu-a-7/ieu-a-7.vcf.gz](https://gwas.mrcieu.ac.uk/files/ieu-a-7/ieu-a-7.vcf.gz) 79 | - [https://gwas.mrcieu.ac.uk/files/ieu-a-7/ieu-a-7.vcf.gz.tbi](https://gwas.mrcieu.ac.uk/files/ieu-a-7/ieu-a-7.vcf.gz.tbi) 80 | 81 | 82 | Set a region to plot: 83 | 84 | ```{r} 85 | chrpos <- "19:11112306-11292306" 86 | ``` 87 | 88 | Extract region from vcf files and convert to coloc object 89 | 90 | ```{r} 91 | vout <- gwasvcf_to_coloc("ieu-a-300.vcf.gz", "ieu-a-7.vcf.gz", chrpos) 92 | ``` 93 | 94 | Run colocalisation analysis 95 | 96 | ```{r} 97 | vres <- coloc::coloc.abf(vout[[1]], vout[[2]]) 98 | ``` 99 | 100 | Plot 101 | 102 | ```{r} 103 | library(gassocplot) 104 | temp <- coloc_to_gassocplot(vout) 105 | gassocplot::stack_assoc_plot(temp$markers, temp$z, temp$corr, traits=temp$traits) 106 | ``` 107 | 108 | -------------------------------------------------------------------------------- /vignettes/finemapping.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Clumping and finemapping" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Clumping and finemapping} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | eval=FALSE, 13 | collapse = TRUE, 14 | comment = "#>" 15 | ) 16 | library(gwasglue) 17 | ``` 18 | 19 | ## Clumping 20 | 21 | Here, we use an LD reference panel to identify SNPs that are in LD with the top signals from a GWAS. The algorithm sequentially chooses the top SNP, removes all SNPs in LD above some threshold within some window, then goes on to the next top hit and repeats the pruning process, until no more SNPs are left above the specified p-value threshold. 22 | 23 | The data to be clumped can be retrieved either from the VCF files or data from the OpenGWAS database. Once the data has been retrieved, clumping can be performed either using the clumping routines in the cloud via the API, or locally using local LD reference data. The latter is recommended, it allows you to run things in parallel, to use larger LD reference panels, and avoids killing the servers! 24 | 25 | 26 | ### Data from OpenGWAS 27 | 28 | Extract top hits for LDL cholesterol (`ieu-a-300`) 29 | 30 | ```{r} 31 | dat <- ieugwasr::tophits("ieu-a-300") 32 | ``` 33 | 34 | This is very quick because it extracts the pre-clumped top hits for this dataset. If you specify a different threshold from the default it will be a bit slower e.g. 35 | 36 | ```{r} 37 | dat <- ieugwasr::tophits("ieu-a-300", pval=5e-7) 38 | ``` 39 | 40 | Performing clumping using local data is possible. For example, extract the data without clumping: 41 | 42 | ```{r} 43 | dat <- ieugwasr::tophits("ieu-a-300", clump=FALSE) 44 | ``` 45 | 46 | Obtain the `plink` binary for your operating system. For convenience you can use the [genetics.binaRies](https://github.com/explodecomputer/genetics.binaRies/) R package which has a few different widely used utilities bundled within it: 47 | 48 | ```{r} 49 | plink_bin <- genetics.binaRies::get_plink_binary() 50 | ``` 51 | 52 | Obtain some LD reference data. See the homepage of this site for options for downloading LD reference data. Here we'll be using the same LD reference data as that used by the API by default, which is Europeans from the 1000 genomes reference panel 53 | 54 | 55 | ```{r} 56 | ldref <- "/path/to/EUR" 57 | ``` 58 | 59 | Perform clumping 60 | 61 | ```{r} 62 | clumped <- ld_clump(dat, bfile=ldref, plink_bin=plink_bin) 63 | ``` 64 | 65 | 66 | ### Data from VCF 67 | 68 | There is a single function that can be used to perform clumping on the VCF files. It will either run locally or run using the API, depending on the arguments you supply. 69 | 70 | Running locally: 71 | 72 | ```{r} 73 | # Set path to file 74 | vcffile <- "/path/to/ieu-a-300.vcf.gz" 75 | 76 | # Set path to bcftools 77 | gwasvcf::set_bcftools() 78 | 79 | # Perform clumping 80 | clumped <- clump_gwasvcf(vcffile, bfile=ldref, plink_bin=plink_bin) 81 | ``` 82 | 83 | Running remotely: 84 | 85 | ```{r} 86 | clumped <- clump_gwasvcf(vcffile, pop="EUR") 87 | ``` 88 | 89 | Here, the `pop` argument is passed to the API specifying which super-population to use for the LD reference. 90 | 91 | 92 | ## Finemapping 93 | 94 | Here, the objective is to extract a slice of the data with relevant fields, and its corresponding LD matrix. Then the data can be applied to a few different packages quite easily 95 | 96 | - [`finemapr`](https://github.com/variani/finemapr) package - which simplifies analysis using FINEMAP, PAINTOR and CAVIAR 97 | - [`susieR`](https://stephenslab.github.io/susieR) package - new approach called "Sum of Single Effects" 98 | 99 | The required data format can be generated from the OpenGWAS database or from VCF files 100 | 101 | 102 | ### Data from OpenGWAS 103 | 104 | One of the tophits for LDL cholesterol is `rs10903129`, which is located at `1:25768937` on hg19. Determining the region to finemap around this variant is simplified by knowing the natural LD break points in a the European population, which is where the LDL GWAS was performed (`ieu-a-300`). [Berisa and Pickrell 2016](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4731402/) provide a useful dataset of natural breakpoints, which has been incorporated into this package. 105 | 106 | Identify LD region to perform finemapping: 107 | 108 | ```{r} 109 | region <- map_variants_to_regions(chrpos="1:25768937", pop="EUR") 110 | ``` 111 | 112 | Extract data from that region into a format for finemapping 113 | 114 | ```{r} 115 | dat <- ieugwasr_to_finemapr(region$region, "ieu-a-300") 116 | ``` 117 | 118 | This returns a nested list. The top level of results is an item for every GWAS dataset requested. Each item contains a dataframe of rsids and their z-scores within the region, and an LD matrix for those variants, and the sample size for each of the variants. 119 | 120 | Perform finemapping e.g. using CAVIAR (see [https://github.com/variani/finemapr](https://github.com/variani/finemapr) for more info: 121 | 122 | ```{r} 123 | options(finemapr_caviar = "/path/to/caviar") 124 | library(dplyr) 125 | o <- finemapr::run_caviar(dat[[1]]$z, dat[[1]]$ld, args = "-c 3") 126 | ``` 127 | 128 | 129 | Perform finemapping e.g using susieR (see [https://stephenslab.github.io/susieR/](https://stephenslab.github.io/susieR/) for more info): 130 | 131 | ```{r} 132 | fitted_rss <- susieR::susie_rss( 133 | dat[[1]]$z$zscore, 134 | dat[[1]]$ld, L=10, 135 | estimate_residual_variance=TRUE, 136 | estimate_prior_variance=TRUE, 137 | check_R=FALSE, 138 | z_ld_weight=1/500 139 | ) 140 | summary(fitted_rss) 141 | susieR::susie_plot(fitted_rss, y="PIP") 142 | ``` 143 | 144 | ### Data from VCF 145 | 146 | Let's perform a similar analysis for the VCF files 147 | 148 | ```{r} 149 | # extract data from vcf 150 | dat <- gwasvcf_to_finemapr(region = region$region, vcf=vcffile, bfile=ldref, plink_bin=plink_bin) 151 | 152 | # Perform finemapping 153 | fitted_rss <- susieR::susie_rss( 154 | dat[[1]]$z$zscore, 155 | dat[[1]]$ld, L=10, 156 | estimate_residual_variance=TRUE, 157 | estimate_prior_variance=TRUE, 158 | check_R=FALSE, 159 | z_ld_weight=1/500 160 | ) 161 | summary(fitted_rss) 162 | susieR::susie_plot(fitted_rss, y="PIP") 163 | ``` 164 | 165 | 166 | ### Finemapping across the whole dataset 167 | 168 | 1. Perform clumping to get a set of regions to interrogate 169 | 2. Finemap within each region 170 | 171 | Output: A list of regions for the dataset which has 172 | 173 | 174 | 175 | ## Multi-population finemapping 176 | 177 | Need at least two datasets for the same trait but from different super-populations. Look up the region of interest in all traits, harmonise across traits to get the same set of variants, and then run [PAINTOR](https://github.com/gkichaev/PAINTOR_V3.0) or [MsCAVIAR](https://github.com/nlapier2/MsCAVIAR) (https://www.biorxiv.org/content/10.1101/2020.01.15.908517v1.full). 178 | 179 | Extract data: 180 | 181 | ```{r} 182 | args <- list() 183 | args$radius <- 50000 184 | args$ids <- c("bbj-a-52", "ukb-b-20175") 185 | args$bfiles <- c("/Users/gh13047/data/ld_files/ldmat/EAS", "/Users/gh13047/data/ld_files/ldmat/EUR") 186 | args$plink <- "plink" 187 | args$pops <- NULL 188 | args$chr <- 4 189 | args$position <- 54801228 190 | 191 | regiondata <- do.call(organise_region, args=args) 192 | str(regiondata) 193 | ``` 194 | 195 | Run PAINTOR: 196 | 197 | ```{r} 198 | pres <- run_paintor(regiondata) 199 | plot_paintor(pres) 200 | ``` 201 | 202 | Run MsCAVIAR: 203 | 204 | ```{r} 205 | n <- gwasinfo(args$ids)$sample_size 206 | run_MsCAVIAR(regiondata, n) 207 | ``` 208 | -------------------------------------------------------------------------------- /vignettes/finemapping_experiment.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Finemapping experiments" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Finemapping experiments} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | eval=FALSE, 13 | collapse = TRUE, 14 | comment = "#>" 15 | ) 16 | library(gwasglue) 17 | ``` 18 | 19 | Trying other methods e.g. PAINTOR and MsCAVIAR 20 | 21 | - Seem really slow 22 | - They give an overall posterior probability, not clear how to extract number of distinct causal variants 23 | 24 | 25 | ```{r} 26 | library(tidyverse) 27 | library(ieugwasr) 28 | library(devtools) 29 | library(glue) 30 | load_all() 31 | 32 | args <- list() 33 | args$radius <- 50000 34 | args$ids <- c("bbj-a-52", "ukb-b-20175") 35 | args$bfiles <- c("/Users/gh13047/data/ld_files/ldmat/EAS", "/Users/gh13047/data/ld_files/ldmat/EUR") 36 | args$plink <- "plink" 37 | args$pops <- NULL 38 | args$chr <- 4 39 | args$position <- 54801228 40 | 41 | regiondata <- do.call(organise_region, args=args) 42 | 43 | pres <- run_paintor(regiondata) 44 | dev.new() 45 | plot_PAINTOR(pres) 46 | 47 | n <- gwasinfo(args$ids)$sample_size 48 | cres <- run_MsCAVIAR(regiondata, n) 49 | plot_MsCAVIAR(cres) 50 | 51 | 52 | 53 | alpha=0.05 54 | mt_method="fdr" 55 | ld_thresh=0.05 56 | prune_bfile=NULL 57 | prune_pop="EUR" 58 | 59 | 60 | inst <- readRDS("~/repo/mr.trans/data/sbp_eas_eur.rds") 61 | args <- list() 62 | args$radius <- 100000 63 | args$ids <- c("bbj-a-52", "ukb-b-20175") 64 | args$bfiles <- c("/Users/gh13047/data/ld_files/ldmat/EAS", "/Users/gh13047/data/ld_files/ldmat/EUR") 65 | args$plink <- "plink" 66 | 67 | 68 | args$pops <- NULL 69 | i <- 1 70 | 71 | paintor_res <- list() 72 | for(i in 1:nrow(inst$inst)) 73 | { 74 | args$chr <- inst$inst$chr.exposure[i] 75 | args$position <- as.numeric(inst$inst$pos.exposure[i]) 76 | regiondata <- do.call(extract_regional_data, args=args) 77 | paintor_res[[i]] <- run_PAINTOR(regiondata) 78 | } 79 | 80 | o <- run_susie(regiondata) 81 | plot_PAINTOR(paintor_res[[i]]) 82 | ``` 83 | -------------------------------------------------------------------------------- /vignettes/ld_ref.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Generate LD matrices" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Generate LD matrices} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>", 14 | eval=FALSE 15 | ) 16 | ``` 17 | 18 | 19 | ```{r setup} 20 | library(gwasglue) 21 | library(data.table) 22 | ``` 23 | 24 | For each region generate an LD matrix 25 | 26 | ```{r} 27 | pop <- "EUR" 28 | ldref <- paste0("/Users/gh13047/repo/mr-base-api/app/ld_files/", pop) 29 | bim <- data.table::fread(paste0(ldref, ".bim")) 30 | regionfile <- system.file("extdata", "ldetect", paste0(pop, ".bed"), package="gwasglue") 31 | regions <- data.table::fread(regionfile, header=TRUE) %>% 32 | dplyr::mutate( 33 | chr=as.numeric(gsub("chr", "", chr)), 34 | start=as.numeric(start), 35 | stop=as.numeric(stop) 36 | ) %>% dplyr::as_tibble() 37 | 38 | for(i in 1:nrow(regions)) 39 | { 40 | message(i) 41 | x <- subset(bim, V1 == regions$chr[i] & V4 >= regions$start[i] & V4 <= regions$stop[i])$V2 42 | y <- ieugwasr::ld_matrix(x, pop=pop, bfile=ldref, plink_bin=genetics.binaRies::get_plink_binary(), with_alleles=FALSE) 43 | save(y, file="temp.rdata") 44 | } 45 | 46 | 47 | 48 | ``` 49 | 50 | 51 | 52 | bin/emeraLD -i /mnt/storage/private/mrcieu/research/mr-eve/vcf-reference-datasets/1000g/ALL.chr6.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz --region 6:2458936-3573593 --out ~/mr-eve/temp.ld --no-phase 53 | 54 | bin/emeraLD -i /mnt/storage/private/mrcieu/research/mr-eve/vcf-reference-datasets/1000g/1kg_v3_nomult.vcf.gz --region 1:30923-100923 --out ~/mr-eve/temp.ld --no-phase 55 | 56 | 57 | bin/emeraLD -i /mnt/storage/private/mrcieu/research/mr-eve/vcf-reference-datasets/1000g/temp.vcf.gz --region 6:2458936-3573593 --out ~/mr-eve/temp.ld --no-phase 58 | 59 | 60 | 61 | betas <- hyprcoloc::test.betas 62 | ses <- hyprcoloc::test.ses 63 | trait.cor <- hyprcoloc::test.corr 64 | ld.matrix <- hyprcoloc::test.ld 65 | traits <- paste0("T", 1:dim(betas)[2]) 66 | rsid <- rownames(betas) 67 | res <- hyprcoloc(betas, ses, trait.names=traits, snp.id=rsid) 68 | res 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /vignettes/mr.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Mendelian randomization" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Mendelian randomization} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | To use the [IEU GWAS database](https://gwas.mrcieu.ac.uk/) for MR analysis, see the [TwoSampleMR](https://mrcieu.github.io/TwoSampleMR/) R package. 18 | 19 | Here we'll demonstrate how to achieve the same data extractions using the [GWAS VCF](https://github.com/MRCIEU/gwas_vcf_spec) files. 20 | 21 | We'll use the example of LDL cholesterol [ieu-a-300](https://gwas.mrcieu.ac.uk/datasets/ieu-a-300/) and coronary heart disease [ieu-a-7](https://gwas.mrcieu.ac.uk/datasets/ieu-a-7/). 22 | 23 | Load libraries: 24 | 25 | ```{r} 26 | suppressPackageStartupMessages(suppressWarnings({ 27 | library(TwoSampleMR) 28 | library(gwasglue) 29 | library(gwasvcf) 30 | library(ieugwasr) 31 | library(dplyr) 32 | })) 33 | ``` 34 | 35 | ## Using TwoSampleMR 36 | 37 | This is a simple procedure for MR using the TwoSampleMR package: 38 | 39 | ```{r} 40 | # Extract the instruments for LDL 41 | expd1 <- TwoSampleMR::extract_instruments("ieu-a-300") 42 | 43 | # Extract those SNP effects for CHD 44 | outd1 <- TwoSampleMR::extract_outcome_data(expd1$SNP, "ieu-a-7", proxies=FALSE) 45 | 46 | # Harmonise the exposure and outcome data 47 | dat1 <- TwoSampleMR::harmonise_data(expd1, outd1) 48 | 49 | # Perform MR 50 | TwoSampleMR::mr(dat1) 51 | ``` 52 | 53 | Note that this extraction process can be simplified with: 54 | 55 | ```{r, eval=FALSE} 56 | dat1 <- make_dat("ieu-a-300", "ieu-a-7") 57 | ``` 58 | 59 | ## Using GWAS VCF files 60 | 61 | Let's do the same with the vcf files (and the indexes). Download from here: 62 | 63 | ```bash 64 | wget https://gwas.mrcieu.ac.uk/files/ieu-a-300/ieu-a-300.vcf.gz 65 | wget https://gwas.mrcieu.ac.uk/files/ieu-a-300/ieu-a-300.vcf.gz.tbi 66 | wget https://gwas.mrcieu.ac.uk/files/ieu-a-7/ieu-a-7.vcf.gz 67 | wget https://gwas.mrcieu.ac.uk/files/ieu-a-7/ieu-a-7.vcf.gz.tbi 68 | ``` 69 | 70 | First get the tophits for LDL cholesterol 71 | 72 | ```{r} 73 | gwasvcf::set_bcftools() 74 | expd2 <- gwasvcf::query_gwas("ieu-a-300.vcf.gz", chrompos=paste0(expd1$chr.exposure, ":", expd1$pos.exposure)) 75 | ``` 76 | 77 | Convert to TwoSampleMR format: 78 | 79 | ```{r} 80 | expd2 <- gwasglue::gwasvcf_to_TwoSampleMR(expd2, type="exposure") 81 | ``` 82 | 83 | Extract those SNPs from the outcome vcf file and convert to TwoSampleMR format 84 | 85 | ```{r} 86 | outd2 <- gwasvcf::query_gwas("ieu-a-7.vcf.gz", chrompos = paste0(expd1$chr.exposure, ":", expd1$pos.exposure)) 87 | outd2 <- gwasglue::gwasvcf_to_TwoSampleMR(outd2, "outcome") 88 | ``` 89 | 90 | Proceed with harmonising and performing MR 91 | 92 | ```{r} 93 | dat2 <- TwoSampleMR::harmonise_data(expd2, outd2) 94 | TwoSampleMR::mr(dat2) 95 | ``` 96 | 97 | 98 | 99 | ## Other options 100 | 101 | ### Clumping vcf files 102 | 103 | If we want to extract top hits based on a threshold and clump locally. First download the LD reference dataset: 104 | 105 | ```bash 106 | wget http://fileserve.mrcieu.ac.uk/ld/data_maf0.01_rs_ref.tgz 107 | tar xzvf data_maf0.01_rs_ref.tgz 108 | ``` 109 | 110 | Now extract the top hits based on a p-value threshold 111 | 112 | ```{r} 113 | gwasvcf::set_bcftools() 114 | expd3 <- gwasvcf::query_gwas("ieu-a-300.vcf.gz", pval=5e-8) 115 | expd3 116 | ``` 117 | 118 | Convert to TwoSampleMR format: 119 | 120 | ```{r} 121 | expd3 <- gwasglue::gwasvcf_to_TwoSampleMR(expd3, type="exposure") 122 | ``` 123 | 124 | Get a list of SNPs to retain after clumping and subset the data 125 | 126 | ```{r} 127 | retain_snps <- expd3 %>% dplyr::select(rsid=SNP, pval=pval.exposure) %>% 128 | ieugwasr::ld_clump(., plink_bin=genetics.binaRies::get_plink_binary(), bfile="data_maf0.01_rs_ref") %>% 129 | {.$rsid} 130 | expd3 <- subset(expd3, SNP %in% retain_snps) 131 | ``` 132 | 133 | ### Extracting outcome data with LD proxies 134 | 135 | This only works if you extract on rsids at the moment, and is quite slow. But here it is: 136 | 137 | ```{r} 138 | gwasvcf::set_plink() 139 | outd2 <- gwasvcf::query_gwas("ieu-a-7.vcf.gz", rsid = expd3$SNP, proxies="yes", bfile="data_maf0.01_rs_ref") 140 | outd2 <- gwasglue::gwasvcf_to_TwoSampleMR(outd2, "outcome") 141 | ``` 142 | 143 | 144 | ## Further MR methods 145 | 146 | A number of other MR methods are available. The current framework for using them is to: 147 | 148 | 1. Convert your data to TwoSampleMR format 149 | 2. Use the TwoSampleMR package to convert to other formats 150 | 151 | Examples of formats that you can convert to within TwoSampleMR: 152 | 153 | - [MR-PRESSO](https://github.com/rondolab/MR-PRESSO) 154 | - [RadialMR](https://github.com/WSpiller/RadialMR) 155 | - [MRMix](https://github.com/gqi/MRMix/) 156 | - [MendelianRandomization](https://cran.r-project.org/web/packages/MendelianRandomization/index.html) 157 | - [MR-RAPS](https://cran.r-project.org/web/packages/mr.raps/) 158 | 159 | 160 | 161 | ## Bluecrystal4 users 162 | 163 | All data in OpenGWAS is stored on bc4 in the form of GWAS VCF files. You can create harmonised datasets easily on bc4 with these files. 164 | 165 | Determine the locations of the GWAS VCF files and a number of other reference datasets and binaries: 166 | 167 | ```{r, eval=FALSE} 168 | set_bcftools() 169 | set_plink() 170 | set_bc4_files() 171 | ``` 172 | 173 | Now simply run: 174 | 175 | ```{r, eval=FALSE} 176 | dat <- make_TwoSampleMR_dat("ieu-a-300", "ieu-a-7") 177 | ``` 178 | 179 | This can be run in parallel for large combinations of exposures and outcomes, e.g.: 180 | 181 | ```{r, eval=FALSE} 182 | dat <- make_TwoSampleMR_dat( 183 | id1=c("ieu-a-300", "ieu-a-302", "ieu-a-299"), 184 | id2=c("ieu-a-7", "ieu-a-2"), 185 | nthreads=6 186 | ) 187 | ``` 188 | 189 | This will lookup all instruments in the exposures (id1) in both exposures and outcomes, and harmonise all exposure-outcome pairs, parallelised across 6 threads. 190 | 191 | Note: please make sure to run these analyses in batch mode by submitting to the slurm scheduler, not interactively on the head nodes! --------------------------------------------------------------------------------