├── .DS_Store
├── .gitattributes
├── .github
    └── workflows
    │   └── install-ci.yml
├── .readthedocs.yml
├── Dockerfile
├── LICENSE
├── README.md
├── TODO.md
├── docs
    ├── Makefile
    ├── _build
    │   ├── doctrees
    │   │   ├── environment.pickle
    │   │   ├── imputation.doctree
    │   │   ├── index.doctree
    │   │   ├── install
    │   │   │   ├── linux.doctree
    │   │   │   └── macosx.doctree
    │   │   ├── installation.doctree
    │   │   ├── pca.doctree
    │   │   ├── pca
    │   │   │   ├── joint.doctree
    │   │   │   ├── normal.doctree
    │   │   │   └── project.doctree
    │   │   ├── phasing.doctree
    │   │   ├── preimp_qc.doctree
    │   │   ├── qb.doctree
    │   │   └── tutorial.doctree
    │   └── html
    │   │   ├── .buildinfo
    │   │   ├── _images
    │   │       └── qc_workflow.png
    │   │   ├── _sources
    │   │       ├── imputation.rst.txt
    │   │       ├── index.rst.txt
    │   │       ├── install
    │   │       │   ├── linux.rst.txt
    │   │       │   └── macosx.rst.txt
    │   │       ├── installation.rst.txt
    │   │       ├── pca.rst.txt
    │   │       ├── pca
    │   │       │   ├── joint.rst.txt
    │   │       │   ├── normal.rst.txt
    │   │       │   └── project.rst.txt
    │   │       ├── phasing.rst.txt
    │   │       ├── preimp_qc.rst.txt
    │   │       ├── qb.rst.txt
    │   │       └── tutorial.rst.txt
    │   │   ├── _static
    │   │       ├── _sphinx_javascript_frameworks_compat.js
    │   │       ├── basic.css
    │   │       ├── css
    │   │       │   ├── badge_only.css
    │   │       │   ├── fonts
    │   │       │   │   ├── Roboto-Slab-Bold.woff
    │   │       │   │   ├── Roboto-Slab-Bold.woff2
    │   │       │   │   ├── Roboto-Slab-Regular.woff
    │   │       │   │   ├── Roboto-Slab-Regular.woff2
    │   │       │   │   ├── fontawesome-webfont.eot
    │   │       │   │   ├── fontawesome-webfont.svg
    │   │       │   │   ├── fontawesome-webfont.ttf
    │   │       │   │   ├── fontawesome-webfont.woff
    │   │       │   │   ├── fontawesome-webfont.woff2
    │   │       │   │   ├── lato-bold-italic.woff
    │   │       │   │   ├── lato-bold-italic.woff2
    │   │       │   │   ├── lato-bold.woff
    │   │       │   │   ├── lato-bold.woff2
    │   │       │   │   ├── lato-normal-italic.woff
    │   │       │   │   ├── lato-normal-italic.woff2
    │   │       │   │   ├── lato-normal.woff
    │   │       │   │   └── lato-normal.woff2
    │   │       │   └── theme.css
    │   │       ├── custom.css
    │   │       ├── doctools.js
    │   │       ├── documentation_options.js
    │   │       ├── file.png
    │   │       ├── fonts
    │   │       │   ├── Lato
    │   │       │   │   ├── lato-bold.eot
    │   │       │   │   ├── lato-bold.ttf
    │   │       │   │   ├── lato-bold.woff
    │   │       │   │   ├── lato-bold.woff2
    │   │       │   │   ├── lato-bolditalic.eot
    │   │       │   │   ├── lato-bolditalic.ttf
    │   │       │   │   ├── lato-bolditalic.woff
    │   │       │   │   ├── lato-bolditalic.woff2
    │   │       │   │   ├── lato-italic.eot
    │   │       │   │   ├── lato-italic.ttf
    │   │       │   │   ├── lato-italic.woff
    │   │       │   │   ├── lato-italic.woff2
    │   │       │   │   ├── lato-regular.eot
    │   │       │   │   ├── lato-regular.ttf
    │   │       │   │   ├── lato-regular.woff
    │   │       │   │   └── lato-regular.woff2
    │   │       │   └── RobotoSlab
    │   │       │   │   ├── roboto-slab-v7-bold.eot
    │   │       │   │   ├── roboto-slab-v7-bold.ttf
    │   │       │   │   ├── roboto-slab-v7-bold.woff
    │   │       │   │   ├── roboto-slab-v7-bold.woff2
    │   │       │   │   ├── roboto-slab-v7-regular.eot
    │   │       │   │   ├── roboto-slab-v7-regular.ttf
    │   │       │   │   ├── roboto-slab-v7-regular.woff
    │   │       │   │   └── roboto-slab-v7-regular.woff2
    │   │       ├── jquery.js
    │   │       ├── js
    │   │       │   ├── badge_only.js
    │   │       │   ├── theme.js
    │   │       │   └── versions.js
    │   │       ├── language_data.js
    │   │       ├── minus.png
    │   │       ├── plus.png
    │   │       ├── pygments.css
    │   │       ├── searchtools.js
    │   │       └── sphinx_highlight.js
    │   │   ├── genindex.html
    │   │   ├── imputation.html
    │   │   ├── index.html
    │   │   ├── install
    │   │       ├── linux.html
    │   │       └── macosx.html
    │   │   ├── installation.html
    │   │   ├── objects.inv
    │   │   ├── pca.html
    │   │   ├── pca
    │   │       ├── joint.html
    │   │       ├── normal.html
    │   │       └── project.html
    │   │   ├── phasing.html
    │   │   ├── preimp_qc.html
    │   │   ├── qb.html
    │   │   ├── search.html
    │   │   ├── searchindex.js
    │   │   └── tutorial.html
    ├── _static
    │   └── custom.css
    ├── conf.py
    ├── images
    │   └── qc_workflow.png
    ├── imputation.rst
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    ├── pca.rst
    ├── pca
    │   ├── joint.rst
    │   ├── normal.rst
    │   └── project.rst
    ├── phasing.rst
    ├── preimp_qc.rst
    ├── qb.rst
    ├── requirements.txt
    └── tutorial.rst
├── env-setup.sh
├── gwaspy
    ├── .DS_Store
    ├── __init__.py
    ├── check_alleles
    │   ├── __init__.py
    │   ├── check_alleles.py
    │   └── flips.py
    ├── imputation
    │   ├── __init__.py
    │   ├── concat_vcfs.py
    │   ├── glimpse2_impute.py
    │   ├── imputation.py
    │   ├── impute.py
    │   ├── impute5_impute.py
    │   ├── impute_vcf.py
    │   └── sex_aut_imp.py
    ├── pca
    │   ├── __init__.py
    │   ├── assign_pop_labels.py
    │   ├── filter_ref_data.py
    │   ├── pca.py
    │   ├── pca_filter_snps.py
    │   ├── pca_joint.py
    │   ├── pca_normal.py
    │   └── pca_project.py
    ├── phasing
    │   ├── __init__.py
    │   ├── concat_vcfs.py
    │   ├── get_filebase.py
    │   ├── phase.py
    │   ├── phase_vcf.py
    │   ├── phasing.py
    │   ├── scatter_vcf.py
    │   └── shapeit5_phase.py
    ├── preimp_qc
    │   ├── __init__.py
    │   ├── aggregators.py
    │   ├── annotations.py
    │   ├── plots.py
    │   ├── preimp_qc.py
    │   └── report.py
    └── utils
    │   ├── __init__.py
    │   ├── export_file.py
    │   ├── get_file_size.py
    │   ├── natural_sort.py
    │   ├── read_file.py
    │   ├── reference_liftover.py
    │   └── sample_annotations.py
├── nf
    ├── main.nf
    ├── modules
    │   ├── imputation.nf
    │   └── phasing.nf
    ├── nextflow.config
    └── params.json
├── requirements.txt
├── setup.py
└── split_maps.sh


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/.DS_Store


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.bim filter=lfs diff=lfs merge=lfs -text
2 | *.fam filter=lfs diff=lfs merge=lfs -text
3 | *.bed filter=lfs diff=lfs merge=lfs -text
4 | data/1kg_annotated.mt/** filter=lfs diff=lfs merge=lfs -text
5 | 


--------------------------------------------------------------------------------
/.github/workflows/install-ci.yml:
--------------------------------------------------------------------------------
 1 | # Run installation checks
 2 | 
 3 | name: install
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ main ]
 8 |   pull_request:
 9 |     branches: [ main ]
10 | 
11 | jobs:
12 |   install:
13 | 
14 |     runs-on: ubuntu-20.04
15 |     strategy:
16 |       matrix:
17 |         python-version: [3.10, 3.11]
18 | 
19 |     steps:
20 |     - uses: actions/checkout@v2
21 |     - name: Set up Python ${{ matrix.python-version }}
22 |       uses: actions/setup-python@v2
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 |     - name: Install dependencies
26 |       run: |
27 |         python -m pip install --upgrade pip
28 |         python -m pip install flake8 pytest pypandoc
29 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
30 |     - name: Install GWASpy
31 |       run: |
32 |         python setup.py sdist
33 |         pip3 install dist/gwaspy*
34 |     - name: Check modules
35 |       run: |
36 |         preimp_qc --help
37 |         pca --help
38 |         imputation --help
39 |         phasing --help
40 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # https://docs.readthedocs.com/platform/stable/config-file/index.html
 2 | 
 3 | # Read the Docs configuration file for Sphinx projects
 4 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 5 | 
 6 | # Required
 7 | version: 2
 8 | 
 9 | # Set the OS, Python version and other tools you might need
10 | build:
11 |   os: ubuntu-22.04
12 |   tools:
13 |     python: "3.12"
14 |     # You can also specify other tool versions:
15 |     # nodejs: "20"
16 |     # rust: "1.70"
17 |     # golang: "1.20"
18 | 
19 | # Build documentation in the "docs/" directory with Sphinx
20 | sphinx:
21 |   configuration: docs/conf.py
22 |   # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
23 |   # builder: "dirhtml"
24 |   # Fail on all warnings to avoid broken references
25 |   # fail_on_warning: true
26 | 
27 | # Optionally build your docs in additional formats such as PDF and ePub
28 | # formats:
29 | #   - pdf
30 | #   - epub
31 | 
32 | # Optional but recommended, declare the Python requirements required
33 | # to build your documentation
34 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
35 | python:
36 |   install:
37 |     - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | MAINTAINER Lindo Nkambule (lindonkambule116@gmail.com)
 3 | 
 4 | ARG SAMTOOLS_VERSION=1.13
 5 | 
 6 | RUN apt-get update && apt-get install -y software-properties-common && \
 7 |     apt-get update && apt-get install -y \
 8 |         autoconf \
 9 |         automake \
10 |         bzip2 \
11 |         build-essential \
12 |         ca-certificates \
13 |         cmake \
14 |         curl \
15 |         g++ \
16 |         gcc \
17 |         git \
18 |         gzip \
19 |         libboost-all-dev \
20 |         libbz2-dev \
21 |         libcurl4-openssl-dev \
22 |         liblzma-dev \
23 |         libncurses5-dev \
24 |         libssl-dev \
25 |         make \
26 |         python3 \
27 |         python3-pip \
28 |         r-mathlib \
29 |         sudo \
30 |         unzip \
31 |         wget \
32 |         zlib1g-dev \
33 |     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
34 |     apt-get clean && \
35 |     apt-get autoremove -y && \
36 |     rm -rf /var/lib/{apt,dpkg,cache,log}/
37 | 
38 | # HTSLIB
39 | RUN cd /opt && \
40 |     wget --no-check-certificate https://github.com/samtools/htslib/releases/download/${SAMTOOLS_VERSION}/htslib-${SAMTOOLS_VERSION}.tar.bz2 && \
41 |     tar xf htslib-${SAMTOOLS_VERSION}.tar.bz2 && rm htslib-${SAMTOOLS_VERSION}.tar.bz2 && cd htslib-${SAMTOOLS_VERSION} && \
42 |     ./configure --enable-libcurl --enable-s3 --enable-gcs && \
43 |     make && make install && make clean
44 | 
45 | COPY makefile_shapeit4 /opt
46 | 
47 | # SHAPEIT4
48 | RUN git clone https://github.com/odelaneau/shapeit4.git && \
49 |     cd shapeit4 && \
50 |     mv makefile makefile.old && cp /opt/makefile_shapeit4 . && mv makefile_shapeit4 makefile && \
51 |     make && \
52 |     cd /shapeit4/maps && mkdir b37 b38 && gunzip *.gz && \
53 |     tar -xf genetic_maps.b37.tar -C b37/ && \
54 |     tar -xf genetic_maps.b38.tar -C b38/ && \
55 |     rm *.tar
56 | 
57 | ENV PATH /shapeit4/bin/:${PATH}
58 | 
59 | # BCFTOOLS
60 | RUN cd /opt && \
61 |     wget --no-check-certificate https://github.com/samtools/bcftools/releases/download/${SAMTOOLS_VERSION}/bcftools-${SAMTOOLS_VERSION}.tar.bz2 && \
62 |     tar -xf bcftools-${SAMTOOLS_VERSION}.tar.bz2 && rm bcftools-${SAMTOOLS_VERSION}.tar.bz2 && cd bcftools-${SAMTOOLS_VERSION} && \
63 |     ./configure --with-htslib=/opt/htslib-${SAMTOOLS_VERSION} && make && make install && make clean
64 | 
65 | # EAGLE
66 | RUN cd /opt && \
67 |     wget https://data.broadinstitute.org/alkesgroup/Eagle/downloads/Eagle_v2.4.1.tar.gz && \
68 |     gunzip Eagle_v2.4.1.tar.gz && \
69 |     tar xvf Eagle_v2.4.1.tar && \
70 |     cp /opt/Eagle_v2.4.1/tables/genetic_map_hg19_withX.txt.gz /opt && \
71 |     cp /opt/Eagle_v2.4.1/tables/genetic_map_hg38_withX.txt.gz /opt && \
72 |     mv Eagle_v2.4.1/eagle /usr/local/bin/ && \
73 |     rm -rf Eagle_v2.4.1*
74 | 
75 | # IMPUTE5
76 | COPY  impute5_v1.1.5.zip /opt
77 | RUN cd /opt && \
78 |     unzip impute5_v1.1.5.zip && cd impute5_v1.1.5 && \
79 |     mv *_static /usr/local/bin/ && cd /opt && rm -rf impute5_v1.1.5*
80 | 
81 | # makeScaffold for building haplotype scaffolds for phasing
82 | RUN pip3 install Cython --install-option="--no-cython-compile"
83 | 
84 | RUN git clone https://github.com/sinanshi/makeScaffold.git && \
85 |     cd makeScaffold && rm makefile && \
86 |     cmake . && \
87 |     make && \
88 |     mv src/scaffold /usr/local/bin/
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Broad Institute of MIT and Harvard
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GWASpy
 2 | 
 3 | <!-- badges: start -->
 4 | ![install status](https://github.com/atgu/GWASpy/actions/workflows/install-ci.yml/badge.svg)
 5 | ![PyPI version](https://badge.fury.io/py/gwaspy.svg)
 6 | 
 7 | Genome-wide association studies pypeline (GWASpy): A Python package for performing GWAS QC, PCA, haplotype phasing, and
 8 | genotype imputation.
 9 | 
10 | ## Installation
11 | GWASpy is available through [PyPI](https://pypi.org/project/gwaspy/). To install, run the command:
12 | ```bash
13 | pip3 install gwaspy
14 | ```
15 | 
16 | ## Usage
17 | For usage, please visit [GWASpy](https://gwaspy.readthedocs.io/)
18 | 
19 | ## Copyright and License
20 | GWASpy is generously distributed under the [MIT License](https://github.com/atgu/GWASpy/blob/main/LICENSE)


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | Todo list for GWASpy
 2 | ====
 3 | 
 4 | ## Todo
 5 | 
 6 | - [x] Update report and preimp_qc.py sections to cater for different data types e.g. case-/control-only and 
 7 | case-control data
 8 | - [ ] Add filter functions for handling trio dataset (mendel erros for IDs+SNPs and HWE p-value for SNPs) and
 9 | update report and preimp_qc.py sections
10 | - [x] Add support for VCF files and include appropriate filter functions (also
11 | check https://blog.hail.is/whole-exome-and-whole-genome-sequencing-recommendations/) -> VCF from arrays differs to that from sequences, so we don't need them here. 
12 | - [x] Currently, we're saving intermediate files in /tmp/. Work out a way to store these files temporarily


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/_build/doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/environment.pickle


--------------------------------------------------------------------------------
/docs/_build/doctrees/imputation.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/imputation.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/index.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/install/linux.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/install/linux.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/install/macosx.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/install/macosx.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/installation.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/installation.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/pca.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/pca.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/pca/joint.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/pca/joint.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/pca/normal.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/pca/normal.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/pca/project.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/pca/project.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/phasing.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/phasing.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/preimp_qc.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/preimp_qc.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/qb.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/qb.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/tutorial.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/doctrees/tutorial.doctree


--------------------------------------------------------------------------------
/docs/_build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file records the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: 364152833ab1d15953fd0b10d3e8242a
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 | 


--------------------------------------------------------------------------------
/docs/_build/html/_images/qc_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_images/qc_workflow.png


--------------------------------------------------------------------------------
/docs/_build/html/_sources/imputation.rst.txt:
--------------------------------------------------------------------------------
 1 | .. _sec-imputation:
 2 | 
 3 | ===================
 4 | Genotype Imputation
 5 | ===================
 6 | 
 7 | Genotype imputation is a process of estimating missing genotypes from the haplotype or genotype reference panel. It
 8 | allows you to accurately evaluate the evidence for association at genetic markers that are not directly genotyped.
 9 | GWASpy has a module, :code:`imputation`, for running imputation using IMPUTE5. Because imputation can be a computationally
10 | intensive task, we run it on multiple chunks in parallel, then merge the imputed chunks together at the end. Below are
11 | examples of how to run imputation using either the HGDP+1kGP or your own reference panel.
12 | 
13 | Examples
14 | ########
15 | 
16 | **1. HGDP+1kGP reference panel**
17 | 
18 |     .. code-block:: sh
19 | 
20 |         imputation --input-file gs://path/to/file.vcf.bgz --vcf-ref hgdp1kgp --output-filename my_outfilename --out-dir gs://path/to/output/dir --n-samples 1989 --n-ref-samples 4091 --billing-project my-billing-project
21 | 
22 | **2. Own reference panel**
23 | 
24 |     .. code-block:: python
25 | 
26 |         imputation --input-file gs://path/to/file.vcf.bgz --vcf-ref gs://path/to/ref_panel/ALL.chrCNUMBER.vcf --output-filename my_outfilename --out-dir gs://path/to/output/dir --n-samples 1989 --n-ref-samples 4091 --billing-project my-billing-project
27 | 
28 | .. warning::
29 |     When using your own reference panel, make sure that you use the CNUMBER placeholder in the filename passed to --vcf-ref
30 | 
31 | Arguments and options
32 | #####################
33 | 
34 | .. list-table::
35 |    :widths: 15 50
36 |    :header-rows: 1
37 | 
38 |    * - Argument
39 |      - Description
40 |    * - :code:`--input-file`
41 |      - Path to where the VCF or TSV with target VCF/BAM files is
42 |    * - :code:`--vcf-ref`
43 |      - Reference panel file to use for imputation
44 |    * - :code:`--chromosomes`
45 |      - Chromosome(s) to run imputation for. Default is :code:`all`
46 |    * - :code:`--local`
47 |      - Type of service. Default is Service backend where jobs are executed on a multi-tenant compute cluster in Google Cloud
48 |    * - :code:`--billing-project`
49 |      - Billing project to be used for the jobs
50 |    * - :code:`--n-samples`
51 |      - Number of target samples to be imputed. We use this to estimate resources for some of the jobs
52 |    * - :code:`--n-ref-samples`
53 |      - Number of reference samples. We use this to estimate resources for some of the jobs
54 |    * - :code:`--software`
55 |      - Software to use for phasing. Options: [:code:`beagle5`, :code:`impute5`]. Default is :code:`impute5`
56 |    * - :code:`--output-filename`
57 |      - Output filename without file extension
58 |    * - :code:`--out-dir`
59 |      - Path to where output files will be saved
60 | 
61 | Output
62 | ######
63 | The resulting output is a VCF file per chromosome with imputed genotypes.
64 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/index.rst.txt:
--------------------------------------------------------------------------------
 1 | ==========
 2 | GWASpy 0.1
 3 | ==========
 4 | 
 5 | GWASpy is an open-source Python package for scalable: (1) Pre-imputation QC; (2) Principal Component Analysis; (3) Haplotype phasing; and (4) Genotype
 6 | Imputation. See the `installation page <installation.html>`_ to get started
 7 | using GWASpy.
 8 | 
 9 | ========
10 | Contents
11 | ========
12 | 
13 | .. toctree::
14 |     :maxdepth: 2
15 | 
16 |     Installation <installation.rst>
17 |     Hail Query and Batch <qb.rst>
18 |     Pre-Imputation QC <preimp_qc.rst>
19 |     Principal Component Analysis <pca.rst>
20 |     Haplotype Phasing <phasing.rst>
21 |     Genotype Imputation <imputation.rst>
22 |     Tutorial <tutorial.rst>


--------------------------------------------------------------------------------
/docs/_build/html/_sources/install/linux.rst.txt:
--------------------------------------------------------------------------------
 1 | ===========================
 2 | Install GWASpy on GNU/Linux
 3 | ===========================
 4 | 
 5 | - Install Java 8.
 6 | - Install Python 3.6+.
 7 | - Install a recent version of the C and C++ standard libraries. GCC 5.0, LLVM
 8 |   version 3.4, or any later versions suffice.
 9 | - Install BLAS and LAPACK.
10 | - Install TeX Live
11 | - Install GWASpy using pip.
12 | 
13 | On a recent Debian-like system, the following should suffice:
14 | 
15 | .. code-block:: sh
16 | 
17 |    apt-get install -y \
18 |        openjdk-8-jre-headless \
19 |        g++ \
20 |        python3.6 python3-pip \
21 |        libopenblas-base liblapack3 \
22 |        texlive-pictures texlive-science texlive-latex-extra latexmk
23 |    python3.6 -m pip install gwaspy
24 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/install/macosx.rst.txt:
--------------------------------------------------------------------------------
1 | ==========================
2 | Install GWASpy on Mac OS X
3 | ==========================
4 | 
5 | - Install `Java 8 <https://www.oracle.com/java/technologies/javase/javase-jdk8-downloads.html>`__.
6 | - Install Python 3.6+.
7 | - Install MacTeX
8 | - Open Terminal.app and execute ``pip3 install gwaspy``.
9 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/installation.rst.txt:
--------------------------------------------------------------------------------
 1 | .. _sec-installation:
 2 | 
 3 | =================
 4 | Installing GWASpy
 5 | =================
 6 | 
 7 | GWASpy leverages Hail to enable efficient processing of data directly from Google Cloud. As such, the first step is to
 8 | install Hail as per instructions `here <https://hail.is/docs/0.2/install/macosx.html>`_. After you have installed Hail, GWASpy can be easily installed using
 9 | 
10 | .. code-block:: sh
11 | 
12 |    pip install gwaspy
13 | 
14 | It is important to note that the command above will install GWASpy locally (or wherever you ran the command). For the
15 | :code:`phasing` and :code:`imputation` modules using Hail Batch, this is enough. For the :code:`preimp_qc` and
16 | :code:`pca` modules using Hail Query, however, you have to ensure that the dataproc cluster has GWASpy, and there are
17 | examples showing how to do this in the :ref:`preimp_qc` and :ref:`pca` sections.
18 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/pca.rst.txt:
--------------------------------------------------------------------------------
 1 | .. _sec-pca:
 2 | .. _pca:
 3 | 
 4 | ============================
 5 | Principal Component Analysis
 6 | ============================
 7 | 
 8 | Principal components analysis (PCA) can be used to detect and quantify the genetic structure of populations.
 9 | In GWASpy, the :code:`pca` module can be run in 3 different ways: (1) normal PCA without a reference panel; (2) joint PCA; or (3) Projection PCA.
10 | 
11 | .. toctree::
12 |    :maxdepth: 1
13 | 
14 |         Normal PCA <pca/normal.rst>
15 |         Joint PCA <pca/joint.rst>
16 |         Projection PCA <pca/project.rst>
17 | 
18 | Arguments and options
19 | #####################
20 | 
21 | .. list-table::
22 |    :widths: 15 50
23 |    :header-rows: 1
24 | 
25 |    * - Argument
26 |      - Description
27 |    * - :code:`--ref-dirname`
28 |      - Path to where reference data is
29 |    * - :code:`--ref-basename`
30 |      - Reference basename
31 |    * - :code:`--ref-info`
32 |      - Path to reference information. Tab-delimited file with sample IDs and their SuperPop labels
33 |    * - :code:`--reference`
34 |      - Genome reference build. Default is GRCh38. Options: [:code:`GRCh37`, :code:`GRCh38`]
35 |    * - :code:`--pca-type`
36 |      - Type of PCA to run. Default is normal. Options: [:code:`normal`, :code:`project`, :code:`joint`]
37 |    * - :code:`--data-dirname`
38 |      - Path to where the data is
39 |    * - :code:`--data-basename`
40 |      - Data basename
41 |    * - :code:`--input-type`
42 |      - Data input type. Options: [:code:`hail`, :code:`plink`, :code:`vcf`]
43 |    * - :code:`--maf`
44 |      - include only SNPs with MAF >= NUM in PCA. Default is 0.05
45 |    * - :code:`--hwe`
46 |      - include only SNPs with HWE >= NUM in PCA. Default is 1e-03
47 |    * - :code:`--geno`
48 |      - include only SNPs with call-rate > NUM. Default is 0.98
49 |    * - :code:`--ld-cor`
50 |      - Squared correlation threshold (exclusive upper bound). Must be in the range [0.0, 1.0]. Default is 0.2
51 |    * - :code:`--ld-window`
52 |      - Window size in base pairs (inclusive upper bound). Default is 250000
53 |    * - :code:`--npcs`
54 |      - Number of PCs to use. Default is 20
55 |    * - :code:`--relatedness-method`
56 |      - Method to use for the inference of relatedness. Default is pc_relate. Options: [:code:`pc_relate`, :code:`ibd`, :code:`king`]
57 |    * - :code:`--relatedness-thresh`
58 |      - Threshold value to use in relatedness checks. Default is 0.98
59 |    * - :code:`--prob`
60 |      - Minimum probability of belonging to a given population for the population to be set. Default is 0.8
61 |    * - :code:`--out-dir`
62 |      - Path to where output files will be saved
63 | 
64 | Output
65 | ######
66 | A tab-delimited file with the first 20 principal components (PCs)  computed and
67 | graphical visualizations of the PCs are generated.
68 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/pca/joint.rst.txt:
--------------------------------------------------------------------------------
 1 | ================================
 2 | Joint PCA (with a reference)
 3 | ================================
 4 | 
 5 | The joint PCA method works by first merging (joining), by locus and allele(s), the input dataset with the reference dataset.
 6 | This is followed by "normal" PCA on the merged dataset
 7 | 
 8 | Below is a code on how you can run joint PCA via the command-line or inside a Python script Use
 9 | 
10 | #. Python (inside a Python script)
11 | 
12 |     .. code-block:: python
13 | 
14 |         import gwaspy.pca as pca
15 |         pca.pca.pca(data_dirname="gs://my-gcs/bucket/test_data/", data_basename="my_data_basename",
16 |                     out_dir="gs://my-gcs/bucket/test_data/", input_type="my_input_type", reference="GRCh37",
17 |                     pca_type="joint")
18 | 
19 | #. Command line
20 | 
21 |     .. code-block:: sh
22 | 
23 |         pca --data-dirname gs://my-gcs/bucket/test_data/ --data-basename my_data_basename --out-dir gs://my-gcs/bucket/test_data/--input-type my_input_type --reference grch37 --pca-type joint


--------------------------------------------------------------------------------
/docs/_build/html/_sources/pca/normal.rst.txt:
--------------------------------------------------------------------------------
 1 | ================================
 2 | Normal PCA (without a reference)
 3 | ================================
 4 | 
 5 | GWASpy allows you to run normal PCA without any reference panel
 6 | 
 7 | Below is a code on how you can run normal PCA without a reference via the command-line or inside a Python script Use
 8 | 
 9 | #. Python (inside a Python script)
10 | 
11 |     .. code-block:: python
12 | 
13 |         import gwaspy.pca as pca
14 |         pca.pca.pca(data_dirname="gs://my-gcs/bucket/test_data/", data_basename="my_data_basename",
15 |                     out_dir="gs://my-gcs/bucket/test_data/", input_type="my_input_type", reference="GRCh37",
16 |                     pca_type="normal")
17 | 
18 | #. Command line
19 | 
20 |     .. code-block:: sh
21 | 
22 |         pca --data-dirname gs://my-gcs/bucket/test_data/ --data-basename my_data_basename --out-dir gs://my-gcs/bucket/test_data/--input-type my_input_type --reference grch37 --pca-type normal


--------------------------------------------------------------------------------
/docs/_build/html/_sources/pca/project.rst.txt:
--------------------------------------------------------------------------------
 1 | ================================
 2 | Project PCA (with a reference)
 3 | ================================
 4 | 
 5 | You can leverage reference panel information to see how samples in your data cluster on a "global" scale.
 6 | PCs are computed using 1KG+HGDP dataset as a reference panel, and then samples in the input dataset are projected onto the 1KG+HGDP PC space.
 7 | A random forest classifier model, adopted from gnomAD, is then used to assign population ancestries in the input dataset
 8 | 
 9 | Below is a code on how you can run projection PCA via the command-line or inside a Python script Use
10 | 
11 | #. Python (inside a Python script)
12 | 
13 |     .. code-block:: python
14 | 
15 |         import gwaspy.pca as pca
16 |         pca.pca.pca(data_dirname="gs://my-gcs/bucket/test_data/", data_basename="my_data_basename",
17 |                     out_dir="gs://my-gcs/bucket/test_data/", input_type="my_input_type", reference="GRCh37",
18 |                     pca_type="project")
19 | 
20 | #. Command line
21 | 
22 |     .. code-block:: sh
23 | 
24 |         pca --data-dirname gs://my-gcs/bucket/test_data/ --data-basename my_data_basename --out-dir gs://my-gcs/bucket/test_data/--input-type my_input_type --reference grch37 --pca-type project


--------------------------------------------------------------------------------
/docs/_build/html/_sources/phasing.rst.txt:
--------------------------------------------------------------------------------
 1 | .. _sec-phasing:
 2 | 
 3 | =================
 4 | Haplotype Phasing
 5 | =================
 6 | 
 7 | Knowing the phase of a haplotype can allow us to impute low frequency variants, this makes haplotype phasing an
 8 | important step before genotype imputation. GWASpy has a module, :code:`phasing`, for performing phasing. Phasing can
 9 | be run with or without a reference panel using SHAPEIT5
10 | 
11 | GWASpy can handle both array and WGS data. For array data, the user can pass a VCF/BCF file with all the chromosomes,
12 | then GWASpy will use SHAPEIT5 to phase the chromosomes in parallel. Since WGS has more variants, phasing will be parallelized across
13 | multiple chunks in each chromosome. It's also important to note that phasing of WGS data includes phasing common
14 | variants first, followed by phasing rare variants.
15 | 
16 | Another important aspect of phasing is the use of a reference panel. In many cases (small sample size), including a reference panel when
17 | phasing improves accuracy. By default, GWASpy runs phasing without a reference panel, but there is an option to use a
18 | reference panel as shown below.
19 | 
20 | Examples
21 | ########
22 | 
23 | **1. Without a reference panel**
24 | 
25 |     .. code-block:: sh
26 | 
27 |         phasing --input-vcf gs://path/to/file.vcf.bgz --output-filename outfilename.phased --out-dir gs://path/to/output/dir --genome-build GRCh38 --billing-project my-billing-project
28 | 
29 | **2. HGDP+1KG reference panel**
30 | 
31 | Set :code:`--vcf-ref` to  :code:`hgdp1kgp`
32 | 
33 |     .. code-block:: sh
34 | 
35 |         phasing --input-vcf gs://path/to/file.vcf.bgz --output-filename my_outfilename --out-dir gs://path/to/output/dir --genome-build GRCh38 --billing-project my-billing-project --vcf-ref hgdp1kgp
36 | 
37 | **3. Own reference panel**
38 | 
39 | .. note::
40 |     1. If you're using your own reference panel, make sure the files are bgzip compressed.
41 |     2. Chromosome X reference file must be named X and not 23
42 | 
43 | Say you have your reference panel files for each chromosomes stored in gs://ref_panel/ALL.chr{1..22,X}.vcf,
44 | you would pass the path to :code:`--vcf-ref` as gs://ref_panel/ALL.chr\ **CNUMBER**\ .vcf.
45 | GWASpy uses **CNUMBER** as a placeholder for the chromosomes. Then you can run phasing as:
46 | 
47 |     .. code-block:: sh
48 | 
49 |         phasing --input-vcf gs://path/to/file.vcf.bgz --output-filename outfilename.phased --out-dir gs://path/to/output/dir --genome-build GRCh38 --billing-project my-billing-project --vcf-ref gs://ref_panel/ALL.chrCNUMBER.vcf
50 | 
51 | .. note::
52 |     For nextflow users, the idea is the same. The only difference is you have to update the params.json file. Examples
53 |     are provided in the tutorial section of the documentation
54 | 
55 | Arguments and options
56 | #####################
57 | 
58 | .. list-table::
59 |    :widths: 15 50
60 |    :header-rows: 1
61 | 
62 |    * - Argument
63 |      - Description
64 |    * - :code:`--input-vcf`
65 |      - Path to where VCF file to be phased is
66 |    * - :code:`--vcf-ref`
67 |      - VCF file for reference haplotypes if phasing with a reference panel
68 |    * - :code:`--pedigree`
69 |      - Pedigree (PLINK FAM) file
70 |    * - :code:`--local`
71 |      - Type of service. Default is Service backend where jobs are executed on a multi-tenant compute cluster in Google Cloud
72 |    * - :code:`--billing-project`
73 |      - Billing project to be used for the job(s)
74 |    * - :code:`--genome-build`
75 |      - Genome reference build. Default is GRCh38. Options: [:code:`GRCh37`, :code:`GRCh38`]
76 |    * - :code:`--data-type`
77 |      - Array or WGS data. Default is array. Options: [:code:`array`, :code:`wgs`].
78 |    * - :code:`--fill-tags`
79 |      - Whether or not to add AC tag required by SHAPEIT5. Including :code:`--fill-tags`, in your command will enable this step
80 |    * - :code:`--software`
81 |      - Software to use for phasing. Options: [:code:`beagle`, :code:`shapeit`]. Default is :code:`shapeit`
82 |    * - :code:`--output-filename`
83 |      - Output filename without file extension
84 |    * - :code:`--out-dir`
85 |      - Path to where output files will be saved
86 | 
87 | Output
88 | ######
89 | The resulting output is a VCF file per chromosome with phased haplotypes.
90 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/preimp_qc.rst.txt:
--------------------------------------------------------------------------------
  1 | .. _sec-pre_imputation_qc:
  2 | .. _preimp_qc:
  3 | 
  4 | ===================================
  5 | Pre-Imputation Quality Control (QC)
  6 | ===================================
  7 | 
  8 | Detecting and correcting issues such as genotyping errors, sample handling errors, population stratification etc
  9 | is important in GWAS. The :code:`preimp_qc` module addresses these issues and cleans (QC) your data. Below is a flow diagram
 10 | of the filters applied when QC'ing input data:
 11 | 
 12 | .. image:: images/qc_workflow.png
 13 |    :width: 1000px
 14 |    :height: 1900px
 15 |    :scale: 50 %
 16 |    :align: center
 17 | 
 18 | 
 19 | Arguments and options
 20 | #####################
 21 | 
 22 | .. list-table::
 23 |    :widths: 15 50
 24 |    :header-rows: 1
 25 | 
 26 |    * - Argument
 27 |      - Description
 28 |    * - :code:`--dirname`
 29 |      - Path to where the data is
 30 |    * - :code:`--basename`
 31 |      - Data basename
 32 |    * - :code:`--input-type`
 33 |      - Input type. Options: [:code:`hail`, :code:`plink`, :code:`vcf`]
 34 |    * - :code:`--export-type`
 35 |      - Export type. Options: [:code:`hail`, :code:`plink`, :code:`vcf`]
 36 |    * - :code:`--out-dir`
 37 |      - Directory path to where output files are going to be saved
 38 |    * - :code:`--annotations`
 39 |      - Annotations file to be used for annotating sample with information such as Sex and Phenotype
 40 |    * - :code:`--reference`
 41 |      - Reference genome build. Default is GRCh38. Options: [:code:`GRCh37`, :code:`GRCh38`]
 42 |    * - :code:`--report`
 43 |      - Generate a QC PDF report or not. Default is True
 44 |    * - :code:`--liftover`
 45 |      - Liftover input data to GRCh38 or not, default is False. Running :code:`preimp_qc` with :code:`--liftover` will activate liftover
 46 |    * - :code:`--pre-geno`
 47 |      - include only SNPs with missing-rate < NUM (before ID filter), important for post merge of multiple platforms
 48 |    * - :code:`--mind`
 49 |      - include only IDs with missing-rate < NUM
 50 |    * - :code:`--fhet-aut`
 51 |      - include only IDs within NUM < FHET < NUM
 52 |    * - :code:`--fstat-y`
 53 |      - include only female IDs with fhet < NUM
 54 |    * - :code:`--fstat-x`
 55 |      - include only male IDs with fhet > NUM
 56 |    * - :code:`--geno`
 57 |      - include only SNPs with missing-rate < NUM
 58 |    * - :code:`--midi`
 59 |      - include only SNPs with missing-rate-difference (case/control) < NUM
 60 |    * - :code:`--withpna`
 61 |      - include monomorphic (invariant) SNPs
 62 |    * - :code:`--maf`
 63 |      - include only SNPs with MAF >= NUM
 64 |    * - :code:`--hwe-th-con`
 65 |      - HWE_controls < NUM
 66 |    * - :code:`--hwe-th-cas`
 67 |      - HWE_cases < NUM
 68 | 
 69 | Output(s)
 70 | ##########
 71 | * QC'ed file(s) i.e. file with all the variants and/or samples that fail QC filters removed
 72 | * A detailed PDF QC report including pre- and post-QC variant/sample counts, figures such as Manhattan and QQ plots etc.
 73 | 
 74 | 
 75 | Examples
 76 | ########
 77 | 
 78 | All the code below assumes the user already has a Dataproc cluster running as described in the `previous section <qb.html>`_
 79 | 
 80 | You can run pre-imputation qc using the :code:`preimp_qc` module (1) inside a python script; or (2) via the command line
 81 | 
 82 | 1. Python script - submitting a python script to a cluster from local machine (Highly recommended)
 83 | 
 84 | - First create a python script on your local machine as below
 85 | 
 86 |     .. code-block:: python
 87 | 
 88 |         import gwaspy.preimp_qc as qc
 89 |         qc.preimp_qc.preimp_qc(dirname="gs://my-gcs/bucket/test_data/", basename="my_data_basename",
 90 |                                input_type="my_input_type")
 91 | 
 92 | - Then run the following command to submit the script to the Dataproc cluster named `my-cluster-name`
 93 | 
 94 |     .. code-block:: sh
 95 | 
 96 |         hailctl dataproc submit my-cluster-name qc_script.py
 97 | 
 98 | 2. Command line - requires user to SSH'ed to a cluster
 99 | 
100 | Users may encounter `this error <https://hail.zulipchat.com/#narrow/channel/128581-Cloud-support/topic/Running.20GWASpy.20on.20hailctl.20cluster.20-.20file.20not.20found.20exception>`_ when trying to run things from the command line
101 | 
102 | - This requires the user to be inside (`gcloud compute ssh`) the Dataproc cluster with GWASpy already installed
103 | 
104 |     .. code-block:: sh
105 | 
106 |         gcloud compute ssh "my-cluster-name-m"
107 |         preimp_qc --dirname gs://my-gcs/bucket/test_data/ --basename my_data_basename --input-type my_input_type
108 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/qb.rst.txt:
--------------------------------------------------------------------------------
 1 | .. _sec-qb:
 2 | 
 3 | ====================
 4 | Hail Query and Batch
 5 | ====================
 6 | 
 7 | The four GWASpy modules use two different backends: :code:`preimp_qc` and :code:`pca` use Hail Query, while
 8 | :code:`phasing` and :code:`imputation` modules use Batch (Hail Batch for Broad users and nextflow for non-Broad users).
 9 | Hail Query is well-suited for manipulating large genomics data in a highly parallelised environments such as Dataproc.
10 | `Batch <https://cloud.google.com/batch/docs/get-started>`_, on the other hand, is good for batch processing (scheduling,
11 | queueing, and executing) workloads on Google Cloud resources.
12 | 
13 | All the instructions below assume the user has a Google account and an active (Google) Cloud billing account
14 | 
15 | Query
16 | #####
17 | 
18 | For running the :code:`preimp_qc` and :code:`pca` modules, you need to start a Dataproc cluster. Hail has a command-line
19 | tool, `hailctl <https://hail.is/docs/0.2/cloud/google_cloud.html>`_, for doing this and it is installed automatically when
20 | you install Hail. We highly recommend setting a maximum age for the cluster (:code:`--max-age`), this will ensure the cluster is
21 | automatically deleted after the specified time.
22 | 
23 | Below is how you can start a cluster with GWASpy pre-installed:
24 | 
25 |     .. code-block:: sh
26 | 
27 |        hailctl dataproc start my-cluster-name -region=us-central1 --packages gwaspy --max-age 4h
28 | 
29 | To shut down the cluster, you can run:
30 | 
31 |     .. code-block:: sh
32 | 
33 |         hailctl dataproc stop my-cluster-name --region=us-central1
34 | 
35 | Batch
36 | #####
37 | 
38 | The :code:`phasing` and :code:`imputation` modules use Batch as the backend. For Broad users with a Hail Batch account,
39 | there is no setup needed, you can proceed to running the modules. For non-Broad users, we have a nextflow implementation
40 | of the modules that requires nextflow setup first. Follow the steps here to: `(1) install nextflow <https://www.nextflow.io/docs/latest/install.html#install-page>`_; and
41 | `(2) setup Google Cloud Batch for nextflow <https://www.nextflow.io/docs/latest/google.html>`_
42 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/_sphinx_javascript_frameworks_compat.js:
--------------------------------------------------------------------------------
  1 | /* Compatability shim for jQuery and underscores.js.
  2 |  *
  3 |  * Copyright Sphinx contributors
  4 |  * Released under the two clause BSD licence
  5 |  */
  6 | 
  7 | /**
  8 |  * small helper function to urldecode strings
  9 |  *
 10 |  * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL
 11 |  */
 12 | jQuery.urldecode = function(x) {
 13 |     if (!x) {
 14 |         return x
 15 |     }
 16 |     return decodeURIComponent(x.replace(/\+/g, ' '));
 17 | };
 18 | 
 19 | /**
 20 |  * small helper function to urlencode strings
 21 |  */
 22 | jQuery.urlencode = encodeURIComponent;
 23 | 
 24 | /**
 25 |  * This function returns the parsed url parameters of the
 26 |  * current request. Multiple values per key are supported,
 27 |  * it will always return arrays of strings for the value parts.
 28 |  */
 29 | jQuery.getQueryParameters = function(s) {
 30 |     if (typeof s === 'undefined')
 31 |         s = document.location.search;
 32 |     var parts = s.substr(s.indexOf('?') + 1).split('&');
 33 |     var result = {};
 34 |     for (var i = 0; i < parts.length; i++) {
 35 |         var tmp = parts[i].split('=', 2);
 36 |         var key = jQuery.urldecode(tmp[0]);
 37 |         var value = jQuery.urldecode(tmp[1]);
 38 |         if (key in result)
 39 |             result[key].push(value);
 40 |         else
 41 |             result[key] = [value];
 42 |     }
 43 |     return result;
 44 | };
 45 | 
 46 | /**
 47 |  * highlight a given string on a jquery object by wrapping it in
 48 |  * span elements with the given class name.
 49 |  */
 50 | jQuery.fn.highlightText = function(text, className) {
 51 |     function highlight(node, addItems) {
 52 |         if (node.nodeType === 3) {
 53 |             var val = node.nodeValue;
 54 |             var pos = val.toLowerCase().indexOf(text);
 55 |             if (pos >= 0 &&
 56 |                 !jQuery(node.parentNode).hasClass(className) &&
 57 |                 !jQuery(node.parentNode).hasClass("nohighlight")) {
 58 |                 var span;
 59 |                 var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
 60 |                 if (isInSVG) {
 61 |                     span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
 62 |                 } else {
 63 |                     span = document.createElement("span");
 64 |                     span.className = className;
 65 |                 }
 66 |                 span.appendChild(document.createTextNode(val.substr(pos, text.length)));
 67 |                 node.parentNode.insertBefore(span, node.parentNode.insertBefore(
 68 |                     document.createTextNode(val.substr(pos + text.length)),
 69 |                     node.nextSibling));
 70 |                 node.nodeValue = val.substr(0, pos);
 71 |                 if (isInSVG) {
 72 |                     var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
 73 |                     var bbox = node.parentElement.getBBox();
 74 |                     rect.x.baseVal.value = bbox.x;
 75 |                     rect.y.baseVal.value = bbox.y;
 76 |                     rect.width.baseVal.value = bbox.width;
 77 |                     rect.height.baseVal.value = bbox.height;
 78 |                     rect.setAttribute('class', className);
 79 |                     addItems.push({
 80 |                         "parent": node.parentNode,
 81 |                         "target": rect});
 82 |                 }
 83 |             }
 84 |         }
 85 |         else if (!jQuery(node).is("button, select, textarea")) {
 86 |             jQuery.each(node.childNodes, function() {
 87 |                 highlight(this, addItems);
 88 |             });
 89 |         }
 90 |     }
 91 |     var addItems = [];
 92 |     var result = this.each(function() {
 93 |         highlight(this, addItems);
 94 |     });
 95 |     for (var i = 0; i < addItems.length; ++i) {
 96 |         jQuery(addItems[i].parent).before(addItems[i].target);
 97 |     }
 98 |     return result;
 99 | };
100 | 
101 | /*
102 |  * backward compatibility for jQuery.browser
103 |  * This will be supported until firefox bug is fixed.
104 |  */
105 | if (!jQuery.browser) {
106 |     jQuery.uaMatch = function(ua) {
107 |         ua = ua.toLowerCase();
108 | 
109 |         var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
110 |             /(webkit)[ \/]([\w.]+)/.exec(ua) ||
111 |             /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
112 |             /(msie) ([\w.]+)/.exec(ua) ||
113 |             ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
114 |             [];
115 | 
116 |         return {
117 |             browser: match[ 1 ] || "",
118 |             version: match[ 2 ] || "0"
119 |         };
120 |     };
121 |     jQuery.browser = {};
122 |     jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
123 | }
124 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/badge_only.css:
--------------------------------------------------------------------------------
1 | .clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions .rst-other-versions .rtd-current-item{font-weight:700}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}#flyout-search-form{padding:6px}


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/Roboto-Slab-Bold.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/Roboto-Slab-Regular.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/fontawesome-webfont.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-bold-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/lato-bold-italic.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-bold-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/lato-bold-italic.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/lato-bold.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/lato-bold.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-normal-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/lato-normal-italic.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-normal-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/lato-normal-italic.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-normal.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/lato-normal.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/css/fonts/lato-normal.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/css/fonts/lato-normal.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/custom.css:
--------------------------------------------------------------------------------
1 | .wy-nav-content {
2 |     max-width: none;
3 | }
4 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/doctools.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Base JavaScript utilities for all Sphinx HTML documentation.
  3 |  */
  4 | "use strict";
  5 | 
  6 | const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([
  7 |   "TEXTAREA",
  8 |   "INPUT",
  9 |   "SELECT",
 10 |   "BUTTON",
 11 | ]);
 12 | 
 13 | const _ready = (callback) => {
 14 |   if (document.readyState !== "loading") {
 15 |     callback();
 16 |   } else {
 17 |     document.addEventListener("DOMContentLoaded", callback);
 18 |   }
 19 | };
 20 | 
 21 | /**
 22 |  * Small JavaScript module for the documentation.
 23 |  */
 24 | const Documentation = {
 25 |   init: () => {
 26 |     Documentation.initDomainIndexTable();
 27 |     Documentation.initOnKeyListeners();
 28 |   },
 29 | 
 30 |   /**
 31 |    * i18n support
 32 |    */
 33 |   TRANSLATIONS: {},
 34 |   PLURAL_EXPR: (n) => (n === 1 ? 0 : 1),
 35 |   LOCALE: "unknown",
 36 | 
 37 |   // gettext and ngettext don't access this so that the functions
 38 |   // can safely bound to a different name (_ = Documentation.gettext)
 39 |   gettext: (string) => {
 40 |     const translated = Documentation.TRANSLATIONS[string];
 41 |     switch (typeof translated) {
 42 |       case "undefined":
 43 |         return string; // no translation
 44 |       case "string":
 45 |         return translated; // translation exists
 46 |       default:
 47 |         return translated[0]; // (singular, plural) translation tuple exists
 48 |     }
 49 |   },
 50 | 
 51 |   ngettext: (singular, plural, n) => {
 52 |     const translated = Documentation.TRANSLATIONS[singular];
 53 |     if (typeof translated !== "undefined")
 54 |       return translated[Documentation.PLURAL_EXPR(n)];
 55 |     return n === 1 ? singular : plural;
 56 |   },
 57 | 
 58 |   addTranslations: (catalog) => {
 59 |     Object.assign(Documentation.TRANSLATIONS, catalog.messages);
 60 |     Documentation.PLURAL_EXPR = new Function(
 61 |       "n",
 62 |       `return (${catalog.plural_expr})`
 63 |     );
 64 |     Documentation.LOCALE = catalog.locale;
 65 |   },
 66 | 
 67 |   /**
 68 |    * helper function to focus on search bar
 69 |    */
 70 |   focusSearchBar: () => {
 71 |     document.querySelectorAll("input[name=q]")[0]?.focus();
 72 |   },
 73 | 
 74 |   /**
 75 |    * Initialise the domain index toggle buttons
 76 |    */
 77 |   initDomainIndexTable: () => {
 78 |     const toggler = (el) => {
 79 |       const idNumber = el.id.substr(7);
 80 |       const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`);
 81 |       if (el.src.substr(-9) === "minus.png") {
 82 |         el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`;
 83 |         toggledRows.forEach((el) => (el.style.display = "none"));
 84 |       } else {
 85 |         el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`;
 86 |         toggledRows.forEach((el) => (el.style.display = ""));
 87 |       }
 88 |     };
 89 | 
 90 |     const togglerElements = document.querySelectorAll("img.toggler");
 91 |     togglerElements.forEach((el) =>
 92 |       el.addEventListener("click", (event) => toggler(event.currentTarget))
 93 |     );
 94 |     togglerElements.forEach((el) => (el.style.display = ""));
 95 |     if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler);
 96 |   },
 97 | 
 98 |   initOnKeyListeners: () => {
 99 |     // only install a listener if it is really needed
100 |     if (
101 |       !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS &&
102 |       !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS
103 |     )
104 |       return;
105 | 
106 |     document.addEventListener("keydown", (event) => {
107 |       // bail for input elements
108 |       if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
109 |       // bail with special keys
110 |       if (event.altKey || event.ctrlKey || event.metaKey) return;
111 | 
112 |       if (!event.shiftKey) {
113 |         switch (event.key) {
114 |           case "ArrowLeft":
115 |             if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
116 | 
117 |             const prevLink = document.querySelector('link[rel="prev"]');
118 |             if (prevLink && prevLink.href) {
119 |               window.location.href = prevLink.href;
120 |               event.preventDefault();
121 |             }
122 |             break;
123 |           case "ArrowRight":
124 |             if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
125 | 
126 |             const nextLink = document.querySelector('link[rel="next"]');
127 |             if (nextLink && nextLink.href) {
128 |               window.location.href = nextLink.href;
129 |               event.preventDefault();
130 |             }
131 |             break;
132 |         }
133 |       }
134 | 
135 |       // some keyboard layouts may need Shift to get /
136 |       switch (event.key) {
137 |         case "/":
138 |           if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break;
139 |           Documentation.focusSearchBar();
140 |           event.preventDefault();
141 |       }
142 |     });
143 |   },
144 | };
145 | 
146 | // quick alias for translations
147 | const _ = Documentation.gettext;
148 | 
149 | _ready(Documentation.init);
150 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/documentation_options.js:
--------------------------------------------------------------------------------
 1 | const DOCUMENTATION_OPTIONS = {
 2 |     VERSION: '0.1.0',
 3 |     LANGUAGE: 'en',
 4 |     COLLAPSE_INDEX: false,
 5 |     BUILDER: 'html',
 6 |     FILE_SUFFIX: '.html',
 7 |     LINK_SUFFIX: '.html',
 8 |     HAS_SOURCE: true,
 9 |     SOURCELINK_SUFFIX: '.txt',
10 |     NAVIGATION_WITH_KEYS: false,
11 |     SHOW_SEARCH_SUMMARY: true,
12 |     ENABLE_SEARCH_SHORTCUTS: true,
13 | };


--------------------------------------------------------------------------------
/docs/_build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/file.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-bold.eot


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-bold.ttf


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-bold.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-bold.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bolditalic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-bolditalic.eot


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bolditalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-bolditalic.ttf


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-bolditalic.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-italic.eot


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-italic.ttf


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-italic.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-italic.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-regular.eot


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-regular.ttf


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-regular.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/Lato/lato-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/Lato/lato-regular.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff


--------------------------------------------------------------------------------
/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2


--------------------------------------------------------------------------------
/docs/_build/html/_static/js/badge_only.js:
--------------------------------------------------------------------------------
1 | !function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}});


--------------------------------------------------------------------------------
/docs/_build/html/_static/js/theme.js:
--------------------------------------------------------------------------------
1 | !function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("<div class='wy-table-responsive'></div>"),n("table.docutils.footnote").wrap("<div class='wy-table-responsive footnote'></div>"),n("table.docutils.citation").wrap("<div class='wy-table-responsive citation'></div>"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n('<button class="toctree-expand" title="Open/close menu"></button>'),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}if(t.length>0){$(".wy-menu-vertical .current").removeClass("current").attr("aria-expanded","false"),t.addClass("current").attr("aria-expanded","true"),t.closest("li.toctree-l1").parent().addClass("current").attr("aria-expanded","true");for(let n=1;n<=10;n++)t.closest("li.toctree-l"+n).addClass("current").attr("aria-expanded","true");t[0].scrollIntoView()}}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current").attr("aria-expanded","false"),e.siblings().find("li.current").removeClass("current").attr("aria-expanded","false");var t=e.find("> ul li");t.length&&(t.removeClass("current").attr("aria-expanded","false"),e.toggleClass("current").attr("aria-expanded",(function(n,e){return"true"==e?"false":"true"})))}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t<e.length&&!window.requestAnimationFrame;++t)window.requestAnimationFrame=window[e[t]+"RequestAnimationFrame"],window.cancelAnimationFrame=window[e[t]+"CancelAnimationFrame"]||window[e[t]+"CancelRequestAnimationFrame"];window.requestAnimationFrame||(window.requestAnimationFrame=function(e,t){var i=(new Date).getTime(),o=Math.max(0,16-(i-n)),r=window.setTimeout((function(){e(i+o)}),o);return n=i+o,r}),window.cancelAnimationFrame||(window.cancelAnimationFrame=function(n){clearTimeout(n)})}()}).call(window)},function(n,e){n.exports=jQuery},function(n,e,t){}]);


--------------------------------------------------------------------------------
/docs/_build/html/_static/language_data.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This script contains the language-specific data used by searchtools.js,
  3 |  * namely the list of stopwords, stemmer, scorer and splitter.
  4 |  */
  5 | 
  6 | var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
  7 | 
  8 | 
  9 | /* Non-minified version is copied as a separate JS file, if available */
 10 | 
 11 | /**
 12 |  * Porter Stemmer
 13 |  */
 14 | var Stemmer = function() {
 15 | 
 16 |   var step2list = {
 17 |     ational: 'ate',
 18 |     tional: 'tion',
 19 |     enci: 'ence',
 20 |     anci: 'ance',
 21 |     izer: 'ize',
 22 |     bli: 'ble',
 23 |     alli: 'al',
 24 |     entli: 'ent',
 25 |     eli: 'e',
 26 |     ousli: 'ous',
 27 |     ization: 'ize',
 28 |     ation: 'ate',
 29 |     ator: 'ate',
 30 |     alism: 'al',
 31 |     iveness: 'ive',
 32 |     fulness: 'ful',
 33 |     ousness: 'ous',
 34 |     aliti: 'al',
 35 |     iviti: 'ive',
 36 |     biliti: 'ble',
 37 |     logi: 'log'
 38 |   };
 39 | 
 40 |   var step3list = {
 41 |     icate: 'ic',
 42 |     ative: '',
 43 |     alize: 'al',
 44 |     iciti: 'ic',
 45 |     ical: 'ic',
 46 |     ful: '',
 47 |     ness: ''
 48 |   };
 49 | 
 50 |   var c = "[^aeiou]";          // consonant
 51 |   var v = "[aeiouy]";          // vowel
 52 |   var C = c + "[^aeiouy]*";    // consonant sequence
 53 |   var V = v + "[aeiou]*";      // vowel sequence
 54 | 
 55 |   var mgr0 = "^(" + C + ")?" + V + C;                      // [C]VC... is m>0
 56 |   var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$";    // [C]VC[V] is m=1
 57 |   var mgr1 = "^(" + C + ")?" + V + C + V + C;              // [C]VCVC... is m>1
 58 |   var s_v   = "^(" + C + ")?" + v;                         // vowel in stem
 59 | 
 60 |   this.stemWord = function (w) {
 61 |     var stem;
 62 |     var suffix;
 63 |     var firstch;
 64 |     var origword = w;
 65 | 
 66 |     if (w.length < 3)
 67 |       return w;
 68 | 
 69 |     var re;
 70 |     var re2;
 71 |     var re3;
 72 |     var re4;
 73 | 
 74 |     firstch = w.substr(0,1);
 75 |     if (firstch == "y")
 76 |       w = firstch.toUpperCase() + w.substr(1);
 77 | 
 78 |     // Step 1a
 79 |     re = /^(.+?)(ss|i)es$/;
 80 |     re2 = /^(.+?)([^s])s$/;
 81 | 
 82 |     if (re.test(w))
 83 |       w = w.replace(re,"$1$2");
 84 |     else if (re2.test(w))
 85 |       w = w.replace(re2,"$1$2");
 86 | 
 87 |     // Step 1b
 88 |     re = /^(.+?)eed$/;
 89 |     re2 = /^(.+?)(ed|ing)$/;
 90 |     if (re.test(w)) {
 91 |       var fp = re.exec(w);
 92 |       re = new RegExp(mgr0);
 93 |       if (re.test(fp[1])) {
 94 |         re = /.$/;
 95 |         w = w.replace(re,"");
 96 |       }
 97 |     }
 98 |     else if (re2.test(w)) {
 99 |       var fp = re2.exec(w);
100 |       stem = fp[1];
101 |       re2 = new RegExp(s_v);
102 |       if (re2.test(stem)) {
103 |         w = stem;
104 |         re2 = /(at|bl|iz)$/;
105 |         re3 = new RegExp("([^aeiouylsz])\\1$");
106 |         re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
107 |         if (re2.test(w))
108 |           w = w + "e";
109 |         else if (re3.test(w)) {
110 |           re = /.$/;
111 |           w = w.replace(re,"");
112 |         }
113 |         else if (re4.test(w))
114 |           w = w + "e";
115 |       }
116 |     }
117 | 
118 |     // Step 1c
119 |     re = /^(.+?)y$/;
120 |     if (re.test(w)) {
121 |       var fp = re.exec(w);
122 |       stem = fp[1];
123 |       re = new RegExp(s_v);
124 |       if (re.test(stem))
125 |         w = stem + "i";
126 |     }
127 | 
128 |     // Step 2
129 |     re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
130 |     if (re.test(w)) {
131 |       var fp = re.exec(w);
132 |       stem = fp[1];
133 |       suffix = fp[2];
134 |       re = new RegExp(mgr0);
135 |       if (re.test(stem))
136 |         w = stem + step2list[suffix];
137 |     }
138 | 
139 |     // Step 3
140 |     re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
141 |     if (re.test(w)) {
142 |       var fp = re.exec(w);
143 |       stem = fp[1];
144 |       suffix = fp[2];
145 |       re = new RegExp(mgr0);
146 |       if (re.test(stem))
147 |         w = stem + step3list[suffix];
148 |     }
149 | 
150 |     // Step 4
151 |     re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
152 |     re2 = /^(.+?)(s|t)(ion)$/;
153 |     if (re.test(w)) {
154 |       var fp = re.exec(w);
155 |       stem = fp[1];
156 |       re = new RegExp(mgr1);
157 |       if (re.test(stem))
158 |         w = stem;
159 |     }
160 |     else if (re2.test(w)) {
161 |       var fp = re2.exec(w);
162 |       stem = fp[1] + fp[2];
163 |       re2 = new RegExp(mgr1);
164 |       if (re2.test(stem))
165 |         w = stem;
166 |     }
167 | 
168 |     // Step 5
169 |     re = /^(.+?)e$/;
170 |     if (re.test(w)) {
171 |       var fp = re.exec(w);
172 |       stem = fp[1];
173 |       re = new RegExp(mgr1);
174 |       re2 = new RegExp(meq1);
175 |       re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
176 |       if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
177 |         w = stem;
178 |     }
179 |     re = /ll$/;
180 |     re2 = new RegExp(mgr1);
181 |     if (re.test(w) && re2.test(w)) {
182 |       re = /.$/;
183 |       w = w.replace(re,"");
184 |     }
185 | 
186 |     // and turn initial Y back to y
187 |     if (firstch == "y")
188 |       w = firstch.toLowerCase() + w.substr(1);
189 |     return w;
190 |   }
191 | }
192 | 
193 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/minus.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/_static/plus.png


--------------------------------------------------------------------------------
/docs/_build/html/_static/pygments.css:
--------------------------------------------------------------------------------
 1 | pre { line-height: 125%; }
 2 | td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
 3 | span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
 4 | td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
 5 | span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
 6 | .highlight .hll { background-color: #ffffcc }
 7 | .highlight { background: #f8f8f8; }
 8 | .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */
 9 | .highlight .err { border: 1px solid #FF0000 } /* Error */
10 | .highlight .k { color: #008000; font-weight: bold } /* Keyword */
11 | .highlight .o { color: #666666 } /* Operator */
12 | .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */
13 | .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */
14 | .highlight .cp { color: #9C6500 } /* Comment.Preproc */
15 | .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */
16 | .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */
17 | .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */
18 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
19 | .highlight .ge { font-style: italic } /* Generic.Emph */
20 | .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
21 | .highlight .gr { color: #E40000 } /* Generic.Error */
22 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
23 | .highlight .gi { color: #008400 } /* Generic.Inserted */
24 | .highlight .go { color: #717171 } /* Generic.Output */
25 | .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
26 | .highlight .gs { font-weight: bold } /* Generic.Strong */
27 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
28 | .highlight .gt { color: #0044DD } /* Generic.Traceback */
29 | .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
30 | .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
31 | .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
32 | .highlight .kp { color: #008000 } /* Keyword.Pseudo */
33 | .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
34 | .highlight .kt { color: #B00040 } /* Keyword.Type */
35 | .highlight .m { color: #666666 } /* Literal.Number */
36 | .highlight .s { color: #BA2121 } /* Literal.String */
37 | .highlight .na { color: #687822 } /* Name.Attribute */
38 | .highlight .nb { color: #008000 } /* Name.Builtin */
39 | .highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */
40 | .highlight .no { color: #880000 } /* Name.Constant */
41 | .highlight .nd { color: #AA22FF } /* Name.Decorator */
42 | .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */
43 | .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */
44 | .highlight .nf { color: #0000FF } /* Name.Function */
45 | .highlight .nl { color: #767600 } /* Name.Label */
46 | .highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
47 | .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */
48 | .highlight .nv { color: #19177C } /* Name.Variable */
49 | .highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
50 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
51 | .highlight .mb { color: #666666 } /* Literal.Number.Bin */
52 | .highlight .mf { color: #666666 } /* Literal.Number.Float */
53 | .highlight .mh { color: #666666 } /* Literal.Number.Hex */
54 | .highlight .mi { color: #666666 } /* Literal.Number.Integer */
55 | .highlight .mo { color: #666666 } /* Literal.Number.Oct */
56 | .highlight .sa { color: #BA2121 } /* Literal.String.Affix */
57 | .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */
58 | .highlight .sc { color: #BA2121 } /* Literal.String.Char */
59 | .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */
60 | .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
61 | .highlight .s2 { color: #BA2121 } /* Literal.String.Double */
62 | .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */
63 | .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */
64 | .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */
65 | .highlight .sx { color: #008000 } /* Literal.String.Other */
66 | .highlight .sr { color: #A45A77 } /* Literal.String.Regex */
67 | .highlight .s1 { color: #BA2121 } /* Literal.String.Single */
68 | .highlight .ss { color: #19177C } /* Literal.String.Symbol */
69 | .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */
70 | .highlight .fm { color: #0000FF } /* Name.Function.Magic */
71 | .highlight .vc { color: #19177C } /* Name.Variable.Class */
72 | .highlight .vg { color: #19177C } /* Name.Variable.Global */
73 | .highlight .vi { color: #19177C } /* Name.Variable.Instance */
74 | .highlight .vm { color: #19177C } /* Name.Variable.Magic */
75 | .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */


--------------------------------------------------------------------------------
/docs/_build/html/_static/sphinx_highlight.js:
--------------------------------------------------------------------------------
  1 | /* Highlighting utilities for Sphinx HTML documentation. */
  2 | "use strict";
  3 | 
  4 | const SPHINX_HIGHLIGHT_ENABLED = true
  5 | 
  6 | /**
  7 |  * highlight a given string on a node by wrapping it in
  8 |  * span elements with the given class name.
  9 |  */
 10 | const _highlight = (node, addItems, text, className) => {
 11 |   if (node.nodeType === Node.TEXT_NODE) {
 12 |     const val = node.nodeValue;
 13 |     const parent = node.parentNode;
 14 |     const pos = val.toLowerCase().indexOf(text);
 15 |     if (
 16 |       pos >= 0 &&
 17 |       !parent.classList.contains(className) &&
 18 |       !parent.classList.contains("nohighlight")
 19 |     ) {
 20 |       let span;
 21 | 
 22 |       const closestNode = parent.closest("body, svg, foreignObject");
 23 |       const isInSVG = closestNode && closestNode.matches("svg");
 24 |       if (isInSVG) {
 25 |         span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
 26 |       } else {
 27 |         span = document.createElement("span");
 28 |         span.classList.add(className);
 29 |       }
 30 | 
 31 |       span.appendChild(document.createTextNode(val.substr(pos, text.length)));
 32 |       const rest = document.createTextNode(val.substr(pos + text.length));
 33 |       parent.insertBefore(
 34 |         span,
 35 |         parent.insertBefore(
 36 |           rest,
 37 |           node.nextSibling
 38 |         )
 39 |       );
 40 |       node.nodeValue = val.substr(0, pos);
 41 |       /* There may be more occurrences of search term in this node. So call this
 42 |        * function recursively on the remaining fragment.
 43 |        */
 44 |       _highlight(rest, addItems, text, className);
 45 | 
 46 |       if (isInSVG) {
 47 |         const rect = document.createElementNS(
 48 |           "http://www.w3.org/2000/svg",
 49 |           "rect"
 50 |         );
 51 |         const bbox = parent.getBBox();
 52 |         rect.x.baseVal.value = bbox.x;
 53 |         rect.y.baseVal.value = bbox.y;
 54 |         rect.width.baseVal.value = bbox.width;
 55 |         rect.height.baseVal.value = bbox.height;
 56 |         rect.setAttribute("class", className);
 57 |         addItems.push({ parent: parent, target: rect });
 58 |       }
 59 |     }
 60 |   } else if (node.matches && !node.matches("button, select, textarea")) {
 61 |     node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
 62 |   }
 63 | };
 64 | const _highlightText = (thisNode, text, className) => {
 65 |   let addItems = [];
 66 |   _highlight(thisNode, addItems, text, className);
 67 |   addItems.forEach((obj) =>
 68 |     obj.parent.insertAdjacentElement("beforebegin", obj.target)
 69 |   );
 70 | };
 71 | 
 72 | /**
 73 |  * Small JavaScript module for the documentation.
 74 |  */
 75 | const SphinxHighlight = {
 76 | 
 77 |   /**
 78 |    * highlight the search words provided in localstorage in the text
 79 |    */
 80 |   highlightSearchWords: () => {
 81 |     if (!SPHINX_HIGHLIGHT_ENABLED) return;  // bail if no highlight
 82 | 
 83 |     // get and clear terms from localstorage
 84 |     const url = new URL(window.location);
 85 |     const highlight =
 86 |         localStorage.getItem("sphinx_highlight_terms")
 87 |         || url.searchParams.get("highlight")
 88 |         || "";
 89 |     localStorage.removeItem("sphinx_highlight_terms")
 90 |     url.searchParams.delete("highlight");
 91 |     window.history.replaceState({}, "", url);
 92 | 
 93 |     // get individual terms from highlight string
 94 |     const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
 95 |     if (terms.length === 0) return; // nothing to do
 96 | 
 97 |     // There should never be more than one element matching "div.body"
 98 |     const divBody = document.querySelectorAll("div.body");
 99 |     const body = divBody.length ? divBody[0] : document.querySelector("body");
100 |     window.setTimeout(() => {
101 |       terms.forEach((term) => _highlightText(body, term, "highlighted"));
102 |     }, 10);
103 | 
104 |     const searchBox = document.getElementById("searchbox");
105 |     if (searchBox === null) return;
106 |     searchBox.appendChild(
107 |       document
108 |         .createRange()
109 |         .createContextualFragment(
110 |           '<p class="highlight-link">' +
111 |             '<a href="javascript:SphinxHighlight.hideSearchWords()">' +
112 |             _("Hide Search Matches") +
113 |             "</a></p>"
114 |         )
115 |     );
116 |   },
117 | 
118 |   /**
119 |    * helper function to hide the search marks again
120 |    */
121 |   hideSearchWords: () => {
122 |     document
123 |       .querySelectorAll("#searchbox .highlight-link")
124 |       .forEach((el) => el.remove());
125 |     document
126 |       .querySelectorAll("span.highlighted")
127 |       .forEach((el) => el.classList.remove("highlighted"));
128 |     localStorage.removeItem("sphinx_highlight_terms")
129 |   },
130 | 
131 |   initEscapeListener: () => {
132 |     // only install a listener if it is really needed
133 |     if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return;
134 | 
135 |     document.addEventListener("keydown", (event) => {
136 |       // bail for input elements
137 |       if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
138 |       // bail with special keys
139 |       if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return;
140 |       if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) {
141 |         SphinxHighlight.hideSearchWords();
142 |         event.preventDefault();
143 |       }
144 |     });
145 |   },
146 | };
147 | 
148 | _ready(() => {
149 |   /* Do not call highlightSearchWords() when we are on the search page.
150 |    * It will highlight words from the *previous* search query.
151 |    */
152 |   if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords();
153 |   SphinxHighlight.initEscapeListener();
154 | });
155 | 


--------------------------------------------------------------------------------
/docs/_build/html/genindex.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html5" lang="en" data-content_root="./">
  5 | <head>
  6 |   <meta charset="utf-8" />
  7 |   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  8 |   <title>Index &mdash; GWASpy 0.1.0 documentation</title>
  9 |       <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
 10 |       <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=e59714d7" />
 11 |       <link rel="stylesheet" type="text/css" href="_static/custom.css?v=eea1f72d" />
 12 | 
 13 |   
 14 |       <script src="_static/jquery.js?v=5d32c60e"></script>
 15 |       <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
 16 |       <script src="_static/documentation_options.js?v=01f34227"></script>
 17 |       <script src="_static/doctools.js?v=9bcbadda"></script>
 18 |       <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
 19 |     <script src="_static/js/theme.js"></script>
 20 |     <link rel="index" title="Index" href="#" />
 21 |     <link rel="search" title="Search" href="search.html" /> 
 22 | </head>
 23 | 
 24 | <body class="wy-body-for-nav"> 
 25 |   <div class="wy-grid-for-nav">
 26 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 27 |       <div class="wy-side-scroll">
 28 |         <div class="wy-side-nav-search" >
 29 | 
 30 |           
 31 |           
 32 |           <a href="index.html" class="icon icon-home">
 33 |             GWASpy
 34 |           </a>
 35 | <div role="search">
 36 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 37 |     <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
 38 |     <input type="hidden" name="check_keywords" value="yes" />
 39 |     <input type="hidden" name="area" value="default" />
 40 |   </form>
 41 | </div>
 42 |         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
 43 |               <ul>
 44 | <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
 45 | <li class="toctree-l1"><a class="reference internal" href="qb.html">Hail Query and Batch</a></li>
 46 | <li class="toctree-l1"><a class="reference internal" href="preimp_qc.html">Pre-Imputation QC</a></li>
 47 | <li class="toctree-l1"><a class="reference internal" href="pca.html">Principal Component Analysis</a></li>
 48 | <li class="toctree-l1"><a class="reference internal" href="phasing.html">Haplotype Phasing</a></li>
 49 | <li class="toctree-l1"><a class="reference internal" href="imputation.html">Genotype Imputation</a></li>
 50 | <li class="toctree-l1"><a class="reference internal" href="tutorial.html">Tutorial</a></li>
 51 | </ul>
 52 | 
 53 |         </div>
 54 |       </div>
 55 |     </nav>
 56 | 
 57 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
 58 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
 59 |           <a href="index.html">GWASpy</a>
 60 |       </nav>
 61 | 
 62 |       <div class="wy-nav-content">
 63 |         <div class="rst-content">
 64 |           <div role="navigation" aria-label="Page navigation">
 65 |   <ul class="wy-breadcrumbs">
 66 |       <li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
 67 |       <li class="breadcrumb-item active">Index</li>
 68 |       <li class="wy-breadcrumbs-aside">
 69 |       </li>
 70 |   </ul>
 71 |   <hr/>
 72 | </div>
 73 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
 74 |            <div itemprop="articleBody">
 75 |              
 76 | 
 77 | <h1 id="index">Index</h1>
 78 | 
 79 | <div class="genindex-jumpbox">
 80 |  
 81 | </div>
 82 | 
 83 | 
 84 |            </div>
 85 |           </div>
 86 |           <footer>
 87 | 
 88 |   <hr/>
 89 | 
 90 |   <div role="contentinfo">
 91 |     <p>&#169; Copyright 2024, Martin Lab, Broad Institute.</p>
 92 |   </div>
 93 | 
 94 |   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
 95 |     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
 96 |     provided by <a href="https://readthedocs.org">Read the Docs</a>.
 97 |    
 98 | 
 99 | </footer>
100 |         </div>
101 |       </div>
102 |     </section>
103 |   </div>
104 |   <script>
105 |       jQuery(function () {
106 |           SphinxRtdTheme.Navigation.enable(true);
107 |       });
108 |   </script> 
109 | 
110 | </body>
111 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/install/linux.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html5" lang="en" data-content_root="../">
  5 | <head>
  6 |   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
  7 | 
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  9 |   <title>Install GWASpy on GNU/Linux &mdash; GWASpy 0.1.0 documentation</title>
 10 |       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
 11 |       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=e59714d7" />
 12 |       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=eea1f72d" />
 13 | 
 14 |   
 15 |       <script src="../_static/jquery.js?v=5d32c60e"></script>
 16 |       <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
 17 |       <script src="../_static/documentation_options.js?v=01f34227"></script>
 18 |       <script src="../_static/doctools.js?v=9bcbadda"></script>
 19 |       <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
 20 |     <script src="../_static/js/theme.js"></script>
 21 |     <link rel="index" title="Index" href="../genindex.html" />
 22 |     <link rel="search" title="Search" href="../search.html" /> 
 23 | </head>
 24 | 
 25 | <body class="wy-body-for-nav"> 
 26 |   <div class="wy-grid-for-nav">
 27 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 28 |       <div class="wy-side-scroll">
 29 |         <div class="wy-side-nav-search" >
 30 | 
 31 |           
 32 |           
 33 |           <a href="../index.html" class="icon icon-home">
 34 |             GWASpy
 35 |           </a>
 36 | <div role="search">
 37 |   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
 38 |     <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
 39 |     <input type="hidden" name="check_keywords" value="yes" />
 40 |     <input type="hidden" name="area" value="default" />
 41 |   </form>
 42 | </div>
 43 |         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
 44 |               <ul>
 45 | <li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
 46 | <li class="toctree-l1"><a class="reference internal" href="../qb.html">Hail Query and Batch</a></li>
 47 | <li class="toctree-l1"><a class="reference internal" href="../preimp_qc.html">Pre-Imputation QC</a></li>
 48 | <li class="toctree-l1"><a class="reference internal" href="../pca.html">Principal Component Analysis</a></li>
 49 | <li class="toctree-l1"><a class="reference internal" href="../phasing.html">Haplotype Phasing</a></li>
 50 | <li class="toctree-l1"><a class="reference internal" href="../imputation.html">Genotype Imputation</a></li>
 51 | <li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
 52 | </ul>
 53 | 
 54 |         </div>
 55 |       </div>
 56 |     </nav>
 57 | 
 58 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
 59 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
 60 |           <a href="../index.html">GWASpy</a>
 61 |       </nav>
 62 | 
 63 |       <div class="wy-nav-content">
 64 |         <div class="rst-content">
 65 |           <div role="navigation" aria-label="Page navigation">
 66 |   <ul class="wy-breadcrumbs">
 67 |       <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
 68 |       <li class="breadcrumb-item active">Install GWASpy on GNU/Linux</li>
 69 |       <li class="wy-breadcrumbs-aside">
 70 |             <a href="../_sources/install/linux.rst.txt" rel="nofollow"> View page source</a>
 71 |       </li>
 72 |   </ul>
 73 |   <hr/>
 74 | </div>
 75 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
 76 |            <div itemprop="articleBody">
 77 |              
 78 |   <section id="install-gwaspy-on-gnu-linux">
 79 | <h1>Install GWASpy on GNU/Linux<a class="headerlink" href="#install-gwaspy-on-gnu-linux" title="Link to this heading"></a></h1>
 80 | <ul class="simple">
 81 | <li><p>Install Java 8.</p></li>
 82 | <li><p>Install Python 3.6+.</p></li>
 83 | <li><p>Install a recent version of the C and C++ standard libraries. GCC 5.0, LLVM
 84 | version 3.4, or any later versions suffice.</p></li>
 85 | <li><p>Install BLAS and LAPACK.</p></li>
 86 | <li><p>Install TeX Live</p></li>
 87 | <li><p>Install GWASpy using pip.</p></li>
 88 | </ul>
 89 | <p>On a recent Debian-like system, the following should suffice:</p>
 90 | <div class="highlight-sh notranslate"><div class="highlight"><pre><span></span>apt-get<span class="w"> </span>install<span class="w"> </span>-y<span class="w"> </span><span class="se">\</span>
 91 | <span class="w">    </span>openjdk-8-jre-headless<span class="w"> </span><span class="se">\</span>
 92 | <span class="w">    </span>g++<span class="w"> </span><span class="se">\</span>
 93 | <span class="w">    </span>python3.6<span class="w"> </span>python3-pip<span class="w"> </span><span class="se">\</span>
 94 | <span class="w">    </span>libopenblas-base<span class="w"> </span>liblapack3<span class="w"> </span><span class="se">\</span>
 95 | <span class="w">    </span>texlive-pictures<span class="w"> </span>texlive-science<span class="w"> </span>texlive-latex-extra<span class="w"> </span>latexmk
 96 | python3.6<span class="w"> </span>-m<span class="w"> </span>pip<span class="w"> </span>install<span class="w"> </span>gwaspy
 97 | </pre></div>
 98 | </div>
 99 | </section>
100 | 
101 | 
102 |            </div>
103 |           </div>
104 |           <footer>
105 | 
106 |   <hr/>
107 | 
108 |   <div role="contentinfo">
109 |     <p>&#169; Copyright 2024, Martin Lab, Broad Institute.</p>
110 |   </div>
111 | 
112 |   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
113 |     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
114 |     provided by <a href="https://readthedocs.org">Read the Docs</a>.
115 |    
116 | 
117 | </footer>
118 |         </div>
119 |       </div>
120 |     </section>
121 |   </div>
122 |   <script>
123 |       jQuery(function () {
124 |           SphinxRtdTheme.Navigation.enable(true);
125 |       });
126 |   </script> 
127 | 
128 | </body>
129 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/install/macosx.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html5" lang="en" data-content_root="../">
  5 | <head>
  6 |   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
  7 | 
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  9 |   <title>Install GWASpy on Mac OS X &mdash; GWASpy 0.1.0 documentation</title>
 10 |       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
 11 |       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=e59714d7" />
 12 |       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=eea1f72d" />
 13 | 
 14 |   
 15 |       <script src="../_static/jquery.js?v=5d32c60e"></script>
 16 |       <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
 17 |       <script src="../_static/documentation_options.js?v=01f34227"></script>
 18 |       <script src="../_static/doctools.js?v=9bcbadda"></script>
 19 |       <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
 20 |     <script src="../_static/js/theme.js"></script>
 21 |     <link rel="index" title="Index" href="../genindex.html" />
 22 |     <link rel="search" title="Search" href="../search.html" /> 
 23 | </head>
 24 | 
 25 | <body class="wy-body-for-nav"> 
 26 |   <div class="wy-grid-for-nav">
 27 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 28 |       <div class="wy-side-scroll">
 29 |         <div class="wy-side-nav-search" >
 30 | 
 31 |           
 32 |           
 33 |           <a href="../index.html" class="icon icon-home">
 34 |             GWASpy
 35 |           </a>
 36 | <div role="search">
 37 |   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
 38 |     <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
 39 |     <input type="hidden" name="check_keywords" value="yes" />
 40 |     <input type="hidden" name="area" value="default" />
 41 |   </form>
 42 | </div>
 43 |         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
 44 |               <ul>
 45 | <li class="toctree-l1"><a class="reference internal" href="../installation.html">Installation</a></li>
 46 | <li class="toctree-l1"><a class="reference internal" href="../qb.html">Hail Query and Batch</a></li>
 47 | <li class="toctree-l1"><a class="reference internal" href="../preimp_qc.html">Pre-Imputation QC</a></li>
 48 | <li class="toctree-l1"><a class="reference internal" href="../pca.html">Principal Component Analysis</a></li>
 49 | <li class="toctree-l1"><a class="reference internal" href="../phasing.html">Haplotype Phasing</a></li>
 50 | <li class="toctree-l1"><a class="reference internal" href="../imputation.html">Genotype Imputation</a></li>
 51 | <li class="toctree-l1"><a class="reference internal" href="../tutorial.html">Tutorial</a></li>
 52 | </ul>
 53 | 
 54 |         </div>
 55 |       </div>
 56 |     </nav>
 57 | 
 58 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
 59 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
 60 |           <a href="../index.html">GWASpy</a>
 61 |       </nav>
 62 | 
 63 |       <div class="wy-nav-content">
 64 |         <div class="rst-content">
 65 |           <div role="navigation" aria-label="Page navigation">
 66 |   <ul class="wy-breadcrumbs">
 67 |       <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
 68 |       <li class="breadcrumb-item active">Install GWASpy on Mac OS X</li>
 69 |       <li class="wy-breadcrumbs-aside">
 70 |             <a href="../_sources/install/macosx.rst.txt" rel="nofollow"> View page source</a>
 71 |       </li>
 72 |   </ul>
 73 |   <hr/>
 74 | </div>
 75 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
 76 |            <div itemprop="articleBody">
 77 |              
 78 |   <section id="install-gwaspy-on-mac-os-x">
 79 | <h1>Install GWASpy on Mac OS X<a class="headerlink" href="#install-gwaspy-on-mac-os-x" title="Link to this heading"></a></h1>
 80 | <ul class="simple">
 81 | <li><p>Install <a class="reference external" href="https://www.oracle.com/java/technologies/javase/javase-jdk8-downloads.html">Java 8</a>.</p></li>
 82 | <li><p>Install Python 3.6+.</p></li>
 83 | <li><p>Install MacTeX</p></li>
 84 | <li><p>Open Terminal.app and execute <code class="docutils literal notranslate"><span class="pre">pip3</span> <span class="pre">install</span> <span class="pre">gwaspy</span></code>.</p></li>
 85 | </ul>
 86 | </section>
 87 | 
 88 | 
 89 |            </div>
 90 |           </div>
 91 |           <footer>
 92 | 
 93 |   <hr/>
 94 | 
 95 |   <div role="contentinfo">
 96 |     <p>&#169; Copyright 2024, Martin Lab, Broad Institute.</p>
 97 |   </div>
 98 | 
 99 |   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
100 |     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
101 |     provided by <a href="https://readthedocs.org">Read the Docs</a>.
102 |    
103 | 
104 | </footer>
105 |         </div>
106 |       </div>
107 |     </section>
108 |   </div>
109 |   <script>
110 |       jQuery(function () {
111 |           SphinxRtdTheme.Navigation.enable(true);
112 |       });
113 |   </script> 
114 | 
115 | </body>
116 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/installation.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html5" lang="en" data-content_root="./">
  5 | <head>
  6 |   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
  7 | 
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  9 |   <title>Installing GWASpy &mdash; GWASpy 0.1.0 documentation</title>
 10 |       <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
 11 |       <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=e59714d7" />
 12 |       <link rel="stylesheet" type="text/css" href="_static/custom.css?v=eea1f72d" />
 13 | 
 14 |   
 15 |       <script src="_static/jquery.js?v=5d32c60e"></script>
 16 |       <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
 17 |       <script src="_static/documentation_options.js?v=01f34227"></script>
 18 |       <script src="_static/doctools.js?v=9bcbadda"></script>
 19 |       <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
 20 |     <script src="_static/js/theme.js"></script>
 21 |     <link rel="index" title="Index" href="genindex.html" />
 22 |     <link rel="search" title="Search" href="search.html" />
 23 |     <link rel="next" title="Hail Query and Batch" href="qb.html" />
 24 |     <link rel="prev" title="GWASpy 0.1" href="index.html" /> 
 25 | </head>
 26 | 
 27 | <body class="wy-body-for-nav"> 
 28 |   <div class="wy-grid-for-nav">
 29 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 30 |       <div class="wy-side-scroll">
 31 |         <div class="wy-side-nav-search" >
 32 | 
 33 |           
 34 |           
 35 |           <a href="index.html" class="icon icon-home">
 36 |             GWASpy
 37 |           </a>
 38 | <div role="search">
 39 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 40 |     <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
 41 |     <input type="hidden" name="check_keywords" value="yes" />
 42 |     <input type="hidden" name="area" value="default" />
 43 |   </form>
 44 | </div>
 45 |         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
 46 |               <ul class="current">
 47 | <li class="toctree-l1 current"><a class="current reference internal" href="#">Installation</a></li>
 48 | <li class="toctree-l1"><a class="reference internal" href="qb.html">Hail Query and Batch</a></li>
 49 | <li class="toctree-l1"><a class="reference internal" href="preimp_qc.html">Pre-Imputation QC</a></li>
 50 | <li class="toctree-l1"><a class="reference internal" href="pca.html">Principal Component Analysis</a></li>
 51 | <li class="toctree-l1"><a class="reference internal" href="phasing.html">Haplotype Phasing</a></li>
 52 | <li class="toctree-l1"><a class="reference internal" href="imputation.html">Genotype Imputation</a></li>
 53 | <li class="toctree-l1"><a class="reference internal" href="tutorial.html">Tutorial</a></li>
 54 | </ul>
 55 | 
 56 |         </div>
 57 |       </div>
 58 |     </nav>
 59 | 
 60 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
 61 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
 62 |           <a href="index.html">GWASpy</a>
 63 |       </nav>
 64 | 
 65 |       <div class="wy-nav-content">
 66 |         <div class="rst-content">
 67 |           <div role="navigation" aria-label="Page navigation">
 68 |   <ul class="wy-breadcrumbs">
 69 |       <li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
 70 |       <li class="breadcrumb-item active">Installing GWASpy</li>
 71 |       <li class="wy-breadcrumbs-aside">
 72 |             <a href="_sources/installation.rst.txt" rel="nofollow"> View page source</a>
 73 |       </li>
 74 |   </ul>
 75 |   <hr/>
 76 | </div>
 77 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
 78 |            <div itemprop="articleBody">
 79 |              
 80 |   <section id="installing-gwaspy">
 81 | <span id="sec-installation"></span><h1>Installing GWASpy<a class="headerlink" href="#installing-gwaspy" title="Link to this heading"></a></h1>
 82 | <p>GWASpy leverages Hail to enable efficient processing of data directly from Google Cloud. As such, the first step is to
 83 | install Hail as per instructions <a class="reference external" href="https://hail.is/docs/0.2/install/macosx.html">here</a>. After you have installed Hail, GWASpy can be easily installed using</p>
 84 | <div class="highlight-sh notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span>gwaspy
 85 | </pre></div>
 86 | </div>
 87 | <p>It is important to note that the command above will install GWASpy locally (or wherever you ran the command). For the
 88 | <code class="code docutils literal notranslate"><span class="pre">phasing</span></code> and <code class="code docutils literal notranslate"><span class="pre">imputation</span></code> modules using Hail Batch, this is enough. For the <code class="code docutils literal notranslate"><span class="pre">preimp_qc</span></code> and
 89 | <code class="code docutils literal notranslate"><span class="pre">pca</span></code> modules using Hail Query, however, you have to ensure that the dataproc cluster has GWASpy, and there are
 90 | examples showing how to do this in the <a class="reference internal" href="preimp_qc.html#preimp-qc"><span class="std std-ref">Pre-Imputation Quality Control (QC)</span></a> and <a class="reference internal" href="pca.html#pca"><span class="std std-ref">Principal Component Analysis</span></a> sections.</p>
 91 | </section>
 92 | 
 93 | 
 94 |            </div>
 95 |           </div>
 96 |           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
 97 |         <a href="index.html" class="btn btn-neutral float-left" title="GWASpy 0.1" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
 98 |         <a href="qb.html" class="btn btn-neutral float-right" title="Hail Query and Batch" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
 99 |     </div>
100 | 
101 |   <hr/>
102 | 
103 |   <div role="contentinfo">
104 |     <p>&#169; Copyright 2024, Martin Lab, Broad Institute.</p>
105 |   </div>
106 | 
107 |   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
108 |     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
109 |     provided by <a href="https://readthedocs.org">Read the Docs</a>.
110 |    
111 | 
112 | </footer>
113 |         </div>
114 |       </div>
115 |     </section>
116 |   </div>
117 |   <script>
118 |       jQuery(function () {
119 |           SphinxRtdTheme.Navigation.enable(true);
120 |       });
121 |   </script> 
122 | 
123 | </body>
124 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/_build/html/objects.inv


--------------------------------------------------------------------------------
/docs/_build/html/search.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html5" lang="en" data-content_root="./">
  5 | <head>
  6 |   <meta charset="utf-8" />
  7 |   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  8 |   <title>Search &mdash; GWASpy 0.1.0 documentation</title>
  9 |       <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
 10 |       <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=e59714d7" />
 11 |       <link rel="stylesheet" type="text/css" href="_static/custom.css?v=eea1f72d" />
 12 | 
 13 |   
 14 |     
 15 |       <script src="_static/jquery.js?v=5d32c60e"></script>
 16 |       <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
 17 |       <script src="_static/documentation_options.js?v=01f34227"></script>
 18 |       <script src="_static/doctools.js?v=9bcbadda"></script>
 19 |       <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
 20 |     <script src="_static/js/theme.js"></script>
 21 |     <script src="_static/searchtools.js"></script>
 22 |     <script src="_static/language_data.js"></script>
 23 |     <link rel="index" title="Index" href="genindex.html" />
 24 |     <link rel="search" title="Search" href="#" /> 
 25 | </head>
 26 | 
 27 | <body class="wy-body-for-nav"> 
 28 |   <div class="wy-grid-for-nav">
 29 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 30 |       <div class="wy-side-scroll">
 31 |         <div class="wy-side-nav-search" >
 32 | 
 33 |           
 34 |           
 35 |           <a href="index.html" class="icon icon-home">
 36 |             GWASpy
 37 |           </a>
 38 | <div role="search">
 39 |   <form id="rtd-search-form" class="wy-form" action="#" method="get">
 40 |     <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
 41 |     <input type="hidden" name="check_keywords" value="yes" />
 42 |     <input type="hidden" name="area" value="default" />
 43 |   </form>
 44 | </div>
 45 |         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
 46 |               <ul>
 47 | <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
 48 | <li class="toctree-l1"><a class="reference internal" href="qb.html">Hail Query and Batch</a></li>
 49 | <li class="toctree-l1"><a class="reference internal" href="preimp_qc.html">Pre-Imputation QC</a></li>
 50 | <li class="toctree-l1"><a class="reference internal" href="pca.html">Principal Component Analysis</a></li>
 51 | <li class="toctree-l1"><a class="reference internal" href="phasing.html">Haplotype Phasing</a></li>
 52 | <li class="toctree-l1"><a class="reference internal" href="imputation.html">Genotype Imputation</a></li>
 53 | <li class="toctree-l1"><a class="reference internal" href="tutorial.html">Tutorial</a></li>
 54 | </ul>
 55 | 
 56 |         </div>
 57 |       </div>
 58 |     </nav>
 59 | 
 60 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
 61 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
 62 |           <a href="index.html">GWASpy</a>
 63 |       </nav>
 64 | 
 65 |       <div class="wy-nav-content">
 66 |         <div class="rst-content">
 67 |           <div role="navigation" aria-label="Page navigation">
 68 |   <ul class="wy-breadcrumbs">
 69 |       <li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
 70 |       <li class="breadcrumb-item active">Search</li>
 71 |       <li class="wy-breadcrumbs-aside">
 72 |       </li>
 73 |   </ul>
 74 |   <hr/>
 75 | </div>
 76 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
 77 |            <div itemprop="articleBody">
 78 |              
 79 |   <noscript>
 80 |   <div id="fallback" class="admonition warning">
 81 |     <p class="last">
 82 |       Please activate JavaScript to enable the search functionality.
 83 |     </p>
 84 |   </div>
 85 |   </noscript>
 86 | 
 87 |   
 88 |   <div id="search-results">
 89 |   
 90 |   </div>
 91 | 
 92 |            </div>
 93 |           </div>
 94 |           <footer>
 95 | 
 96 |   <hr/>
 97 | 
 98 |   <div role="contentinfo">
 99 |     <p>&#169; Copyright 2024, Martin Lab, Broad Institute.</p>
100 |   </div>
101 | 
102 |   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
103 |     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
104 |     provided by <a href="https://readthedocs.org">Read the Docs</a>.
105 |    
106 | 
107 | </footer>
108 |         </div>
109 |       </div>
110 |     </section>
111 |   </div>
112 |   <script>
113 |       jQuery(function () {
114 |           SphinxRtdTheme.Navigation.enable(true);
115 |       });
116 |   </script>
117 |   <script>
118 |     jQuery(function() { Search.loadIndex("searchindex.js"); });
119 |   </script>
120 |   
121 |   <script id="searchindexloader"></script>
122 |    
123 | 
124 | 
125 | </body>
126 | </html>


--------------------------------------------------------------------------------
/docs/_static/custom.css:
--------------------------------------------------------------------------------
1 | .wy-nav-content {
2 |     max-width: none;
3 | }
4 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'GWASpy'
21 | copyright = '2024, Martin Lab, Broad Institute'
22 | author = 'Lindokuhle Nkambule'
23 | 
24 | # The full version, including alpha/beta/rc tags
25 | release = '0.1.0'
26 | 
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 | ]
35 | 
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ['_templates']
38 | 
39 | # List of patterns, relative to source directory, that match files and
40 | # directories to ignore when looking for source files.
41 | # This pattern also affects html_static_path and html_extra_path.
42 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
43 | 
44 | 
45 | # -- Options for HTML output -------------------------------------------------
46 | 
47 | # The theme to use for HTML and HTML Help pages.  See the documentation for
48 | # a list of builtin themes.
49 | #
50 | html_theme = 'sphinx_rtd_theme'
51 | 
52 | # Add any paths that contain custom static files (such as style sheets) here,
53 | # relative to this directory. They are copied after the builtin static files,
54 | # so a file named "default.css" will overwrite the builtin "default.css".
55 | html_static_path = ['_static']
56 | 
57 | html_css_files = [
58 |     'custom.css',
59 | ]
60 | 


--------------------------------------------------------------------------------
/docs/images/qc_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/docs/images/qc_workflow.png


--------------------------------------------------------------------------------
/docs/imputation.rst:
--------------------------------------------------------------------------------
 1 | .. _sec-imputation:
 2 | 
 3 | ===================
 4 | Genotype Imputation
 5 | ===================
 6 | 
 7 | Genotype imputation is a process of estimating missing genotypes from the haplotype or genotype reference panel. It
 8 | allows you to accurately evaluate the evidence for association at genetic markers that are not directly genotyped.
 9 | GWASpy has a module, :code:`imputation`, for running imputation using IMPUTE5. Because imputation can be a computationally
10 | intensive task, we run it on multiple chunks in parallel, then merge the imputed chunks together at the end. Below are
11 | examples of how to run imputation using either the HGDP+1kGP or your own reference panel.
12 | 
13 | Examples
14 | ########
15 | 
16 | **1. HGDP+1kGP reference panel**
17 | 
18 |     .. code-block:: sh
19 | 
20 |         imputation --input-file gs://path/to/file.vcf.bgz --vcf-ref hgdp1kgp --output-filename my_outfilename --out-dir gs://path/to/output/dir --n-samples 1989 --n-ref-samples 4091 --billing-project my-billing-project
21 | 
22 | **2. Own reference panel**
23 | 
24 |     .. code-block:: python
25 | 
26 |         imputation --input-file gs://path/to/file.vcf.bgz --vcf-ref gs://path/to/ref_panel/ALL.chrCNUMBER.vcf --output-filename my_outfilename --out-dir gs://path/to/output/dir --n-samples 1989 --n-ref-samples 4091 --billing-project my-billing-project
27 | 
28 | .. warning::
29 |     When using your own reference panel, make sure that you use the CNUMBER placeholder in the filename passed to --vcf-ref
30 | 
31 | Arguments and options
32 | #####################
33 | 
34 | .. list-table::
35 |    :widths: 15 50
36 |    :header-rows: 1
37 | 
38 |    * - Argument
39 |      - Description
40 |    * - :code:`--input-file`
41 |      - Path to where the VCF or TSV with target VCF/BAM files is
42 |    * - :code:`--vcf-ref`
43 |      - Reference panel file to use for imputation
44 |    * - :code:`--chromosomes`
45 |      - Chromosome(s) to run imputation for. Default is :code:`all`
46 |    * - :code:`--local`
47 |      - Type of service. Default is Service backend where jobs are executed on a multi-tenant compute cluster in Google Cloud
48 |    * - :code:`--billing-project`
49 |      - Billing project to be used for the jobs
50 |    * - :code:`--n-samples`
51 |      - Number of target samples to be imputed. We use this to estimate resources for some of the jobs
52 |    * - :code:`--n-ref-samples`
53 |      - Number of reference samples. We use this to estimate resources for some of the jobs
54 |    * - :code:`--software`
55 |      - Software to use for phasing. Options: [:code:`beagle5`, :code:`impute5`]. Default is :code:`impute5`
56 |    * - :code:`--output-filename`
57 |      - Output filename without file extension
58 |    * - :code:`--out-dir`
59 |      - Path to where output files will be saved
60 | 
61 | Output
62 | ######
63 | The resulting output is a VCF file per chromosome with imputed genotypes.
64 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | ==========
 2 | GWASpy 0.1
 3 | ==========
 4 | 
 5 | GWASpy is an open-source Python package for scalable: (1) Pre-imputation QC; (2) Principal Component Analysis; (3) Haplotype phasing; and (4) Genotype
 6 | Imputation. See the `installation page <installation.html>`_ to get started
 7 | using GWASpy.
 8 | 
 9 | ========
10 | Contents
11 | ========
12 | 
13 | .. toctree::
14 |     :maxdepth: 2
15 | 
16 |     Installation <installation.rst>
17 |     Hail Query and Batch <qb.rst>
18 |     Pre-Imputation QC <preimp_qc.rst>
19 |     Principal Component Analysis <pca.rst>
20 |     Haplotype Phasing <phasing.rst>
21 |     Genotype Imputation <imputation.rst>
22 |     Tutorial <tutorial.rst>


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _sec-installation:
 2 | 
 3 | =================
 4 | Installing GWASpy
 5 | =================
 6 | 
 7 | GWASpy leverages Hail to enable efficient processing of data directly from Google Cloud. As such, the first step is to
 8 | install Hail as per instructions `here <https://hail.is/docs/0.2/install/macosx.html>`_. After you have installed Hail, GWASpy can be easily installed using
 9 | 
10 | .. code-block:: sh
11 | 
12 |    pip install gwaspy
13 | 
14 | It is important to note that the command above will install GWASpy locally (or wherever you ran the command). For the
15 | :code:`phasing` and :code:`imputation` modules using Hail Batch, this is enough. For the :code:`preimp_qc` and
16 | :code:`pca` modules using Hail Query, however, you have to ensure that the dataproc cluster has GWASpy, and there are
17 | examples showing how to do this in the :ref:`preimp_qc` and :ref:`pca` sections.
18 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/pca.rst:
--------------------------------------------------------------------------------
 1 | .. _sec-pca:
 2 | .. _pca:
 3 | 
 4 | ============================
 5 | Principal Component Analysis
 6 | ============================
 7 | 
 8 | Principal components analysis (PCA) can be used to detect and quantify the genetic structure of populations.
 9 | In GWASpy, the :code:`pca` module can be run in 3 different ways: (1) normal PCA without a reference panel; (2) joint PCA; or (3) Projection PCA.
10 | 
11 | .. toctree::
12 |    :maxdepth: 1
13 | 
14 |         Normal PCA <pca/normal.rst>
15 |         Joint PCA <pca/joint.rst>
16 |         Projection PCA <pca/project.rst>
17 | 
18 | Arguments and options
19 | #####################
20 | 
21 | .. list-table::
22 |    :widths: 15 50
23 |    :header-rows: 1
24 | 
25 |    * - Argument
26 |      - Description
27 |    * - :code:`--ref-dirname`
28 |      - Path to where reference data is
29 |    * - :code:`--ref-basename`
30 |      - Reference basename
31 |    * - :code:`--ref-info`
32 |      - Path to reference information. Tab-delimited file with sample IDs and their SuperPop labels
33 |    * - :code:`--reference`
34 |      - Genome reference build. Default is GRCh38. Options: [:code:`GRCh37`, :code:`GRCh38`]
35 |    * - :code:`--pca-type`
36 |      - Type of PCA to run. Default is normal. Options: [:code:`normal`, :code:`project`, :code:`joint`]
37 |    * - :code:`--data-dirname`
38 |      - Path to where the data is
39 |    * - :code:`--data-basename`
40 |      - Data basename
41 |    * - :code:`--input-type`
42 |      - Data input type. Options: [:code:`hail`, :code:`plink`, :code:`vcf`]
43 |    * - :code:`--maf`
44 |      - include only SNPs with MAF >= NUM in PCA. Default is 0.05
45 |    * - :code:`--hwe`
46 |      - include only SNPs with HWE >= NUM in PCA. Default is 1e-03
47 |    * - :code:`--geno`
48 |      - include only SNPs with call-rate > NUM. Default is 0.98
49 |    * - :code:`--ld-cor`
50 |      - Squared correlation threshold (exclusive upper bound). Must be in the range [0.0, 1.0]. Default is 0.2
51 |    * - :code:`--ld-window`
52 |      - Window size in base pairs (inclusive upper bound). Default is 250000
53 |    * - :code:`--npcs`
54 |      - Number of PCs to use. Default is 20
55 |    * - :code:`--relatedness-method`
56 |      - Method to use for the inference of relatedness. Default is pc_relate. Options: [:code:`pc_relate`, :code:`ibd`, :code:`king`]
57 |    * - :code:`--relatedness-thresh`
58 |      - Threshold value to use in relatedness checks. Default is 0.98
59 |    * - :code:`--prob`
60 |      - Minimum probability of belonging to a given population for the population to be set. Default is 0.8
61 |    * - :code:`--out-dir`
62 |      - Path to where output files will be saved
63 | 
64 | Output
65 | ######
66 | A tab-delimited file with the first 20 principal components (PCs)  computed and
67 | graphical visualizations of the PCs are generated.
68 | 


--------------------------------------------------------------------------------
/docs/pca/joint.rst:
--------------------------------------------------------------------------------
 1 | ================================
 2 | Joint PCA (with a reference)
 3 | ================================
 4 | 
 5 | The joint PCA method works by first merging (joining), by locus and allele(s), the input dataset with the reference dataset.
 6 | This is followed by "normal" PCA on the merged dataset
 7 | 
 8 | Below is a code on how you can run joint PCA via the command-line or inside a Python script Use
 9 | 
10 | #. Python (inside a Python script)
11 | 
12 |     .. code-block:: python
13 | 
14 |         import gwaspy.pca as pca
15 |         pca.pca.pca(data_dirname="gs://my-gcs/bucket/test_data/", data_basename="my_data_basename",
16 |                     out_dir="gs://my-gcs/bucket/test_data/", input_type="my_input_type", reference="GRCh37",
17 |                     pca_type="joint")
18 | 
19 | #. Command line
20 | 
21 |     .. code-block:: sh
22 | 
23 |         pca --data-dirname gs://my-gcs/bucket/test_data/ --data-basename my_data_basename --out-dir gs://my-gcs/bucket/test_data/--input-type my_input_type --reference grch37 --pca-type joint


--------------------------------------------------------------------------------
/docs/pca/normal.rst:
--------------------------------------------------------------------------------
 1 | ================================
 2 | Normal PCA (without a reference)
 3 | ================================
 4 | 
 5 | GWASpy allows you to run normal PCA without any reference panel
 6 | 
 7 | Below is a code on how you can run normal PCA without a reference via the command-line or inside a Python script Use
 8 | 
 9 | #. Python (inside a Python script)
10 | 
11 |     .. code-block:: python
12 | 
13 |         import gwaspy.pca as pca
14 |         pca.pca.pca(data_dirname="gs://my-gcs/bucket/test_data/", data_basename="my_data_basename",
15 |                     out_dir="gs://my-gcs/bucket/test_data/", input_type="my_input_type", reference="GRCh37",
16 |                     pca_type="normal")
17 | 
18 | #. Command line
19 | 
20 |     .. code-block:: sh
21 | 
22 |         pca --data-dirname gs://my-gcs/bucket/test_data/ --data-basename my_data_basename --out-dir gs://my-gcs/bucket/test_data/--input-type my_input_type --reference grch37 --pca-type normal


--------------------------------------------------------------------------------
/docs/pca/project.rst:
--------------------------------------------------------------------------------
 1 | ================================
 2 | Project PCA (with a reference)
 3 | ================================
 4 | 
 5 | You can leverage reference panel information to see how samples in your data cluster on a "global" scale.
 6 | PCs are computed using 1KG+HGDP dataset as a reference panel, and then samples in the input dataset are projected onto the 1KG+HGDP PC space.
 7 | A random forest classifier model, adopted from gnomAD, is then used to assign population ancestries in the input dataset
 8 | 
 9 | Below is a code on how you can run projection PCA via the command-line or inside a Python script Use
10 | 
11 | #. Python (inside a Python script)
12 | 
13 |     .. code-block:: python
14 | 
15 |         import gwaspy.pca as pca
16 |         pca.pca.pca(data_dirname="gs://my-gcs/bucket/test_data/", data_basename="my_data_basename",
17 |                     out_dir="gs://my-gcs/bucket/test_data/", input_type="my_input_type", reference="GRCh37",
18 |                     pca_type="project")
19 | 
20 | #. Command line
21 | 
22 |     .. code-block:: sh
23 | 
24 |         pca --data-dirname gs://my-gcs/bucket/test_data/ --data-basename my_data_basename --out-dir gs://my-gcs/bucket/test_data/--input-type my_input_type --reference grch37 --pca-type project


--------------------------------------------------------------------------------
/docs/phasing.rst:
--------------------------------------------------------------------------------
 1 | .. _sec-phasing:
 2 | 
 3 | =================
 4 | Haplotype Phasing
 5 | =================
 6 | 
 7 | Knowing the phase of a haplotype can allow us to impute low frequency variants, this makes haplotype phasing an
 8 | important step before genotype imputation. GWASpy has a module, :code:`phasing`, for performing phasing. Phasing can
 9 | be run with or without a reference panel using SHAPEIT5
10 | 
11 | GWASpy can handle both array and WGS data. For array data, the user can pass a VCF/BCF file with all the chromosomes,
12 | then GWASpy will use SHAPEIT5 to phase the chromosomes in parallel. Since WGS has more variants, phasing will be parallelized across
13 | multiple chunks in each chromosome. It's also important to note that phasing of WGS data includes phasing common
14 | variants first, followed by phasing rare variants.
15 | 
16 | Another important aspect of phasing is the use of a reference panel. In many cases (small sample size), including a reference panel when
17 | phasing improves accuracy. By default, GWASpy runs phasing without a reference panel, but there is an option to use a
18 | reference panel as shown below.
19 | 
20 | Examples
21 | ########
22 | 
23 | **1. Without a reference panel**
24 | 
25 |     .. code-block:: sh
26 | 
27 |         phasing --input-vcf gs://path/to/file.vcf.bgz --output-filename outfilename.phased --out-dir gs://path/to/output/dir --genome-build GRCh38 --billing-project my-billing-project
28 | 
29 | **2. HGDP+1KG reference panel**
30 | 
31 | Set :code:`--vcf-ref` to  :code:`hgdp1kgp`
32 | 
33 |     .. code-block:: sh
34 | 
35 |         phasing --input-vcf gs://path/to/file.vcf.bgz --output-filename my_outfilename --out-dir gs://path/to/output/dir --genome-build GRCh38 --billing-project my-billing-project --vcf-ref hgdp1kgp
36 | 
37 | **3. Own reference panel**
38 | 
39 | .. note::
40 |     1. If you're using your own reference panel, make sure the files are bgzip compressed.
41 |     2. Chromosome X reference file must be named X and not 23
42 | 
43 | Say you have your reference panel files for each chromosomes stored in gs://ref_panel/ALL.chr{1..22,X}.vcf,
44 | you would pass the path to :code:`--vcf-ref` as gs://ref_panel/ALL.chr\ **CNUMBER**\ .vcf.
45 | GWASpy uses **CNUMBER** as a placeholder for the chromosomes. Then you can run phasing as:
46 | 
47 |     .. code-block:: sh
48 | 
49 |         phasing --input-vcf gs://path/to/file.vcf.bgz --output-filename outfilename.phased --out-dir gs://path/to/output/dir --genome-build GRCh38 --billing-project my-billing-project --vcf-ref gs://ref_panel/ALL.chrCNUMBER.vcf
50 | 
51 | .. note::
52 |     For nextflow users, the idea is the same. The only difference is you have to update the params.json file. Examples
53 |     are provided in the tutorial section of the documentation
54 | 
55 | Arguments and options
56 | #####################
57 | 
58 | .. list-table::
59 |    :widths: 15 50
60 |    :header-rows: 1
61 | 
62 |    * - Argument
63 |      - Description
64 |    * - :code:`--input-vcf`
65 |      - Path to where VCF file to be phased is
66 |    * - :code:`--vcf-ref`
67 |      - VCF file for reference haplotypes if phasing with a reference panel
68 |    * - :code:`--pedigree`
69 |      - Pedigree (PLINK FAM) file
70 |    * - :code:`--local`
71 |      - Type of service. Default is Service backend where jobs are executed on a multi-tenant compute cluster in Google Cloud
72 |    * - :code:`--billing-project`
73 |      - Billing project to be used for the job(s)
74 |    * - :code:`--genome-build`
75 |      - Genome reference build. Default is GRCh38. Options: [:code:`GRCh37`, :code:`GRCh38`]
76 |    * - :code:`--data-type`
77 |      - Array or WGS data. Default is array. Options: [:code:`array`, :code:`wgs`].
78 |    * - :code:`--fill-tags`
79 |      - Whether or not to add AC tag required by SHAPEIT5. Including :code:`--fill-tags`, in your command will enable this step
80 |    * - :code:`--software`
81 |      - Software to use for phasing. Options: [:code:`beagle`, :code:`shapeit`]. Default is :code:`shapeit`
82 |    * - :code:`--output-filename`
83 |      - Output filename without file extension
84 |    * - :code:`--out-dir`
85 |      - Path to where output files will be saved
86 | 
87 | Output
88 | ######
89 | The resulting output is a VCF file per chromosome with phased haplotypes.
90 | 


--------------------------------------------------------------------------------
/docs/preimp_qc.rst:
--------------------------------------------------------------------------------
  1 | .. _sec-pre_imputation_qc:
  2 | .. _preimp_qc:
  3 | 
  4 | ===================================
  5 | Pre-Imputation Quality Control (QC)
  6 | ===================================
  7 | 
  8 | Detecting and correcting issues such as genotyping errors, sample handling errors, population stratification etc
  9 | is important in GWAS. The :code:`preimp_qc` module addresses these issues and cleans (QC) your data. Below is a flow diagram
 10 | of the filters applied when QC'ing input data:
 11 | 
 12 | .. image:: images/qc_workflow.png
 13 |    :width: 1000px
 14 |    :height: 1900px
 15 |    :scale: 50 %
 16 |    :align: center
 17 | 
 18 | 
 19 | Arguments and options
 20 | #####################
 21 | 
 22 | .. list-table::
 23 |    :widths: 15 50
 24 |    :header-rows: 1
 25 | 
 26 |    * - Argument
 27 |      - Description
 28 |    * - :code:`--dirname`
 29 |      - Path to where the data is
 30 |    * - :code:`--basename`
 31 |      - Data basename
 32 |    * - :code:`--input-type`
 33 |      - Input type. Options: [:code:`hail`, :code:`plink`, :code:`vcf`]
 34 |    * - :code:`--export-type`
 35 |      - Export type. Options: [:code:`hail`, :code:`plink`, :code:`vcf`]
 36 |    * - :code:`--out-dir`
 37 |      - Directory path to where output files are going to be saved
 38 |    * - :code:`--annotations`
 39 |      - Annotations file to be used for annotating sample with information such as Sex and Phenotype
 40 |    * - :code:`--reference`
 41 |      - Reference genome build. Default is GRCh38. Options: [:code:`GRCh37`, :code:`GRCh38`]
 42 |    * - :code:`--report`
 43 |      - Generate a QC PDF report or not. Default is True
 44 |    * - :code:`--liftover`
 45 |      - Liftover input data to GRCh38 or not, default is False. Running :code:`preimp_qc` with :code:`--liftover` will activate liftover
 46 |    * - :code:`--pre-geno`
 47 |      - include only SNPs with missing-rate < NUM (before ID filter), important for post merge of multiple platforms
 48 |    * - :code:`--mind`
 49 |      - include only IDs with missing-rate < NUM
 50 |    * - :code:`--fhet-aut`
 51 |      - include only IDs within NUM < FHET < NUM
 52 |    * - :code:`--fstat-y`
 53 |      - include only female IDs with fhet < NUM
 54 |    * - :code:`--fstat-x`
 55 |      - include only male IDs with fhet > NUM
 56 |    * - :code:`--geno`
 57 |      - include only SNPs with missing-rate < NUM
 58 |    * - :code:`--midi`
 59 |      - include only SNPs with missing-rate-difference (case/control) < NUM
 60 |    * - :code:`--withpna`
 61 |      - include monomorphic (invariant) SNPs
 62 |    * - :code:`--maf`
 63 |      - include only SNPs with MAF >= NUM
 64 |    * - :code:`--hwe-th-con`
 65 |      - HWE_controls < NUM
 66 |    * - :code:`--hwe-th-cas`
 67 |      - HWE_cases < NUM
 68 | 
 69 | Output(s)
 70 | ##########
 71 | * QC'ed file(s) i.e. file with all the variants and/or samples that fail QC filters removed
 72 | * A detailed PDF QC report including pre- and post-QC variant/sample counts, figures such as Manhattan and QQ plots etc.
 73 | 
 74 | 
 75 | Examples
 76 | ########
 77 | 
 78 | All the code below assumes the user already has a Dataproc cluster running as described in the `previous section <qb.html>`_
 79 | 
 80 | You can run pre-imputation qc using the :code:`preimp_qc` module (1) inside a python script; or (2) via the command line
 81 | 
 82 | 1. Python script - submitting a python script to a cluster from local machine (Highly recommended)
 83 | 
 84 | - First create a python script on your local machine as below
 85 | 
 86 |     .. code-block:: python
 87 | 
 88 |         import gwaspy.preimp_qc as qc
 89 |         qc.preimp_qc.preimp_qc(dirname="gs://my-gcs/bucket/test_data/", basename="my_data_basename",
 90 |                                input_type="my_input_type")
 91 | 
 92 | - Then run the following command to submit the script to the Dataproc cluster named `my-cluster-name`
 93 | 
 94 |     .. code-block:: sh
 95 | 
 96 |         hailctl dataproc submit my-cluster-name qc_script.py
 97 | 
 98 | 2. Command line - requires user to SSH'ed to a cluster
 99 | 
100 | Users may encounter `this error <https://hail.zulipchat.com/#narrow/channel/128581-Cloud-support/topic/Running.20GWASpy.20on.20hailctl.20cluster.20-.20file.20not.20found.20exception>`_ when trying to run things from the command line
101 | 
102 | - This requires the user to be inside (`gcloud compute ssh`) the Dataproc cluster with GWASpy already installed
103 | 
104 |     .. code-block:: sh
105 | 
106 |         gcloud compute ssh "my-cluster-name-m"
107 |         preimp_qc --dirname gs://my-gcs/bucket/test_data/ --basename my_data_basename --input-type my_input_type
108 | 


--------------------------------------------------------------------------------
/docs/qb.rst:
--------------------------------------------------------------------------------
 1 | .. _sec-qb:
 2 | 
 3 | ====================
 4 | Hail Query and Batch
 5 | ====================
 6 | 
 7 | The four GWASpy modules use two different backends: :code:`preimp_qc` and :code:`pca` use Hail Query, while
 8 | :code:`phasing` and :code:`imputation` modules use Batch (Hail Batch for Broad users and nextflow for non-Broad users).
 9 | Hail Query is well-suited for manipulating large genomics data in a highly parallelised environments such as Dataproc.
10 | `Batch <https://cloud.google.com/batch/docs/get-started>`_, on the other hand, is good for batch processing (scheduling,
11 | queueing, and executing) workloads on Google Cloud resources.
12 | 
13 | All the instructions below assume the user has a Google account and an active (Google) Cloud billing account
14 | 
15 | Query
16 | #####
17 | 
18 | For running the :code:`preimp_qc` and :code:`pca` modules, you need to start a Dataproc cluster. Hail has a command-line
19 | tool, `hailctl <https://hail.is/docs/0.2/cloud/google_cloud.html>`_, for doing this and it is installed automatically when
20 | you install Hail. We highly recommend setting a maximum age for the cluster (:code:`--max-age`), this will ensure the cluster is
21 | automatically deleted after the specified time.
22 | 
23 | Below is how you can start a cluster with GWASpy pre-installed:
24 | 
25 |     .. code-block:: sh
26 | 
27 |        hailctl dataproc start my-cluster-name -region=us-central1 --packages gwaspy --max-age 4h
28 | 
29 | To shut down the cluster, you can run:
30 | 
31 |     .. code-block:: sh
32 | 
33 |         hailctl dataproc stop my-cluster-name --region=us-central1
34 | 
35 | Batch
36 | #####
37 | 
38 | The :code:`phasing` and :code:`imputation` modules use Batch as the backend. For Broad users with a Hail Batch account,
39 | there is no setup needed, you can proceed to running the modules. For non-Broad users, we have a nextflow implementation
40 | of the modules that requires nextflow setup first. Follow the steps here to: `(1) install nextflow <https://www.nextflow.io/docs/latest/install.html#install-page>`_; and
41 | `(2) setup Google Cloud Batch for nextflow <https://www.nextflow.io/docs/latest/google.html>`_
42 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx-rtd-theme
2 | 


--------------------------------------------------------------------------------
/env-setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -ex
 4 | 
 5 | PLATFORM="${OSTYPE}"
 6 | 
 7 | # install pylatex depedencies
 8 | case "$PLATFORM" in
 9 |     darwin*)
10 |         install-pylatex-dependencies() {
11 |             brew install --cask mactex
12 |             eval "$(/usr/libexec/path_helper)"
13 |         }
14 |         ;;
15 |     linux*)
16 |         install-pylatex-dependencies() {
17 |             yes Y | apt-get install texlive-pictures texlive-science texlive-latex-extra latexmk
18 |         }
19 |         ;;
20 |     *)
21 |         echo "unsupported platform $PLATFORM."
22 |         ;;
23 | esac
24 | 
25 | install-pylatex-dependencies
26 | 


--------------------------------------------------------------------------------
/gwaspy/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/gwaspy/.DS_Store


--------------------------------------------------------------------------------
/gwaspy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atgu/GWASpy/f8b9f5eccbb67fa5ab462007634db3862198aa6e/gwaspy/__init__.py


--------------------------------------------------------------------------------
/gwaspy/check_alleles/__init__.py:
--------------------------------------------------------------------------------
1 | from gwaspy.check_alleles import flips
2 | __all__ = ['flips']
3 | 


--------------------------------------------------------------------------------
/gwaspy/check_alleles/check_alleles.py:
--------------------------------------------------------------------------------
  1 | _author__ = 'Lindo Nkambule'
  2 | 
  3 | import hailtop.batch as hb
  4 | import hailtop.fs as hfs
  5 | 
  6 | from hailtop.batch.job import Job
  7 | 
  8 | 
  9 | def size(file: str):
 10 |     """
 11 |     Convert the size from bytes to GiB
 12 |     :param file: path to file, str
 13 |     :return: file size in GiB
 14 |     """
 15 |     file_info = hfs.stat(file)   # returns a named tuple
 16 |     size_gigs = file_info.size / (1024 * 1024 * 1024)
 17 | 
 18 |     return size_gigs
 19 | 
 20 | 
 21 | def check_alleles_workflow(
 22 |         batch: hb.Batch = None,
 23 |         input_path: str = None,
 24 |         reference_path: str = None,
 25 |         output_filename: str = None,
 26 |         step: str = "check",
 27 |         fix_mode: str = "top",
 28 |         output_path: str = None):
 29 | 
 30 |     def get_stats(
 31 |             b: hb.batch.Batch,
 32 |             job_name: str = None,
 33 |             vcf: hb.ResourceGroup = None,
 34 |             ref_fasta: hb.ResourceGroup = None,
 35 |             output_name: str = None,
 36 |             out_dir: str = None,
 37 |             ncpu: int = 8,
 38 |             memory: str = 'standard',
 39 |             storage: int = None,
 40 |             img: str = 'docker.io/lindonkambule/gwaspy_phase_impute:latest',
 41 |     ) -> Job:
 42 |         j = b.new_job(name=f'Check alleles: {job_name}')
 43 | 
 44 |         j.image(img)
 45 |         j.memory(memory)
 46 |         j.cpu(ncpu)
 47 |         j.storage(f'{storage}Gi')
 48 | 
 49 |         j.command(
 50 |             f"""
 51 |             bcftools +fixref {vcf['vcf']} -- -f {ref_fasta['ref_fasta']} > stats.txt
 52 |             mv stats.txt {j.ofile}
 53 |             """
 54 |         )
 55 | 
 56 |         b.write_output(j.ofile,
 57 |                        f'{out_dir}/check_alleles/{output_name}.stats.txt')
 58 | 
 59 |         return j
 60 | 
 61 |     def fix_alleles(
 62 |             b: hb.batch.Batch,
 63 |             job_name: str = None,
 64 |             vcf: hb.ResourceGroup = None,
 65 |             ref_fasta: hb.ResourceGroup = None,
 66 |             allele_mode: str = "top",
 67 |             output_name: str = None,
 68 |             out_dir: str = None,
 69 |             ncpu: int = 8,
 70 |             memory: str = 'standard',
 71 |             storage: int = None,
 72 |             img: str = 'docker.io/lindonkambule/gwaspy_phase_impute:latest',
 73 |     ) -> Job:
 74 |         j = b.new_job(name=f'Fix alleles: {job_name}')
 75 | 
 76 |         j.image(img)
 77 |         j.memory(memory)
 78 |         j.cpu(ncpu)
 79 |         j.storage(f'{storage}Gi')
 80 | 
 81 |         j.declare_resource_group(
 82 |             fixed_file={
 83 |                 'bcf': '{root}.bcf',
 84 |                 'bcf.csi': '{root}.bcf.csi'
 85 |             }
 86 |         )
 87 | 
 88 |         j.command(
 89 |             f"""
 90 |             bcftools +fixref {vcf['vcf']} -Ob -o {j.fixed_file['bcf']} -- -f {ref_fasta['ref_fasta']} -m {allele_mode}
 91 |             bcftools index --force {j.fixed_file['bcf']} --output {j.fixed_file['bcf.csi']} --threads {ncpu}
 92 |             """
 93 |         )
 94 | 
 95 |         b.write_output(j.fixed_file,
 96 |                        f'{out_dir}/check_alleles/{output_name}.alleles.fixed')
 97 | 
 98 |         return j
 99 | 
100 |     ref_fasta_in = batch.read_input_group(**{'ref_fasta': reference_path,
101 |                                              'ref_fasta_index': f'{reference_path}.fai'})
102 |     ref_size = round(size(reference_path))
103 | 
104 |     if "CNUMBER" in input_path:  # input VCF is already split by chromosome
105 |         for i in range(1, 23):
106 |             vcf_path = input_path.replace('CNUMBER', str(i))
107 |             input_idx = f'{vcf_path}.tbi' if hfs.exists(f'{vcf_path}.tbi') else f'{vcf_path}.csi'
108 | 
109 |             if not hfs.exists(input_idx):
110 |                 raise SystemExit('Input file needs to be indexed (.tbi or .csi). Found none, exiting')
111 | 
112 |             chrom_vcf = batch.read_input_group(**{'vcf': vcf_path,
113 |                                                   'index': input_idx})
114 |             vcf_size = round(size(vcf_path))
115 |             disk_size = int(round(5.0 + vcf_size + ref_size))
116 | 
117 |             if step == "check":
118 |                 get_stats(
119 |                     b=batch,
120 |                     job_name=vcf_path,
121 |                     vcf=chrom_vcf,
122 |                     ref_fasta=ref_fasta_in,
123 |                     output_name=f'{output_filename}_chr{i}',
124 |                     out_dir=output_path,
125 |                     storage=disk_size
126 |                 )
127 |             else:
128 |                 fix_alleles(
129 |                     b=batch,
130 |                     job_name=vcf_path,
131 |                     vcf=chrom_vcf,
132 |                     ref_fasta=ref_fasta_in,
133 |                     allele_mode=fix_mode,
134 |                     output_name=f'{output_filename}_chr{i}',
135 |                     out_dir=output_path,
136 |                     storage=disk_size
137 |                 )
138 | 
139 |     else: # one input file with all the chromosomes
140 |         vcf_path = input_path
141 |         input_idx = f'{vcf_path}.tbi' if hfs.exists(f'{vcf_path}.tbi') else f'{vcf_path}.csi'
142 | 
143 |         if not hfs.exists(input_idx):
144 |             raise SystemExit('Input file needs to be indexed (.tbi or .csi). Found none, exiting')
145 | 
146 |         chrom_vcf = batch.read_input_group(**{'vcf': input_path,
147 |                                               'index': input_idx})
148 | 
149 |         vcf_size = round(size(vcf_path))
150 |         disk_size = int(round(5.0 + vcf_size + ref_size))
151 | 
152 |         if step == "check":
153 |             get_stats(
154 |                 b=batch,
155 |                 job_name=vcf_path,
156 |                 vcf=chrom_vcf,
157 |                 ref_fasta=ref_fasta_in,
158 |                 output_name=output_filename,
159 |                 out_dir=output_path,
160 |                 storage=disk_size
161 |             )
162 |         else:
163 |             fix_alleles(
164 |                 b=batch,
165 |                 job_name=vcf_path,
166 |                 vcf=chrom_vcf,
167 |                 ref_fasta=ref_fasta_in,
168 |                 allele_mode=fix_mode,
169 |                 output_name=output_filename,
170 |                 out_dir=output_path,
171 |                 storage=disk_size
172 |             )
173 | 
174 |     batch.run()
175 | 


--------------------------------------------------------------------------------
/gwaspy/check_alleles/flips.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Lindo Nkambule'
 2 | 
 3 | import argparse
 4 | import hailtop.batch as hb
 5 | from gwaspy.check_alleles.check_alleles import check_alleles_workflow
 6 | from typing import Union
 7 | 
 8 | 
 9 | def run_checks_fix(
10 |         backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,
11 |         input_vcf: str = None,
12 |         ref_path: str = None,
13 |         step: str = "check",
14 |         fix_mode: str = "top",
15 |         output_filename: str = None,
16 |         out_dir: str = None
17 | ):
18 |     b = hb.Batch(backend=backend,
19 |                  name=f'GWASpy-{step.upper()}-Alleles')
20 | 
21 |     check_alleles_workflow(
22 |         batch=b,
23 |         input_path=input_vcf,
24 |         reference_path=ref_path,
25 |         output_filename=output_filename,
26 |         step=step,
27 |         fix_mode=fix_mode,
28 |         output_path=out_dir
29 |     )
30 | 
31 | 
32 | def main():
33 |     parser = argparse.ArgumentParser()
34 |     parser.add_argument('--input-vcf', type=str, required=True)
35 |     parser.add_argument('--ref-fasta', type=str, default='gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta')
36 |     parser.add_argument('--local', action='store_true')
37 |     parser.add_argument('--billing-project', required=True)
38 |     parser.add_argument('--step', type=str, default='check', choices=['check', 'fix'])
39 |     parser.add_argument('--mode', type=str, default='top', choices=['flip', 'flip-all', 'id', 'ref-alt', 'stats', 'swap', 'top'])
40 |     parser.add_argument('--output-filename', type=str, required=True)
41 |     parser.add_argument('--out-dir', type=str, required=True)
42 | 
43 |     args = parser.parse_args()
44 | 
45 |     if args.local:
46 |         backend = hb.LocalBackend()
47 |     else:
48 |         backend = hb.ServiceBackend(billing_project=args.billing_project,
49 |                                     remote_tmpdir=f'{args.out_dir}/tmp/')
50 | 
51 |     run_checks_fix(
52 |         backend=backend,
53 |         input_vcf=args.input_vcf,
54 |         ref_path=args.ref_fasta,
55 |         step=args.step,
56 |         fix_mode=args.mode,
57 |         output_filename=args.output_filename,
58 |         out_dir=args.out_dir)
59 | 
60 |     backend.close()
61 | 


--------------------------------------------------------------------------------
/gwaspy/imputation/__init__.py:
--------------------------------------------------------------------------------
1 | from gwaspy.imputation import impute
2 | __all__ = ['impute']


--------------------------------------------------------------------------------
/gwaspy/imputation/concat_vcfs.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Lindo Nkambule'
  2 | 
  3 | import hailtop.batch as hb
  4 | import hail as hl
  5 | import pandas as pd
  6 | from gwaspy.utils.get_file_size import bytes_to_gb
  7 | from gwaspy.phasing.get_filebase import get_vcf_filebase
  8 | from typing import List
  9 | from typing import Union
 10 | 
 11 | 
 12 | def concat_vcfs(b: hb.batch.Batch,
 13 |                 vcf_basename: str = None,
 14 |                 vcfs_to_merge: List = None,
 15 |                 output_type: str = 'vcf',
 16 |                 chrom: str = None,
 17 |                 cpu: int = 16,
 18 |                 memory: str = 'standard',
 19 |                 docker_img: str = 'docker.io/lindonkambule/gwaspy:v1',
 20 |                 out_dir: str = None):
 21 | 
 22 |     global index_cmd
 23 | 
 24 |     out_type = 'b' if output_type == 'bcf' else 'z'
 25 |     vcfs_sizes_sum = 0
 26 |     merge_vcf_i = ''
 27 | 
 28 |     out_filename = f'{vcf_basename}.{chrom}.merged.bcf' if output_type == 'bcf' else \
 29 |         f'{vcf_basename}.{chrom}.merged.vcf.gz'
 30 |     out_index_name = f'{vcf_basename}.{chrom}.merged.bcf.csi' if output_type == 'bcf' else \
 31 |         f'{vcf_basename}.{chrom}.merged.vcf.gz.csi'
 32 | 
 33 |     for line in vcfs_to_merge:
 34 |         vcfs_sizes_sum += 2 + bytes_to_gb(line)
 35 | 
 36 |     disk_size = int(round(10 + (2 * vcfs_sizes_sum)))
 37 |     threads = cpu - 1
 38 | 
 39 |     concat = b.new_job(name=f'concat-{vcf_basename}')
 40 |     concat.memory(memory)
 41 |     concat.storage(f'{disk_size}Gi')
 42 |     concat.image(docker_img)
 43 |     concat.cpu(cpu)
 44 | 
 45 |     for line in vcfs_to_merge:
 46 |         input_vcf = b.read_input_group(vcf=line,
 47 |                                        ind=f'{line}.csi')
 48 |         merge_vcf_i += f'{input_vcf.vcf} '
 49 | 
 50 |     cmd = f'''
 51 |         bcftools concat \
 52 |             --no-version \
 53 |             --output-type {out_type} \
 54 |             --output {out_filename} \
 55 |             --threads {threads} \
 56 |             {merge_vcf_i}
 57 |     '''
 58 | 
 59 |     concat.command(cmd)
 60 |     # index the merged output
 61 |     concat.command(f'bcftools index --force {out_filename}')
 62 | 
 63 |     concat.command(f'mv {out_filename} {concat.ofile}')
 64 |     concat.command(f'mv {out_index_name} {concat.idx}')
 65 |     b.write_output(concat.ofile, f'{out_dir}/GWASpy/{vcf_basename}/Imputation/imputed_merged/{out_filename}')
 66 |     b.write_output(concat.idx, f'{out_dir}/GWASpy/{vcf_basename}/Imputation/imputed_merged/{out_index_name}')
 67 | 
 68 | 
 69 | def run_concat(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,
 70 |                input_vcf: str = None,
 71 |                output_type: str = 'vcf',
 72 |                exclude_chrx: bool = False,
 73 |                cpu: int = 16,
 74 |                memory: str = 'standard',
 75 |                out_dir: str = None):
 76 | 
 77 |     print(f'\n2. CONCAT {input_vcf}\n')
 78 |     vcf_filebase = get_vcf_filebase(input_vcf)
 79 |     concat_b = hb.Batch(backend=backend, name=f'concat-imputed-chunks-{vcf_filebase}')
 80 | 
 81 |     # get the regions so we can map each file to its specific region
 82 |     regions = pd.read_csv(f'{out_dir}/GWASpy/{vcf_filebase}/Imputation/imputation.regions', sep='\t', names=['reg', 'ind'])
 83 |     regions_dict = pd.Series(regions.reg.values, index=regions.ind).to_dict()
 84 | 
 85 |     imputed_vcfs_chunks = hl.utils.hadoop_ls(f'{out_dir}/GWASpy/{vcf_filebase}/Imputation/imputed_chunks/*.bcf')
 86 | 
 87 |     max_chrom = 23 if exclude_chrx else 24 # 1-22 if autosomes only, else 1-23
 88 | 
 89 |     for i in range(1, max_chrom):
 90 |         if i == 23:
 91 |             chrom = 'chrX'
 92 |         else:
 93 |             chrom = f'chr{i}'
 94 | 
 95 |         chrom_phased_files_to_concat = []
 96 | 
 97 |         for file in imputed_vcfs_chunks:
 98 |             f = file['path']
 99 |             vcf_basename = get_vcf_filebase(f)
100 |             file_index = int(vcf_basename.split('.')[-4])
101 |             file_region = regions_dict[file_index]
102 |             map_chrom = file_region.split(':')[0]
103 |             if map_chrom == chrom:
104 |                 chrom_phased_files_to_concat.append(f)
105 | 
106 |         # naturally sort the list of files to merge
107 |         from gwaspy.utils.natural_sort import natural_keys
108 |         chrom_phased_files_to_concat.sort(key=natural_keys)
109 | 
110 |         concat_vcfs(b=concat_b, vcfs_to_merge=chrom_phased_files_to_concat, vcf_basename=vcf_filebase,
111 |                     output_type=output_type, chrom=chrom, cpu=cpu, memory=memory, out_dir=out_dir)
112 | 
113 |     concat_b.run()
114 | 


--------------------------------------------------------------------------------
/gwaspy/imputation/imputation.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Lindo Nkambule'
 2 | 
 3 | import hailtop.batch as hb
 4 | import argparse
 5 | 
 6 | 
 7 | # ONCE YOU ADD FUNCTIONALITY FOR USING DIFFERENT REF PANELS, CHANGE n_panel* parameters and include cmd args
 8 | def genotype_imputation(input_vcf: str = None,
 9 |                         females_file: str = None,
10 |                         n_samples: int = None,
11 |                         n_panel_samples: int = 4099,
12 |                         buffer_region: int = 250,
13 |                         phasing_software: str = None,
14 |                         local: bool = False,
15 |                         exclude_chrx: bool = False,
16 |                         billing_project: str = None,
17 |                         memory: str = 'highmem',
18 |                         cpu: int = 16,
19 |                         stages: str = 'impute,concat',
20 |                         output_type: str = 'bcf',
21 |                         out_dir: str = None):
22 |     # Error handling
23 |     if not out_dir:
24 |         raise SystemExit('Output directory not specified. Specify using --out_dir if running from the command line or'
25 |                          'out_dir argument if running inside a Python script')
26 | 
27 |     steps_list = stages.split(',')
28 |     steps = [x.lower() for x in steps_list]
29 |     unknown_steps = [i for i in steps if i not in ['impute', 'concat']]
30 | 
31 |     if len(unknown_steps) > 0:
32 |         raise SystemExit(f'Incorrect process(es) {unknown_steps} selected. Options are [impute, concat]')
33 | 
34 |     if output_type.lower() not in ['bcf', 'vcf']:
35 |         raise SystemExit(f'Incorrect output type {output_type} selected. Options are [bcf, vcf]')
36 | 
37 |     if memory.lower() not in ['lowmem', 'standard', 'highmem']:
38 |         raise SystemExit(f'Incorrect memory type {memory} selected. Options are [lowmem, standard, highmem]')
39 | 
40 |     if not n_samples:
41 |         raise SystemExit('Number of samples in input data not detected. Specify how many samples (integer), using'
42 |                          '--n-samples if running from the command line or'
43 |                          'n_samples argument if running inside a Python script, are in the input data')
44 | 
45 |     if local:
46 |         backend = hb.LocalBackend()
47 |     else:
48 |         backend = hb.ServiceBackend(billing_project=billing_project,
49 |                                     remote_tmpdir=f'{out_dir}/tmp/')
50 | 
51 |     # impute genotypes
52 |     if 'impute' in steps:
53 |         from gwaspy.imputation.sex_aut_imp import run_impute
54 |         run_impute(backend=backend, input_vcf=input_vcf, females_file=females_file, n_samples=n_samples,
55 |                    n_panel_samples=n_panel_samples, phasing_software=phasing_software, exclude_chrx=exclude_chrx,
56 |                    memory=memory, buffer_region=buffer_region, out_dir=out_dir)
57 | 
58 |     # Concatenate imputed chunks
59 |     if 'concat' in steps:
60 |         from gwaspy.imputation.concat_vcfs import run_concat
61 |         run_concat(backend=backend, input_vcf=input_vcf, output_type=output_type, exclude_chrx=exclude_chrx, cpu=cpu,
62 |                    memory=memory, out_dir=out_dir)
63 | 
64 | 
65 | def main():
66 |     parser = argparse.ArgumentParser()
67 |     parser.add_argument('--input-vcf', type=str, required=True)
68 |     parser.add_argument('--samples-file', type=str, required=True)
69 |     parser.add_argument('--local', action='store_true')
70 |     parser.add_argument('--exclude-chrx', action='store_true')
71 |     parser.add_argument('--billing-project', required=True)
72 |     parser.add_argument('--phasing-software', type=str, default='shapeit', choices=['eagle', 'shapeit'])
73 |     parser.add_argument('--memory', type=str, default='highmem', choices=['lowmem', 'standard', 'highmem'])
74 |     parser.add_argument('--cpu-concat', type=int, default=16)
75 |     parser.add_argument('--n-samples', type=int, required=True)
76 |     parser.add_argument('--buffer-region', type=int, default=250)
77 |     parser.add_argument('--stages', type=str, default='impute,concat')
78 |     parser.add_argument('--out-type', type=str, default='bcf', choices=['bcf', 'vcf'])
79 |     parser.add_argument('--out-dir', required=True)
80 | 
81 |     args = parser.parse_args()
82 | 
83 |     genotype_imputation(input_vcf=args.input_vcf, females_file=args.samples_file, n_samples=args.n_samples,
84 |                         buffer_region=args.buffer_region, phasing_software=args.phasing_software, local=args.local,
85 |                         exclude_chrx=args.exclude_chrx, billing_project=args.billing_project, memory=args.memory,
86 |                         cpu=args.cpu_concat, stages=args.stages, output_type=args.out_type, out_dir=args.out_dir)
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     main()
91 | 


--------------------------------------------------------------------------------
/gwaspy/imputation/impute.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Lindo Nkambule'
 2 | 
 3 | import argparse
 4 | import hailtop.batch as hb
 5 | from gwaspy.imputation.impute5_impute import impute5_imputation
 6 | from gwaspy.imputation.glimpse2_impute import glimpse_phase_impute
 7 | from typing import Union
 8 | 
 9 | 
10 | def run_impute(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,
11 |                input_file: str = None,
12 |                vcf_ref: str = None,
13 |                chromosomes: str = "all",
14 |                software: str = 'impute5',
15 |                output_filename: str = None,
16 |                n_samples: int = None,
17 |                n_panel_samples: int = 4091,
18 |                out_dir: str = None
19 |                ):
20 | 
21 |     if software.lower() not in ['beagle5', 'glimpse2', 'impute5']:
22 |         raise SystemExit(f'Incorrect software {software} selected. Options are [beagle5, glimpse2, impute5]')
23 | 
24 |     b = hb.Batch(backend=backend,
25 |                  name=f'GWASpy-Imputation-{software.upper()}')
26 | 
27 |     if vcf_ref == 'hgdp1kgp':
28 |         print(f'\nIMPUTING GENOTYPES WITH HGDP+1KGP PANEL\n')
29 |         ref_path = 'gs://gcp-public-data--gnomad/resources/hgdp_1kg/phased_haplotypes_v2/hgdp1kgp_chrCNUMBER.filtered.SNV_INDEL.phased.shapeit5.bcf'
30 |     else:
31 |         print(f'\nIMPUTING GENOTYPES WITH USER-DEFINED REFERENCE PANEL\n')
32 |         ref_path = vcf_ref
33 | 
34 |     if software == 'impute5':
35 |         print(f'\nIMPUTING GENOTYPES USING IMPUTE5\n')
36 |         impute5_imputation(
37 |             batch=b,
38 |             input_path=input_file,
39 |             reference_path=ref_path,
40 |             chromosomes=chromosomes,
41 |             output_filename=output_filename,
42 |             n_samples=n_samples,
43 |             n_panel_samples=n_panel_samples,
44 |             output_path=out_dir
45 |         )
46 |     elif software == 'glimpse2':
47 |         glimpse_phase_impute(
48 |             batch=b,
49 |             bam_files=input_file,
50 |             reference_path=ref_path,
51 |             chromosomes = chromosomes,
52 |             output_filename=output_filename,
53 |             output_path=out_dir
54 |         )
55 |     # else: TO add BEAGLE
56 | 
57 | 
58 | def main():
59 |     parser = argparse.ArgumentParser()
60 |     parser.add_argument('--input-file', type=str, required=True)
61 |     parser.add_argument('--vcf-ref', type=str, default='hgdp1kgp')
62 |     parser.add_argument('--chromosomes', type=str, default='all')
63 |     parser.add_argument('--local', action='store_true')
64 |     parser.add_argument('--billing-project', required=True)
65 |     parser.add_argument('--n-samples', type=int, required=True)
66 |     parser.add_argument('--n-ref-samples', type=int, default=4091)
67 |     parser.add_argument('--software', type=str, default='impute5', choices=['beagle5', 'glimpse2', 'impute5'])
68 |     parser.add_argument('--output-filename', type=str, required=True)
69 |     parser.add_argument('--out-dir', type=str, required=True)
70 | 
71 |     args = parser.parse_args()
72 | 
73 |     if args.local:
74 |         backend = hb.LocalBackend()
75 |     else:
76 |         backend = hb.ServiceBackend(billing_project=args.billing_project,
77 |                                     remote_tmpdir=f'{args.out_dir}/tmp/')
78 | 
79 |     run_impute(backend=backend,
80 |                input_file=args.input_file,
81 |                vcf_ref=args.vcf_ref,
82 |                chromosomes=args.chromosomes,
83 |                software=args.software,
84 |                output_filename=args.output_filename,
85 |                n_samples=args.n_samples,
86 |                n_panel_samples=args.n_ref_samples,
87 |                out_dir=args.out_dir)
88 | 
89 |     backend.close()
90 | 


--------------------------------------------------------------------------------
/gwaspy/imputation/impute_vcf.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Lindo Nkambule'
  2 | 
  3 | import hailtop.batch as hb
  4 | import hail as hl
  5 | import pandas as pd
  6 | from typing import Union
  7 | from gwaspy.phasing.get_filebase import get_vcf_filebase
  8 | from gwaspy.utils.get_file_size import bytes_to_gb
  9 | 
 10 | 
 11 | def imputation(b: hb.batch.Batch,
 12 |                vcf: str = None,
 13 |                vcf_filename_no_ext: str = None,
 14 |                ref: hb.ResourceGroup = None,
 15 |                ref_size: Union[int, float] = None,
 16 |                region: str = None,
 17 |                chromosome: str = None,
 18 |                cpu: int = 8,
 19 |                memory: str = 'highmem',
 20 |                img: str = 'docker.io/lindonkambule/gwaspy:v1',
 21 |                threads: int = 7,
 22 |                out_dir: str = None):
 23 | 
 24 |     # in_vcf = b.read_input(vcf)
 25 |     in_vcf = b.read_input_group(**{'bcf': vcf,
 26 |                                 'bcf.csi': f'{vcf}.csi'})
 27 |     vcf_size = bytes_to_gb(vcf)
 28 | 
 29 |     output_file_name = vcf_filename_no_ext + '.imputed.bcf'
 30 |     file_dir = vcf_filename_no_ext.split('.')[0]
 31 | 
 32 |     disk_size = ref_size + (vcf_size * 4)
 33 | 
 34 |     map_file = f'/shapeit4/maps/b38/{chromosome}.b38.gmap.gz'
 35 | 
 36 |     impute = b.new_job(name=output_file_name)
 37 |     impute.cpu(cpu)
 38 |     impute.memory(memory)
 39 |     impute.storage(f'{disk_size}Gi')
 40 |     impute.image(img)
 41 | 
 42 |     cmd = f'''
 43 |         impute5_1.1.5_static \
 44 |             --h {ref.bcf} \
 45 |             --m {map_file} \
 46 |             --g {in_vcf.bcf} \
 47 |             --r {region} \
 48 |             --out-gp-field \
 49 |             --o {output_file_name} \
 50 |             --threads {threads}
 51 |     '''
 52 | 
 53 |     impute.command(cmd)
 54 |     # index file to use when merging
 55 |     impute.command(f'bcftools index {output_file_name}')
 56 | 
 57 |     impute.command(f'mv {output_file_name} {impute.ofile}')
 58 |     impute.command(f'mv {output_file_name}.csi {impute.ind}')
 59 |     b.write_output(impute.ofile, f'{out_dir}/GWASpy/Imputation/{file_dir}/imputed_chunks/{output_file_name}')
 60 |     b.write_output(impute.ind, f'{out_dir}/GWASpy/Imputation/{file_dir}/imputed_chunks/{output_file_name}.csi')
 61 | 
 62 | 
 63 | def run_impute(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,
 64 |                input_vcfs: str = None,
 65 |                phasing_software: str = None,
 66 |                memory: str = 'highmem',
 67 |                cpu: int = 8,
 68 |                threads: int = 7,
 69 |                out_dir: str = None):
 70 | 
 71 |     print(f'RUNNING IMPUTATION ON FILES PHASED WITH {phasing_software.upper()}')
 72 |     impute_b = hb.Batch(backend=backend, name=f'impute-phased-chunks')
 73 | 
 74 |     vcf_paths = pd.read_csv(input_vcfs, sep='\t', header=None)
 75 | 
 76 |     # get the regions so we can map each file to its specific region
 77 |     regions = pd.read_csv(f'{out_dir}/GWASpy/Phasing/regions.lines', sep='\t', names=['reg', 'ind'])
 78 |     regions_dict = pd.Series(regions.reg.values, index=regions.ind).to_dict()
 79 | 
 80 |     for index, row in vcf_paths.iterrows():
 81 |         vcf = row[0]
 82 |         vcf_filebase = get_vcf_filebase(vcf)
 83 | 
 84 |         if phasing_software == 'shapeit':
 85 |             phased_vcfs_chunks = hl.utils.hadoop_ls(f'{out_dir}/GWASpy/Phasing/{vcf_filebase}/phased_scatter/*.shapeit.bcf')
 86 |         else:
 87 |             phased_vcfs_chunks = hl.utils.hadoop_ls(f'{out_dir}/GWASpy/Phasing/{vcf_filebase}/phased_scatter/*.eagle.bcf')
 88 | 
 89 |         for i in range(1, 24):
 90 |             if i == 23:
 91 |                 chrom = 'chrX'
 92 |             else:
 93 |                 chrom = f'chr{i}'
 94 | 
 95 |             ref_bcf = f'gs://gcp-public-data--gnomad/resources/hgdp_1kg/phased_haplotypes/hgdp.tgp.gwaspy.merged.{chrom}.merged.bcf'
 96 |             ref_ind = f'gs://gcp-public-data--gnomad/resources/hgdp_1kg/phased_haplotypes/hgdp.tgp.gwaspy.merged.{chrom}.merged.bcf.csi'
 97 |             ref_size = bytes_to_gb(ref_bcf)
 98 |             ref = impute_b.read_input_group(**{'bcf': ref_bcf,
 99 |                                                'bcf.csi': ref_ind})
100 | 
101 |             for file in phased_vcfs_chunks:
102 |                 f = file['path']
103 |                 vcf_basename = get_vcf_filebase(f)
104 |                 file_index = int(vcf_basename.split('.')[-3])
105 |                 file_region = regions_dict[file_index]
106 |                 map_chrom = file_region.split(':')[0]
107 | 
108 |                 imp_out_filename = f'{vcf_basename}.imputed.bcf'
109 |                 file_dir = vcf_basename.split('.')[0]
110 |                 output_filepath_name = f'{out_dir}/GWASpy/Imputation/{file_dir}/imputed_chunks/{imp_out_filename}'
111 | 
112 |                 if map_chrom == chrom:
113 |                     # check if imputed file already exists
114 |                     if hl.hadoop_exists(output_filepath_name):
115 |                         continue
116 |                     else:
117 |                         imputation(b=impute_b, vcf=f, vcf_filename_no_ext=vcf_basename, ref=ref, ref_size=ref_size,
118 |                                    region=file_region, chromosome=chrom, cpu=cpu, memory=memory,
119 |                                    threads=threads, out_dir=out_dir)
120 | 
121 |     impute_b.run()
122 | 


--------------------------------------------------------------------------------
/gwaspy/pca/__init__.py:
--------------------------------------------------------------------------------
1 | from gwaspy.pca import pca
2 | __all__ = ['pca']


--------------------------------------------------------------------------------
/gwaspy/pca/assign_pop_labels.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.ensemble import RandomForestClassifier
 3 | from typing import Tuple
 4 | import random
 5 | 
 6 | 
 7 | def assign_population_pcs(
 8 |         pop_pc_pd: pd.DataFrame,
 9 |         num_pcs: int,
10 |         known_col: str = 'SuperPop',
11 |         fit: RandomForestClassifier = None,
12 |         seed: int = 42,
13 |         prop_train: float = 0.8,
14 |         n_estimators: int = 100,
15 |         min_prob: float = 0.9,
16 |         output_col: str = 'pop',
17 |         missing_label: str = 'oth'
18 | ) -> Tuple[pd.DataFrame, RandomForestClassifier]:
19 |     """
20 |     This function uses a random forest model to assign population labels based on the results of PCA.
21 |     Default values for model and assignment parameters are those used in gnomAD.
22 |     :param Table pop_pc_pd: Pandas dataframe containing population PCs as well as a column with population labels
23 |     :param str known_col: Column storing the known population labels
24 |     :param RandomForestClassifier fit: fit from a previously trained random forest model (i.e., the output from a previous RandomForestClassifier() call)
25 |     :param int num_pcs: number of population PCs on which to train the model
26 |     :param int seed: Random seed
27 |     :param float prop_train: Proportion of known data used for training
28 |     :param int n_estimators: Number of trees to use in the RF model
29 |     :param float min_prob: Minimum probability of belonging to a given population for the population to be set (otherwise set to `None`)
30 |     :param str output_col: Output column storing the assigned population
31 |     :param str missing_label: Label for samples for which the assignment probability is smaller than `min_prob`
32 |     :return: Dataframe containing sample IDs and imputed population labels, trained random forest model
33 |     :rtype: DataFrame, RandomForestClassifier
34 |     """
35 | 
36 |     print(f'{num_pcs} PCs to be used in population assignment')
37 |     # Expand PC column
38 |     pc_cols = ['PC{}'.format(i + 1) for i in range(num_pcs)]
39 |     train_data = pop_pc_pd.loc[~pop_pc_pd[known_col].isnull()]
40 | 
41 |     N = len(train_data)
42 | 
43 |     # Split training data into subsamples for fitting and evaluating
44 |     if not fit:
45 |         random.seed(seed)
46 |         train_subsample_ridx = random.sample(list(range(0, N)), int(N * prop_train))
47 |         train_fit = train_data.iloc[train_subsample_ridx]
48 |         fit_samples = [x for x in train_fit['s']]
49 |         evaluate_fit = train_data.loc[~train_data['s'].isin(fit_samples)]
50 | 
51 |         # Train RF
52 |         training_set_known_labels = train_fit[known_col].values
53 |         training_set_pcs = train_fit[pc_cols].values
54 |         evaluation_set_pcs = evaluate_fit[pc_cols].values
55 | 
56 |         pop_clf = RandomForestClassifier(n_estimators=n_estimators, random_state=seed)
57 |         pop_clf.fit(training_set_pcs, training_set_known_labels)
58 |         print('Random forest feature importances are as follows: {}'.format(pop_clf.feature_importances_))
59 | 
60 |         # Evaluate RF
61 |         predictions = pop_clf.predict(evaluation_set_pcs)
62 |         error_rate = 1 - sum(evaluate_fit[known_col] == predictions) / float(len(predictions))
63 |         print('Estimated error rate for RF model is {}'.format(error_rate))
64 |     else:
65 |         pop_clf = fit
66 | 
67 |     # Classify data
68 |     print('Classifying data')
69 |     pop_pc_pd[output_col] = pop_clf.predict(pop_pc_pd[pc_cols].values)
70 |     probs = pop_clf.predict_proba(pop_pc_pd[pc_cols].values)
71 |     probs = pd.DataFrame(probs, columns=[f'prob_{p}' for p in pop_clf.classes_])
72 | 
73 |     pop_pc_pd = pd.concat([pop_pc_pd.reset_index(drop=True), probs.reset_index(drop=True)], axis=1)
74 | 
75 |     probs['max'] = probs.max(axis=1)
76 |     pop_pc_pd.loc[probs['max'] < min_prob, output_col] = missing_label
77 | 
78 |     return pop_pc_pd, pop_clf


--------------------------------------------------------------------------------
/gwaspy/pca/filter_ref_data.py:
--------------------------------------------------------------------------------
 1 | # this is the script used to filter the reference 1KG+HGDP data used in PCA
 2 | 
 3 | import hail as hl
 4 | 
 5 | hl.init(default_reference='GRCh38')
 6 | 
 7 | ref_mt = hl.read_matrix_table('gs://african-seq-data/hgdp_tgp/hgdp_tgp_postQC.mt')
 8 | 
 9 | print("\nInitial number of SNPs before filtering: {}".format(ref_mt.count_rows()))
10 | filtered_ref = hl.variant_qc(ref_mt)
11 | 
12 | print("filtering")
13 | 
14 | filtered_ref = filtered_ref.filter_rows((filtered_ref.variant_qc.AF[0] > 0.05) & (filtered_ref.variant_qc.AF[0] < 0.95))
15 | print("\nNumber of SNPs after MAF filtering: {}".format(filtered_ref.count_rows()))
16 | 
17 | filtered_ref = filtered_ref.filter_rows(filtered_ref.variant_qc.call_rate > 0.999)
18 | print("\nNumber of SNPs after Call Rate filtering: {}".format(filtered_ref.count_rows()))
19 | 
20 | # print("repartitioning")
21 | # filtered_ref = filtered_ref.repartition(n_partitions=100, shuffle=True)
22 | 
23 | print("writing filtererd mt")
24 | filtered_ref.write('gs://african-seq-data/hgdp_tgp/gwaspy_pca_ref/hgdp_1kg_filtered_maf_5_GRCh38.mt', overwrite=True)
25 | print("Done filtering")
26 | 
27 | print("getting sample information")
28 | 
29 | mt = hl.read_matrix_table('gs://african-seq-data/hgdp_tgp/gwaspy_pca_ref/hgdp_1kg_filtered_maf_5_GRCh38.mt')
30 | 
31 | cols_ht = mt.cols()
32 | 
33 | pops = cols_ht.select(cols_ht.hgdp_tgp_meta.Study.region)
34 | 
35 | df = pops.to_pandas()
36 | 
37 | df.columns = ['Sample', 'SuperPop']
38 | 
39 | old_pops_labs = ['Africa', 'America', 'Central_South_Asia', 'East_Asia', 'Europe', 'Middle_East', 'Oceania', 'SAS']
40 | new_pops_labs = ['AFR', 'AMR', 'CSA', 'EAS', 'EUR', 'MID', 'OCE', 'CSA']
41 | df['SuperPop'] = df['SuperPop'].replace(old_pops_labs, new_pops_labs)
42 | 
43 | print(df['SuperPop'].value_counts())
44 | 
45 | print("exporting sample metadata")
46 | df.to_csv('gs://african-seq-data/hgdp_tgp/gwaspy_pca_ref/hgdp_1kg_sample_info.tsv', sep='\t', index=False)
47 | 
48 | 


--------------------------------------------------------------------------------
/gwaspy/pca/pca.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Lindo Nkambule'
  2 | 
  3 | import argparse
  4 | import hail as hl
  5 | 
  6 | 
  7 | def pca(
  8 |         ref_dirname: str = 'gs://gcp-public-data--gnomad/release/3.1/secondary_analyses/hgdp_1kg_v2/pca_results/',
  9 |         ref_basename: str = 'unrelateds_without_outliers',
 10 |         ref_info: str = 'gs://hgdp-1kg/hgdp_tgp/gwaspy_pca_ref/hgdp_1kg_sample_info.unrelateds.pca_outliers_removed.with_project.tsv',
 11 |         reference: str = 'GRCh38', pca_type: str = None,
 12 |         data_dirname: str = None, data_basename: str = None, input_type: str = None,
 13 |         maf: float = 0.05, hwe: float = 1e-3, call_rate: float = 0.98,
 14 |         ld_cor: float = 0.2, ld_window: int = 250000, n_pcs: int = 20, run_relatedness_check: bool = True,
 15 |         include_kinself: bool = False, relatedness_method: str = 'pc_relate',
 16 |         relatedness_thresh: float = 0.1, prob_threshold: float = 0.8, out_dir: str = None):
 17 | 
 18 |     if not out_dir:
 19 |         raise Exception('\nOutput directory where files will be saved is not specified')
 20 | 
 21 |     hl.default_reference(new_default_reference=reference)
 22 | 
 23 |     if pca_type == 'project':
 24 |         print('\nRunning PCA using projection method')
 25 | 
 26 |         from gwaspy.pca.pca_project import run_pca_project
 27 |         run_pca_project(ref_dirname=ref_dirname, ref_basename=ref_basename, ref_info=ref_info,
 28 |                         data_dirname=data_dirname, data_basename=data_basename, out_dir=out_dir, input_type=input_type,
 29 |                         reference=reference, npcs=n_pcs, maf=maf, hwe=hwe, call_rate=call_rate,
 30 |                         relatedness_method=relatedness_method, run_relatedness_check=run_relatedness_check,
 31 |                         ld_cor=ld_cor, ld_window=ld_window, include_kinself=include_kinself,
 32 |                         prob_threshold=prob_threshold)
 33 | 
 34 |     elif pca_type == 'joint':
 35 |         print('\nRunning PCA using joint method')
 36 |         from gwaspy.pca.pca_joint import run_pca_joint
 37 |         run_pca_joint(ref_dirname=ref_dirname, ref_basename=ref_basename, ref_info=ref_info, data_dirname=data_dirname,
 38 |                       data_basename=data_basename, out_dir=out_dir, input_type=input_type, reference=reference,
 39 |                       npcs=n_pcs, maf=maf, hwe=hwe, call_rate=call_rate, ld_cor=ld_cor, ld_window=ld_window,
 40 |                       relatedness_method=relatedness_method, relatedness_thresh=relatedness_thresh,
 41 |                       prob_threshold=prob_threshold)
 42 | 
 43 |     else:
 44 |         print('\nRunning PCA without a reference')
 45 |         from gwaspy.pca.pca_normal import run_pca_normal
 46 |         run_pca_normal(dirname=data_dirname, basename=data_basename, input_type=input_type, out_dir=out_dir,
 47 |                        reference=reference, maf=maf, hwe=hwe, call_rate=call_rate, ld_cor=ld_cor, ld_window=ld_window,
 48 |                        n_pcs=n_pcs, run_relatedness_check=run_relatedness_check, relatedness_method=relatedness_method,
 49 |                        relatedness_thresh=relatedness_thresh, include_kinself=include_kinself)
 50 | 
 51 | 
 52 | def main():
 53 |     parser = argparse.ArgumentParser()
 54 |     # reference args
 55 |     parser.add_argument('--ref-dirname', default='gs://gcp-public-data--gnomad/release/3.1/secondary_analyses/hgdp_1kg_v2/pca_results/')
 56 |     parser.add_argument('--ref-basename', default='unrelateds_without_outliers')
 57 |     parser.add_argument('--ref-info', default='gs://hgdp-1kg/hgdp_tgp/gwaspy_pca_ref/hgdp_1kg_sample_info.unrelateds.pca_outliers_removed.with_project.tsv')
 58 |     parser.add_argument('--reference', type=str, default='GRCh38')
 59 |     parser.add_argument('--pca-type', type=str, default='normal', choices=['normal', 'project', 'joint'])
 60 | 
 61 |     # data args
 62 |     parser.add_argument('--data-dirname', type=str, required=True)
 63 |     parser.add_argument('--data-basename', type=str, required=True)
 64 |     parser.add_argument('--input-type', type=str, required=True, choices=['vcf', 'plink', 'hail'])
 65 | 
 66 |     # filter args
 67 |     parser.add_argument('--maf', type=float, default=0.05, help='include only SNPs with MAF >= NUM in PCA')
 68 |     parser.add_argument('--hwe', type=float, default=1e-3, help='include only SNPs with HWE >= NUM in PCA')
 69 |     parser.add_argument('--geno', type=float, default=0.98, help='include only SNPs with call-rate > NUM')
 70 |     parser.add_argument('--ld-cor', type=float, default=0.2, choices=range(0,1), metavar="[0.0-1.0]",
 71 |                         help='Squared correlation threshold (exclusive upper bound). Must be in the range [0.0, 1.0]')
 72 |     parser.add_argument('--ld-window', type=int, default=250000,
 73 |                         help='Window size in base pairs (inclusive upper bound)')
 74 |     parser.add_argument('--npcs', type=int, default=20, help='Number of PCs to use')
 75 |     parser.add_argument('--no-relatedness', action='store_false')
 76 |     parser.add_argument('--include-kinself', action='store_true')
 77 |     parser.add_argument('--relatedness-method', type=str, default='pc_relate',
 78 |                         choices=['pc_relate', 'ibd', 'king'], help='Method to use for the inference of relatedness')
 79 |     parser.add_argument('--relatedness-thresh', type=float, default=0.1,
 80 |                         help='Threshold value to use in relatedness checks')
 81 |     parser.add_argument('--prob', type=float, default=0.8,
 82 |                         help='Minimum probability of belonging to a given population for the population to be set')
 83 |     parser.add_argument('--out-dir', type=str, required=True)
 84 | 
 85 |     args = parser.parse_args()
 86 | 
 87 |     if not args.prob:
 88 |         print(f'No prob value specified, {args.prob} will be used')
 89 | 
 90 |     pca(ref_dirname=args.ref_dirname, ref_basename=args.ref_basename, ref_info=args.ref_info, reference=args.reference,
 91 |         pca_type=args.pca_type, input_type=args.input_type, data_dirname=args.data_dirname,
 92 |         data_basename=args.data_basename, maf=args.maf, hwe=args.hwe, call_rate=args.geno, ld_cor=args.ld_cor,
 93 |         ld_window=args.ld_window, n_pcs=args.npcs, run_relatedness_check=args.no_relatedness,
 94 |         include_kinself=args.include_kinself, relatedness_method=args.relatedness_method,
 95 |         relatedness_thresh=args.relatedness_thresh, prob_threshold=args.prob, out_dir=args.out_dir)
 96 | 
 97 |     print('\nDone running PCA')
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     main()
102 | 
103 | 


--------------------------------------------------------------------------------
/gwaspy/pca/pca_filter_snps.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Lindo Nkambule'
  2 | 
  3 | import hail as hl
  4 | import pandas as pd
  5 | 
  6 | 
  7 | def pca_filter_mt(
  8 |         in_mt: hl.MatrixTable,
  9 |         maf: float = 0.05,
 10 |         hwe: float = 1e-3,
 11 |         call_rate: float = 0.98,
 12 |         ld_cor: float = 0.2,
 13 |         ld_window: int = 250000):
 14 | 
 15 |     print("\nInitial number of SNPs before filtering: {}".format(in_mt.count_rows()))
 16 |     mt = hl.variant_qc(in_mt)
 17 |     print(f'\nFiltering out variants with MAF < {maf}')
 18 |     mt_filt = mt.annotate_rows(maf=hl.min(mt.variant_qc.AF))
 19 |     mt_filt = mt_filt.filter_rows(mt_filt.maf > maf)
 20 | 
 21 |     print(f'\nFiltering out variants with HWE < {hwe:1e}')
 22 |     mt_filt = mt_filt.filter_rows(mt_filt.variant_qc.p_value_hwe > hwe)
 23 | 
 24 |     print(f'\nFiltering out variants with Call Rate < {call_rate}')
 25 |     mt_filt = mt_filt.filter_rows(mt_filt.variant_qc.call_rate >= call_rate)
 26 | 
 27 |     # no strand ambiguity
 28 |     print('\nFiltering out strand ambigous variants')
 29 |     mt_filt = mt_filt.filter_rows(~hl.is_strand_ambiguous(mt_filt.alleles[0], mt_filt.alleles[1]))
 30 | 
 31 |     # MHC chr6:25-35Mb
 32 |     # chr8.inversion chr8:7-13Mb
 33 |     print('\nFiltering out variants in MHC [chr6:25M-35M] and chromosome 8 inversions [chr8:7M-13M]')
 34 |     intervals = ['chr6:25M-35M', 'chr8:7M-13M']
 35 |     mt_filt = hl.filter_intervals(mt_filt, [hl.parse_locus_interval(x, reference_genome='GRCh38') for x in intervals],
 36 |                                   keep=False)
 37 | 
 38 |     # This step is expensive (on local machine)
 39 |     print(f'\nLD pruning using correlation threshold of {ld_cor} and window size of {ld_window}')
 40 |     mt_ld_prune = hl.ld_prune(mt_filt.GT, r2=ld_cor, bp_window_size=ld_window)
 41 |     mt_ld_pruned = mt_filt.filter_rows(hl.is_defined(mt_ld_prune[mt_filt.row_key]))
 42 |     print("\nNumber of SNPs after filtering: {}".format(mt_ld_pruned.count_rows()))
 43 | 
 44 |     return mt_ld_pruned
 45 | 
 46 | 
 47 | def relatedness_check(
 48 |         in_mt: hl.MatrixTable = None,
 49 |         method: str = 'pc_relate',
 50 |         outdir: str = None,
 51 |         kin_estimate: float = 0.1,
 52 |         include_kinself: bool = False):
 53 | 
 54 |     if method == 'pc_relate':
 55 |         print("\nUsing PC-Relate for relatedness checks")
 56 |         # compute kinship statistic for every sample-pair
 57 |         if include_kinself:
 58 |             print("\nkinself will be included in exported tsv file")
 59 |         relatedness_ht = hl.pc_relate(in_mt.GT, 0.01, k=10, statistics='kin', include_self_kinship=include_kinself)
 60 |         
 61 |         print('exporting relatedness statistics to a tsv file')
 62 |         ht_export = relatedness_ht.key_by()
 63 |         ht_export = ht_export.select(ht_export.kin, i=ht_export.i.s, j=ht_export.j.s)
 64 |         ht_export.export(f'{outdir}relatedness_checks_pc_relate.tsv.bgz')
 65 | 
 66 |         print('getting related samples to be removed using maximal independent set')
 67 |         # only run maximal independent set step on sample-pairs with kinship above specified threshold
 68 | 
 69 |         # when include_kinself is True, not removing kinself will result in all samples failing relatedness because for
 70 |         # every kin between a sample with itself, the kin estimate will be ~0.5 in most cases (excluding inbreeding)
 71 |         if include_kinself:
 72 |             relatedness_ht = relatedness_ht.filter(relatedness_ht.i == relatedness_ht.j, keep=False)
 73 |         else:
 74 |             relatedness_ht = relatedness_ht
 75 |         pairs = relatedness_ht.filter(relatedness_ht['kin'] > kin_estimate)
 76 |         samples_to_remove = hl.maximal_independent_set(pairs.i, pairs.j, False)
 77 |         samples = samples_to_remove.node.s.collect()
 78 | 
 79 |     elif method == 'ibd':
 80 |         print("\nUsing PLINK-style IBD for relatedness checks")
 81 |         in_mt = hl.variant_qc(in_mt)
 82 |         in_mt = in_mt.annotate_rows(maf=hl.min(in_mt.variant_qc.AF))
 83 |         relatedness_ht = hl.identity_by_descent(in_mt, maf=in_mt['maf'])
 84 |         
 85 |         print('exporting relatedness statistics to a tsv file')
 86 |         relatedness_ht.export(f'{outdir}relatedness_checks_ibd.tsv.bgz')
 87 | 
 88 |         print('getting related samples to be removed using maximal independent set')
 89 |         # only run maximal independent set step on sample-pairs with kinship above specified threshold
 90 |         pairs = relatedness_ht.filter(relatedness_ht['ibd.PI_HAT'] > kin_estimate)
 91 |         samples_to_remove = hl.maximal_independent_set(pairs.i, pairs.j, False)
 92 |         samples = samples_to_remove.node.collect()
 93 | 
 94 |     else:
 95 |         print("\nUsing KING for relatedness checks")
 96 |         if kin_estimate > 0.5:
 97 |             raise Exception("\nThe maximum kinship coefficient in KING is 0.5")
 98 |         relatedness_mt = hl.king(in_mt.GT)
 99 |         relatedness_ht = relatedness_mt.filter_entries((relatedness_mt.s_1 != relatedness_mt.s) &
100 |                                                        (relatedness_mt.phi >= kin_estimate)).entries()
101 | 
102 |         print('exporting relatedness statistics to a tsv file')
103 |         relatedness_ht.export(f'{outdir}relatedness_checks_king.tsv.bgz')
104 | 
105 |         print('getting related samples to be removed using maximal independent set')
106 |         samples_to_remove = hl.maximal_independent_set(relatedness_ht.s_1, relatedness_ht.s, False)
107 |         samples = samples_to_remove.node.collect()
108 | 
109 |     if len(samples) > 0:
110 |         # Do not remove samples that fail relatedness check
111 |         # in_mt = in_mt.filter_cols(hl.literal(samples).contains(in_mt['s']), keep=False)
112 |         print(f"\nNumber of samples that fail relatedness checks: {len(samples)}")
113 |         
114 |         df = pd.DataFrame(samples, columns=['Sample'])
115 |         ht = hl.Table.from_pandas(df)
116 |         ht.export(f'{outdir}samples_failing_relatedness_checks.tsv')
117 | 
118 |     else:
119 |         print("\nNo samples failed the relatedness check")
120 | 
121 |     return in_mt, samples
122 | 


--------------------------------------------------------------------------------
/gwaspy/phasing/__init__.py:
--------------------------------------------------------------------------------
1 | from gwaspy.phasing import phase
2 | __all__ = ['phase']
3 | 


--------------------------------------------------------------------------------
/gwaspy/phasing/concat_vcfs.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Michael Wilson & Lindo Nkambule'
  2 | 
  3 | import hailtop.batch as hb
  4 | import hail as hl
  5 | import pandas as pd
  6 | from gwaspy.utils.get_file_size import bytes_to_gb
  7 | from gwaspy.phasing.get_filebase import get_vcf_filebase
  8 | from typing import List
  9 | from typing import Union
 10 | 
 11 | 
 12 | def concat_vcfs(b: hb.batch.Batch,
 13 |                 vcf_basename: str = None,
 14 |                 vcfs_to_merge: List = None,
 15 |                 output_type: str = 'bcf',
 16 |                 software: str = None,
 17 |                 chrom: str = None,
 18 |                 docker_img: str = 'docker.io/lindonkambule/gwaspy:v1',
 19 |                 cpu: int = 8,
 20 |                 out_dir: str = None):
 21 | 
 22 |     global index_cmd
 23 | 
 24 |     out_type = 'b' if output_type == 'bcf' else 'z'
 25 |     threads = cpu - 1
 26 |     vcfs_sizes_sum = 0
 27 |     merge_vcf_i = ''
 28 | 
 29 |     out_filename = f'{vcf_basename}.{chrom}.phased.{software}.bcf' if output_type == 'bcf' else \
 30 |         f'{vcf_basename}.{chrom}.phased.{software}.vcf.gz'
 31 |     out_index_name = f'{vcf_basename}.{chrom}.phased.{software}.bcf.csi' if output_type == 'bcf' else \
 32 |         f'{vcf_basename}.{chrom}.phased.{software}.vcf.gz.csi'
 33 | 
 34 |     for line in vcfs_to_merge:
 35 |         vcfs_sizes_sum += 1 + bytes_to_gb(line)
 36 | 
 37 |     mem = 'highmem' if vcfs_sizes_sum > 2 else 'standard'
 38 |     disk_size = 10 + vcfs_sizes_sum
 39 | 
 40 |     concat = b.new_job(name=f'concat-{vcf_basename}')
 41 |     concat.memory(mem)
 42 |     concat.storage(f'{disk_size}Gi')
 43 |     concat.image(docker_img)
 44 |     concat.cpu(cpu)
 45 | 
 46 |     for line in vcfs_to_merge:
 47 |         input_vcf = b.read_input_group(vcf=line,
 48 |                                        ind=f'{line}.csi')
 49 |         merge_vcf_i += f'{input_vcf.vcf} '
 50 | 
 51 |     cmd = f'''
 52 |         bcftools concat \
 53 |             --no-version \
 54 |             --output-type {out_type} \
 55 |             --output {out_filename} \
 56 |             --threads {threads} \
 57 |             --ligate \
 58 |             {merge_vcf_i}
 59 |     '''
 60 | 
 61 |     concat.command(cmd)
 62 |     # index the merged output
 63 |     concat.command(f'bcftools index {out_filename}')
 64 | 
 65 |     concat.command(f'mv {out_filename} {concat.ofile}')
 66 |     concat.command(f'mv {out_index_name} {concat.idx}')
 67 |     b.write_output(concat.ofile, f'{out_dir}/GWASpy/{vcf_basename}/Phasing/phased_merged/{out_filename}')
 68 |     b.write_output(concat.idx, f'{out_dir}/GWASpy/{vcf_basename}/Phasing/phased_merged/{out_index_name}')
 69 | 
 70 | 
 71 | def run_concat(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,
 72 |                input_vcf: str = None,
 73 |                output_type: str = 'bcf',
 74 |                reference: str = 'GRCh38',
 75 |                software: str = None,
 76 |                out_dir: str = None):
 77 | 
 78 |     print(f'\n3. CONCAT {input_vcf}\n')
 79 |     vcf_filebase = get_vcf_filebase(input_vcf)
 80 | 
 81 |     # get the regions so we can map each file to its specific region
 82 |     regions = pd.read_csv(f'{out_dir}/GWASpy/{vcf_filebase}/Phasing/regions.lines', sep='\t', names=['reg', 'ind'])
 83 |     regions_dict = pd.Series(regions.reg.values, index=regions.ind).to_dict()
 84 | 
 85 |     concat_b = hb.Batch(backend=backend, name=f'concat-phased-chunks-{vcf_filebase}')
 86 | 
 87 |     if software == 'shapeit':
 88 |         phased_vcf_chunks = hl.utils.hadoop_ls(f'{out_dir}/GWASpy/{vcf_filebase}/Phasing/phased_scatter/*.shapeit.bcf')
 89 |     else:
 90 |         phased_vcf_chunks = hl.utils.hadoop_ls(f'{out_dir}/GWASpy/{vcf_filebase}/Phasing/phased_scatter/*.eagle.bcf')
 91 | 
 92 |     for i in range(1, 24):
 93 |         if reference == 'GRCh38':
 94 |             if i == 23:
 95 |                 chrom = 'chrX'
 96 |             else:
 97 |                 chrom = f'chr{i}'
 98 | 
 99 |             out_chrom_name = chrom
100 |         else:
101 |             chrom = str(i)
102 |             out_chrom_name = f'chr{chrom}'
103 | 
104 |         chrom_phased_files_to_concat = []
105 | 
106 |         for file in phased_vcf_chunks:
107 |             f = file['path']
108 |             vcf_basename = get_vcf_filebase(f)
109 |             file_index = int(vcf_basename.split('.')[-3])
110 |             file_region = regions_dict[file_index]
111 |             map_chrom = file_region.split(':')[0]
112 |             if map_chrom == chrom:
113 |                 chrom_phased_files_to_concat.append(f)
114 | 
115 |         # naturally sort the list of files to merge
116 |         from gwaspy.utils.natural_sort import natural_keys
117 |         chrom_phased_files_to_concat.sort(key=natural_keys)
118 | 
119 |         # checkpoint to see if file already exists to avoid redoing things
120 |         chrom_out_filename = f'{vcf_filebase}.{out_chrom_name}.phased.{software}.bcf' if output_type == 'bcf' else \
121 |             f'{vcf_filebase}.{out_chrom_name}.phased.{software}.vcf.gz'
122 |         chrom_out_filname_path = f'{out_dir}/GWASpy/{vcf_filebase}/Phasing/phased_merged/{chrom_out_filename}'
123 | 
124 |         if hl.hadoop_exists(chrom_out_filname_path):
125 |             continue
126 |         else:
127 |             concat_vcfs(b=concat_b, vcfs_to_merge=chrom_phased_files_to_concat, vcf_basename=vcf_filebase,
128 |                         output_type=output_type, software=software, chrom=out_chrom_name, out_dir=out_dir)
129 | 
130 |     concat_b.run()
131 | 


--------------------------------------------------------------------------------
/gwaspy/phasing/get_filebase.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Lindo Nkambule'
 2 | 
 3 | import ntpath
 4 | 
 5 | 
 6 | def get_vcf_filebase(file: str = None):
 7 | 
 8 |     vcf_name = ntpath.basename(file)
 9 |     if vcf_name.endswith('.gz'):
10 |         file_no_ext = vcf_name[:-7]
11 |     elif vcf_name.endswith('.bgz'):
12 |         file_no_ext = vcf_name[:-8]
13 |     else:
14 |         file_no_ext = vcf_name[:-4]
15 | 
16 |     return file_no_ext
17 | 


--------------------------------------------------------------------------------
/gwaspy/phasing/phase.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Lindo Nkambule'
 2 | 
 3 | import argparse
 4 | import hailtop.batch as hb
 5 | from gwaspy.phasing.shapeit5_phase import shapeit_phasing
 6 | from typing import Union
 7 | 
 8 | 
 9 | def run_phase(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None,
10 |               input_vcf: str = None,
11 |               vcf_ref: str = None,
12 |               fam_file: str = None,
13 |               data_type: str = 'array',
14 |               software: str = 'shapeit',
15 |               genome_build: str = 'GRCh38',
16 |               fill_tags: bool = False,
17 |               output_filename: str = None,
18 |               out_dir: str = None):
19 | 
20 |     if data_type.lower() not in ['array', 'wgs']:
21 |         raise SystemExit(f'Incorrect data type {data_type} selected. Options are [array, wgs]')
22 | 
23 |     if software.lower() not in ['beagle', 'shapeit']:
24 |         raise SystemExit(f'Incorrect software {software} selected. Options are [beagle, shapeit]')
25 | 
26 |     b = hb.Batch(backend=backend,
27 |                  name=f'GWASpy-Phasing-{software.upper()}')
28 | 
29 |     if vcf_ref:
30 |         if vcf_ref == 'hgdp1kgp':
31 |             print(f'\nPHASING {input_vcf} WITH HGDP+1KGP PANEL\n')
32 |             ref_path = 'gs://gcp-public-data--gnomad/resources/hgdp_1kg/phased_haplotypes_v2/hgdp1kgp_chrCNUMBER.filtered.SNV_INDEL.phased.shapeit5.bcf'
33 |         else:
34 |             print(f'\nPHASING {input_vcf} WITH USER-DEFINED REFERENCE PANEL\n')
35 |             ref_path = vcf_ref
36 |     else:
37 |         ref_path = None
38 |         print(f'\nPHASING {input_vcf} WITHOUT A REFERENCE PANEL\n')
39 | 
40 |     pedigree = b.read_input(fam_file) if fam_file else None
41 | 
42 |     if software == 'shapeit':
43 |         shapeit_phasing(
44 |             batch=b,
45 |             input_path=input_vcf,
46 |             reference_path=ref_path,
47 |             genome_build=genome_build,
48 |             fam_file=pedigree,
49 |             data_type=data_type,
50 |             fill_tags=fill_tags,
51 |             output_filename=output_filename,
52 |             output_path=out_dir)
53 |     # else: To add BEAGLE
54 | 
55 | 
56 | def main():
57 |     parser = argparse.ArgumentParser()
58 |     parser.add_argument('--input-vcf', type=str, required=True)
59 |     parser.add_argument('--vcf-ref', type=str, default=None)
60 |     parser.add_argument('--pedigree', type=str, default=None)
61 |     parser.add_argument('--local', action='store_true')
62 |     parser.add_argument('--billing-project', required=True)
63 |     parser.add_argument('--genome-build', type=str, default='GRCh38', choices=['GRCh37', 'GRCh38'])
64 |     parser.add_argument('--data-type', type=str, default='array', choices=['array', 'wgs'])
65 |     parser.add_argument('--fill-tags', action='store_true')
66 |     parser.add_argument('--software', type=str, default='shapeit', choices=['beagle', 'shapeit'])
67 |     parser.add_argument('--output-filename', type=str, required=True)
68 |     parser.add_argument('--out-dir', type=str, required=True)
69 | 
70 |     args = parser.parse_args()
71 | 
72 |     if args.local:
73 |         backend = hb.LocalBackend()
74 |     else:
75 |         backend = hb.ServiceBackend(billing_project=args.billing_project,
76 |                                     remote_tmpdir=f'{args.out_dir}/tmp/')
77 | 
78 |     run_phase(backend=backend,
79 |               input_vcf=args.input_vcf,
80 |               vcf_ref=args.vcf_ref,
81 |               fam_file=args.pedigree,
82 |               data_type=args.data_type,
83 |               software=args.software,
84 |               genome_build=args.genome_build,
85 |               fill_tags=args.fill_tags,
86 |               output_filename=args.output_filename,
87 |               out_dir=args.out_dir)
88 | 


--------------------------------------------------------------------------------
/gwaspy/phasing/phasing.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Michael Wilson & Lindo Nkambule'
 2 | 
 3 | import hailtop.batch as hb
 4 | import argparse
 5 | 
 6 | 
 7 | def haplotype_phasing(input_vcf: str = None,
 8 |                       vcf_ref: str = None,
 9 |                       family_pedigree: str = None,
10 |                       local: bool = False,
11 |                       billing_project: str = None,
12 |                       software: str = 'shapeit',
13 |                       reference: str = 'GRCh38',
14 |                       max_win_size_cm: float = 10.0,
15 |                       overlap_size_cm: float = 2.0,
16 |                       scatter_memory: int = 26,
17 |                       cpu: int = 8,
18 |                       threads: int = 7,
19 |                       stages: str = 'scatter,phase,concat',
20 |                       output_type: str = 'bcf',
21 |                       out_dir: str = None):
22 |     # Error handling
23 |     if not out_dir:
24 |         raise SystemExit('Output directory not specified. Specify using --out_dir if running from the command line or'
25 |                          'out_dir argument if running inside a Python script')
26 | 
27 |     steps_list = stages.split(',')
28 |     steps = [x.lower() for x in steps_list]
29 |     unknown_steps = [i for i in steps if i not in ['scatter', 'phase', 'concat']]
30 | 
31 |     if len(unknown_steps) > 0:
32 |         raise SystemExit(f'Incorrect process(es) {unknown_steps} selected. Options are [scatter, phase, concat]')
33 | 
34 |     if output_type.lower() not in ['bcf', 'vcf']:
35 |         raise SystemExit(f'Incorrect output type {output_type} selected. Options are [bcf, vcf]')
36 | 
37 |     if local:
38 |         backend = hb.LocalBackend()
39 |     else:
40 |         backend = hb.ServiceBackend(billing_project=billing_project,
41 |                                     remote_tmpdir=f'{out_dir}/tmp/')
42 | 
43 |     # Scatter VCF/BCF file(s)
44 |     if 'scatter' in steps:
45 |         from gwaspy.phasing.scatter_vcf import run_scatter
46 |         run_scatter(backend=backend, input_vcf=input_vcf, reference=reference, max_win_size_cm=max_win_size_cm,
47 |                     overlap_size_cm=overlap_size_cm, scatter_memory=scatter_memory, out_dir=out_dir)
48 | 
49 |     # Phase scatterd chunks
50 |     if 'phase' in steps:
51 |         from gwaspy.phasing.phase_vcf import run_phase
52 |         run_phase(backend=backend, input_vcf=input_vcf, vcf_ref_path=vcf_ref, family_pedigree=family_pedigree,
53 |                   software=software, reference=reference, cpu=cpu, threads=threads, out_dir=out_dir)
54 | 
55 |     # Concatenate phased chunks
56 |     if 'concat' in steps:
57 |         from gwaspy.phasing.concat_vcfs import run_concat
58 |         run_concat(backend=backend, input_vcf=input_vcf, output_type=output_type, reference=reference,
59 |                    software=software, out_dir=out_dir)
60 | 
61 | 
62 | def main():
63 |     parser = argparse.ArgumentParser()
64 |     parser.add_argument('--input-vcf', type=str, required=True)
65 |     parser.add_argument('--vcf-ref', type=str, default=None)
66 |     parser.add_argument('--family-pedigree', type=str, default=None)
67 |     parser.add_argument('--local', action='store_true')
68 |     parser.add_argument('--billing-project', required=True)
69 |     parser.add_argument('--software', type=str, default='shapeit', choices=['eagle', 'shapeit'])
70 |     parser.add_argument('--reference', type=str, default='GRCh38', choices=['GRCh37', 'GRCh38'])
71 |     parser.add_argument('--max-win-size-cm', type=float, default=10.0)
72 |     parser.add_argument('--overlap-size-cm', type=float, default=2.0)
73 |     parser.add_argument('--cpu', type=int, default=8)
74 |     parser.add_argument('--scatter-mem', type=int, default=26)
75 |     parser.add_argument('--threads', type=int, default=7)
76 |     parser.add_argument('--stages', type=str, default='scatter,phase,concat')
77 |     parser.add_argument('--out-type', type=str, default='bcf', choices=['bcf', 'vcf'])
78 |     parser.add_argument('--out-dir', required=True)
79 | 
80 |     args = parser.parse_args()
81 | 
82 |     haplotype_phasing(input_vcf=args.input_vcf, vcf_ref=args.vcf_ref, family_pedigree=args.family_pedigree,
83 |                       local=args.local, billing_project=args.billing_project, software=args.software,
84 |                       reference=args.reference, max_win_size_cm=args.max_win_size_cm,
85 |                       overlap_size_cm=args.overlap_size_cm, scatter_memory=args.scatter_mem, cpu=args.cpu,
86 |                       threads=args.threads, stages=args.stages, output_type=args.out_type, out_dir=args.out_dir)
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     main()
91 | 


--------------------------------------------------------------------------------
/gwaspy/preimp_qc/__init__.py:
--------------------------------------------------------------------------------
1 | from gwaspy.preimp_qc import preimp_qc
2 | __all__ = ['preimp_qc']
3 | 


--------------------------------------------------------------------------------
/gwaspy/preimp_qc/aggregators.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'Dan King'
  2 | 
  3 | import hail as hl
  4 | 
  5 | 
  6 | def variant_qc_aggregator(mt) -> hl.MatrixTable:
  7 |     """:func:`.variant_qc` as an aggregator."""
  8 |     bound_exprs = {}
  9 |     gq_dp_exprs = {}
 10 | 
 11 |     def has_field_of_type(name, dtype):
 12 |         return name in mt.entry and mt[name].dtype == dtype
 13 | 
 14 |     if has_field_of_type('DP', hl.tint32):
 15 |         gq_dp_exprs['dp_stats'] = hl.agg.stats(mt.DP).select('mean', 'stdev', 'min', 'max')
 16 |     if has_field_of_type('GQ', hl.tint32):
 17 |         gq_dp_exprs['gq_stats'] = hl.agg.stats(mt.GQ).select('mean', 'stdev', 'min', 'max')
 18 |     if not has_field_of_type('GT', hl.tcall):
 19 |         raise ValueError("'variant_qc': expect an entry field 'GT' of type 'call'")
 20 |     bound_exprs['n_called'] = hl.agg.count_where(hl.is_defined(mt['GT']))
 21 |     bound_exprs['n_not_called'] = hl.agg.count_where(hl.is_missing(mt['GT']))
 22 |     n_cols = hl.agg.count()
 23 |     bound_exprs['n_filtered'] = hl.int64(n_cols) - hl.agg.count()
 24 |     bound_exprs['call_stats'] = hl.agg.call_stats(mt.GT, mt.alleles)
 25 |     return hl.rbind(hl.struct(**bound_exprs),
 26 |                     lambda e1: hl.rbind(
 27 |                         hl.case().when(hl.len(mt.alleles) == 2,
 28 |                                        hl.hardy_weinberg_test(e1.call_stats.homozygote_count[0],
 29 |                                                               e1.call_stats.AC[1] - 2
 30 |                                                               * e1.call_stats.homozygote_count[1],
 31 |                                                               e1.call_stats.homozygote_count[1])
 32 |                                        ).or_missing(),
 33 |                         lambda hwe: hl.struct(**{
 34 |                             **gq_dp_exprs,
 35 |                             **e1.call_stats,
 36 |                             'call_rate': hl.float(e1.n_called) / (e1.n_called + e1.n_not_called + e1.n_filtered),
 37 |                             'n_called': e1.n_called,
 38 |                             'n_not_called': e1.n_not_called,
 39 |                             'n_filtered': e1.n_filtered,
 40 |                             'n_het': e1.n_called - hl.sum(e1.call_stats.homozygote_count),
 41 |                             'n_non_ref': e1.n_called - e1.call_stats.homozygote_count[0],
 42 |                             'het_freq_hwe': hwe.het_freq_hwe,
 43 |                             'p_value_hwe': hwe.p_value})))
 44 | 
 45 | 
 46 | def agg_call_rate(mt: hl.MatrixTable):
 47 |     # DOES NOT HANDLE filter_entries CORRECTLY!
 48 |     n_called = hl.agg.count_where(hl.is_defined(mt['GT']))
 49 | 
 50 |     return hl.agg.filter(
 51 |         ~(mt.exclude_row | mt.exclude_col),
 52 |         n_called / hl.agg.count())
 53 | 
 54 | 
 55 | def impute_sex_aggregator(call,
 56 |                           aaf,
 57 |                           aaf_threshold=0.0,
 58 |                           include_par=False,
 59 |                           female_threshold=0.4,
 60 |                           male_threshold=0.8) -> hl.Table:
 61 |     """:func:`.impute_sex` as an aggregator."""
 62 |     mt = call._indices.source
 63 |     rg = mt.locus.dtype.reference_genome
 64 |     x_contigs = hl.literal(
 65 |         hl.eval(
 66 |             hl.map(lambda x_contig: hl.parse_locus_interval(x_contig, rg), rg.x_contigs)))
 67 |     inbreeding = hl.agg.inbreeding(call, aaf)
 68 |     is_female = hl.if_else(inbreeding.f_stat < female_threshold,
 69 |                            True,
 70 |                            hl.if_else(inbreeding.f_stat > male_threshold,
 71 |                                       False,
 72 |                                       hl.is_missing('tbool')))
 73 |     expression = hl.struct(is_female=is_female, **inbreeding)
 74 |     if not include_par:
 75 |         interval_type = hl.tarray(hl.tinterval(hl.tlocus(rg)))
 76 |         par_intervals = hl.literal(rg.par, interval_type)
 77 |         expression = hl.agg.filter(
 78 |             ~par_intervals.any(lambda par_interval: par_interval.contains(mt.locus)),
 79 |             expression)
 80 |     expression = hl.agg.filter((aaf > aaf_threshold) & (aaf < (1 - aaf_threshold)), expression)
 81 |     expression = hl.agg.filter(
 82 |         x_contigs.any(lambda contig: contig.contains(mt.locus)),
 83 |         expression)
 84 | 
 85 |     return expression
 86 | 
 87 | 
 88 | def allele_types(mt):
 89 |     from hail.expr.functions import _num_allele_type, _allele_types
 90 |     allele_types = _allele_types[:]
 91 |     allele_types.extend(['Transition', 'Transversion'])
 92 |     allele_enum = {i: v for i, v in enumerate(allele_types)}
 93 |     allele_ints = {v: k for k, v in allele_enum.items()}
 94 | 
 95 |     def allele_type(ref, alt):
 96 |         return hl.bind(lambda at: hl.if_else(at == allele_ints['SNP'],
 97 |                                              hl.if_else(hl.is_transition(ref, alt),
 98 |                                                         allele_ints['Transition'],
 99 |                                                         allele_ints['Transversion']),
100 |                                              at),
101 |                        _num_allele_type(ref, alt))
102 | 
103 |     return mt.alleles[1:].map(lambda alt: allele_type(mt.alleles[0], alt))
104 | 


--------------------------------------------------------------------------------
/gwaspy/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from gwaspy.utils import read_file
2 | __all__ = ['read_file']


--------------------------------------------------------------------------------
/gwaspy/utils/export_file.py:
--------------------------------------------------------------------------------
 1 | import hail as hl
 2 | 
 3 | 
 4 | def export_qced_file(mt: hl.MatrixTable, out_dir: str, basename: str, export_type='hail'):
 5 |     outname = basename + '_qced'
 6 | 
 7 |     if export_type == 'hail':
 8 |         mt.write('{}GWASpy/Preimp_QC/{}.mt'.format(out_dir, outname), overwrite=True)
 9 | 
10 |     elif export_type == 'plink':
11 |         hl.export_plink(dataset=mt, output='{}GWASpy/Preimp_QC/{}'.format(out_dir, outname), fam_id=mt.fam_id,
12 |                         ind_id=mt.s, pat_id=mt.pat_id, mat_id=mt.mat_id, is_female=mt.is_female, pheno=mt.is_case,
13 |                         varid=mt.rsid)
14 | 
15 |     else:
16 |         hl.export_vcf(mt, '{}GWASpy/Preimp_QC/{}.vcf.bgz'.format(out_dir, outname), tabix=True)
17 | 


--------------------------------------------------------------------------------
/gwaspy/utils/get_file_size.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | __author__ = 'Lindo Nkambule'
 4 | 
 5 | import hail as hl
 6 | 
 7 | 
 8 | def bytes_to_gb(in_file: str):
 9 |     """
10 |     Convert the size from bytes to GiB
11 |     :param in_file: path to file, str
12 |     :return: file size in GiB
13 |     """
14 | 
15 |     file_info = hl.utils.hadoop_stat(in_file)
16 |     size_bytes = file_info['size_bytes']
17 |     size_gigs = size_bytes / (1024 * 1024 * 1024)
18 | 
19 |     return size_gigs
20 | 


--------------------------------------------------------------------------------
/gwaspy/utils/natural_sort.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def atoi(text):
 5 |     return int(text) if text.isdigit() else text
 6 | 
 7 | 
 8 | def natural_keys(text):
 9 |     """
10 |     alist.sort(key=natural_keys) sorts in human order
11 |     http://nedbatchelder.com/blog/200712/human_sorting.html
12 |     (See Toothy's implementation in the comments)
13 |     """
14 |     return [atoi(c) for c in re.split(r'(\d+)', text)]
15 | 


--------------------------------------------------------------------------------
/gwaspy/utils/read_file.py:
--------------------------------------------------------------------------------
 1 | import hail as hl
 2 | import hailtop.fs as hfs
 3 | from gwaspy.utils.sample_annotations import add_sample_annotations
 4 | 
 5 | 
 6 | def read_plink(dirname: str, basename: str) -> hl.MatrixTable:
 7 | 
 8 |     in_mt: hl.MatrixTable = hl.import_plink(bed=dirname + basename + '.bed',
 9 |                                             bim=dirname + basename + '.bim',
10 |                                             fam=dirname + basename + '.fam',
11 |                                             block_size=16)
12 | 
13 |     return in_mt
14 | 
15 | 
16 | def read_vcf(dirname: str, basename: str) -> hl.MatrixTable:
17 |     hl._set_flags(no_whole_stage_codegen='1')
18 | 
19 |     if hfs.exists(f"{dirname}{basename}.vcf.bgz"):
20 |         vcf_file = f"{dirname}{basename}.vcf.bgz"
21 |     elif hfs.exists(f"{dirname}{basename}.vcf.gz"):
22 |         vcf_file = f"{dirname}{basename}.vcf.gz"
23 |     else:
24 |         vcf_file = f"{dirname}{basename}.vcf"
25 | 
26 |     hl.import_vcf(vcf_file, force_bgz=True, block_size=16).write('{}GWASpy.preimpQC.mt'.format(dirname), overwrite=True)
27 | 
28 |     # unset flag to avoid locus_windows: 'locus_expr' global position must be in ascending order when LD pruning
29 |     # https://hail.zulipchat.com/#narrow/channel/123010-Hail-Query-0.2E2-support/topic/locus_windows.20Error/near/272143278
30 |     hl._set_flags(no_whole_stage_codegen=None)
31 |     in_mt = hl.read_matrix_table('{}GWASpy.preimpQC.mt'.format(dirname))
32 | 
33 |     # Unlike array data, a VCF might have multi-allelic sites
34 |     # split multi-allelic sites into bi-allelic
35 |     print("Checking for multi-allelic sites")
36 |     pre_filt_multi_n = in_mt.count_rows()
37 |     bi = in_mt.filter_rows(hl.len(in_mt.alleles) == 2)
38 |     bi = bi.annotate_rows(a_index=hl.missing(hl.tint))  # when we update Hail version, use hl.missing instead of hl.null
39 |     bi = bi.annotate_rows(was_split=False)
40 | 
41 |     multi = in_mt.filter_rows(hl.len(in_mt.alleles) > 2)
42 |     split = hl.split_multi_hts(multi)
43 | 
44 |     in_mt = split.union_rows(bi)
45 |     pos_filt_multi_n = in_mt.count_rows()
46 |     print("Number of multi-allelic SNPs in VCF file: {}".format(pos_filt_multi_n-pre_filt_multi_n))
47 | 
48 |     return in_mt
49 | 
50 | 
51 | def read_mt(dirname: str, basename: str) -> hl.MatrixTable:
52 |     print(dirname + basename + ".mt")
53 |     in_mt: hl.MatrixTable = hl.read_matrix_table(dirname + basename + ".mt")
54 | 
55 |     return in_mt
56 | 
57 | 
58 | def read_infile(
59 |         input_type: str = None,
60 |         dirname: str = None, basename: str = None,
61 |         **kwargs):
62 | 
63 |     global mt
64 | 
65 |     # vcf = kwargs.get('vcf')
66 |     annotations = kwargs.get('annotations')
67 | 
68 |     if input_type == 'plink':
69 |         mt = read_plink(dirname, basename)
70 | 
71 |     elif input_type == 'vcf':
72 |         mt = read_vcf(dirname, basename)
73 | 
74 |     else:
75 |         mt = read_mt(dirname, basename)
76 | 
77 |     if annotations:
78 |         mt = add_sample_annotations(mt, annotations)
79 | 
80 |     return mt
81 | 


--------------------------------------------------------------------------------
/gwaspy/utils/reference_liftover.py:
--------------------------------------------------------------------------------
 1 | import hail as hl
 2 | from gwaspy.utils.read_file import read_infile
 3 | from gwaspy.utils.sample_annotations import add_sample_annotations
 4 | 
 5 | 
 6 | def liftover_to_grch38(
 7 |         input_type: str = None,
 8 |         dirname: str = None,
 9 |         basename: str = None,
10 |         **kwargs):
11 | 
12 |     lifted_over = f'{dirname}{basename}.liftover.grch38.mt'
13 |     print('\nLifting over to GRCh38')
14 |     mt = read_infile(input_type=input_type, dirname=dirname, basename=basename)
15 | 
16 |     annotations = kwargs.get('annotations')
17 |     if annotations:
18 |         mt = add_sample_annotations(mt, annotations)
19 | 
20 |     rg37 = hl.get_reference('GRCh37')
21 |     rg38 = hl.get_reference('GRCh38')
22 |     rg37.add_liftover('gs://hail-common/references/grch37_to_grch38.over.chain.gz', rg38)
23 | 
24 |     mt = mt.annotate_rows(new_locus=hl.liftover(mt.locus, 'GRCh38', include_strand=True), old_locus=mt.locus)
25 |     mt = mt.filter_rows(hl.is_defined(mt.new_locus) & ~mt.new_locus.is_negative_strand)
26 | 
27 |     mt = mt.key_rows_by(locus=mt.new_locus.result, alleles=mt.alleles)
28 | 
29 |     print(f'\nWriting out data lifted-over to GRCh38 to: {lifted_over}')
30 |     mt.write(lifted_over)
31 | 
32 |     return hl.read_matrix_table(lifted_over)
33 | 


--------------------------------------------------------------------------------
/gwaspy/utils/sample_annotations.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'Lindo Nkambule'
 2 | 
 3 | import hail as hl
 4 | import sys
 5 | 
 6 | 
 7 | def add_sample_annotations(mt: hl.MatrixTable, annotations: str) -> hl.MatrixTable:
 8 |     # use annotations file to annotate VCF
 9 |     ann = hl.import_table(annotations, impute=False,
10 |                           types={'Sample': hl.tstr, 'Sex': hl.tstr, 'Pheno': hl.tstr}).key_by('Sample')
11 |     ann_cols = dict(ann.row)
12 | 
13 |     mt = mt.annotate_cols(annotations=ann[mt.s])
14 | 
15 |     if 'is_female' not in mt.col:
16 |         if 'Sex' in ann_cols:
17 |             mt = mt.annotate_cols(is_female=hl.if_else(((mt.annotations.Sex == 'F') |
18 |                                                         (mt.annotations.Sex == str(2)) |
19 |                                                         (mt.annotations.Sex == 'True') |
20 |                                                         (mt.annotations.Sex == 'Female')),
21 |                                                        True, False))
22 |         else:
23 |             print('Sex column is missing from annotations file. Please add it and run GWASpy again')
24 |             sys.exit(2)
25 | 
26 |     if 'is_case' not in mt.col:
27 |         if 'Pheno' in ann_cols:
28 |             mt = mt.annotate_cols(is_case=hl.if_else(((mt.annotations.Pheno == str(2)) |
29 |                                                       (mt.annotations.Pheno == 'True') |
30 |                                                       (mt.annotations.Pheno == 'Case')),
31 |                                                      True, False))
32 | 
33 |     return mt
34 | 
35 | 


--------------------------------------------------------------------------------
/nf/modules/imputation.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | nextflow.enable.dsl=2
 4 | 
 5 | 
 6 | process IMPUTE5 {
 7 |     cpus 8
 8 |     memory { 16.GB * task.attempt }
 9 |     container 'docker.io/lindonkambule/gwaspy_phase_impute:latest'
10 |     tag "impute: ${irg}"
11 |     publishDir "${out_directory}", overwrite: true, mode:'copy', pattern: '*.bcf*'
12 | 
13 |     input:
14 |         tuple val(chrom), file(input), file(input_idx), file(ref), file(ref_idx), file(ref_bin), file(ref_fam), file(map_file), val(srg), val(irg), val(chk), val(out_directory)
15 | 
16 |     output:
17 |     tuple val(chrom), val(chk), path("${chk}imputed.chr${chrom}.bcf"), path("${chk}imputed.chr${chrom}.bcf.csi")
18 | 
19 |     // IMPUTE5 automatically indexes output file
20 |     script:
21 |     """
22 |     impute5_v1.2.0_static \
23 |         --h ${ref} \
24 |         --g ${input} \
25 |         --m ${map_file} \
26 |         --r ${irg} \
27 |         --buffer-region ${srg} \
28 |         --o ${chk}imputed.chr${chrom}.bcf
29 |     """
30 | }
31 | 


--------------------------------------------------------------------------------
/nf/nextflow.config:
--------------------------------------------------------------------------------
 1 | // work directory where intermediate files will be stored
 2 | workDir = 'gs://path/to/my/workdir'
 3 | 
 4 | process {
 5 |   executor = 'google-batch'
 6 |   errorStrategy = { task.exitStatus==null ? 'retry' : 'terminate' }
 7 |   maxRetries = 3
 8 | }
 9 | 
10 | profiles {
11 |     gbatch {
12 |       google.project = 'my-billing-project'
13 |       google.location = 'us-central1'
14 |       batch.spot = true
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/nf/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_vcf": "gs://my-gcs/bucket/my_input_file.vcf",
 3 |     "output_filename": "my_output_filename_prefix",
 4 |     "out_dir": "gs://my-gcs/bucket/nf_phase_impute",
 5 |     "impute": true,
 6 |     "fill_tags": false,
 7 |     "input_split_by_chrom": false,
 8 |     "vcf_ref": "gs://gcp-public-data--gnomad/resources/hgdp_1kg/phased_haplotypes_v2/hgdp1kgp_chrCNUMBER.filtered.SNV_INDEL.phased.shapeit5",
 9 |     "ref_format": "vcf",
10 |     "data_type": "array",
11 |     "maf": 0.001,
12 |     "common_chunks": "gs://my-gcs/bucket/chunks/b38/20cM/chunks_chrCNUMBER.txt",
13 |     "rare_chunks": "gs://my-gcs/bucket/chunks/b38/4cM/chunks_chrCNUMBER.txt",
14 |     "genetic_maps": "gs://my-gcs/bucket/resources/maps/chrCNUMBER.b38.gmap.gz"
15 | }


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | hail
2 | matplotlib>=3.3.3
3 | plotly>=5.7.0
4 | pandas>=0.25.3
5 | pylatex>=1.4.1
6 | numpy>=1.18.4
7 | scikit-learn~=0.21.3
8 | setuptools~=41.6.0
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup, find_packages
 3 | 
 4 | with open("README.md", "r") as fh:
 5 |     long_description = fh.read()
 6 | 
 7 | classifiers = [
 8 |     'Development Status :: 4 - Beta',
 9 |     'Environment :: Console',
10 |     'Intended Audience :: Science/Research',
11 |     'License :: OSI Approved :: MIT License',
12 |     'Programming Language :: Python :: 3',
13 |     'Programming Language :: Python :: 3.2',
14 |     'Programming Language :: Python :: 3.3',
15 |     'Programming Language :: Python :: 3.4',
16 |     'Programming Language :: Python :: 3.5',
17 |     'Programming Language :: Python :: 3.6',
18 |     'Programming Language :: Python :: 3.7',
19 |     'Programming Language :: Python :: 3.8',
20 |     'Topic :: Scientific/Engineering :: Bio-Informatics',
21 |     'Operating System :: POSIX',
22 |     'Operating System :: Unix',
23 |     'Operating System :: MacOS'
24 | ]
25 | 
26 | setup(name='gwaspy',
27 |       version='0.1.2',
28 |       author='Lindokuhle Nkambule',
29 |       author_email='lnkambul@broadinstitute.org',
30 |       url='https://gwaspy.readthedocs.io/',
31 |       project_urls={"GitHub": "https://github.com/atgu/GWASpy"},
32 |       description='GWASpy: A Python package for performing GWAS QC, PCA, phasing, and genotype imputation.',
33 |       long_description=long_description,
34 |       long_description_content_type="text/markdown",
35 |       license='MIT',
36 |       packages=find_packages(),
37 |       entry_points={
38 |           'console_scripts': [
39 |               'preimp_qc = gwaspy.preimp_qc.preimp_qc:main',
40 |               'pca = gwaspy.pca.pca:main',
41 |               'imputation = gwaspy.imputation.impute:main',
42 |               # 'imputation = gwaspy.imputation.imputation:main',
43 |               'phasing = gwaspy.phasing.phase:main',
44 |               # 'phasing = gwaspy.phasing.phasing:main'
45 |               'checkalleleflips = gwaspy.check_alleles.flips:main'
46 |           ]
47 |       },
48 |       classifiers=classifiers,
49 |       keywords='',
50 |       # install_requires=required,
51 |       install_requires=['hail', 'matplotlib', 'numpy', 'pandas', 'pylatex', 'plotly', 'distinctipy'],
52 |       zip_safe=False
53 |       )
54 | 


--------------------------------------------------------------------------------
/split_maps.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for ref in {17,19,38}
 4 | do
 5 |   # put /opt before eagle
 6 |   map_file=/opt/Eagle_v2.4.1/tables/genetic_map_hg${ref}_withX.txt.gz
 7 |   for chrom in {1..23}
 8 |   do
 9 |     echo -e 'chr position COMBINED_rate(cM/Mb) Genetic_Map(cM)' > /opt/genetic_maps_eagle/hg${ref}/genetic_map_hg${ref}_chr${chrom}_withX.txt
10 |     cat $map_file | gunzip | awk '{if ( ($1==CHROM) ) print $0}' CHROM=${chrom} >> /opt/genetic_maps_eagle/hg${ref}/genetic_map_hg${ref}_chr${chrom}_withX.txt
11 |     gzip /opt/genetic_maps_eagle/hg${ref}/genetic_map_hg${ref}_chr${chrom}_withX.txt
12 | 
13 |     # echo -e 'pos\tchr\tcM' > /opt/genetic_maps_shapeit/hg${ref}/genetic_map_hg${ref}_chr${chrom}_withX.txt
14 |     cat /opt/genetic_maps_eagle/hg${ref}/genetic_map_hg${ref}_chr${chrom}_withX.txt.gz | gunzip | awk '{print $2,"\t",$1,"\t",$4}' > /opt/genetic_maps_shapeit/hg${ref}/genetic_map_hg${ref}_chr${chrom}_withX.txt
15 |     sed 's/position/~~/g; s/Genetic_Map(cM)/cM/g; s/~~/pos/g' /opt/genetic_maps_shapeit/hg${ref}/genetic_map_hg${ref}_chr${chrom}_withX.txt | gzip > /opt/genetic_maps_shapeit/hg${ref}/genetic_map_hg${ref}_chr${chrom}_withX.txt.gz
16 |     rm /opt/genetic_maps_shapeit/hg${ref}/genetic_map_hg${ref}_chr${chrom}_withX.txt
17 |   done
18 | done
19 | 
20 | 


--------------------------------------------------------------------------------