├── .github ├── ISSUE_TEMPLATE │ └── bug_report.md └── workflows │ └── pythonapp.yml ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── etc ├── annotate_vep │ ├── make_vcf.py │ ├── merge.py │ └── run.sh ├── demo.md ├── detailed-apache2-instructions │ ├── README.md │ └── pheweb.conf ├── detailed-development-instructions.md ├── detailed-install-instructions.md ├── detailed-internal-dataflow.md ├── detailed-loading-instructions.md ├── detailed-webserver-instructions.md ├── images │ ├── screen-homepage-search.png │ ├── screen-homepage.png │ ├── screen-lz-tooltip.png │ ├── screen-lz.png │ ├── screen-manhattan.png │ ├── screen-phewas.png │ └── screen-qq.png ├── make-input-file.py ├── phecode_icd9 │ ├── make_phecode_icd9.py │ └── phecodes_icd9.json ├── pheweb.service └── pre-commit ├── pheweb ├── __init__.py ├── command_line.py ├── conf.py ├── file_utils.py ├── load │ ├── __init__.py │ ├── add_genes.py │ ├── add_rsids.py │ ├── augment_phenos.py │ ├── best_of_pheno.py │ ├── cffi │ │ ├── ffibuilder.py │ │ └── x.cpp │ ├── cluster.py │ ├── detect_ref.py │ ├── download_genes.py │ ├── download_genes_from_scratch.py │ ├── download_rsids.py │ ├── download_rsids_from_scratch.py │ ├── gather_pvalues_for_each_gene.py │ ├── load_utils.py │ ├── make_cpras_rsids_sqlite3.py │ ├── make_gene_aliases_sqlite3.py │ ├── manhattan.py │ ├── matrix.py │ ├── parse_input_files.py │ ├── pheno_correlation.py │ ├── phenolist │ │ └── __init__.py │ ├── phenotypes.py │ ├── process_assoc_files.py │ ├── qq.py │ ├── read_input_file.py │ ├── sites.py │ ├── top_hits.py │ ├── top_loci.py │ └── wsgi.py ├── parse_utils.py ├── serve │ ├── __init__.py │ ├── auth.py │ ├── autocomplete.py │ ├── run.py │ ├── server.py │ ├── server_utils.py │ ├── static │ │ ├── common.css │ │ ├── common.js │ │ ├── fonts │ │ │ ├── glyphicons-halflings-regular.svg │ │ │ ├── glyphicons-halflings-regular.ttf │ │ │ ├── glyphicons-halflings-regular.woff │ │ │ └── glyphicons-halflings-regular.woff2 │ │ ├── gene.js │ │ ├── pheno-filter.js │ │ ├── pheno.js │ │ ├── phenotypes.js │ │ ├── region.css │ │ ├── region.js │ │ ├── top_hits.js │ │ ├── variant.js │ │ └── vendor │ │ │ └── stream_table-1.1.1.min.js │ └── templates │ │ ├── about.html │ │ ├── about │ │ └── content.html │ │ ├── error.html │ │ ├── gene.html │ │ ├── index.html │ │ ├── index │ │ ├── below-h1.html │ │ ├── below-query.html │ │ └── h1.html │ │ ├── layout.html │ │ ├── pheno-filter.html │ │ ├── pheno.html │ │ ├── pheno │ │ ├── h1.html │ │ └── info.html │ │ ├── phenotypes.html │ │ ├── region.html │ │ ├── region │ │ └── h1.html │ │ ├── title.html │ │ ├── top_hits.html │ │ └── variant.html ├── utils.py ├── version.py └── weetabix.py ├── setup.cfg ├── setup.py └── tests ├── in_venv.sh ├── input_files ├── assoc-files │ ├── *&\+. !`(%@).epacts.gz │ ├── EAR-LENGTH.epacts.gz │ ├── has-fields-.txt │ ├── has-fields-ac-af-maf-ns.txt │ ├── has-fields-ac-af-maf.txt │ ├── has-fields-ac-af-ns.txt │ ├── has-fields-ac-af.txt │ ├── has-fields-ac-maf-ns.txt │ ├── has-fields-ac-maf.txt │ ├── has-fields-ac-ns.txt │ ├── has-fields-ac.txt │ ├── has-fields-af-maf-ns.txt │ ├── has-fields-af-maf.txt │ ├── has-fields-af-ns.txt │ ├── has-fields-af.txt │ ├── has-fields-maf-ns.txt │ ├── has-fields-maf.txt │ ├── has-fields-ns.txt │ ├── pheno.0 │ ├── pheno.1.tsv │ ├── pheno.3.1.epacts.gz │ ├── pheno2.chr1.tsv │ ├── pheno2.chr19.tsv │ └── snowstorm.txt ├── categories.csv ├── config.py ├── correlations │ ├── pheno-correlations.txt │ ├── pheno-list.json │ └── rg-pipeline-output.txt ├── custom_templates │ └── index │ │ └── below-h1.html ├── fake-cache │ ├── gene_aliases-v36.sqlite3 │ ├── gene_aliases-v37.sqlite3 │ ├── genes-v36-hg19.bed │ ├── genes-v37-hg19.bed │ ├── rsids-v150-hg19.tsv.gz │ ├── rsids-v150-hg38.tsv.gz │ ├── rsids-v154-hg19.tsv.gz │ └── rsids-v154-hg38.tsv.gz └── phenolist │ ├── phenolist-example-broken.csv │ └── phenolist-example1.csv ├── run-all.sh ├── run-gunicorn.sh ├── test_all.py ├── test_detectref.py ├── test_pheno_correlation.py └── test_weetabix.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Report a problem with pheweb 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Please include: 11 | 12 | - the version of pheweb you're using, gotten from `pheweb -h`. If you're not on the latest version, consider upgrading with `pip3 install --upgrade` and trying again. 13 | 14 | - the command you were running and its output/error. 15 | 16 | - snippets of relevant files, especially files mentioned in the error. 17 | 18 | - your `config.py`. 19 | -------------------------------------------------------------------------------- /.github/workflows/pythonapp.yml: -------------------------------------------------------------------------------- 1 | name: Python application 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v3 12 | - uses: actions/setup-python@v4 13 | with: 14 | python-version: '3.10' 15 | - name: Lint with flake8 and mypy 16 | run: | 17 | python3 -m pip install flake8 mypy 18 | # stop the build if there are Python syntax errors or undefined names 19 | ./etc/pre-commit 20 | - name: Test wheel with pytest 21 | run: | 22 | python3 -m pip install wheel # for bdist_wheel 23 | python3 -m pip install pytest 24 | python3 -m pip freeze 25 | python3 setup.py sdist bdist_wheel 26 | sh -c "cd /tmp && python3 -m pip install $PWD/dist/*.whl" # must be in a new dir to avoid finding build artifacts 27 | python3 -m pip freeze 28 | sh -c "cd /tmp && pytest $PWD/tests" # must be in a new dir to avoid finding source code 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .pytest_cache/ 3 | PheWeb.egg-info 4 | build 5 | dist 6 | pheweb/load/cffi/_x* 7 | .eggs 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.4" 4 | install: 5 | - pip3 install . 6 | - pip3 install pytest 7 | - pip3 install flake8 8 | script: 9 | - pytest 10 | - python3 -m flake8 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | *This file only includes changes that are relevant to people running a pheweb site.* 2 | 3 | ## 1.3.15 4 | - Fixes `pheweb cluster`. 5 | 6 | ## 1.3.14 7 | - Fixes uppercase `field_aliases` in `config.py`. Column names are case-insensitive now. 8 | 9 | ## 1.3.13 10 | - Speeds up autocomplete 11 | 12 | ## 1.3.12 13 | - Adds beta/sebeta columns to the tables on /pheno/ and /variant/ 14 | - Shows AF range or MAF range better on /variant/ 15 | - Shows pvalue=0 as p<1e-320 in most places. 16 | - Improves error-handling on /pheno-filter/ 17 | - Upgrades to LocusZoom.js 0.13, including new PNG downloads 18 | - Fixes bugs in OAuth and WSGI 19 | - Uses relative redirects, so that http vs https and hostname don't matter, except in OAuth code. 20 | 21 | ## 1.3.9 22 | - Improves hovering on the filtered manhattan plots 23 | - Includes code for annotating with VEP 24 | - Shows category on /top_hits 25 | 26 | **Changes needed to data:** 27 | 28 | - Run `rm generated-by-pheweb/top_hits.json; pheweb top-hits` 29 | 30 | ## 1.3.7 31 | - Uses gencode v37 (released 2021-Feb) 32 | - Shows GClambda and num_samples/num_cases/num_controls and num_loci<5e8 on /phenotypes 33 | - Supports custom_templates/ again 34 | 35 | **Changes needed to data:** 36 | 37 | - Run `rm generated-by-pheweb/sites/sites.tsv && pheweb process` (because gene names must agree beween autocompletion and the pre-processed data) 38 | 39 | ## 1.3.6 40 | - Speeds up `pheweb gather-pvalues-for-each-gene` ~2x by avoiding reading any variant twice. (Thanks to finngen for this suggestion.) 41 | - Allows live-filtering a manhattan plot by MAF or snp/indel, with instructions in README. 42 | - Verifies that `num_cases + num_controls == num_samples` in `pheweb phenolist verify` (which is included in `pheweb process`). 43 | 44 | ## 1.3.5 45 | - Removes dependence on `pandas` (because it wouldn't install on my laptop) 46 | 47 | ## 1.3.4 48 | - Allows setting `loading_nice = True`. 49 | - Allows setting `field_aliases` again. 50 | - Reduces memory usage by `pheweb qq` by ~10x by switching to `numpy` and `pandas`. 51 | - Fixes the bug where `pheweb matrix` breaks when `matrix.tsv.gz` is up-to-date. 52 | 53 | ## 1.3.0 54 | - Rewrites configuration management, losing the ability to customize `extra_per_*_fields` and `null_values` and `field_aliases`. 55 | - Fixes bug where config wasn't passed to child processes when using `PHEWEB_DATADIR` or `pheweb conf key=value `. 56 | 57 | Bugs: 58 | 59 | - `pheweb matrix` breaks when `matrix.tsv.gz` is already up-to-date. 60 | 61 | ## 1.2.5 62 | - Makes sure that `pheno_gz/.gz.tbi` gets created, and re-runs traits that don't have it. 63 | 64 | ## 1.2.3 65 | - Uses dbSNP v154 (the latest!) with way more rsids. To use them, run `rm generated-by-pheweb/sites/sites-rsids.tsv && pheweb process`. 66 | 67 | ## 1.2.1 68 | - Allows hg38 via `hg_build_number=38` 69 | - Downloads resources from instead of processing raw data from EBI, dbSNP, etc. 70 | - Replaces marisa-trie with sqlite3 to remove a flaky dependency and improve the order of autocomplete suggestions. 71 | - Replaces more json files with sqlite3 to handle large datasets better. 72 | - Compresses all internal files with `gzip -2` to save storage and IO. 73 | - Gets rid of `generated-by-pheweb/pheno/`, relying on `generated-by-pheweb/pheno_gz/` instead. 74 | - Allows `chr1`-`chr25` in input files. 75 | 76 | **Changes needed to data:** 77 | 78 | - Run `pheweb download-genes` 79 | - Run `pheweb make-gene-aliases-sqlite3` 80 | - Run `rm generated-by-pheweb/phenotypes.json; pheweb phenotypes` 81 | - Run `pheweb gather-pvalues-for-each-gene` 82 | 83 | ## 1.2.0 (broken) 84 | Bugs: 85 | 86 | - `pheweb matrix` fails to match filenames to columns. 87 | 88 | ## 1.1.28 89 | - Allows selecting which phenotypes to run in most steps via `pheweb --phenos=5-10`. 90 | - Adds `pheweb cluster --step=`. 91 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2023 Regents of the University of Michigan 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include pheweb/serve/static * 2 | recursive-include pheweb/serve/templates * 3 | recursive-include pheweb *.py 4 | include pheweb/load/cffi/*.cpp 5 | -------------------------------------------------------------------------------- /etc/annotate_vep/make_vcf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from pathlib import Path 4 | import gzip, sys 5 | 6 | in_filepath = Path(sys.argv[1]) 7 | out_filepath = Path(sys.argv[2]) 8 | 9 | with gzip.open(in_filepath, 'rt') as in_f, gzip.open(out_filepath,'wt') as out_f: 10 | def write(line:str): out_f.write(line); out_f.write('\n') 11 | 12 | write('##fileformat=VCFv4.1') 13 | write('##reference=http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome/GRCh38_full_analysis_set_plus_decoy_hla.fa') 14 | write('\t'.join('#CHROM POS ID REF ALT INFO'.split())) 15 | 16 | header = next(in_f).rstrip('\n') 17 | assert header.split('\t') == ['chrom', 'pos', 'ref', 'alt', 'rsids', 'nearest_genes'] 18 | 19 | for idx,line in enumerate(in_f): 20 | chrom,pos,ref,alt,rsids,nearest_genes = line.rstrip('\n').split('\t') 21 | variant_id = f'{chrom}:{pos}:{ref}:{alt}' 22 | write('\t'.join([chrom, pos, variant_id, ref, alt, f'nearest_genes={nearest_genes}'])) 23 | -------------------------------------------------------------------------------- /etc/annotate_vep/merge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from pathlib import Path 4 | import gzip, itertools, csv, sys 5 | 6 | import pheweb 7 | from pheweb.file_utils import VariantFileReader, read_maybe_gzip 8 | 9 | 10 | sites_filepath = Path(sys.argv[1]) 11 | vep_filepath = Path(sys.argv[2]) 12 | out_filepath = Path(sys.argv[3]) 13 | 14 | def sites_reader(): 15 | with VariantFileReader(sites_filepath) as vfr: 16 | variants = iter(vfr) 17 | first_variant = next(variants) 18 | assert sorted(first_variant.keys()) == sorted(['chrom', 'pos', 'ref', 'alt', 'rsids', 'nearest_genes']), first_variant 19 | yield from itertools.chain([first_variant], variants) 20 | 21 | def vep_reader(): 22 | with read_maybe_gzip(vep_filepath) as sites_f: 23 | reader = csv.DictReader((line.lstrip('#') for line in sites_f if not line.startswith('##')), delimiter='\t') 24 | first_row = next(reader) 25 | required_cols = {'Uploaded_variation', 'Consequence'} 26 | missing_cols = required_cols - first_row.keys() 27 | if missing_cols: 28 | raise Exception(f'missing_cols={missing_cols} first_row={first_row}') 29 | for row in itertools.chain([first_row], reader): 30 | chrom, pos, ref, alt = row['Uploaded_variation'].split(':') 31 | pos = int(pos) 32 | yield {'chrom':chrom, 'pos':pos, 'ref':ref, 'alt':alt, 'consequence':row['Consequence']} 33 | 34 | 35 | with gzip.open(out_filepath,'wt') as out_f: 36 | writer = csv.DictWriter(out_f, 'chrom pos ref alt rsids nearest_genes consequence'.split(), delimiter="\t") 37 | writer.writeheader() 38 | 39 | for site_v, vep_v in itertools.zip_longest(sites_reader(), vep_reader(), fillvalue={}): 40 | # sites_filepath and vep_filepath must have a perfect one-to-one match! 41 | assert all(site_v[k] == vep_v[k] for k in 'chrom pos ref alt'.split()), (site_v, vep_v) 42 | writer.writerow({**site_v, **vep_v}) 43 | -------------------------------------------------------------------------------- /etc/annotate_vep/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | readlinkf() { perl -MCwd -le 'print Cwd::abs_path shift' "$1"; } 4 | SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | 6 | set -x 7 | 8 | ## This script should get run from the directory that contains `generated-by-pheweb`. 9 | ## It needs `generated-by-pheweb/sites/sites.tsv`, so it should get run after `pheweb add-genes` and its preceeding steps. 10 | ## You can see the list of steps with `pheweb process -h`. 11 | ## Then you should be able to continue with the rest of the steps. I think `pheweb process` should pick up at the right spot. 12 | ## To use these VEP consequences to filter the filterable manhattan plot, set `show_manhattan_filter_consequence = True` in `config.py`. 13 | 14 | ## Uncomment your build: 15 | #build="GRCh38" 16 | build="GRCh37" 17 | 18 | ## Setting parallel="yes" splits the input into chunks of 3 million variants and annotates them in parallel. 19 | ## None of this is super robust, and parallel is even less. 20 | parallel="no" 21 | 22 | # This script needs a version of python that has pheweb installed. 23 | python_exe="/data/pheweb/pheweb-installs/pheweb1.3/venv/bin/python3" 24 | #python_exe="python3" 25 | 26 | 27 | mkdir -p vep_data/input 28 | chmod a+rwx vep_data 29 | if ! [[ -e input.vcf.gz ]]; then 30 | "$python_exe" "$SCRIPTDIR/make_vcf.py" generated-by-pheweb/sites/sites.tsv input.vcf.gz 31 | fi 32 | 33 | if ! [[ $parallel = "yes" ]]; then 34 | cp input.vcf.gz vep_data/input/ 35 | else 36 | zcat input.vcf|grep -v '^##'| split --lines=$((3*1000*1000)) - split_ 37 | for file in split_*; do 38 | zcat input.vcf|head -n3 > "vep_data/input/$file" 39 | cat "$file" >> "vep_data/input/$file" 40 | rm "$file" 41 | done 42 | fi 43 | 44 | sudo docker pull ensemblorg/ensembl-vep 45 | sudo docker run -v "$PWD/vep_data":/opt/vep/.vep ensemblorg/ensembl-vep perl INSTALL.pl -a cfp -s homo_sapiens -y "$build" -g all # Do we really need `-g all`? 46 | 47 | if ! [[ $parallel = "yes" ]]; then 48 | sudo docker run -v "$PWD/vep_data":/opt/vep/.vep ensemblorg/ensembl-vep ./vep --input_file=/opt/vep/.vep/input/input.vcf.gz --output_file=/opt/vep/.vep/output.tsv --force_overwrite --compress_output=gzip --cache --offline --assembly="$build" --regulatory --most_severe --check_existing 49 | mv vep_data/output.tsv out-raw-vep.tsv 50 | 51 | else 52 | for f in vep_data/input/split_*; do 53 | name=$(basename "$f") 54 | sudo docker run -v "$PWD/vep_data":/opt/vep/.vep ensemblorg/ensembl-vep ./vep --input_file=/opt/vep/.vep/input/$name --output_file=/opt/vep/.vep/output-$name.tsv --force_overwrite --compress_output=gzip --cache --offline --assembly="$build" --regulatory --most_severe --check_existing & 55 | done 56 | wait # Wait for child processes to exit (hopefully sucessfully) 57 | zcat vep_data/output-split_aa.tsv | grep '^#' | gzip > out-raw-vep.tsv 58 | for f in $(echo vep_data/output-split_a*tsv|tr " " "\n"|sort); do 59 | zcat $f | grep -v '^#' | gzip >> out-raw-vep.tsv 60 | done 61 | fi 62 | 63 | "$python_exe" "$SCRIPTDIR/merge.py" generated-by-pheweb/sites/sites.tsv out-raw-vep.tsv sites-vep.tsv 64 | 65 | 66 | echo "Now check that sites-vep.tsv looks good." 67 | echo 'It should have the same variants as `generated-by-pheweb/sites/sites.tsv`.' 68 | echo "It should have the same columns, plus 'consequence'." 69 | echo 'Then run `mv sites-vep.tsv generated-by-pheweb/sites/sites.tsv`.' 70 | -------------------------------------------------------------------------------- /etc/demo.md: -------------------------------------------------------------------------------- 1 | ## Demo Navigating PheWeb 2 | 3 | On the homepage use the **search bar** to look up particular (1) genes (e.g. _APOB_, _FTO_, _TCF7L2_), (2) variants (by either rsID or chromosome:position on the appropriate genome build), or phenotypes/traits. 4 | Note: View a list of traits on the PheWeb on the About page. 5 | In any view, clicking on the PheWeb icon on the top left corner will allow you to return to the homepage. 6 | 7 | If you are feeling adventurous, hit the **Random** icon in the top panel to view a randomly selected view from the PheWeb. 8 | Selecting **Top Hits** in this panel will present a list of the most significant associations in this PheWeb in table format. 9 | To learn more about the data behind the PheWeb select **About**. 10 | 11 | PheWeb shows 3 types of views: `Manhattan` + `quantile-quantile (QQ)` plots, `LocusZoom` plots, and `PheWAS` plots. 12 | 13 | Below I am looking up _TCF7L2_ in the search bar: 14 | 15 | ![](/etc/images/screen-homepage-search.png?raw=true) 16 | 17 | Searching by gene will show you the most significant associations in that gene (table format) and a `LocusZoom` regional view showing the linkage disequilibrium among the variants in the region around the gene (below). 18 | Selecting a different row in the table will change the `LocusZoom` plot accordingly. 19 | 20 | In my _TCF7L2_ search, this page appears, in which the `LocusZoom` plot below is displaying the row in the table that is selected (“Type 1 diabetes”): 21 | 22 | ![](/etc/images/screen-lz.png?raw=true) 23 | 24 | All plots are interactive. You can hover your mouse above variants to learn more information about them, for example in the `LocusZoom` plot: 25 | 26 | ![](/etc/images/screen-lz-tooltip.png?raw=true) 27 | 28 | Clicking on a variant in the `LocusZoom plot` will display a `PheWAS` view showing the association p-value for the variant across all the phenotypes in the PheWeb. 29 | In the `PheWAS` view an upwards facing triangle implies a positive effect of that variant on the phenotype, whereas a downwards facing triangle implies a negative effect. 30 | Circles are used for variants in which the estimate of the beta is not precise (e.g. standard error encompassing zero). The variants are colored according to a user-specified biological grouping. 31 | 32 | I decided to select a _TCF7L2_ variant from the previous screenshot, and here is the `PheWAS` view followed by a table summary: 33 | 34 | ![](/etc/images/screen-phewas.png?raw=true) 35 | 36 | Selecting a trait in the `PheWAS` plot will navigate you to the Manhattan plot view. Below the `Manhattan` is a table showing the most significant associations, and below that is the `quantile-quantile (QQ)` plot stratified by minor allele frequency bin and the genomic control lambda calculated from various percentiles of variants. 37 | 38 | Below I selected “Stricture of Artery” from the `PheWAS` view, and am hovering my mouse over a variant in the `Manhattan` plot. 39 | If I select this variant I will be brought to its `LocusZoom` regional plot. 40 | 41 | ![](/etc/images/screen-manhattan.png?raw=true) 42 | 43 | Scrolling down on the same page I see the `QQ` plot below the table of top associations: 44 | 45 | ![](/etc/images/screen-qq.png?raw=true) 46 | 47 | -------------------------------------------------------------------------------- /etc/detailed-apache2-instructions/README.md: -------------------------------------------------------------------------------- 1 | ### Running PheWeb with Apache2 2 | 3 | 1. Install apache2. 4 | 5 | 2. Run `tmux` or `screen` to get a shell session that won't exit when you close your terminal. 6 | 7 | 3. Run `pheweb serve --host 127.0.0.1 --port 9974 --num-workers 4 --no-reloader`. 8 | 9 | - This command is equivalent to `gunicorn -b 127.0.0.1:9974 --access-logfile=- -w4 pheweb.serve.server:app` 10 | - Use whatever port you want and whatever number of workers you want. 11 | 12 | 3. Run `sudo a2enmod proxy proxy_http`. 13 | 14 | 4. Copy `pheweb.conf` from this directory into `/etc/apache2/sites-available/`. 15 | 16 | - If you need name-based virtual hosts, add uncomment `ServerName foo.example.com` and use your domain instead. 17 | 18 | 5. Run `sudo a2ensite pheweb`, which should make a symlink in `/etc/apache2/sites-enabled/` 19 | 20 | 6. Run `sudo service apache2 restart`. 21 | 22 | 7. Any time the computer crashes, apache2 should start on its own but you'll need to start tmux and pheweb/gunicorn. 23 | -------------------------------------------------------------------------------- /etc/detailed-apache2-instructions/pheweb.conf: -------------------------------------------------------------------------------- 1 | 2 | # This will hopefully prevent people from being able to browse the python source code if something goes wrong. 3 | Options -Indexes 4 | 5 | 6 | # requires `a2enmod proxy proxy_http` 7 | 8 | ## Use this if you want to use name-based virtualhosts for multiple (sub)domains on one IP 9 | # ServerName foo.example.com 10 | 11 | ProxyPreserveHost On 12 | ProxyPass / http://127.0.0.1:9974/ 13 | ProxyPassReverse / http://127.0.0.1:9974/ 14 | 15 | LogLevel warn 16 | ErrorLog ${APACHE_LOG_DIR}/pheweb_error.log 17 | CustomLog ${APACHE_LOG_DIR}/pheweb_access.log combined 18 | 19 | -------------------------------------------------------------------------------- /etc/detailed-development-instructions.md: -------------------------------------------------------------------------------- 1 | ## Detailed development instructions 2 | 3 | This document contains information useful for those looking to modify and develop the PheWeb source code. 4 | It requires some familiarity with Python and terminal. 5 | 6 | ### Installing PheWeb 7 | In order to reflect code changes as you work, PheWeb should be installed in "editable" mode. 8 | 9 | 1. Clone the repository to a new folder. 10 | 2. Create and active a new virtual environment. For example, in the checked-out PheWeb directory: `python3 -m venv .venv && source .venv/bin/activate` (if you prefer to manage your virtualenv some other way, that is ok) 11 | 3. With the virtualenv activated, install the package in "editable" mode: `pip3 install -e .` 12 | 4. When complete, verify that PheWeb is installed and working correctly: `pheweb -h` 13 | 14 | ### Running static analysis 15 | 16 | You can do simple static analysis by running `./etc/pre-commit`. It requires `pip3 install flake8 mypy`. If it is broken, it might not be a problem, but it can be a good way to catch bugs. 17 | 18 | ### Running the unit tests 19 | The tests take a minute or two. PheWeb loads a sample dataset, runs a local server, and then queries some pages on that server. It doesn't test everything in PheWeb, but it gets most of it. 20 | 21 | `pytest` 22 | 23 | 24 | ### Running a local server with sample data 25 | Run `./tests/run-all.sh`, and then open to view your site. 26 | 27 | This uses the same data as the unit tests to serve a website you can browse. 28 | 29 | The homepage links to some good pages. Most of the other pages aren't very useful because the data is so sparse. 30 | 31 | If you are only modifying the server code, you can quickly re-run just `pheweb serve` without re-running all the loading steps. Use the line like `+ pheweb conf ... serve` that is printed to your console. 32 | 33 | -------------------------------------------------------------------------------- /etc/detailed-install-instructions.md: -------------------------------------------------------------------------------- 1 | ## Detailed install instructions 2 | 3 | First, try: 4 | 5 | ```bash 6 | python3 -m pip install -U cython wheel pip setuptools 7 | python3 -m pip install pheweb 8 | pheweb 9 | ``` 10 | 11 | *(Note: In most cases this is equivalent to `pip3 install pheweb`, but if you have a bad version of `pip3` on your `$PATH`, using `python3 -m pip` will avoid it.)* 12 | 13 | - If you get the error `Segmentation fault (core dumped)`, try running `python3 -m pip install --no-binary=cffi,cryptography,pyopenssl pheweb` instead. ([more info](https://github.com/pypa/pip/issues/5366)) 14 | 15 | - If you get an error related to pysam, run `python3 -m pip install -U cython; python3 -m pip install https://github.com/pysam-developers/pysam/archive/master.zip` and try again. 16 | 17 | - If installation was successful but running `pheweb` results in "command not found", you need to add `pheweb` to your PATH. You should be able to just add the line `PATH="$HOME/.local/bin:$PATH"` to the end of `~/.bashrc`, start a new terminal, and run `pheweb` again. If you're on macOS, you might need to add the line `source "$HOME/.bashrc"` to `~/.bash_profile`. 18 | 19 | - If that command fails in a different way, then use one of the approaches below. 20 | 21 | 22 | ### Installing on Linux with `sudo`: 23 | 24 | *(Note: If you're not sure whether you have permissions for `sudo`, just try it. If you don't have root access, it will say something like `you are not in the sudoers file.`*) 25 | 26 | Install prerequisites: 27 | 28 | - If you are running Ubuntu (or another `apt-get`-based distribution), run: 29 | 30 | ```bash 31 | sudo apt-get update 32 | sudo apt-get install python3-pip python3-dev libz-dev libffi-dev 33 | ``` 34 | 35 | - If you are running Fedora, RedHat, or CentOS (or another `yum`-based distribution), run: 36 | 37 | ```bash 38 | sudo yum install python3-devel gcc-c++ zlib-devel 39 | ``` 40 | 41 | Then run: 42 | 43 | ```bash 44 | sudo python3 -m pip install wheel cython 45 | sudo python3 -m pip install pheweb 46 | sudo pheweb 47 | ``` 48 | 49 | If this doesn't work, try the miniconda3 approach instead. 50 | 51 | 52 | ### Installing on Linux or Mac with Miniconda3: 53 | 54 | If you are on macOS, install XCode Developer Tools with `xcode-select --install`. 55 | 56 | To install miniconda3, follow the instructions [here](https://docs.conda.io/projects/conda/en/latest/user-guide/install/). 57 | 58 | When you're installing miniconda3, you can close the terms & conditions with "q". 59 | You should install into the default directory of `~/miniconda3`. 60 | You should let miniconda modify `$PATH` in your `~/.bash_profile` or `~/.bashrc`, so that you'll be able to run just `pheweb` instead of needing to type `~/miniconda3/bin/pheweb` on the command line. 61 | 62 | Next, close and re-open your terminal, to make the new `$PATH` take effect. 63 | You can check that you have the miniconda3 python set up by running `which python3`, which should reply something like `/home/peter/miniconda3/bin/python3`. 64 | Then run: 65 | 66 | ```bash 67 | python3 -m pip install pheweb 68 | ``` 69 | 70 | If none of these work, open a Github issue. 71 | -------------------------------------------------------------------------------- /etc/detailed-internal-dataflow.md: -------------------------------------------------------------------------------- 1 | # Internal Data-Handling 2 | ``` 3 | input-association-files 4 | │ │ 5 | │ [phenolist] 6 | │ │ 7 | │ v 8 | │ pheno-list.json 9 | │ │ │ 10 | [parse] │ 11 | │ │ │ 12 | v v │ 13 | parsed/* │ 14 | │ └──────┐ │ 15 | [sites] │ │ 16 | rsids.tsv.gz--[add-rsids] │ │ 17 | genes.bed--[add-genes] │ │ 18 | │ │ │ 19 | v │ │ 20 | sites.tsv │ │ 21 | │ │ └──[augment-phenos] 22 | [make-...] │ │ 23 | │ │ v 24 | v │ pheno_gz/* 25 | cpras-rsids-sqlite3 └─[matrix]─┘ │ │ └─[best-of-pheno]─> best_of_pheno/* 26 | │ │ └─[qq]-> qq/* 27 | v └─[manhattan]-> manhattan/* 28 | matrix.tsv.gz │ │ 29 | │ [top-hits] [phenotypes] 30 | [gather-pvalues-for-each-gene] │ │ 31 | │ v v 32 | v top_hits.json phenotypes.json 33 | best-phenos-by-gene.sqlite3 34 | ``` 35 | 36 | Square brackets show `pheweb ` subcommands. 37 | Filenames are in `generated-by-pheweb/` or its subdirectories (except `pheno-list.json` which is its sibling). 38 | 39 | Reference this diagram against the filepaths listed in `file_utils.py` and the steps in `pheweb process -h`. 40 | You can see all of the per-variant fields, per-association fields, and per-phenotype fields in `parse_utils.py`. 41 | 42 | - `parsed/*` files have the per-variant and per-association fields from the input files. 43 | - `sites.tsv` has every variant in the dataset, with the per-variant fields from the `parsed/*` plus `rsids` and `nearest_genes` and (optionally) `consequence`. 44 | - `pheno_gz/*` files are like `parsed/*` plus `rsids` and `nearest_genes` and (optionally) `consequence`. 45 | - Every line in these files must begin with a line from `sites.tsv` in order for `pheweb matrix` to work. ie, they've got to have the same per-variant fields. 46 | - `matrix.tsv.gz` contains all the per-variant fields (ie, an exact copy of `sites.tsv` in its left few columns), and all per-assoc fields (with header format `@`, eg `maf@a1c`). 47 | -------------------------------------------------------------------------------- /etc/detailed-loading-instructions.md: -------------------------------------------------------------------------------- 1 | ## Configuration options 2 | 3 | - `assoc_min_maf` (float): an association (between a phenotype and variant) will only be included if its MAF is greater than or equal to this value. (default: `0`) 4 | 5 | - `cache` (string): a directory where files shared by all datasets can be cached. If you're loading multiple phewebs, setting `cache = "~/.pheweb/cache/"` will avoid downloading files multiples times. (default: None) 6 | 7 | - `num_procs` (int): the number of processes to use for parallel loading steps. (default: 2/3 of the number of cores on your machine) 8 | 9 | - `loading_nice = True`: sets nice=19 (reducing cpu priority) and sets ionice to class "Idle" (reducing IO when anything else is using disk) 10 | 11 | - `debugging_limit_num_variants` (int): only parses this many variants from each input association file and from the rsids file. This is convenient for quickly loading part of a dataset to check that it works as expected. 12 | 13 | - `download_pheno_sumstats`: explained in [README](../README.md) 14 | 15 | - `show_correlations`: explained in [README](../README.md) 16 | 17 | 18 | ## Making pheno-list.json 19 | 20 | 21 | There are four ways to make a `pheno-list.json`: 22 | 23 | 1. If you have a csv (or tsv, optionally gzipped) with a header that has exactly the right column names, just import it by running `pheweb phenolist import-phenolist "/path/to/my/pheno-list.csv"`. 24 | 25 | If you have multiple association files for each phenotype, you may put them all into a single column with `|` between them. For example, your file `pheno-list.csv` might look like this: 26 | 27 | ``` 28 | phenocode,assoc_files 29 | a1c,/home/peter/data/a1c.autosomal.gz|/home/peter/data/a1c.X.gz 30 | ear-length,/home/peter/data/ear-length.gz 31 | ``` 32 | 33 | 2. If you have one association file per phenotype, you can use a shell-glob to get assoc-files. Suppose that your assocation files are at paths like: 34 | 35 | - `/home/peter/data/a1c.autosomal.gz` 36 | - `/home/peter/data/ear-length.gz` 37 | 38 | Then you could run `pheweb phenolist glob "/home/peter/data/*.gz"` to get `assoc-files`. 39 | 40 | To get `phenocodes`, you can use this command which will take the text after the last `/` and before the next `.`: 41 | 42 | ``` 43 | pheweb phenolist extract-phenocode-from-filepath --simple 44 | ``` 45 | 46 | If that doesn't work, see `pheweb phenolist extract-phenocode-from-filepath -h` for how to use a regex capture group. 47 | 48 | 3. If you have multiple association files for some phenotypes, you can follow the directions in 2 and then run `pheweb phenolist unique-phenocode`. 49 | 50 | For example, if your association files are at: 51 | 52 | - `/home/peter/data/ear-length.gz` 53 | - `/home/peter/data/a1c.autosomal.gz` 54 | - `/home/peter/data/a1c.X.gz` 55 | 56 | then you can run: 57 | 58 | ``` 59 | pheweb phenolist glob "/home/peter/data/*.gz" 60 | pheweb phenolist extract-phenocode-from-filepath --simple 61 | pheweb phenolist unique-phenocode 62 | ``` 63 | 64 | 4. If you want to do more advanced things, like merging in more information from another file, check out the tools in `pheweb phenolist --help`. 65 | 66 | 67 | 68 | 69 | 70 | ## Distributing jobs across a cluster 71 | 72 | `pheweb process` runs a bunch of steps, which you can see by running `pheweb process -h`. 73 | Some of those steps can instead be run distributed across a cluster. 74 | You can see which steps by running `pheweb cluster -h`. 75 | 76 | The schedulers SLURM and SGE are natively supported. 77 | Use `--engine=slurm` or `--engine=sge` when you run `pheweb cluster`. 78 | For other schedulers, you'll have to modify the output of `pheweb cluster`. 79 | 80 | For example, on SLURM you could run: 81 | 82 | ``` 83 | pheweb phenolist verify 84 | pheweb cluster --engine=slurm --step=parse 85 | pheweb sites && pheweb make-gene-aliases-sqlite3 && pheweb add-rsids && pheweb add-genes && pheweb make-cpras-rsids-sqlite3 86 | pheweb cluster --engine=slurm --step=augment-phenos 87 | pheweb cluster --engine=slurm --step=manhattan 88 | pheweb cluster --engine=slurm --step=qq 89 | pheweb process # This won't re-create any files that are already up-to-date. 90 | ``` 91 | 92 | 93 | ## Annotating with VEP 94 | 95 | Run the code in `etc/annotate_vep/run.sh`. It requires docker (and thus sudo) and only works on hg38. 96 | Read the comments at the top of that script. 97 | 98 | 99 |











100 | -------------------------------------------------------------------------------- /etc/images/screen-homepage-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/etc/images/screen-homepage-search.png -------------------------------------------------------------------------------- /etc/images/screen-homepage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/etc/images/screen-homepage.png -------------------------------------------------------------------------------- /etc/images/screen-lz-tooltip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/etc/images/screen-lz-tooltip.png -------------------------------------------------------------------------------- /etc/images/screen-lz.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/etc/images/screen-lz.png -------------------------------------------------------------------------------- /etc/images/screen-manhattan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/etc/images/screen-manhattan.png -------------------------------------------------------------------------------- /etc/images/screen-phewas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/etc/images/screen-phewas.png -------------------------------------------------------------------------------- /etc/images/screen-qq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/etc/images/screen-qq.png -------------------------------------------------------------------------------- /etc/make-input-file.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ''' 4 | This script is for getting more test data. 5 | In theory I could just make random variants from the reference, but I don't have an `hg19.fa` on hand. 6 | ''' 7 | 8 | import csv 9 | import requests 10 | import random 11 | import re 12 | import itertools 13 | 14 | def format_float(x): 15 | if x >= 0.01: return '{:.3}'.format(x) 16 | rv = '{:.0e}'.format(x).replace('e-0', 'e-').replace('e+00', '') 17 | if re.match(r'^[0-9]e-1$', rv): return '.{}'.format(rv[0]) 18 | if re.match(r'^[0-9]e-2$', rv): return '.0{}'.format(rv[0]) 19 | return rv 20 | 21 | class TSVWriter: 22 | def __init__(self, filepath): 23 | self.filepath = filepath 24 | def __enter__(self): 25 | self.f = open(self.filepath, 'w') 26 | return self 27 | def writerow(self, dct): 28 | if not hasattr(self, 'writer'): 29 | self.writer = csv.DictWriter(self.f, fieldnames=list(dct.keys()), delimiter='\t', lineterminator='\n') 30 | self.writer.writeheader() 31 | self.writer.writerow(dct) 32 | def __exit__(self, *args): 33 | self.f.close() 34 | 35 | variants = requests.get('http://pheweb.sph.umich.edu/api/manhattan/pheno/601.json').json()['unbinned_variants'] 36 | variants.append(dict(chrom='1', pos=869334, ref='G', alt='A')) 37 | chroms = [str(i) for i in range(1,22+1)] + ['X'] 38 | variants = sorted(variants, key=lambda v: (chroms.index(v['chrom']), v['pos'])) 39 | 40 | def make_pheno(pheno_name, use_maf, use_af, use_ac, use_ns): 41 | 42 | ns = random.randrange(100, int(1e5)) 43 | num_chromosomes = ns*2 44 | 45 | with TSVWriter('input_files/assoc-files/{}.txt'.format(pheno_name)) as writer: 46 | 47 | for v in variants: 48 | if v['chrom'] == '1' and v['pos'] == 869334 or random.random() < 100 / len(variants): 49 | 50 | d = dict( 51 | chrom=v['chrom'], 52 | pos=v['pos'], 53 | ref=v['ref'], 54 | alt=v['alt'], 55 | pval=format_float(random.random()), 56 | ) 57 | ac = random.randrange(0,num_chromosomes+1) # allow MAF=0 b/c I'm sure somebody will. 58 | af = ac / num_chromosomes 59 | if use_maf: d['maf'] = format_float(min(af, 1-af)) 60 | if use_af: d['af'] = format_float(af) 61 | if use_ac: d['ac'] = ac 62 | if use_ns: d['ns'] = ns 63 | 64 | writer.writerow(d) 65 | 66 | for x in itertools.product(*[(True, False)]*4): 67 | args = dict(zip('maf af ac ns'.split(), x)) 68 | name = 'has-fields-' + '-'.join(k for k in sorted(args) if args[k]) 69 | make_pheno(name, *x) 70 | -------------------------------------------------------------------------------- /etc/phecode_icd9/make_phecode_icd9.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # type: ignore 3 | 4 | ''' 5 | $ wget https://github.com/PheWAS/PheWAS/blob/master/data/phemap.rda 6 | $ wget https://github.com/PheWAS/PheWAS/blob/master/data/pheinfo.rda 7 | $ r 8 | > load('phemap.rda') 9 | > load('pheinfo.rda') 10 | > write.csv(phemap, 'phemap.csv', row.names=F) 11 | > write.csv(pheinfo, 'pheinfo.csv', row.names=F) 12 | 13 | # found this link at 14 | $ wget https://www.cms.gov/Medicare/Coding/ICD9ProviderDiagnosticCodes/Downloads/ICD-9-CM-v32-master-descriptions.zip 15 | $ unzip ICD-9-CM-v32-master-descriptions.zip 16 | 17 | $ wget https://medschool.vanderbilt.edu/cpm/files/cpm/public_files/perl_phewas.zip 18 | $ unzip perl_phewas.zip 19 | $ cat code_translation.txt | tr "\r" "\n" > icd9s.tsv 20 | 21 | # got ICD9_CodeCounts_20160323_LF.txt from group. it differs from others in some ways. 22 | 23 | # maybe I should have just downloaded the last 6 icd9 versions and unioned them? 24 | ''' 25 | 26 | import csv 27 | import json 28 | import itertools 29 | import string 30 | 31 | icd9s_1 = list(csv.DictReader(open('icd9s.tsv'), delimiter='\t')) 32 | string_for_icd9_1 = {} 33 | for x in icd9s_1: 34 | string_for_icd9_1[x['CODE']] = x['STR_SHORT'].strip() 35 | 36 | icd9s_2 = list(open("ICD-9-CM-v32-master-descriptions/CMS32_DESC_LONG_DX.txt", encoding="ISO-8859-1")) 37 | string_for_icd9_2 = {} 38 | for x in icd9s_2: 39 | icd9, desc = x.strip().split(' ', 1) 40 | icd9 = icd9[:3] + '.' + icd9[3:] 41 | string_for_icd9_2[icd9] = desc.strip() 42 | 43 | icd9s_3 = list(csv.DictReader(open("ICD9_CodeCounts_20160323_LF.txt"), delimiter='\t')) 44 | string_for_icd9_3 = {} 45 | for x in icd9s_3: 46 | string_for_icd9_3[x['icd9']] = x['icd9_string'].strip() 47 | 48 | phemap = list(csv.DictReader(open('phemap.csv'))) 49 | icd9s_for_phecode = {} 50 | for x in phemap: 51 | icd9s_for_phecode.setdefault(x['phecode'], []).append(x['icd9']) 52 | 53 | pheinfo = list(csv.DictReader(open("pheinfo.csv"))) 54 | info_for_phecode = {} 55 | for x in pheinfo: 56 | info_for_phecode[x['phecode']] = { 57 | 'desc': x['description'].strip(), 58 | 'category': x['group'].strip(), 59 | 'color': x['color'].strip(), 60 | } 61 | 62 | def cmp(*xs): 63 | for n in range(1, 1+len(xs)): 64 | for c in itertools.combinations(range(len(xs)), n): 65 | print(''.join(string.ascii_letters[i] for i in c), end=':') 66 | print(len(set.intersection(*[set(xs[i]) for i in c])), end=' ') 67 | print('') 68 | for n in range(1, 1+len(xs)): 69 | for c in itertools.combinations(range(len(xs)), n): 70 | print(''.join(string.ascii_letters[i] for i in c), end='') 71 | comp = [i for i in range(len(xs)) if i not in c] 72 | if comp: 73 | print('-' + ''.join(string.ascii_letters[i] for i in comp), end='') 74 | print(':', end='') 75 | print(len(set.intersection(*[set(xs[i]) for i in c]).difference(*[xs[i] for i in comp])), end=' ') 76 | print('') 77 | cmp(info_for_phecode, icd9s_for_phecode) 78 | cmp(string_for_icd9_1, string_for_icd9_2, string_for_icd9_3) 79 | 80 | for phecode in info_for_phecode: 81 | ii = [] 82 | for icd9 in icd9s_for_phecode[phecode]: 83 | # 3 is from group, 2 is from govt, 1 is from vb 84 | desc = string_for_icd9_3.get(icd9, False) or string_for_icd9_2.get(icd9, False) or string_for_icd9_1.get(icd9, False) or '?' 85 | ii.append({'icd9': icd9, 'desc': desc}) 86 | ii = sorted(ii, key=lambda x: x['icd9']) 87 | info_for_phecode[phecode]['icd9s'] = ii 88 | 89 | with open('phecodes_icd9.json', 'w') as f: 90 | json.dump(info_for_phecode, f, sort_keys=True, indent=1) 91 | -------------------------------------------------------------------------------- /etc/pheweb.service: -------------------------------------------------------------------------------- 1 | # This file belongs at /etc/systemd/system/pheweb.service 2 | 3 | # Sample commands to use: 4 | # sudo systemctl daemon-reload # makes systemd notice changes to this file 5 | # sudo systemctl enable pheweb.service # run once (re-running is fine) so that systemd knows to run this when the system starts 6 | # sudo systemctl start pheweb.service 7 | # sudo systemctl restart pheweb.service 8 | # sudo systemctl status -n30 pheweb.service 9 | 10 | [Unit] 11 | Description=Gunicorn instance to serve pheweb 12 | After=network.target 13 | 14 | [Install] 15 | WantedBy=multi-user.target 16 | 17 | [Service] 18 | # you can also use User=nobody Group=nogroup, which is arguably less secure but works for most people 19 | User=www-data 20 | Group=www-data 21 | # this WorkingDirectory contains pheno-list.json and generated-by-pheweb 22 | WorkingDirectory=/data/pheweb/my_data_set/ 23 | ExecStart=/data/pheweb/venv/bin/pheweb serve --host localhost --port 8879 --num-workers=4 24 | # consider addding `Restart=always` or `Restart=on-failure`, though that might make debugging harder 25 | -------------------------------------------------------------------------------- /etc/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | flake8 --show-source --ignore=E501,E302,E251,E701,E226,E305,E225,E261,E231,E301,E306,E402,E704,E265,E201,E202,E303,E124,E241,E127,E266,E221,E126,E129,F811,E222,E401,E702,E203,E116,E228,W504,B007,E271,F401 *py tests/ pheweb/ && 4 | mypy --install-types --non-interactive --pretty --ignore-missing-imports *py tests/ pheweb/ 5 | #npx htmlhint -r id-unique,csslint,src-not-empty,tag-pair,title-require pheweb/serve/template/ 6 | 7 | # TODO: add eslint with just enough rules to catch bugs 8 | -------------------------------------------------------------------------------- /pheweb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/pheweb/__init__.py -------------------------------------------------------------------------------- /pheweb/load/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/pheweb/load/__init__.py -------------------------------------------------------------------------------- /pheweb/load/augment_phenos.py: -------------------------------------------------------------------------------- 1 | 2 | from ..utils import PheWebError 3 | from ..file_utils import VariantFileReader, VariantFileWriter, get_filepath, get_pheno_filepath, with_chrom_idx, get_tmp_path, convert_VariantFile_to_IndexedVariantFile 4 | from .load_utils import parallelize_per_pheno, get_phenos_subset, get_phenolist 5 | 6 | import argparse, os 7 | from typing import List,Dict,Any 8 | 9 | def run(argv:List[str]) -> None: 10 | parser = argparse.ArgumentParser(description="annotate each phenotype by pulling in information from the combined sites file") 11 | parser.add_argument('--phenos', help="Can be like '4,5,6,12' or '4-6,12' to run on only the phenos at those positions (0-indexed) in pheno-list.json (and only if they need to run)") 12 | args = parser.parse_args(argv) 13 | 14 | phenos = get_phenos_subset(args.phenos) if args.phenos else get_phenolist() 15 | 16 | parallelize_per_pheno( 17 | get_input_filepaths = get_input_filepaths, 18 | get_output_filepaths = get_output_filepaths, 19 | convert = convert, 20 | cmd = 'augment-pheno', 21 | phenos = phenos, 22 | ) 23 | 24 | def get_input_filepaths(pheno:dict) -> List[str]: 25 | return [ 26 | get_pheno_filepath('parsed', pheno['phenocode']), 27 | get_filepath('sites'), 28 | ] 29 | def get_output_filepaths(pheno:dict) -> List[str]: 30 | return [ 31 | get_pheno_filepath('pheno_gz', pheno['phenocode'], must_exist=False), 32 | get_pheno_filepath('pheno_gz_tbi', pheno['phenocode'], must_exist=False), 33 | ] 34 | 35 | def convert(pheno:Dict[str,Any]) -> None: 36 | 37 | parsed_filepath = get_pheno_filepath('parsed', pheno['phenocode']) 38 | sites_filepath = get_filepath('sites') 39 | out_filepath = get_pheno_filepath('pheno_gz', pheno['phenocode'], must_exist=False) 40 | out_unzipped_filepath = get_tmp_path(out_filepath) 41 | 42 | 43 | with VariantFileReader(sites_filepath) as sites_reader, \ 44 | VariantFileReader(parsed_filepath) as pheno_reader, \ 45 | VariantFileWriter(out_unzipped_filepath, use_gzip=False) as writer: 46 | sites_variants = with_chrom_idx(iter(sites_reader)) 47 | pheno_variants = with_chrom_idx(iter(pheno_reader)) 48 | 49 | def write_variant(sites_variant:Dict[str,Any], pheno_variant:Dict[str,Any]) -> None: 50 | # Sometimes I use copy files from pheno_gz/ into parsed/, and I want the new sites info to take precendence. 51 | pheno_variant.update(sites_variant) 52 | del pheno_variant['chrom_idx'] 53 | writer.write(pheno_variant) 54 | 55 | try: pheno_variant = next(pheno_variants) 56 | except StopIteration: raise PheWebError("It appears that the phenotype {!r} has no variants.".format(pheno['phenocode'])) 57 | try: sites_variant = next(sites_variants) 58 | except StopIteration: raise PheWebError("It appears that your sites file ({!r}) has no variants.".format(sites_filepath)) 59 | while True: 60 | cmp = _which_variant_is_bigger(pheno_variant, sites_variant) 61 | if cmp == 1: # pheno variant is ahead, so advance sites variant 62 | try: sites_variant = next(sites_variants) 63 | except StopIteration: raise PheWebError("The sites file ({}) ran out of variants while {} still had {}".format(sites_filepath, parsed_filepath, pheno_variant)) 64 | elif cmp == 2: # sites variant is ahead, so something went wrong. 65 | raise PheWebError("The sites file ({}) is missing a variant that's present in {}: {}.".format(sites_filepath, parsed_filepath, pheno_variant)) 66 | else: # they're equal, so write out the match and then advance both. (pheno first and sites second) 67 | write_variant(sites_variant, pheno_variant) 68 | try: pheno_variant = next(pheno_variants) 69 | except StopIteration: break # done. 70 | try: sites_variant = next(sites_variants) 71 | except StopIteration: raise PheWebError("The sites file ({}) ran out of variants while {} still had {}".format(sites_filepath, parsed_filepath, pheno_variant)) 72 | 73 | convert_VariantFile_to_IndexedVariantFile(out_unzipped_filepath, out_filepath) 74 | os.unlink(out_unzipped_filepath) 75 | 76 | 77 | def _which_variant_is_bigger(v1:Dict[str,Any], v2:Dict[str,Any]) -> int: 78 | '''1 means v1 is bigger. 2 means v2 is bigger. 0 means tie.''' 79 | if v1['chrom_idx'] == v2['chrom_idx']: 80 | if v1['pos'] == v2['pos']: 81 | if v1['ref'] == v2['ref']: 82 | if v1['alt'] == v2['alt']: 83 | return 0 84 | return 1 if v1['alt'] > v2['alt'] else 2 85 | return 1 if v1['ref'] > v2['ref'] else 2 86 | return 1 if v1['pos'] > v2['pos'] else 2 87 | return 1 if v1['chrom_idx'] > v2['chrom_idx'] else 2 88 | -------------------------------------------------------------------------------- /pheweb/load/best_of_pheno.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This script creates generated-by-pheweb/best-of-pheno/ which contains the strongest 100k associations for the phenotype. 3 | ''' 4 | 5 | from ..file_utils import VariantFileReader, VariantFileWriter, get_pheno_filepath 6 | from ..utils import chrom_order 7 | from .load_utils import MaxPriorityQueue, parallelize_per_pheno, get_phenos_subset, get_phenolist 8 | 9 | import argparse 10 | from typing import List,Dict,Any 11 | 12 | 13 | NUM_VARIANTS = 100_000 14 | 15 | def run(argv:List[str]) -> None: 16 | parser = argparse.ArgumentParser(description="Make a file .") 17 | parser.add_argument('--phenos', help="Can be like '4,5,6,12' or '4-6,12' to run on only the phenos at those positions (0-indexed) in pheno-list.json (and only if they need to run)") 18 | args = parser.parse_args(argv) 19 | 20 | phenos = get_phenos_subset(args.phenos) if args.phenos else get_phenolist() 21 | 22 | parallelize_per_pheno( 23 | get_input_filepaths = lambda pheno: get_pheno_filepath('pheno_gz', pheno['phenocode']), 24 | get_output_filepaths = lambda pheno: get_pheno_filepath('best_of_pheno', pheno['phenocode'], must_exist=False), 25 | convert = make_bestof_file, 26 | cmd = 'best_of_pheno', 27 | phenos = phenos, 28 | ) 29 | 30 | 31 | def make_bestof_file(pheno:Dict[str,Any]) -> None: 32 | make_bestof_file_explicit(get_pheno_filepath('pheno_gz', pheno['phenocode']), 33 | get_pheno_filepath('best_of_pheno', pheno['phenocode'], must_exist=False)) 34 | 35 | def make_bestof_file_explicit(in_filepath:str, out_filepath:str) -> None: 36 | q = MaxPriorityQueue() 37 | with VariantFileReader(in_filepath) as vfr: 38 | for v in vfr: 39 | q.add_and_keep_size(v, v['pval'], NUM_VARIANTS) 40 | assocs = list(q.pop_all()) 41 | assocs.sort(key=lambda v: (chrom_order[v['chrom']], v['pos'])) 42 | with VariantFileWriter(out_filepath) as vfw: vfw.write_all(assocs) 43 | -------------------------------------------------------------------------------- /pheweb/load/cffi/ffibuilder.py: -------------------------------------------------------------------------------- 1 | 2 | # For reference, see , even though it uses deprecated .verify() 3 | 4 | import cffi 5 | import os.path 6 | 7 | cxx_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'x.cpp') 8 | with open(cxx_path) as f: 9 | src = f.read() 10 | 11 | ffibuilder = cffi.FFI() 12 | ffibuilder.set_source('pheweb.load.cffi._x', 13 | src, 14 | source_extension='.cpp', 15 | extra_compile_args=['--std=c++11'], 16 | libraries=['z'], # needed on Linux but not macOS 17 | ) 18 | ffibuilder.cdef(''' 19 | const char* cffi_make_matrix(const char *sites_filepath, const char *augmented_pheno_glob, const char *matrix_filepath); 20 | ''') 21 | -------------------------------------------------------------------------------- /pheweb/load/cluster.py: -------------------------------------------------------------------------------- 1 | 2 | from ..utils import get_phenolist 3 | from .. import conf 4 | from ..file_utils import get_tmp_path, get_dated_tmp_path, get_pheno_filepath 5 | from .load_utils import PerPhenoParallelizer 6 | from boltons.fileutils import mkdir_p # to make tmp directory 7 | 8 | import sys, argparse 9 | from boltons.iterutils import chunked 10 | from typing import List,Dict,Any 11 | 12 | header_template = { 13 | 'slurm': '''\ 14 | #!/bin/bash 15 | #SBATCH --array=0-{n_jobs_m1} 16 | #SBATCH --mem=4G 17 | #SBATCH --time=5-0:0 18 | #SBATCH --output={tmp_path}/slurm-%j.out 19 | #SBATCH --error={tmp_path}/slurm-%j.out 20 | ''', 21 | 'sge': '''\ 22 | #!/bin/bash 23 | #$ -t 0-{n_jobs_m1} 24 | #$ -l h_vmem=4G 25 | #$ -l h_rt=120:00:00 26 | #$ -o {tmp_path} 27 | #$ -e {tmp_path} 28 | ''', 29 | 'uge': '''\ 30 | #!/bin/bash 31 | #$ -t 1-{n_jobs} 32 | #$ -l h_vmem=4G 33 | #$ -l h_rt=120:00:00 34 | #$ -o {tmp_path} 35 | #$ -e {tmp_path} 36 | ''' 37 | } 38 | array_id_variable = { 39 | 'slurm': 'SLURM_ARRAY_TASK_ID', 40 | 'sge': 'SGE_TASK_ID', 41 | 'uge': '(($SGE_TASK_ID - 1))', 42 | } 43 | submit_command = { 44 | 'slurm': 'sbatch', 45 | 'sge': 'qsub', 46 | 'uge': 'qsub', 47 | } 48 | monitor_command = { 49 | 'slurm': 'squeue --long --array --job', 50 | 'sge': 'qstat -j', 51 | 'uge': 'qstat -j', 52 | } 53 | 54 | def run(argv:List[str]) -> None: 55 | parser = argparse.ArgumentParser() 56 | parser.add_argument('--engine', choices=['slurm', 'sge', 'uge'], required=True) 57 | parser.add_argument('--step', choices=['parse', 'augment-phenos', 'manhattan', 'qq'], required=True) 58 | parser.add_argument('--N_per_job', default=5) 59 | args = parser.parse_args(argv) 60 | 61 | def should_process(pheno:Dict[str,Any]) -> bool: 62 | if args.step == "parse": 63 | from . import parse_input_files 64 | get_input_filepaths = parse_input_files.get_input_filepaths 65 | get_output_filepaths = parse_input_files.get_output_filepaths 66 | elif args.step == "augment-phenos": 67 | from . import augment_phenos 68 | get_input_filepaths = augment_phenos.get_input_filepaths 69 | get_output_filepaths = augment_phenos.get_output_filepaths 70 | elif args.step == "manhattan": 71 | from . import manhattan 72 | get_input_filepaths = manhattan.get_input_filepaths 73 | get_output_filepaths = manhattan.get_output_filepaths 74 | elif args.step == "qq": 75 | from . import qq 76 | get_input_filepaths = qq.get_input_filepaths 77 | get_output_filepaths = qq.get_output_filepaths 78 | else: 79 | raise Exception("No implementation for step {}".format(args.step)) 80 | return PerPhenoParallelizer().should_process_pheno( 81 | pheno, 82 | get_input_filepaths = get_input_filepaths, 83 | get_output_filepaths = get_output_filepaths, 84 | ) 85 | idxs = [i for i,pheno in enumerate(get_phenolist()) if should_process(pheno)] 86 | if not idxs: 87 | print('All phenos are up-to-date!') 88 | exit(0) 89 | 90 | jobs = chunked(idxs, args.N_per_job) 91 | batch_filepath = get_dated_tmp_path('{}-{}'.format(args.engine, args.step)) + '.sh' 92 | tmp_path = get_tmp_path(args.step) 93 | mkdir_p(tmp_path) 94 | with open(batch_filepath, 'w') as f: 95 | f.write(header_template[args.engine].format(n_jobs_m1 = len(jobs)-1, n_jobs = len(jobs), tmp_path=tmp_path)) 96 | f.write('\n\njobs=(\n') 97 | for job in jobs: 98 | f.write(','.join(map(str,job)) + '\n') 99 | f.write(')\n\n') 100 | f.write('export PHEWEB_DATADIR={!r}\n'.format(conf.get_data_dir())) 101 | f.write(sys.argv[0] + ' conf num_procs=1 ' + args.step +' --phenos=${jobs[$' + array_id_variable[args.engine] + ']}\n') 102 | print('Run:\n{} {}\n'.format(submit_command[args.engine], batch_filepath)) 103 | print('Monitor with `{} `\n'.format(monitor_command[args.engine])) 104 | print('output will be in {}'.format(tmp_path)) 105 | -------------------------------------------------------------------------------- /pheweb/load/download_genes.py: -------------------------------------------------------------------------------- 1 | # This module finds gene data (wherever it can) and puts a copy in `generated-by-pheweb/resources/`. 2 | # It downloads resources from . To populate that site, I run `pheweb download-genes-from-scratch` and `cp generated-by-pheweb/resources/* ...`. 3 | # `pheweb download-genes-from-scratch` _should_ always work, so it's always a fallback, but it should only happen when explicitly chosen. 4 | 5 | from ..utils import PheWebError 6 | from ..file_utils import get_filepath, get_tmp_path 7 | from .. import conf 8 | 9 | import shutil, wget, os 10 | from pathlib import Path 11 | from typing import List 12 | 13 | 14 | def get_genes_for_build(hg_build_number: int) -> None: 15 | 16 | dest_filepath = Path(get_filepath('genes-hg{}'.format(hg_build_number), must_exist=False)) 17 | if dest_filepath.exists(): return 18 | 19 | # Check cache_dir 20 | cache_dir = conf.get_cache_dir() 21 | if cache_dir: 22 | cache_filepath = Path(cache_dir) / dest_filepath.name 23 | if cache_filepath.exists(): 24 | print('Copying {} to {}'.format(cache_filepath, dest_filepath)) 25 | shutil.copy(cache_filepath, dest_filepath) 26 | return 27 | 28 | if not conf.is_allowed_to_download(): 29 | raise PheWebError("PheWeb is set to disallow downloading files, but couldn't pull {!r} from cache_dir {!r}".format( 30 | dest_filepath, conf.get_cache_dir())) 31 | 32 | # Download from https://resources.pheweb.org/ 33 | url = 'https://resources.pheweb.org/{}'.format(dest_filepath.name) 34 | print('Downloading {} from {}'.format(dest_filepath, url)) 35 | dest_tmp_filepath = Path(get_tmp_path(dest_filepath)) 36 | try: 37 | wget.download(url=url, out=str(dest_tmp_filepath)); print() 38 | except Exception as exc: 39 | raise PheWebError('Failed to download genes from {}. Try `pheweb download-genes-from-scratch` instead.'.format(url)) from exc 40 | os.rename(dest_tmp_filepath, dest_filepath) 41 | if cache_dir and Path(cache_dir).exists(): 42 | print('Cacheing {} at {}'.format(dest_filepath, cache_filepath)) 43 | # It's okay if this doesn't work 44 | try: shutil.copy(dest_filepath, cache_filepath) 45 | except Exception: pass 46 | 47 | def run(argv:List[str]) -> None: 48 | import argparse 49 | parser = argparse.ArgumentParser() 50 | parser.add_argument('--hg', type=int, default=conf.get_hg_build_number(), choices=[19,38]) 51 | args = parser.parse_args(argv) 52 | get_genes_for_build(args.hg) 53 | -------------------------------------------------------------------------------- /pheweb/load/download_rsids.py: -------------------------------------------------------------------------------- 1 | # This module finds rsid data (wherever it can) and puts a copy in `generated-by-pheweb/resources/`. 2 | # It downloads resources from . To populate that site, I run `pheweb download-rsids-from-scratch` and `cp generated-by-pheweb/resources/* ...`. 3 | # `pheweb download-rsids-from-scratch` _should_ always work, so it's always a fallback, but it should only happen when explicitly chosen. 4 | 5 | from ..utils import PheWebError 6 | from ..file_utils import get_filepath, get_tmp_path 7 | from .. import conf 8 | 9 | import shutil, wget, os 10 | from pathlib import Path 11 | from typing import List 12 | 13 | 14 | def get_rsids_for_build(hg_build_number: int) -> None: 15 | 16 | dest_filepath = Path(get_filepath('rsids-hg{}'.format(hg_build_number), must_exist=False)) 17 | if dest_filepath.exists(): return 18 | 19 | # Check cache_dir 20 | cache_dir = conf.get_cache_dir() 21 | if cache_dir: 22 | cache_filepath = Path(cache_dir) / dest_filepath.name 23 | if cache_filepath.exists(): 24 | print('Copying {} to {}'.format(cache_filepath, dest_filepath)) 25 | shutil.copy(cache_filepath, dest_filepath) 26 | return 27 | 28 | if not conf.is_allowed_to_download(): 29 | raise PheWebError("PheWeb is set to disallow downloading files, but couldn't pull {!r} from cache_dir {!r}".format( 30 | dest_filepath, conf.get_cache_dir())) 31 | 32 | # Download from https://resources.pheweb.org/ 33 | url = 'https://resources.pheweb.org/{}'.format(dest_filepath.name) 34 | print('Downloading {} from {}'.format(dest_filepath, url)) 35 | dest_tmp_filepath = Path(get_tmp_path(dest_filepath)) 36 | try: 37 | wget.download(url=url, out=str(dest_tmp_filepath)); print() 38 | except Exception as exc: 39 | raise PheWebError('Failed to download rsids from {}. Try `pheweb download-rsids-from-scratch` instead.'.format(url)) from exc 40 | os.rename(dest_tmp_filepath, dest_filepath) 41 | if cache_dir and Path(cache_dir).exists(): 42 | print('Cacheing {} at {}'.format(dest_filepath, cache_filepath)) 43 | # It's okay if this doesn't work 44 | try: shutil.copy(dest_filepath, cache_filepath) 45 | except Exception: pass 46 | 47 | def run(argv:List[str]) -> None: 48 | import argparse 49 | parser = argparse.ArgumentParser() 50 | parser.add_argument('--hg', type=int, default=conf.get_hg_build_number(), choices=[19,38]) 51 | args = parser.parse_args(argv) 52 | get_rsids_for_build(args.hg) 53 | -------------------------------------------------------------------------------- /pheweb/load/download_rsids_from_scratch.py: -------------------------------------------------------------------------------- 1 | 2 | from ..file_utils import make_basedir, get_tmp_path, dbsnp_version, get_filepath 3 | from .load_utils import run_script 4 | from .. import conf 5 | 6 | import os 7 | import wget 8 | from typing import List 9 | 10 | def download_rsids_for_build(hg_build_number:int) -> None: 11 | raw_dbsnp_filepath = get_tmp_path('dbsnp-b{}-hg{}.gz'.format(dbsnp_version, hg_build_number)) 12 | rsids_filepath = get_filepath('rsids-hg{}'.format(hg_build_number), must_exist=False) 13 | 14 | if not os.path.exists(rsids_filepath): 15 | print('dbsnp will be stored at {!r}'.format(rsids_filepath)) 16 | 17 | if not os.path.exists(raw_dbsnp_filepath): 18 | # dbSNP downloads are described at 19 | # This file includes chr-pos-ref-alt-rsid and 4X a bunch of useless columns: 20 | if hg_build_number == 19: 21 | url = 'https://ftp.ncbi.nih.gov/snp/redesign/archive/b{}/VCF/GCF_000001405.25.gz'.format(dbsnp_version) 22 | elif hg_build_number == 38: 23 | url = 'https://ftp.ncbi.nih.gov/snp/redesign/archive/b{}/VCF/GCF_000001405.38.gz'.format(dbsnp_version) 24 | else: raise Exception() 25 | print('Downloading dbsnp from {} to {}'.format(url, raw_dbsnp_filepath)) 26 | make_basedir(raw_dbsnp_filepath) 27 | raw_dbsnp_tmp_filepath = get_tmp_path(raw_dbsnp_filepath) 28 | wget.download(url=url, out=raw_dbsnp_tmp_filepath) 29 | print('') 30 | os.rename(raw_dbsnp_tmp_filepath, raw_dbsnp_filepath) 31 | print('Finished downloading to {}'.format(raw_dbsnp_filepath)) 32 | 33 | print('Converting {} -> {}'.format(raw_dbsnp_filepath, rsids_filepath)) 34 | make_basedir(rsids_filepath) 35 | rsids_tmp_filepath = get_tmp_path(rsids_filepath) 36 | # Note: `perl -F'\t'` declares that input is tab-delimited 37 | # Note: chromosomes in v154 are named like `NC_000001.10` for chr1. I don't know about MT. I don't know what `NC_012920.1` is, but drop it. 38 | run_script(r''' 39 | gzip -cd '{raw_dbsnp_filepath}' | 40 | grep -v '^#' | 41 | perl -F'\t' -nale 'print "$F[0]\t$F[1]\t$F[2]\t$F[3]\t$F[4]"' | 42 | grep '^NC_0000' | 43 | perl -pale 's/^NC_0*23\.\d+/X/' | 44 | perl -pale 's/^NC_0*24\.\d+/Y/' | 45 | perl -pale 's/^NC_0*([1-9][0-9]*)\.\d+/\1/' | 46 | gzip > '{rsids_tmp_filepath}' 47 | '''.format(raw_dbsnp_filepath=raw_dbsnp_filepath, rsids_tmp_filepath=rsids_tmp_filepath)) 48 | os.rename(rsids_tmp_filepath, rsids_filepath) 49 | 50 | print("rsids are at '{rsids_filepath}'".format(rsids_filepath=rsids_filepath)) 51 | 52 | def run(argv:List[str]) -> None: 53 | import argparse 54 | parser = argparse.ArgumentParser() 55 | parser.add_argument('--hg', type=int, default=conf.get_hg_build_number(), choices=[19,38]) 56 | args = parser.parse_args(argv) 57 | download_rsids_for_build(args.hg) 58 | -------------------------------------------------------------------------------- /pheweb/load/make_cpras_rsids_sqlite3.py: -------------------------------------------------------------------------------- 1 | 2 | from ..file_utils import VariantFileReader, get_filepath, get_tmp_path 3 | 4 | import sqlite3 5 | from pathlib import Path 6 | from typing import List,Iterator,Tuple,Optional 7 | 8 | 9 | def run(argv:List[str]) -> None: 10 | 11 | if '-h' in argv or '--help' in argv: 12 | print('Make sqlite3 db for converting between chr-pos-ref-alt and rsid') 13 | exit(1) 14 | 15 | sites_filepath = Path(get_filepath('sites')) 16 | cpras_rsids_filepath = Path(get_filepath('cpras-rsids-sqlite3', must_exist=False)) 17 | 18 | if cpras_rsids_filepath.exists() and cpras_rsids_filepath.stat().st_mtime >= sites_filepath.stat().st_mtime: 19 | print('cpras-rsids-sqlite3 is up-to-date!') 20 | 21 | else: 22 | def get_cpra_rsid_pairs() -> Iterator[Tuple[str,Optional[str]]]: 23 | with VariantFileReader(sites_filepath) as reader: 24 | for v in reader: 25 | cpra = '{chrom}-{pos}-{ref}-{alt}'.format(**v) 26 | if v['rsids']: 27 | for rsid in v['rsids'].split(','): 28 | yield (cpra, rsid) 29 | else: 30 | yield (cpra, None) 31 | 32 | if cpras_rsids_filepath.exists(): cpras_rsids_filepath.unlink() 33 | cpras_rsids_tmp_filepath = Path(get_tmp_path(cpras_rsids_filepath)) 34 | if cpras_rsids_tmp_filepath.exists(): cpras_rsids_tmp_filepath.unlink() 35 | db_conn = sqlite3.connect(str(cpras_rsids_tmp_filepath)) 36 | with db_conn: 37 | db_conn.execute('CREATE TABLE cpras_rsids (cpra TEXT, rsid TEXT)') 38 | db_conn.executemany('INSERT INTO cpras_rsids (cpra, rsid) VALUES (?,?)', get_cpra_rsid_pairs()) 39 | db_conn.execute('CREATE INDEX rsid_idx ON cpras_rsids (rsid)') 40 | 41 | cpras_rsids_tmp_filepath.rename(cpras_rsids_filepath) 42 | print('Done making cpras-rsids sqlite3 at {}'.format(str(cpras_rsids_filepath))) 43 | -------------------------------------------------------------------------------- /pheweb/load/matrix.py: -------------------------------------------------------------------------------- 1 | 2 | # TODO: Parallelize by chromosome. 3 | # Spawn a pheweb process to do each chromosome. Each one must do: 4 | # + For every `pheno_gz/*.gz` and `sites/sites.tsv`, find the byte offset to the block that begins our chromosome. 5 | # + Now cffi down into a function just like our normal one, but which starts at that offset, discards variants until it hits the target chromosome, merges, and then exits. 6 | # + Don't append an empty block in `BgzipWriter:close()`. 7 | # When all the child processes are done, the main thread needs to concatenate all the single-chrom matrix files and then append an empty bgzip block to signal EOF. 8 | 9 | 10 | from ..utils import get_phenolist, PheWebError 11 | from ..file_utils import MatrixReader, get_tmp_path, get_filepath, get_pheno_filepath 12 | from .load_utils import mtime 13 | from .cffi._x import ffi, lib 14 | 15 | import os 16 | import glob 17 | import pysam 18 | from typing import List 19 | 20 | 21 | def clear_out_junk() -> None: 22 | # Remove files that shouldn't be there (and will confuse the glob in matrixify) 23 | cur_phenocodes = set(pheno['phenocode'] for pheno in get_phenolist()) 24 | for filepath in glob.glob(get_filepath('pheno_gz')+'/*.gz'): 25 | name = os.path.basename(filepath) 26 | if name[:-3] not in cur_phenocodes: 27 | print("Removing {} to help matrix glob".format(filepath)) 28 | os.remove(filepath) 29 | 30 | def should_run() -> bool: 31 | sites_filepath = get_filepath('sites') 32 | matrix_gz_filepath = get_filepath('matrix', must_exist=False) 33 | 34 | if not os.path.exists(matrix_gz_filepath): return True 35 | 36 | # If the matrix's columns don't match the phenos in pheno-list, rebuild. 37 | cur_phenocodes = set(pheno['phenocode'] for pheno in get_phenolist()) 38 | try: 39 | matrix_phenocodes = set(MatrixReader().get_phenocodes()) 40 | except Exception: 41 | return True # if something broke, let's just rebuild the matrix. 42 | if matrix_phenocodes != cur_phenocodes: 43 | print('re-running because cur matrix has wrong phenos.') 44 | print('- phenos in pheno-list.json but not matrix.tsv.gz:', ', '.join(repr(p) for p in cur_phenocodes - matrix_phenocodes)) 45 | print('- phenos in matrix.tsv.gz but not pheno-list.json:', ', '.join(repr(p) for p in matrix_phenocodes - cur_phenocodes)) 46 | return True 47 | 48 | # If pheno_gz or sites.tsv are newer than matrix, rebuild. 49 | infilepaths = [get_pheno_filepath('pheno_gz', phenocode) for phenocode in cur_phenocodes] + [sites_filepath] 50 | infile_modtime = max(mtime(filepath) for filepath in infilepaths) 51 | if infile_modtime > mtime(matrix_gz_filepath): 52 | print('rerunning because some input files are newer than matrix.tsv.gz') 53 | return True 54 | 55 | return False 56 | 57 | def run(argv:List[str]) -> None: 58 | 59 | if '-h' in argv or '--help' in argv: 60 | print('Make a single large tabixed file of all phenotypes data') 61 | exit(1) 62 | 63 | matrix_gz_filepath = get_filepath('matrix', must_exist=False) 64 | if should_run(): 65 | clear_out_junk() 66 | 67 | sites_filepath = get_filepath('sites') 68 | pheno_gz_glob = get_filepath('pheno_gz')+'/*.gz' 69 | matrix_gz_tmp_filepath = get_tmp_path(matrix_gz_filepath) 70 | 71 | # we don't need `ffi.new('char[]', ...)` because args are `const` 72 | ret = lib.cffi_make_matrix(sites_filepath.encode('utf8'), 73 | pheno_gz_glob.encode('utf8'), 74 | matrix_gz_tmp_filepath.encode('utf8')) 75 | ret_bytes = ffi.string(ret, maxlen=1000) 76 | if ret_bytes != b'ok': 77 | raise PheWebError('The portion of `pheweb matrix` written in c++/cffi failed with the message ' + repr(ret_bytes)) 78 | os.rename(matrix_gz_tmp_filepath, matrix_gz_filepath) 79 | else: 80 | print('matrix is up-to-date!') 81 | 82 | matrix_tbi_filepath = matrix_gz_filepath + '.tbi' 83 | if not os.path.exists(matrix_tbi_filepath) or mtime(matrix_tbi_filepath) < mtime(matrix_gz_filepath): 84 | print('tabixing matrix') 85 | pysam.tabix_index( 86 | filename=matrix_gz_filepath, force=True, 87 | seq_col=0, start_col=1, end_col=1 # note: column indexes start at 0, whereas `/usr/bin/tabix` starts at 1 88 | ) 89 | else: 90 | print('matrix.tbi is up-to-date!') 91 | -------------------------------------------------------------------------------- /pheweb/load/parse_input_files.py: -------------------------------------------------------------------------------- 1 | 2 | from ..utils import get_phenolist, PheWebError 3 | from .. import conf 4 | from ..file_utils import VariantFileWriter, write_json, get_generated_path, get_filepath, get_pheno_filepath 5 | from .read_input_file import PhenoReader 6 | from .load_utils import parallelize_per_pheno, indent, get_phenos_subset 7 | 8 | import itertools 9 | import argparse 10 | from typing import List,Dict,Any,Iterator 11 | 12 | 13 | def run(argv:List[str]) -> None: 14 | parser = argparse.ArgumentParser(description="import input files into a nice format") 15 | parser.add_argument('--phenos', help="Can be like '4,5,6,12' or '4-6,12' to run on only the phenos at those positions (0-indexed) in pheno-list.json (and only if they need to run)") 16 | args = parser.parse_args(argv) 17 | 18 | phenos = get_phenos_subset(args.phenos) if args.phenos else get_phenolist() 19 | 20 | results_by_phenocode = parallelize_per_pheno( 21 | get_input_filepaths = get_input_filepaths, 22 | get_output_filepaths = get_output_filepaths, 23 | convert = convert, 24 | cmd = 'parse-input-files', 25 | phenos = phenos, 26 | ) 27 | 28 | failed_results = {phenocode:value for phenocode,value in results_by_phenocode.items() if not value['succeeded']} 29 | if failed_results: 30 | failed_filepath = get_generated_path('tmp', 'parse-failures.txt') 31 | write_json(filepath=failed_filepath+'.json', data=failed_results, indent=1, sort_keys=True) 32 | write_failures(failed_filepath, failed_results) 33 | print('\n{} phenotypes failed (saved to {!r})\n'.format(len(failed_results), failed_filepath)) 34 | 35 | succeeded_phenos = [p for p in phenos if p['phenocode'] not in failed_results] 36 | succeeded_filepath = get_generated_path('tmp', 'pheno-list-successful-only.json') 37 | write_json(filepath=succeeded_filepath, data=succeeded_phenos, indent=1, sort_keys=True) 38 | if len(succeeded_phenos) == 0: 39 | raise PheWebError( 40 | 'PheWeb was unable to parse the input files. All {} phenotypes failed.\n\n'.format(len(phenos)) + 41 | 'The errors for each phenotype are in {!r}\n'.format(failed_filepath) 42 | ) 43 | else: 44 | raise PheWebError( 45 | 'Some files failed to parse.\n\n' + 46 | 'A new pheno-list.json with only the {} phenotypes that succeeded (out of {} total) has been written to {!r}.\n'.format( 47 | len(succeeded_phenos), len(phenos), succeeded_filepath) + 48 | 'To continue with only these phenotypes, run:\n' + 49 | 'cp {!r} {!r}\n'.format(succeeded_filepath, get_filepath('phenolist', must_exist=False)) + 50 | 'The errors for each failed phenotype are in {!r}\n'.format(failed_filepath) 51 | ) 52 | 53 | def get_input_filepaths(pheno:dict) -> List[str]: return pheno['assoc_files'] 54 | def get_output_filepaths(pheno:dict)-> List[str]: return [get_pheno_filepath('parsed', pheno['phenocode'], must_exist=False)] 55 | 56 | def write_failures(filepath:str, failed_results:Dict[str,Any]): 57 | with open(filepath, 'w') as f: 58 | for phenocode,d in failed_results.items(): 59 | f.write('=== Error for phenocode {} ===\n{}\n\n'.format(phenocode, d['exception_tb'])) 60 | 61 | 62 | def convert(pheno:Dict[str,Any]) -> Iterator[Dict[str,Any]]: 63 | # suppress Exceptions so that we can report back on which phenotypes succeeded and which didn't. 64 | try: 65 | with VariantFileWriter(get_pheno_filepath('parsed', pheno['phenocode'], must_exist=False)) as writer: 66 | pheno_reader = PhenoReader(pheno, minimum_maf=conf.get_assoc_min_maf()) 67 | variants = pheno_reader.get_variants() 68 | debugging_limit_num_variants = conf.get_debugging_limit_num_variants() 69 | if debugging_limit_num_variants: variants = itertools.islice(variants, 0, debugging_limit_num_variants) 70 | writer.write_all(variants) 71 | except Exception as exc: 72 | import traceback 73 | yield { 74 | 'type': 'warning', # TODO: make PerPhenoParallelizer print this. 75 | 'warning_str': 76 | 'Exception:\n' + indent(str(exc)) + 77 | '\nTraceback:\n' + indent(traceback.format_exc()) + 78 | '\nFiles:\n' + indent('\n'.join(pheno['assoc_files'])) 79 | } 80 | yield {"succeeded": False, "exception_str": str(exc), "exception_tb": traceback.format_exc()} 81 | else: 82 | yield {"succeeded": True} 83 | -------------------------------------------------------------------------------- /pheweb/load/pheno_correlation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Display (and eventually generate) a set of correlated phenotypes as generated from an external pipeline 3 | 4 | This information will be shown on phenotype summary pages. This is an OPTIONAL feature- 5 | if information is not available, it will usually skip this step without failure. 6 | """ 7 | import logging 8 | import os 9 | from boltons.fileutils import AtomicSaver 10 | from typing import List,Optional 11 | 12 | from .. import conf 13 | from ..file_utils import get_filepath, get_tmp_path 14 | from ..utils import get_phenolist, PheWebError 15 | from .. import weetabix 16 | 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | def run(argv:List[str]) -> None: 22 | """Wrap this feature in a command line flag""" 23 | if argv and argv[0] == '-h': 24 | print('Generate phenotype correlations data for use in pheweb plots') 25 | exit(1) 26 | 27 | raw_correl_filepath = get_filepath('correlations-raw', must_exist=False) 28 | annotated_correl_filepath = get_filepath('correlations', must_exist=False) 29 | 30 | if not os.path.isfile(raw_correl_filepath): 31 | logger.info('No "pheno-correlations.txt" file was found; processing step cannot be completed.') 32 | if conf.should_show_correlations(): 33 | # This is an optional feature, so don't fail unless config file specifies to do so 34 | raise PheWebError( 35 | 'You have requested phenotype correlations, but the required input file could not be found: {}'.format( 36 | raw_correl_filepath 37 | ) 38 | ) 39 | return 40 | main(raw_correl_filepath, annotated_correl_filepath) 41 | 42 | 43 | def main(raw_filepath:str, annotated_filepath:str, phenolist_path:Optional[str] = None) -> None: 44 | """Process a correlations file in the format required for display""" 45 | symmetric_filepath = get_tmp_path('pheno-correlations-symmetric.tsv') 46 | make_symmetric(raw_filepath, symmetric_filepath) 47 | annotate_trait_descriptions(symmetric_filepath, annotated_filepath, phenolist_path=phenolist_path) 48 | weetabix.make_byte_index(annotated_filepath, 1, skip_lines=1, delimiter='\t') 49 | 50 | 51 | def make_symmetric(in_filepath:str, out_filepath:str) -> None: 52 | ''' 53 | The output of pheweb-rg-pipeline includes the line 54 | traitA traitB 0.4 0.1 2 1e-3 ldsc 55 | but it omits the line 56 | traitB traitA 0.4 0.1 2 1e-3 ldsc 57 | so this function adds that second line for the symmetric position in the correlation matrix. 58 | If the file already has both directions for some or all pairs of traits, that's okay. 59 | ''' 60 | expected_colnames = ['Trait1','Trait2','rg','SE','Z','P-value','Method'] 61 | trait_pairs_seen = set() 62 | with open(in_filepath) as in_f: 63 | header = next(in_f) 64 | assert header.rstrip().split('\t') == expected_colnames 65 | correlations = [] 66 | for line in in_f: 67 | trait1, trait2, rest_of_line = line.split('\t', maxsplit=2) 68 | trait_pairs_seen.add((trait1, trait2)) 69 | correlations.append((trait1, trait2, rest_of_line)) 70 | 71 | for trait1, trait2, rest_of_line in correlations: 72 | if (trait2, trait1) not in trait_pairs_seen: 73 | correlations.append((trait2, trait1, rest_of_line)) 74 | 75 | correlations.sort() 76 | 77 | with AtomicSaver(out_filepath, text_mode=True, part_file=get_tmp_path(out_filepath), overwrite_part=True) as out_f: 78 | out_f.write(header) 79 | for trait1, trait2, rest_of_line in correlations: 80 | out_f.write(trait1 + '\t' + trait2 + '\t' + rest_of_line) 81 | 82 | 83 | def annotate_trait_descriptions(in_filepath:str, out_filepath:str, phenolist_path:Optional[str] = None) -> None: 84 | """ 85 | Annotate a phenotype correlation file with an additional "Trait2Label" description (where possible) 86 | FIXME: This makes simplistic assumptions about file format/contents, and performs no validation 87 | """ 88 | # Initial file format spec (per SarahGT) is a tab-delimited format: 89 | # Trait1 Trait2 rg SE Z P-value Method 90 | 91 | pheno_labels = {pheno['phenocode']: pheno.get('phenostring', pheno['phenocode']) 92 | for pheno in get_phenolist(filepath=phenolist_path)} 93 | 94 | with open(in_filepath, 'r') as in_f, AtomicSaver(out_filepath, text_mode=True, part_file=get_tmp_path(out_filepath), overwrite_part=True) as out_f: 95 | 96 | headers = in_f.readline().strip() 97 | out_f.write(headers + '\tTrait2Label\n') 98 | 99 | for line in in_f: 100 | line = line.strip() 101 | trait1_code, trait2_code, _ = line.split('\t', maxsplit=2) 102 | if trait2_code not in pheno_labels: 103 | logger.warning('Correlation file specifies an unknown phenocode; value will be skipped: "{}"'.format( 104 | trait2_code)) 105 | continue 106 | 107 | out_f.write(line + '\t{}\n'.format(pheno_labels[trait2_code])) 108 | -------------------------------------------------------------------------------- /pheweb/load/phenotypes.py: -------------------------------------------------------------------------------- 1 | 2 | from ..utils import get_phenolist 3 | from ..file_utils import write_json, get_filepath, get_pheno_filepath, write_heterogenous_variantfile 4 | 5 | import json 6 | from pathlib import Path 7 | from typing import Iterator,Dict,Any,List 8 | 9 | def get_phenotypes_including_top_variants() -> Iterator[Dict[str,Any]]: 10 | for pheno in get_phenolist(): 11 | with open(get_pheno_filepath('qq', pheno['phenocode'])) as f: 12 | # GC lambda 0.01 isn't set if it was infinite or otherwise broken. 13 | gc_lambda_hundred = json.load(f)['overall']['gc_lambda'].get('0.01', None) 14 | with open(get_pheno_filepath('manhattan', pheno['phenocode'])) as f: 15 | variants = json.load(f)['unbinned_variants'] 16 | top_variant = min(variants, key=lambda v: v['pval']) 17 | num_peaks = sum(variant.get('peak',False) and variant['pval']<=5e-8 for variant in variants) 18 | ret = { 19 | 'phenocode': pheno['phenocode'], 20 | 'pval': top_variant['pval'], 21 | 'nearest_genes': top_variant['nearest_genes'], 22 | 'chrom': top_variant['chrom'], 23 | 'pos': top_variant['pos'], 24 | 'ref': top_variant['ref'], 25 | 'alt': top_variant['alt'], 26 | 'rsids': top_variant['rsids'], 27 | 'num_peaks': num_peaks, 28 | 'gc_lambda_hundred': gc_lambda_hundred, # numbers in keys break streamtable 29 | } 30 | for key in ['num_samples', 'num_controls', 'num_cases', 'category', 'phenostring']: 31 | if key in pheno: ret[key] = pheno[key] 32 | if isinstance(ret['nearest_genes'], list): ret['nearest_genes'] = ','.join(ret['nearest_genes']) 33 | yield ret 34 | 35 | def should_run() -> bool: 36 | output_filepaths = [Path(get_filepath(name, must_exist=False)) for name in ['phenotypes_summary', 'phenotypes_summary_tsv']] 37 | if not all(fp.exists() for fp in output_filepaths): 38 | return True 39 | oldest_output_mtime = min(fp.stat().st_mtime for fp in output_filepaths) 40 | input_filepaths = [Path(get_pheno_filepath('manhattan', pheno['phenocode'])) for pheno in get_phenolist()] 41 | newest_input_mtime = max(fp.stat().st_mtime for fp in input_filepaths) 42 | if newest_input_mtime > oldest_output_mtime: 43 | return True 44 | return False 45 | 46 | def run(argv:List[str]) -> None: 47 | if '-h' in argv or '--help' in argv: 48 | print('Make a file summarizing information about each phenotype (for use in the phenotypes table)') 49 | exit(1) 50 | 51 | if not should_run(): 52 | print('Already up-to-date!') 53 | return 54 | 55 | data = sorted(get_phenotypes_including_top_variants(), key=lambda p: p['pval']) 56 | 57 | out_filepath = get_filepath('phenotypes_summary', must_exist=False) 58 | write_json(filepath=out_filepath, data=data) 59 | print("wrote {} phenotypes to {}".format(len(data), out_filepath)) 60 | 61 | out_filepath_tsv = get_filepath('phenotypes_summary_tsv', must_exist=False) 62 | write_heterogenous_variantfile(out_filepath_tsv, data, use_gzip=False) 63 | print("wrote {} phenotypes to {}".format(len(data), out_filepath_tsv)) 64 | -------------------------------------------------------------------------------- /pheweb/load/process_assoc_files.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # TODO: color lines with ==> using `colorama` 4 | # TODO: add a step to verify that the genome build is correct using detect_ref (once on first 10k of each input file, and again on `sites`) 5 | 6 | from ..utils import fmt_seconds 7 | 8 | import time 9 | import importlib 10 | from typing import List 11 | 12 | scripts = ''' 13 | phenolist verify 14 | parse_input_files 15 | sites 16 | make_gene_aliases_sqlite3 17 | add_rsids 18 | add_genes 19 | make_cpras_rsids_sqlite3 20 | augment_phenos 21 | matrix 22 | gather_pvalues_for_each_gene 23 | manhattan 24 | top_hits 25 | qq 26 | phenotypes 27 | pheno_correlation 28 | '''.split('\n') 29 | scripts = [script for script in scripts if script] 30 | 31 | def run(argv:List[str]) -> None: 32 | if any(arg in ['-h', '--help'] for arg in argv): 33 | print('Run all the steps to go from a prepared phenolist to a ready-to-serve pheweb.') 34 | print('This is equivalent to running:\n') 35 | print(' &&\n'.join(' pheweb {}'.format(script.replace('_', '-')) for script in scripts)) 36 | print('') 37 | print("Passing `--no-parse` will skip `pheweb parse-input-files` (so it won't error if input filepaths are missing)") 38 | exit(1) 39 | 40 | if argv == ['--no-parse']: 41 | myscripts = [s for s in scripts if s != 'parse_input_files'] 42 | else: 43 | myscripts = scripts 44 | 45 | for script in myscripts: 46 | print('==> Starting `pheweb {}`'.format(script.replace('_', '-'))) 47 | start_time = time.time() 48 | script_parts = script.split() 49 | module = importlib.import_module('.{}'.format(script_parts[0]), __package__) 50 | module_run = getattr(module, 'run', None) # appeases mypy 51 | if not callable(module_run): raise Exception("module.run ({!r}) isn't callable for module {!r} for script {!r}".format(module_run, module, script)) 52 | try: 53 | module_run(script_parts[1:]) 54 | except Exception: 55 | print('==> failed after {}'.format(fmt_seconds(time.time() - start_time))) 56 | raise 57 | else: 58 | print('==> Completed in {}'.format(fmt_seconds(time.time() - start_time)), end='\n\n') 59 | -------------------------------------------------------------------------------- /pheweb/load/top_hits.py: -------------------------------------------------------------------------------- 1 | 2 | from ..utils import get_phenolist 3 | from .. import conf 4 | from ..file_utils import write_json, write_heterogenous_variantfile, get_filepath, get_pheno_filepath 5 | 6 | import json 7 | from pathlib import Path 8 | from typing import Dict,Any,List,Iterator 9 | 10 | # TODO: It'd be great if each peak also included a list of all the associations that it is masking, so that on-click we could display a variants-under-this-peak table. 11 | # TODO: Somewhere have a user-extendable whitelist of info that should be copied about each pheno. Copy all of that stuff. 12 | 13 | 14 | def get_hits(pheno:Dict[str,Any]) -> Iterator[Dict[str,Any]]: 15 | with open(get_pheno_filepath('manhattan', pheno['phenocode'])) as f: 16 | variants = json.load(f)['unbinned_variants'] 17 | 18 | for v in variants: 19 | if v['pval'] <= conf.get_top_hits_pval_cutoff() and 'peak' in v: 20 | v['phenocode'] = pheno['phenocode'] 21 | for k in ['phenostring', 'category']: 22 | if k in pheno: 23 | v[k] = pheno[k] 24 | yield v 25 | 26 | def get_all_hits() -> List[Dict[str,Any]]: 27 | return sorted((hit for pheno in get_phenolist() for hit in get_hits(pheno)), key=lambda hit:hit['pval']) 28 | 29 | def stringify_assocs(assocs:List[Dict[str,Any]]) -> None: 30 | for a in assocs: 31 | if isinstance(a.get('nearest_genes'), list): 32 | a['nearest_genes'] = ','.join(a['nearest_genes']) 33 | 34 | def should_run() -> bool: 35 | output_filepaths = [Path(get_filepath(name, must_exist=False)) for name in ['top-hits', 'top-hits-1k', 'top-hits-tsv']] 36 | if not all(fp.exists() for fp in output_filepaths): 37 | return True 38 | oldest_output_mtime = min(fp.stat().st_mtime for fp in output_filepaths) 39 | input_filepaths = [Path(get_pheno_filepath('manhattan', pheno['phenocode'])) for pheno in get_phenolist()] 40 | newest_input_mtime = max(fp.stat().st_mtime for fp in input_filepaths) 41 | if newest_input_mtime > oldest_output_mtime: 42 | return True 43 | return False 44 | 45 | def run(argv:List[str]) -> None: 46 | out_filepath_json = get_filepath('top-hits', must_exist=False) 47 | out_filepath_1k_json = get_filepath('top-hits-1k', must_exist=False) 48 | out_filepath_tsv = get_filepath('top-hits-tsv', must_exist=False) 49 | 50 | if argv and argv[0] == '-h': 51 | print(''' 52 | Make lists of top hits for this PheWeb in {} and {}. 53 | 54 | To count as a top hit, a variant must: 55 | - have a p-value < {} 56 | - be among the top {:,} associations in its phenotype 57 | - have the smallest p-value within {:,} bases within its phenotype (well, not exactly, but pretty much) 58 | 59 | Some loci may have hits for multiple phenotypes. If you want a list of loci with 60 | just the top phenotype for each, use `pheweb top-loci`. 61 | '''.format(out_filepath_json, 62 | out_filepath_tsv, 63 | '{:0.0e}'.format(min(conf.get_top_hits_pval_cutoff(), conf.get_manhattan_peak_pval_threshold())).replace('e-0', 'e-'), 64 | conf.get_manhattan_num_unbinned(), 65 | conf.get_within_pheno_mask_around_peak(), 66 | )) 67 | exit(1) 68 | 69 | if not should_run(): 70 | print('Already up-to-date!') 71 | return 72 | 73 | hits = get_all_hits() 74 | 75 | write_json(filepath=out_filepath_json, data=hits, sort_keys=True) 76 | print("wrote {} hits to {}".format(len(hits), out_filepath_json)) 77 | 78 | write_json(filepath=out_filepath_1k_json, data=hits[:1000], sort_keys=True) 79 | print("wrote {} hits to {}".format(len(hits[:1000]), out_filepath_1k_json)) 80 | 81 | if hits: # If there are no hits, we can't write a proper tsv 82 | stringify_assocs(hits) 83 | write_heterogenous_variantfile(out_filepath_tsv, hits, use_gzip=False) 84 | print("wrote {} hits to {}".format(len(hits), out_filepath_tsv)) 85 | -------------------------------------------------------------------------------- /pheweb/load/top_loci.py: -------------------------------------------------------------------------------- 1 | 2 | from .. import conf 3 | from ..file_utils import write_json, write_heterogenous_variantfile, get_filepath 4 | 5 | from .top_hits import get_all_hits, stringify_assocs 6 | 7 | 8 | def get_loci(): 9 | hits = get_all_hits() 10 | 11 | hits_by_chrom = dict() 12 | for hit in hits: 13 | hits_by_chrom.setdefault(hit['chrom'], []).append(hit) 14 | 15 | for hits in hits_by_chrom.values(): 16 | while hits: 17 | best_assoc = min(hits, key=lambda assoc: assoc['pval']) 18 | yield best_assoc 19 | hits = [h for h in hits if abs(h['pos'] - best_assoc['pos']) > conf.get_between_pheno_mask_around_peak()] 20 | 21 | 22 | def run(argv): 23 | out_filepath_json = get_filepath('top-loci', must_exist=False) 24 | out_filepath_tsv = get_filepath('top-loci-tsv', must_exist=False) 25 | 26 | if argv and argv[0] == '-h': 27 | print(''' 28 | Make lists of top loci for this PheWeb in {} and {}. 29 | 30 | To count as a top loci, a variant must: 31 | - have a p-value < {} 32 | - be among the top {:,} associations in its phenotype 33 | - have the smallest p-value within {:,} bases within its phenotype (well, not exactly, but pretty much) 34 | - have the smallest p-value within {:,} bases (well, not exactly, but pretty much) 35 | 36 | Each loci will include the phenotype that has the smallest p-value at that location. 37 | Even if this loci also contains significant hits for other phenotypes, they won't be 38 | shown. If you want all hits, use `pheweb top-hits`. 39 | '''.format(out_filepath_json, 40 | out_filepath_tsv, 41 | '{:0.0e}'.format(conf.get_top_hits_pval_cutoff()).replace('e-0', 'e-'), 42 | conf.get_manhattan_num_unbinned(), 43 | conf.get_within_pheno_mask_around_peak(), 44 | conf.get_between_pheno_mask_around_peak(), 45 | )) 46 | exit(1) 47 | 48 | loci = sorted(get_loci(), key=lambda d: d['pval']) 49 | write_json(filepath=out_filepath_json, data=loci, sort_keys=True) 50 | print("wrote {} loci to {}".format(len(loci), out_filepath_json)) 51 | 52 | stringify_assocs(loci) 53 | write_heterogenous_variantfile(out_filepath_tsv, loci) 54 | print("wrote {} loci to {}".format(len(loci), out_filepath_tsv)) 55 | -------------------------------------------------------------------------------- /pheweb/load/wsgi.py: -------------------------------------------------------------------------------- 1 | from .. import utils 2 | from ..file_utils import get_generated_path, make_basedir 3 | 4 | import os 5 | 6 | template1 = ''' 7 | import os, sys 8 | 9 | # Add the pheweb package into the PYTHONPATH so that we can import it. 10 | # This assumes that you cloned pheweb from github. If you installed with pip, maybe this has no effect? 11 | sys.path.insert(0, '{pheweb_dir}') 12 | ''' 13 | 14 | template2 = ''' 15 | # Activate a virtual environment to get pheweb's dependencies. 16 | path = os.path.join('{venv_dir}/bin/activate_this.py') 17 | with open(path) as f: 18 | code = compile(f.read(), path, 'exec') 19 | exec(code, dict(__file__=path)) 20 | ''' 21 | 22 | template3 = ''' 23 | # `data_dir` is the directory that contains `config.py` and `generated-by-pheweb/`. 24 | data_dir = os.path.dirname(os.path.abspath(__file__)) 25 | os.environ['PHEWEB_DATADIR'] = data_dir 26 | 27 | # Load `config.py`. 28 | config_filepath = os.path.join(data_dir, 'config.py') 29 | assert os.path.exists(config_filepath) 30 | import pheweb.conf 31 | pheweb.conf.load_overrides_from_file(config_filepath) 32 | 33 | # WSGI uses the variable named `application`. 34 | from pheweb.serve.server import app as application 35 | ''' 36 | 37 | def run(argv): 38 | if argv and argv[0] == '-h': 39 | print('Make wsgi.py, which can be used with gunicorn or other WSGI-compatible webservers.') 40 | return 41 | 42 | venv_dir = os.environ.get('VIRTUAL_ENV', '') 43 | if venv_dir: template = template1 + template2 + template3 44 | else: template = template1 + template3 45 | 46 | pheweb_dir = os.path.dirname(os.path.dirname(utils.__file__)) 47 | wsgi = template.format(pheweb_dir=pheweb_dir, venv_dir=venv_dir) 48 | with open('wsgi.py', 'w') as f: 49 | f.write(wsgi) 50 | -------------------------------------------------------------------------------- /pheweb/serve/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/pheweb/serve/__init__.py -------------------------------------------------------------------------------- /pheweb/serve/auth.py: -------------------------------------------------------------------------------- 1 | 2 | from .. import conf 3 | 4 | from flask import url_for, redirect, request 5 | from rauth import OAuth2Service 6 | 7 | import requests 8 | import json 9 | 10 | # It seems like everything is working without these two lines, and I'm not sure why: (maybe because I installed `requests[security]`?) 11 | # import urllib3.contrib.pyopenssl 12 | # urllib3.contrib.pyopenssl.inject_into_urllib3() 13 | 14 | class GoogleSignIn(object): 15 | def __init__(self, current_app): 16 | google_params = self._get_google_info() 17 | self.service = OAuth2Service( 18 | name='google', 19 | client_id=conf.get_login_google_id_and_secret()[0], 20 | client_secret=conf.get_login_google_id_and_secret()[1], 21 | authorize_url=google_params.get('authorization_endpoint'), 22 | base_url=google_params.get('userinfo_endpoint'), 23 | access_token_url=google_params.get('token_endpoint') 24 | ) 25 | 26 | def _get_google_info(self): 27 | # Previously I used: return json.loads(urllib2.urlopen('https://accounts.google.com/.well-known/openid-configuration')) 28 | r = requests.get('https://accounts.google.com/.well-known/openid-configuration') 29 | r.raise_for_status() 30 | return r.json() 31 | 32 | def authorize(self): 33 | return redirect(self.service.get_authorize_url( 34 | scope='email', 35 | response_type='code', 36 | prompt='select_account', 37 | redirect_uri=self.get_callback_url()) 38 | ) 39 | 40 | def get_callback_url(self): 41 | return url_for('.oauth_callback_google', 42 | _external=True, 43 | _scheme='https') # Google only allows HTTPS callbacks, so assume https. I don't know why flask didn't see X-SCHEME header or whatever. 44 | 45 | def callback(self): 46 | if 'code' not in request.args: 47 | return (None, None) 48 | # The following two commands pass **kwargs to requests. 49 | oauth_session = self.service.get_auth_session( 50 | data={'code': request.args['code'], 51 | 'grant_type': 'authorization_code', 52 | 'redirect_uri': self.get_callback_url() 53 | }, 54 | decoder = lambda x: json.loads(x.decode('utf-8')) 55 | ) 56 | me = oauth_session.get('').json() 57 | return (me['name'] if 'name' in me else me['email'], # SAML emails (like @umich.edu) don't have 'name' 58 | me['email']) 59 | -------------------------------------------------------------------------------- /pheweb/serve/server_utils.py: -------------------------------------------------------------------------------- 1 | 2 | from flask import url_for, Response, redirect 3 | 4 | from ..file_utils import MatrixReader, IndexedVariantFileReader, get_filepath 5 | 6 | import random 7 | import re 8 | import itertools 9 | import json 10 | from typing import Optional,Dict,List,Any 11 | 12 | 13 | class _Get_Pheno_Region: 14 | @staticmethod 15 | def _rename(d:dict, oldkey, newkey): 16 | d[newkey] = d[oldkey] 17 | del d[oldkey] 18 | 19 | @staticmethod 20 | def _dataframify(list_of_dicts:List[Dict[Any,Any]]) -> Dict[Any,list]: 21 | '''converts [{a:1,b:2}, {a:11,b:12}] -> {a:[1,11], b:[2,12]}''' 22 | keys = set(itertools.chain.from_iterable(list_of_dicts)) 23 | dataframe: Dict[Any,list] = {k:[] for k in keys} 24 | for d in list_of_dicts: 25 | for k,v in d.items(): 26 | dataframe[k].append(v) 27 | return dataframe 28 | 29 | @staticmethod 30 | def get_pheno_region(phenocode:str, chrom:str, pos_start:int, pos_end:int) -> dict: 31 | variants = [] 32 | with IndexedVariantFileReader(phenocode) as reader: 33 | for v in reader.get_region(chrom, pos_start, pos_end+1): 34 | v['id'] = '{chrom}:{pos}_{ref}/{alt}'.format(**v) 35 | # TODO: change JS to make these unnecessary 36 | v['end'] = v['pos'] 37 | _Get_Pheno_Region._rename(v, 'chrom', 'chr') 38 | _Get_Pheno_Region._rename(v, 'pos', 'position') 39 | _Get_Pheno_Region._rename(v, 'rsids', 'rsid') 40 | _Get_Pheno_Region._rename(v, 'pval', 'pvalue') 41 | variants.append(v) 42 | 43 | df = _Get_Pheno_Region._dataframify(variants) 44 | 45 | return { 46 | 'data': df, 47 | 'lastpage': None, 48 | } 49 | get_pheno_region = _Get_Pheno_Region.get_pheno_region 50 | 51 | 52 | class _ParseVariant: 53 | chrom_regex = re.compile(r'(?:[cC][hH][rR])?([0-9XYMT]+)') 54 | chrom_pos_regex = re.compile(chrom_regex.pattern + r'[-_:/ ]([0-9]+)') 55 | chrom_pos_ref_alt_regex = re.compile(chrom_pos_regex.pattern + r'[-_:/ ]([-AaTtCcGg\.]+)[-_:/ ]([-AaTtCcGg\.]+)') 56 | def parse_variant(self, query, default_chrom_pos=True): 57 | match = self.chrom_pos_ref_alt_regex.match(query) or self.chrom_pos_regex.match(query) or self.chrom_regex.match(query) 58 | g = match.groups() if match else () 59 | 60 | if default_chrom_pos: 61 | if len(g) == 0: g += ('1',) 62 | if len(g) == 1: g += (0,) 63 | if len(g) >= 2: g = (g[0], int(g[1])) + tuple([bases.upper() for bases in g[2:]]) 64 | return g + tuple(itertools.repeat(None, 4-len(g))) 65 | parse_variant = _ParseVariant().parse_variant 66 | 67 | class _GetVariant: 68 | def get_variant(self, query:str) -> Optional[Dict[str,Any]]: 69 | chrom, pos, ref, alt = parse_variant(query) 70 | assert None not in [chrom, pos, ref, alt] 71 | if not hasattr(self, '_matrix_reader'): 72 | self._matrix_reader = MatrixReader() 73 | with self._matrix_reader.context() as mr: 74 | v = mr.get_variant(chrom, pos, ref, alt) 75 | if v is None: return None 76 | v['phenos'] = list(v['phenos'].values()) 77 | v['variant_name'] = '{} : {:,} {} / {}'.format(chrom, pos, ref, alt) 78 | return v 79 | get_variant = _GetVariant().get_variant 80 | 81 | 82 | 83 | 84 | def get_random_page() -> Optional[str]: 85 | with open(get_filepath('top-hits-1k')) as f: 86 | hits = json.load(f) 87 | if not hits: 88 | return None 89 | hits_to_choose_from = [hit for hit in hits if hit['pval'] < 5e-8] 90 | if len(hits_to_choose_from) < 10: 91 | hits_to_choose_from = hits[:10] 92 | hit = random.choice(hits_to_choose_from) 93 | r = random.random() 94 | if r < 0.4: 95 | return url_for('.pheno_page', phenocode=hit['phenocode']) 96 | elif r < 0.8: 97 | return url_for('.variant_page', query='{chrom}-{pos}-{ref}-{alt}'.format(**hit)) 98 | else: 99 | offset = int(50e3) 100 | return url_for('.region_page', 101 | phenocode=hit['phenocode'], 102 | region='{}:{}-{}'.format(hit['chrom'], hit['pos']-offset, hit['pos']+offset)) 103 | # TODO: check if this hit is inside a gene. if so, include that page. 104 | 105 | def relative_redirect(url:str) -> Response: 106 | # `flask.redirect(url)` turns relative URLs into absolute. 107 | # But modern browsers allow relative location header. 108 | # And I want relative to avoid thinking about http/https and hostname. 109 | # Only a few places in pheweb need absolute URLs (eg, auth), and everywhere else can just use relative. 110 | return redirect(url, Response=RelativeResponse) 111 | class RelativeResponse(Response): 112 | autocorrect_location_header = False 113 | -------------------------------------------------------------------------------- /pheweb/serve/static/common.css: -------------------------------------------------------------------------------- 1 | /* Reconcile bootstrap3 to typeahead.js (the real Typeahead, not bootstrap-typeahead) */ 2 | .twitter-typeahead { 3 | width: 100%; 4 | } 5 | .tt-menu { 6 | margin-top: 2px; 7 | padding: 5px 0; 8 | background-color: #fff; 9 | border: 1px solid rgba(0,0,0,.2); 10 | width:100%; 11 | max-height: 20em; 12 | overflow-y: auto; 13 | } 14 | .tt-empty-message, 15 | .tt-suggestion { 16 | padding: 3px 20px; 17 | } 18 | .tt-suggestion.tt-cursor { 19 | color: #fff; 20 | background-color: #0081c2; // just backup for linear-gradientless browsers? 21 | background-image: linear-gradient(to bottom, #0088cc, #0077b3); 22 | } 23 | /* ---- */ 24 | 25 | /* StreamTable */ 26 | .pagination { 27 | margin: 0; 28 | } 29 | .stream_table_row { 30 | margin-top: 10px; 31 | } 32 | #stream_table { 33 | margin-bottom: 10px; 34 | } 35 | /* ---- */ 36 | 37 | 38 | -------------------------------------------------------------------------------- /pheweb/serve/static/common.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | window.debug = window.debug || {}; 4 | 5 | // deal with IE11 problems 6 | if (!Math.log10) { Math.log10 = function(x) { return Math.log(x) / Math.LN10; }; } 7 | if (!!window.MSInputMethodContext && !!document.documentMode) { /*ie11*/ $('').appendTo($('head')); } 8 | if (!String.prototype.includes) { 9 | String.prototype.includes = function(search, start) { 10 | 'use strict'; 11 | if (typeof start !== 'number') { 12 | start = 0; 13 | } 14 | if (start + search.length > this.length) { 15 | return false; 16 | } else { 17 | return this.indexOf(search, start) !== -1; 18 | } 19 | }; 20 | } 21 | 22 | 23 | (function() { 24 | // It's unfortunate that these are hard-coded, but it works pretty great, so I won't change it now. 25 | var autocomplete_bloodhound = new Bloodhound({ 26 | datumTokenizer: Bloodhound.tokenizers.obj.whitespace('display'), 27 | queryTokenizer: Bloodhound.tokenizers.whitespace, 28 | identify: function(sugg) { return sugg.display; }, // maybe allows Bloodhound to `.get()` objects 29 | remote: { 30 | url: window.model.urlprefix + '/api/autocomplete?query=%QUERY', 31 | wildcard: '%QUERY', 32 | rateLimitBy: 'throttle', 33 | rateLimitWait: 100, 34 | }, 35 | }); 36 | 37 | $(function() { 38 | $('.typeahead').typeahead({ 39 | hint: false, 40 | highlight: true, 41 | minLength: 1, 42 | }, { 43 | name: 'autocomplete', 44 | source: autocomplete_bloodhound, 45 | display: 'value', 46 | limit: 100, 47 | templates: { 48 | suggestion: _.template("
<%= display %>
"), 49 | empty: "
No matches found.
" 50 | } 51 | }); 52 | 53 | $('.typeahead').bind('typeahead:select', function(ev, suggestion) { 54 | window.location.href = suggestion.url; 55 | }); 56 | }); 57 | })(); 58 | 59 | 60 | // convenience functions 61 | function fmt(format) { 62 | var args = Array.prototype.slice.call(arguments, 1); 63 | return format.replace(/{(\d+)}/g, function(match, number) { 64 | return (typeof args[number] != 'undefined') ? args[number] : match; 65 | }); 66 | } 67 | 68 | function two_digit_format(x) { return (x>=.1)? x.toFixed(2) : (x>=.01)? x.toFixed(3) : x.toExponential(1); } 69 | -------------------------------------------------------------------------------- /pheweb/serve/static/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/pheweb/serve/static/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /pheweb/serve/static/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/pheweb/serve/static/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /pheweb/serve/static/fonts/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/pheweb/serve/static/fonts/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /pheweb/serve/static/gene.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | function populate_streamtable(data) { 4 | $(function() { 5 | // data = _.sortBy(data, _.property('pval')); 6 | var template = _.template($('#streamtable-template').html()); 7 | var view = function(p) { 8 | return template({p: p}); 9 | }; 10 | 11 | var options = { 12 | view: view, 13 | search_box: false, 14 | pagination: { 15 | span: 5, 16 | next_text: 'Next ', 17 | prev_text: ' Previous', 18 | per_page_select: false, 19 | per_page: 10 20 | } 21 | } 22 | 23 | $("").appendTo("head"); 24 | 25 | if (data.length <= 10) { 26 | $("").appendTo("head"); 27 | options.pagination.next_text = ""; 28 | options.pagination.prev_text = ""; 29 | } 30 | 31 | $('#stream_table').stream_table(options, data); 32 | }); 33 | } 34 | populate_streamtable(window.significant_phenos); 35 | -------------------------------------------------------------------------------- /pheweb/serve/static/phenotypes.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | function populate_streamtable(phenotypes) { 4 | $(function() { 5 | // This is mostly copied from . 6 | var data = phenotypes; 7 | // data = _.sortBy(data, _.property('pval')); 8 | var template = _.template($('#streamtable-template').html()); 9 | var view = function(pheno) { 10 | return template({h: pheno}); 11 | }; 12 | var $found = $('#streamtable-found'); 13 | $found.text(data.length + " phenotypes"); 14 | 15 | var callbacks = { 16 | pagination: function(summary){ 17 | if ($.trim($('#search').val()).length > 0){ 18 | $found.text(summary.total + " matching phenotypes"); 19 | } else { 20 | $found.text(data.length + " phenotypes"); 21 | } 22 | } 23 | } 24 | 25 | var options = { 26 | view: view, 27 | search_box: '#search', 28 | callbacks: callbacks, 29 | pagination: { 30 | span: 5, 31 | next_text: 'Next ', 32 | prev_text: ' Previous', 33 | per_page_select: false, 34 | per_page_opts: [100], // this is the best way I've found to control the number of rows 35 | } 36 | } 37 | 38 | $('#stream_table').stream_table(options, data); 39 | 40 | }); 41 | } 42 | -------------------------------------------------------------------------------- /pheweb/serve/static/region.css: -------------------------------------------------------------------------------- 1 | .lz-toolbar.lz-plot-toolbar { 2 | text-align: center; 3 | } 4 | .lz-toolbar-left, .lz-toolbar-right { 5 | display: inline-block; 6 | } 7 | .lz-toolbar-group-start, .lz-toolbar-group-middle, .lz-toolbar-group-end { 8 | float: none !important; 9 | } 10 | /* 11 | TODO: merge toolbar-component-centering into LZ. (should it be the default?) 12 | - if not, make it explicit. it's bad that I'm just centering anything with `.lz-toolbar-group-*`. 13 | TODO: stop text-align:center from cascading downward through descendants 14 | */ 15 | -------------------------------------------------------------------------------- /pheweb/serve/static/top_hits.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | function populate_streamtable(hits) { 4 | $(function() { 5 | // This is mostly copied from . 6 | var data = hits; 7 | // data = _.sortBy(data, _.property('pval')); 8 | var template = _.template($('#streamtable-template').html()); 9 | var view = function(hit) { 10 | return template({h: hit}); 11 | }; 12 | var $found = $('#streamtable-found'); 13 | $found.text(data.length + " hits"); 14 | 15 | var callbacks = { 16 | pagination: function(summary){ 17 | if ($.trim($('#search').val()).length > 0){ 18 | $found.text(summary.total + " matching hits"); 19 | } else { 20 | $found.text(data.length + " hits"); 21 | } 22 | } 23 | } 24 | 25 | var options = { 26 | view: view, 27 | search_box: '#search', 28 | callbacks: callbacks, 29 | pagination: { 30 | span: 5, 31 | next_text: 'Next ', 32 | prev_text: ' Previous', 33 | per_page_select: false, 34 | per_page_opts: [100], // this is the best way I've found to control the number of rows 35 | } 36 | } 37 | 38 | $('#stream_table').stream_table(options, data); 39 | 40 | }); 41 | } 42 | -------------------------------------------------------------------------------- /pheweb/serve/templates/about.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block contained %} 4 |
5 |
6 | {% include 'about/content.html' %} 7 |
8 |
9 | {% endblock %} 10 | -------------------------------------------------------------------------------- /pheweb/serve/templates/about/content.html: -------------------------------------------------------------------------------- 1 |

About This PheWeb

2 |
3 |

This site was built with PheWeb version {{ config.PHEWEB_VERSION }}.

4 |
5 |

All positions are on GRCh{{config.GRCH_BUILD_NUMBER}}.

6 | -------------------------------------------------------------------------------- /pheweb/serve/templates/error.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block contained %} 4 |
5 |
6 |

7 |
8 |

Error

9 | {% if message %} 10 |

{{ message }}

11 | {% else %} 12 |

Either this page doesn't exist, or you just found a bug in our server.


13 | {% endif %} 14 |

15 | If this is something that we should know about, we'd appreciate if you'd let us know.
16 |
17 | 18 |
19 |
20 | {% endblock %} 21 | -------------------------------------------------------------------------------- /pheweb/serve/templates/gene.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | 4 | {% block in_head %} 5 | 11 | {# Includes for LocusZoom.js (from CDN) #} 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | {% endblock %} 22 | 23 | 24 | {% block contained %} 25 |
26 |
27 |

{{ gene_symbol }}

28 |
29 |
30 | {% if significant_phenos %} 31 |
32 |
33 |

Phenotypes with the most-significant associations for this locus:

34 | 35 |
36 |
37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
Top p-value in genePhenotype
47 |
48 |
49 | 50 |
51 |
52 |
53 | {% endif %} 54 | {% for key in ['num_cases', 'num_controls', 'num_samples'] %} 55 | {% if key in pheno %} 56 | {% if '<' in pheno[key]|string %} 57 | 58 | {% elif 0 < pheno[key]|int < 200 %} 59 | 60 | {% endif %} 61 | {% endif %} 62 | {% endfor %} 63 |
64 |
65 | {% if 'num_cases' in pheno %} 66 |

{{ pheno.num_cases }} cases, {{ pheno.num_controls }} controls.

67 | {% elif 'num_samples' in pheno %} 68 |

{{ pheno.num_samples }} samples

69 | {% endif %} 70 | {% if pheno.category %} 71 |

Category: {{ pheno.category}}

72 | {% endif %} 73 |
74 |
75 |
76 | 77 |
78 |
79 |
80 |
81 |
82 | 83 | 98 | 99 | {% endblock %} 100 | -------------------------------------------------------------------------------- /pheweb/serve/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block navbar_left %}{% endblock %} 4 | 5 | {% block in_head %} 6 | 11 | 14 | {% endblock %} 15 | 16 | {% block contained %} 17 | 18 |
19 | {% if current_user and not current_user.is_authenticated %} 20 |
21 |
22 |
23 | {% if not config['USE_WHITELIST'] %} 24 |

Please register to view this data.

25 |
26 |

Click here to register with a Google account

27 | {% else %} 28 |

Please log in to view this data.

29 |
30 |

Click here to log in with a Google account

31 |
32 |

33 | If your email address is on the list of allowed email addresses but is not connected to a Google account, please 34 | create a Google account for it. 35 |

36 | {% endif %} 37 |
38 |
39 |
40 | {% else %} 41 |
42 | {% endif %} 43 | 44 |
45 |
46 |

{% include 'index/h1.html' %}

47 | {% include 'index/below-h1.html' ignore missing %} 48 |
49 |
50 |
51 |
52 | 62 |
63 |
64 | {% endblock %} 65 | -------------------------------------------------------------------------------- /pheweb/serve/templates/index/below-h1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/pheweb/serve/templates/index/below-h1.html -------------------------------------------------------------------------------- /pheweb/serve/templates/index/below-query.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/pheweb/serve/templates/index/below-query.html -------------------------------------------------------------------------------- /pheweb/serve/templates/index/h1.html: -------------------------------------------------------------------------------- 1 | PheWeb -------------------------------------------------------------------------------- /pheweb/serve/templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% include 'title.html' %} 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | 22 | 23 | {% if config.SENTRY_DSN %} 24 | 25 | 30 | {% endif %} 31 | 32 | {% if config['GOOGLE_ANALYTICS_TRACKING_ID'] %} 33 | 34 | 6 | 7 | 14 | 25 | {% endblock %} 26 | 27 | 28 | {% block contained %} 29 |
30 |
31 |

All Phenotypes

32 |
33 |
34 | {% if config.DOWNLOAD_PHENOTYPES_BUTTON %} 35 | Download 36 | {% endif %} 37 |
38 |
39 |
40 | 41 |
42 |
43 | 44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 |
CategoryPhenotype#SamplesGCλ0.01#Loci<5e-8Top variant in phenoP-valueNearest Gene(s)
67 |
68 |
69 | 70 | 97 | 98 | {% endblock %} 99 | -------------------------------------------------------------------------------- /pheweb/serve/templates/region.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | 4 | {% block in_head %} 5 | 9 | {# Includes for LocusZoom.js (from CDN) #} 10 | 11 | 12 | 13 | 14 | 15 | 16 | {% endblock %} 17 | 18 | 19 | {% block contained %} 20 | {% for key in ['num_cases', 'num_controls', 'num_samples'] %} 21 | {% if key in pheno %} 22 | {% if '<' in pheno[key]|string %} 23 | 24 | {% elif 0 < pheno[key]|int < 200 %} 25 | 26 | {% endif %} 27 | {% endif %} 28 | {% endfor %} 29 |
30 |
31 |

{% include "region/h1.html" %}

32 |
33 |
34 |
35 |
36 | {% if 'num_cases' in pheno %} 37 |

{{ pheno.num_cases }} cases, {{ pheno.num_controls }} controls.

38 | {% elif 'num_samples' in pheno %} 39 |

{{ pheno.num_samples }} samples

40 | {% endif %} 41 | {% if pheno.category %} 42 |

Category: {{ pheno.category}}

43 | {% endif %} 44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 | {% endblock %} 53 | -------------------------------------------------------------------------------- /pheweb/serve/templates/region/h1.html: -------------------------------------------------------------------------------- 1 | {{ pheno['phenocode'] }}{% if pheno.phenostring %}: {{ pheno.phenostring }}{% endif %} 2 | -------------------------------------------------------------------------------- /pheweb/serve/templates/title.html: -------------------------------------------------------------------------------- 1 | PheWeb -------------------------------------------------------------------------------- /pheweb/serve/templates/top_hits.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | 4 | {% block in_head %} 5 | 6 | 7 | 14 | 25 | {% endblock %} 26 | 27 | 28 | {% block contained %} 29 |
30 |
31 |

Top Hits

32 |
33 |
34 | {% if config.DOWNLOAD_TOP_HITS_BUTTON %} 35 | Download 36 | {% endif %} 37 |
38 |
39 |
40 |
41 |

This page shows only the peaks with a p-value < 10-6. 42 | Variants are hidden if any variant within 500kb in the same phenotype has a smaller p-value. 43 | Only the top 2000 variants of each phenotype are considered. 44 | This table is limited to the top 1000 hits, but the download button includes all hits.

45 |
46 |
47 |
48 | 49 |
50 |
51 | 52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 |
CategoryPhenotypeTop variant in locusP-valueMAFNearest Gene(s)
73 |
74 |
75 | 76 | 96 | 97 | {% endblock %} 98 | -------------------------------------------------------------------------------- /pheweb/serve/templates/variant.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | 4 | {% block in_head %} 5 | 9 | 10 | 11 | {# Includes for LocusZoom.js (from CDN) #} 12 | 13 | 14 | 15 | 16 | 17 | 25 | {% endblock %} 26 | 27 | 28 | {% block contained %} 29 |
30 |
31 |

32 | {{ variant.variant_name }} 33 | {% if variant.rsids %}({{ variant.rsids }}){% endif %} 34 |

35 |

Nearest gene: {{ variant.nearest_genes }}

36 | 37 |

View on 38 | UCSC 39 | {% with rsids = (variant.rsids.split(',') if variant.rsids else []) %} 40 | {% if rsids|length == 1 %} 41 | , GWAS Catalog 42 | {% else %} 43 | {% for rsid in rsids %} 44 | , GWAS Catalog for {{ rsid }} 45 | {% endfor %} 46 | {% endif %} 47 | {% if rsids|length == 1 %} 48 | , dbSNP 49 | {% else %} 50 | {% for rsid in rsids %} 51 | , dbSNP for {{ rsid }} 52 | {% endfor %} 53 | {% endif %} 54 | {% endwith %} 55 | 56 | 57 |

58 |
59 |
60 |
61 |
62 | 63 |
64 | 65 |
66 |
67 | 68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 |
CategoryPhenotypeP-valueEffect Size (se)Number of samples
88 |
89 |
90 | 91 | 92 | 109 | 110 | {% endblock %} 111 | -------------------------------------------------------------------------------- /pheweb/version.py: -------------------------------------------------------------------------------- 1 | version = '1.3.16' 2 | -------------------------------------------------------------------------------- /pheweb/weetabix.py: -------------------------------------------------------------------------------- 1 | """ 2 | Given a delimited flat text file, with a specified "key" column, generate an index specifying where to find 3 | rows with a given value 4 | 5 | This is useful for, eg, looking up all information associated with a given phenotype ID 6 | 7 | # TODO: I should probably be embarrassed by this. Daniel can shame me later. 8 | """ 9 | 10 | import os 11 | import pickle 12 | from typing import List,Optional,Dict 13 | 14 | 15 | def _index_name(filename:str) -> str: 16 | # TODO: Replace pickle with another storage mechanism 17 | return '{}.pickle'.format(filename) 18 | 19 | 20 | def make_byte_index(filename: str, key_col: int, 21 | skip_lines: int = 1, delimiter: str = '\t', 22 | index_fn: Optional[str] = None) -> str: 23 | """ 24 | Generate a crude index specifying byte ranges of lines where each value can be found 25 | :param filename: The file to index 26 | :param key_col: The column to use as index values (starts at 1) 27 | :param skip_lines: Number of headers/other lines to skip 28 | :param delimiter: The character used to separate fields 29 | :param index_fn: (optional) path to the index file 30 | :return: 31 | """ 32 | byte_index:Dict[str,List[int]] = {} 33 | 34 | with open(filename, 'r') as f: 35 | for r in range(skip_lines): 36 | f.readline() 37 | 38 | span_start = last_line_end = f.tell() 39 | line = f.readline() 40 | last_key = line.split(delimiter)[key_col - 1] 41 | while line: # workaround for python for-loop "telling position disabled by next() call" message 42 | fields = line.split(delimiter) 43 | key = fields[key_col - 1] 44 | position = f.tell() 45 | 46 | if key != last_key: 47 | byte_index[last_key] = [span_start, last_line_end] 48 | span_start = last_line_end 49 | 50 | # Advance the iteration 51 | last_key = key 52 | last_line_end = position 53 | line = f.readline() 54 | 55 | if last_key not in byte_index: 56 | # In case file has no newline at end 57 | byte_index[last_key] = [span_start, last_line_end] 58 | 59 | index_fn = index_fn or _index_name(filename) 60 | with open(index_fn, 'wb') as pickle_f: 61 | pickle.dump(byte_index, pickle_f) 62 | 63 | return index_fn 64 | 65 | 66 | def get_indexed_rows(filename: str, key: str, 67 | strict: bool = False, index_fn: Optional[str] = None) -> List[str]: 68 | """ 69 | Fetch all lines that reference the specified key, from a previously indexed file 70 | :param filename: The filename to search 71 | :param key: The value to be read. If the specified value was not in the target file, raises a KeyError. 72 | :param strict: Whether to require that the value is present in the file. 73 | :param index_fn: (optional) path to the index file 74 | :return: An array of strings, one per line of file 75 | """ 76 | index_fn = index_fn or _index_name(filename) 77 | if not os.path.isfile(index_fn): 78 | raise FileNotFoundError() 79 | 80 | with open(index_fn, 'rb') as pickle_f: 81 | byte_index = pickle.load(pickle_f) 82 | 83 | if key not in byte_index and not strict: 84 | # Sometimes the file may not have any information about the user's query, and that is usually ok 85 | return [] 86 | 87 | start, end = byte_index[key] 88 | 89 | with open(filename, 'r') as f: 90 | # TODO: Improve this to support for big file ranges 91 | f.seek(start, 0) 92 | return f.read(end - start).splitlines() 93 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | 2 | # By default, distutils replaces the #! line at the top of my scripts with the current interpreter. 3 | # I'm in a virtualenv, so it becomes a non-standard path. 4 | # Then, when I install the package later, the path doesn't get fixed, so the interpreter isn't found. 5 | # I'm sure there's a correct solution to this problem, but here's mine. 6 | # from 7 | [build] 8 | executable = /usr/bin/env python3 9 | 10 | # Not needed but lets user run `python3 setup.py test` instead of `... pytest` 11 | [aliases] 12 | test=pytest 13 | 14 | # --fulltrace shows STDOUT when a test fails 15 | [tool:pytest] 16 | addopts = --fulltrace 17 | 18 | # To run flake8, just run `flake8` in this directory. 19 | # Then iteratively fix problems and add ignores until flake8 output looks acceptable. 20 | [flake8] 21 | show_source = True 22 | ignore = E501,E302,E251,E701,E226,E305,E225,E261,E231,E301,E306,E402,E704,E265,E201,E202,E303,E124,E241,E127,E266,E221,E126,E129,F811,E222,E401,W503,W504 23 | exclude = build,__pycache__,build,dist,.eggs,.git 24 | filename = *.py 25 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # to install: `pip3 install -e .` 3 | # to install latest from pypi: `pip3 install --upgrade --upgrade-strategy eager --no-cache-dir pheweb` 4 | # to upload to pypi: `./setup.py publish` 5 | # to update deps: `kpa pip-find-updates`, edit, `pip3 install -U --upgrade-strategy=eager .`, test 6 | # to test: `./setup.py test` or `pytest` 7 | 8 | from setuptools import setup 9 | import importlib 10 | import sys 11 | 12 | 13 | if sys.platform.startswith('win'): 14 | raise Exception("PheWeb doesn't support Windows, because pysam doesn't support windows.") 15 | if sys.version_info.major <= 2: 16 | print("PheWeb requires Python 3. Please use Python 3 by installing it with `pip3 install pheweb` or `python3 -m pip install pheweb`.") 17 | sys.exit(1) 18 | if sys.version_info < (3, 6): 19 | print("PheWeb requires Python 3.6 or newer. Use Miniconda or Homebrew or another solution to install a newer Python.") 20 | sys.exit(1) 21 | 22 | 23 | def load_module_by_path(module_name, filepath): 24 | spec = importlib.util.spec_from_file_location(module_name, filepath) 25 | if not spec: raise Exception(module_name, filepath, spec) 26 | module = importlib.util.module_from_spec(spec) 27 | module.__spec__.loader.exec_module(module) 28 | return module 29 | version = load_module_by_path('pheweb.version', 'pheweb/version.py').version 30 | 31 | 32 | if sys.argv[-1] in ['publish', 'pub']: 33 | import kpa.pypi_utils 34 | kpa.pypi_utils.upload_package('pheweb', version) 35 | sys.exit(0) 36 | 37 | 38 | setup( 39 | name='PheWeb', 40 | version=version, 41 | description="A tool for building PheWAS websites from association files", 42 | long_description='Please see the README `on github `__', 43 | author="Peter VandeHaar", 44 | author_email="pjvh@umich.edu", 45 | url="https://github.com/statgen/pheweb", 46 | classifiers=[ 47 | 'Programming Language :: Python :: 3 :: Only', 48 | 'Operating System :: Unix', 49 | 'Operating System :: POSIX :: Linux', 50 | 'Operating System :: MacOS :: MacOS X', 51 | 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', 52 | 'Intended Audience :: Science/Research', 53 | 'Topic :: Scientific/Engineering :: Visualization', 54 | 'Topic :: Scientific/Engineering :: Bio-Informatics', 55 | 'Topic :: Internet :: WWW/HTTP :: WSGI :: Application', 56 | ], 57 | 58 | packages=['pheweb'], 59 | entry_points={'console_scripts': [ 60 | 'pheweb=pheweb.command_line:main', 61 | 'detect-ref=pheweb.load.detect_ref:main', 62 | ]}, 63 | include_package_data=True, 64 | zip_safe=False, 65 | cffi_modules=['pheweb/load/cffi/ffibuilder.py:ffibuilder'], 66 | python_requires=">=3.6", 67 | setup_requires=[ 68 | 'cffi~=1.15', 69 | 'pytest-runner~=5.2', 70 | ], 71 | install_requires=[ 72 | 'Flask~=1.1', 73 | 'Flask-Compress~=1.8', 74 | 'Flask-Login~=0.5', 75 | 'rauth~=0.7', 76 | 'pysam~=0.16', 77 | 'intervaltree~=3.1', 78 | 'tqdm~=4.56', 79 | 'scipy~=1.5', 80 | 'numpy~=1.19', 81 | 'requests[security]~=2.25', 82 | 'gunicorn~=20.0.4', 83 | 'boltons~=20.2', 84 | 'cffi~=1.15', # in both `setup_requires` and `install_requires` as per 85 | 'wget~=3.2', 86 | 'gevent~=21.1', 87 | 'psutil~=5.8', 88 | 'markupsafe==2.0.1', # flask 1.1 uses jinja 2.x which breaks with markupsafe>2.0.1. Pinning all deps might be better. 89 | ], 90 | tests_require=[ 91 | 'pytest~=6.2', 92 | ], 93 | ) 94 | -------------------------------------------------------------------------------- /tests/in_venv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | readlinkf() { perl -MCwd -le 'print Cwd::abs_path shift' "$1"; } 4 | SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | cd "$SCRIPTDIR/.." 6 | pwd 7 | 8 | # This script installs pheweb in a venv and runs tests there. 9 | # It's helpful for checking that there aren't any un-checked-in files I guess. 10 | # Or for testing a version of pheweb that isn't installed globally. 11 | 12 | f() { 13 | # 0. Run pre-commit checks 14 | ./etc/pre-commit 15 | 16 | py="/usr/bin/python3" 17 | 18 | # 1. Make venv 19 | venv_dir="/tmp/test-pheweb-venv-$USER" # $(mktemp -d) 20 | echo "venv_dir = $venv_dir" 21 | "$py" -m venv "$venv_dir" 22 | "$venv_dir/bin/pip3" install wheel pip pytest 23 | "$venv_dir/bin/pip3" install -e . 24 | 25 | # 2. pytest 26 | "$venv_dir/bin/pytest" 27 | }; f 28 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/*&\+. !`(%@).epacts.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/tests/input_files/assoc-files/*&\+. !`(%@).epacts.gz -------------------------------------------------------------------------------- /tests/input_files/assoc-files/EAR-LENGTH.epacts.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/tests/input_files/assoc-files/EAR-LENGTH.epacts.gz -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval 2 | 1 869334 G A 0.637 3 | 1 30085939 C T 0.0794 4 | 1 63504147 G C 0.712 5 | 1 187074167 G A 2.3e-83 6 | 1 241730054 C T 0.278 7 | 1 241736177 G A 0.253 8 | 1 247340409 A G 0.104 9 | 2 5726953 G A 0.31 10 | 2 5742612 G A 0.025 11 | 2 5744655 A T 0.436 12 | 2 11233323 T C 0.565 13 | 2 62311086 A G 0.857 14 | 2 106298098 G A 0.108 15 | 2 106301755 A C 0.543 16 | 2 106302511 G A 0.18 17 | 2 124024054 A G 0.194 18 | 2 228871641 C T 0.648 19 | 3 139686208 G A 0.933 20 | 3 182222754 A G 0.469 21 | 4 14839619 C G 0.949 22 | 4 17854055 T G 0.288 23 | 4 102651455 G A 0.38 24 | 4 102658075 G A 0.906 25 | 4 102689501 T C 0.476 26 | 4 102692475 A G 0.456 27 | 5 56994198 A G 0.889 28 | 5 84012571 G C 0.79 29 | 5 180359447 C T 0.335 30 | 6 5497103 G A 0.978 31 | 6 23975798 A G 0.749 32 | 6 25494796 G A 0.833 33 | 7 9934599 T C 0.765 34 | 7 9941833 T C 0.274 35 | 7 97293103 C G 0.321 36 | 7 102692783 A C 0.369 37 | 7 138946363 C T 0.541 38 | 7 139002565 T C 0.273 39 | 7 139006798 C A 0.234 40 | 8 84011970 G A 0.245 41 | 8 124011505 A G 0.0684 42 | 8 124014948 A C 0.523 43 | 8 124020903 C T 0.407 44 | 8 124023785 G A 0.0953 45 | 8 124036956 C T 0.9 46 | 8 126191572 C T 0.168 47 | 8 126213089 T C 0.789 48 | 8 133476430 A T 0.336 49 | 8 142476082 C T 0.912 50 | 8 142477218 C T 0.985 51 | 9 7808987 A G 0.18 52 | 9 34740316 G T 0.0313 53 | 9 112625187 A T 0.716 54 | 10 3691029 T C 0.945 55 | 11 134465118 A G 0.75 56 | 12 18649057 C T 0.0151 57 | 12 53588352 C G 0.478 58 | 12 89799953 A T 0.063 59 | 12 96582630 G A 0.904 60 | 13 43932798 C T 0.267 61 | 13 67690056 G A 0.501 62 | 13 101758117 C T 0.377 63 | 14 21214280 T C 0.0721 64 | 14 21216755 G A 0.597 65 | 14 34859425 C T 0.401 66 | 14 65493112 G A 0.0663 67 | 16 13624325 T G 0.469 68 | 18 14582075 G A 0.167 69 | 18 22098198 C T 0.981 70 | 18 71065831 A T 0.596 71 | 18 71116301 G A 0.0161 72 | 18 74058754 A G 0.862 73 | 19 53490509 T C 0.44 74 | 19 53493158 G A 0.402 75 | 20 4781015 C T 0.918 76 | 21 29336724 G C 0.318 77 | X 2609717 G T 0.596 78 | X 2772660 G A 0.565 79 | X 2776153 T C 0.744 80 | X 12344972 T G 0.833 81 | X 28809582 T C 0.241 82 | X 28823240 A G 0.373 83 | X 47735235 G A 0.234 84 | X 69336499 G A 0.263 85 | X 115438600 C G 0.931 86 | X 119080119 A C 0.168 87 | X 119081421 G C 0.554 88 | X 119089143 G A 0.613 89 | X 123778719 C T 0.758 90 | X 128980172 A T 0.721 91 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-ac-af-maf-ns.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval maf af ac ns 2 | 1 869334 G A 0.326 0.0524 0.0524 417 3978 3 | 1 30259743 G A 0.235 0.498 0.498 3959 3978 4 | 1 30351973 C T 0.766 0.259 0.259 2058 3978 5 | 1 30365235 C T 0.103 0.409 0.409 3253 3978 6 | 1 30443066 C T 0.633 0.278 0.722 5747 3978 7 | 1 34569074 C T 0.156 0.389 0.611 4863 3978 8 | 1 247303263 G A 0.609 0.452 0.548 4357 3978 9 | 2 5744058 C T 0.949 0.383 0.383 3045 3978 10 | 2 6641731 G A 0.232 0.22 0.78 6202 3978 11 | 2 106312232 G A 0.811 0.0333 0.0333 265 3978 12 | 2 175572002 T G 0.688 0.231 0.769 6122 3978 13 | 2 175726227 T C 0.552 0.386 0.614 4886 3978 14 | 2 187740302 G C 0.265 0.0388 0.0388 309 3978 15 | 3 43360244 G A 0.186 0.419 0.581 4626 3978 16 | 3 119872586 A G 0.281 0.119 0.119 948 3978 17 | 3 139692648 A T 0.692 0.0683 0.932 7413 3978 18 | 3 149871159 G A 0.991 0.0319 0.968 7702 3978 19 | 3 157594640 A C 0.233 0.21 0.79 6282 3978 20 | 3 157608030 C T 0.43 0.155 0.845 6719 3978 21 | 3 182225247 T A 0.436 0.0332 0.0332 264 3978 22 | 4 2170324 G A 0.94 0.403 0.597 4750 3978 23 | 4 70830711 T A 0.0638 0.329 0.671 5339 3978 24 | 4 156714386 G A 0.311 0.0187 0.0187 149 3978 25 | 5 12873429 C T 0.795 0.441 0.559 4448 3978 26 | 5 21816517 C T 0.0162 0.212 0.788 6271 3978 27 | 5 32474006 T G 0.319 0.152 0.848 6746 3978 28 | 5 57060614 C T 0.19 0.374 0.626 4979 3978 29 | 5 84030347 G A 0.782 0.404 0.404 3217 3978 30 | 5 84035299 G A 0.221 0.186 0.186 1483 3978 31 | 5 129697384 C T 0.096 0.281 0.719 5723 3978 32 | 5 180343925 T C 0.945 0.388 0.612 4871 3978 33 | 6 12635487 C T 0.602 0.0842 0.0842 670 3978 34 | 6 50030176 A G 0.195 0.127 0.127 1007 3978 35 | 6 131469461 G T 0.0943 0.141 0.141 1125 3978 36 | 6 136351386 T A 0.485 0.206 0.794 6315 3978 37 | 6 157841718 G C 0.915 0.375 0.625 4971 3978 38 | 7 9915441 G A 0.309 0.101 0.101 800 3978 39 | 7 9918193 A G 0.0151 0.49 0.49 3896 3978 40 | 7 9922299 A T 0.357 0.0606 0.939 7474 3978 41 | 7 9929047 T C 0.881 0.316 0.316 2512 3978 42 | 7 102893499 A G 0.881 0.335 0.335 2668 3978 43 | 7 107462149 A G 0.126 0.183 0.183 1453 3978 44 | 8 5070787 G C 0.573 0.215 0.215 1711 3978 45 | 8 124000354 A G 0.57 0.213 0.213 1695 3978 46 | 8 126133108 G A 0.914 0.426 0.426 3392 3978 47 | 8 126164300 G C 0.0787 0.139 0.861 6853 3978 48 | 8 126173545 A T 0.422 0.387 0.387 3077 3978 49 | 8 126185276 T A 0.0849 0.0254 0.0254 202 3978 50 | 8 142475888 G A 0.993 0.363 0.637 5067 3978 51 | 8 142476424 G A 0.112 0.488 0.512 4077 3978 52 | 8 142480023 A T 0.973 0.121 0.121 966 3978 53 | 9 7790940 C T 0.58 0.422 0.422 3355 3978 54 | 9 12270232 T C 0.772 0.364 0.636 5061 3978 55 | 9 12271997 G A 0.553 0.0954 0.905 7197 3978 56 | 9 73249033 A G 0.126 0.136 0.136 1081 3978 57 | 10 72025336 G A 0.585 0.145 0.855 6805 3978 58 | 10 130771802 G A 0.737 0.0985 0.901 7172 3978 59 | 12 75658782 C T 0.106 0.0245 0.0245 195 3978 60 | 13 27699820 A G 0.605 0.0938 0.0938 746 3978 61 | 13 67687719 G T 0.948 0.34 0.66 5247 3978 62 | 14 21214438 C T 0.385 0.0616 0.0616 490 3978 63 | 14 21215310 C T 0.557 0.0584 0.0584 465 3978 64 | 16 4327397 A G 0.917 0.18 0.18 1432 3978 65 | 16 7018981 G * 0.388 0.496 0.504 4011 3978 66 | 16 7019554 A T 0.306 0.5 0.5 3975 3978 67 | 16 57080403 G A 0.888 0.278 0.278 2209 3978 68 | 16 83615888 T A 0.444 0.0194 0.0194 154 3978 69 | 18 4270328 A T 0.981 0.0642 0.0642 511 3978 70 | 18 14583728 T C 0.562 .01 0.99 7877 3978 71 | 18 14585729 A C 0.991 0.129 0.129 1030 3978 72 | 18 38522014 C T 0.583 0.264 0.264 2098 3978 73 | 19 10157743 C T 0.758 0.183 0.817 6503 3978 74 | 19 12254854 T C 0.921 0.395 0.605 4814 3978 75 | 19 53490509 T C 0.37 0.316 0.316 2514 3978 76 | 19 53493249 A G 0.148 0.366 0.366 2914 3978 77 | 20 4781496 T C 0.0382 0.184 0.816 6489 3978 78 | 20 52657590 G A 0.19 0.262 0.738 5869 3978 79 | X 2060097 A G 0.541 0.476 0.476 3790 3978 80 | X 2693624 G A 0.983 0.204 0.796 6331 3978 81 | X 2780319 A C 0.865 0.109 0.109 866 3978 82 | X 17293827 T C 0.561 0.221 0.779 6194 3978 83 | X 17443238 C T 0.117 0.166 0.166 1321 3978 84 | X 28865574 T C 0.493 0.317 0.683 5433 3978 85 | X 30426501 G A 0.632 0.29 0.71 5651 3978 86 | X 32530777 A T 0.387 0.482 0.482 3833 3978 87 | X 36460463 G A 0.484 0.351 0.649 5167 3978 88 | X 44811118 T C 0.228 0.388 0.612 4867 3978 89 | X 69325601 C A 0.512 0.0589 0.941 7487 3978 90 | X 69331723 A G 0.923 .01 0.99 7879 3978 91 | X 69336499 G A 0.703 0.132 0.868 6905 3978 92 | X 87483357 C T 0.908 1e-3 1e-3 11 3978 93 | X 87483684 A G 0.41 8e-3 0.992 7894 3978 94 | X 112546195 G T 0.883 0.0372 0.963 7660 3978 95 | X 119064252 C T 0.602 0.204 0.796 6333 3978 96 | X 119064446 G C 0.991 0.0128 0.987 7854 3978 97 | X 123746341 C A 0.251 0.0452 0.0452 360 3978 98 | X 123786762 G A .01 0.391 0.391 3110 3978 99 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-ac-af-maf.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval maf af ac 2 | 10 3686300 C T 0.366 0.239 0.761 2592 3 | 10 3686327 C T 0.262 0.175 0.825 2808 4 | 10 3695498 A G 0.0394 0.366 0.366 1246 5 | 10 30951327 C T 0.159 0.198 0.802 2729 6 | 10 67382457 T C 0.165 0.112 0.888 3024 7 | 10 130258262 T C 0.433 0.382 0.618 2102 8 | 10 130457159 C T 0.599 0.261 0.261 889 9 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-ac-af-ns.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval af ac ns 2 | 1 869334 G A 0.581 0.499 1853 1857 3 | 1 203189657 A G 0.307 0.281 1043 1857 4 | 1 209231433 T A 0.788 0.554 2056 1857 5 | 1 231667599 A G 0.738 0.244 908 1857 6 | 1 241730054 C T 0.796 8e-3 30 1857 7 | 1 247307061 G A 0.677 0.556 2065 1857 8 | 2 5742612 G A 0.893 0.677 2515 1857 9 | 2 5742868 C T 0.396 0.977 3630 1857 10 | 2 33603349 C G 0.147 0.897 3331 1857 11 | 2 121437921 C G 0.709 0.137 509 1857 12 | 2 124024054 A G 0.429 0.377 1399 1857 13 | 2 160380907 G A 0.947 0.0334 124 1857 14 | 2 175726227 T C 0.388 0.232 860 1857 15 | 2 187742154 T A 0.513 0.96 3566 1857 16 | 3 3103110 A C 0.613 0.543 2017 1857 17 | 3 139691236 G C 0.043 0.492 1829 1857 18 | 3 157598113 C A 0.459 0.277 1028 1857 19 | 3 157603106 C T 0.0864 0.0964 358 1857 20 | 3 195312938 A G 0.591 0.595 2210 1857 21 | 4 17854031 T C 0.668 0.772 2869 1857 22 | 4 45742830 T A 0.167 0.286 1062 1857 23 | 4 173321931 T A 0.642 0.557 2069 1857 24 | 5 9952864 C A 0.873 0.829 3079 1857 25 | 5 34275660 A G 0.702 0.954 3545 1857 26 | 5 57013402 C T 0.926 0.0285 106 1857 27 | 5 57015785 C T 0.761 0.066 245 1857 28 | 5 57045323 G A 0.0976 0.571 2120 1857 29 | 5 57051721 T G 0.404 0.0377 140 1857 30 | 5 57066493 C T 0.126 0.697 2588 1857 31 | 5 79653658 G C 0.562 0.843 3132 1857 32 | 5 84051826 G A 0.446 0.299 1112 1857 33 | 5 148301458 C A 0.602 0.0832 309 1857 34 | 5 148304233 T A 0.81 0.802 2977 1857 35 | 6 23972019 A G 0.637 0.447 1659 1857 36 | 6 23972731 G A 0.859 0.088 327 1857 37 | 6 25494796 G A 0.566 0.307 1142 1857 38 | 6 47089588 C T 0.414 0.921 3419 1857 39 | 6 146015211 T C 0.544 0.855 3176 1857 40 | 7 97292042 A T 0.192 0.826 3067 1857 41 | 7 102656496 T A 0.97 0.414 1538 1857 42 | 7 102977725 T A 0.251 0.141 523 1857 43 | 7 131003753 A G 0.967 0.267 992 1857 44 | 7 139005460 A G 0.578 0.435 1616 1857 45 | 8 5077135 A G 0.562 0.858 3187 1857 46 | 8 13716942 A C 0.125 0.409 1518 1857 47 | 8 23063558 C G 0.875 0.4 1486 1857 48 | 8 126134996 A G 0.439 0.404 1500 1857 49 | 8 126156120 T C 0.625 0.746 2769 1857 50 | 8 126169016 A G 0.367 0.807 2997 1857 51 | 8 126190444 C A 0.609 0.587 2180 1857 52 | 8 126229570 T A 0.561 0.616 2288 1857 53 | 8 126230096 C T 0.57 0.854 3170 1857 54 | 8 126230876 A G 0.792 0.539 2000 1857 55 | 8 142418216 G A 0.794 0.81 3010 1857 56 | 8 142476426 G A 0.937 0.514 1908 1857 57 | 8 142478942 A G 0.35 0.567 2105 1857 58 | 9 8144973 G A 0.167 0.854 3172 1857 59 | 9 12271997 G A 0.374 0.0175 65 1857 60 | 9 24759031 G A 0.595 0.253 941 1857 61 | 10 67416433 C T 0.58 0.333 1237 1857 62 | 10 130258262 T C 0.256 0.508 1886 1857 63 | 11 37002936 A C 0.39 0.845 3137 1857 64 | 11 89927176 T C 0.433 0.157 582 1857 65 | 12 7826120 A T 0.68 0.425 1578 1857 66 | 12 18648812 G A 0.222 0.0864 321 1857 67 | 12 18650325 C A 0.398 0.655 2434 1857 68 | 12 20928993 A G 0.836 0.774 2873 1857 69 | 12 89816762 C T 0.376 0.156 578 1857 70 | 13 43932798 C T 0.593 0.183 681 1857 71 | 13 52113111 C T 0.991 0.462 1715 1857 72 | 13 67690056 G A 0.184 0.618 2296 1857 73 | 14 21214280 T C 0.206 0.288 1070 1857 74 | 14 34856053 C T 0.212 0.643 2387 1857 75 | 14 95675563 T A 0.769 0.489 1818 1857 76 | 14 95678459 C T 0.953 0.073 271 1857 77 | 15 62919312 G A 0.596 0.918 3408 1857 78 | 17 38619024 G C 0.449 0.884 3285 1857 79 | 18 1827364 T C 0.845 0.747 2776 1857 80 | 18 14588752 A G 0.633 0.418 1553 1857 81 | 18 14590752 T C 0.297 0.418 1554 1857 82 | 18 38522014 C T 0.173 0.707 2627 1857 83 | 19 10157743 C T 0.165 0.0512 190 1857 84 | 19 12251752 G T 0.037 0.739 2746 1857 85 | 21 28775234 G A 0.137 0.367 1363 1857 86 | 21 29368271 A T 0.0281 0.963 3575 1857 87 | 21 29384348 A T 0.819 0.231 857 1857 88 | 22 44920969 T G 0.407 0.869 3226 1857 89 | X 2778526 A G 0.457 0.848 3149 1857 90 | X 2779211 G A 0.353 0.842 3128 1857 91 | X 2780265 G A 0.444 0.742 2756 1857 92 | X 7776379 A G 0.775 0.884 3285 1857 93 | X 12344972 T G 0.805 0.359 1335 1857 94 | X 17302168 C T 0.907 0.422 1566 1857 95 | X 17451008 G A 0.247 4e-3 13 1857 96 | X 104484725 C T 0.588 0.475 1764 1857 97 | X 119080119 A C 0.219 0.24 893 1857 98 | X 119087721 A G 0.252 0.0213 79 1857 99 | X 119094532 T G 0.429 0.887 3293 1857 100 | X 119098573 G A 0.649 0.253 939 1857 101 | X 121643568 A C 0.904 0.588 2185 1857 102 | X 123785115 C T 0.786 0.689 2559 1857 103 | X 128969994 T C 0.251 0.15 557 1857 104 | X 150878657 G T 0.125 0.245 910 1857 105 | X 150879383 A C 0.472 0.261 969 1857 106 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-ac-af.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval af ac 2 | 1 869334 G A 0.56 0.536 5334 3 | 1 152553365 C T 0.623 0.489 4866 4 | 1 187082272 A G 0.38 0.247 2459 5 | 1 231393456 G A 9e-3 0.106 1058 6 | 1 231667599 A G 0.687 0.91 9051 7 | 1 241730054 C T 0.538 0.906 9013 8 | 1 241731095 G A 0.334 0.863 8583 9 | 1 241740770 C G 0.216 0.283 2811 10 | 2 5748610 C T 0.482 0.0245 244 11 | 2 44738719 C T 0.42 0.127 1266 12 | 2 228871641 C T 0.149 0.386 3841 13 | 3 30442250 C T 0.87 0.83 8249 14 | 3 119896657 A G 0.539 0.676 6720 15 | 3 157596257 T C 0.71 0.588 5846 16 | 4 14837656 C T 0.672 0.263 2615 17 | 4 14837753 C T 0.398 0.778 7739 18 | 4 17812615 A C 0.692 0.373 3708 19 | 4 102662479 G A 0.695 0.819 8144 20 | 4 102677625 A G 0.131 0.534 5307 21 | 4 102680913 G A 0.431 0.96 9550 22 | 4 114819025 T A 0.102 0.0434 432 23 | 5 56988033 A G 0.977 0.39 3882 24 | 5 56990370 G A 0.706 0.112 1116 25 | 5 57038413 A G 0.976 0.877 8717 26 | 5 57052343 C A 0.0131 0.918 9127 27 | 5 57060614 C T 0.967 0.86 8551 28 | 5 79643412 T C 0.0913 0.861 8563 29 | 5 79643689 A G 0.506 0.931 9260 30 | 5 79663095 T C 0.615 0.582 5785 31 | 5 148305032 C T 0.442 0.576 5728 32 | 6 23974452 G A 0.749 0.191 1904 33 | 6 33843898 T C 0.798 0.843 8380 34 | 6 33856903 C T 0.414 0.664 6601 35 | 7 9915441 G A 0.659 0.25 2485 36 | 7 9922299 A T 0.272 0.216 2144 37 | 7 9926168 G C 0.226 0.718 7142 38 | 7 97292042 A T 0.995 0.454 4515 39 | 7 102846516 G A 0.854 0.477 4745 40 | 7 138946363 C T 0.986 0.0651 647 41 | 7 139006133 G A 0.737 0.269 2671 42 | 7 143889324 A G 0.936 0.105 1046 43 | 8 23058188 T C 0.259 0.612 6090 44 | 8 31287450 C A 0.336 0.461 4582 45 | 8 124009879 T C 0.444 0.403 4008 46 | 8 124014948 A C 0.782 0.491 4883 47 | 8 124020903 C T 0.82 0.207 2057 48 | 8 126229570 T A 0.451 0.3 2979 49 | 8 142477614 C T 0.0598 0.766 7615 50 | 9 12272803 G A 0.261 0.0525 522 51 | 9 12274220 C T 0.987 0.664 6605 52 | 9 34731228 T C 0.189 0.144 1429 53 | 9 34743681 G A 0.0937 6e-3 64 54 | 10 3698452 C T 0.656 0.0291 289 55 | 10 130468301 C T 0.844 0.174 1727 56 | 12 13769725 G C 0.694 0.443 4404 57 | 13 60313713 T C 0.964 0.544 5412 58 | 13 67687719 G T 0.91 0.886 8805 59 | 13 74534179 G T 0.192 0.37 3679 60 | 13 104310518 C G 0.112 0.4 3977 61 | 13 104312690 A G 0.66 0.421 4185 62 | 13 107202871 T C 0.522 0.885 8801 63 | 14 19467188 G T 0.869 0.925 9200 64 | 14 95677235 C T 0.111 0.642 6379 65 | 15 71840306 A G 0.311 0.602 5984 66 | 15 100191909 C T 0.146 0.126 1248 67 | 16 798229 G A 0.914 0.931 9255 68 | 16 7020211 C G 0.938 0.754 7498 69 | 16 57077179 A G 0.0364 0.936 9311 70 | 16 75311729 G C 0.184 0.753 7486 71 | 16 85318475 T C 0.942 0.687 6834 72 | 18 71065831 A T 0.461 0.528 5250 73 | 18 71117825 G A 0.293 0.261 2593 74 | 18 71122821 A G 0.391 0.168 1675 75 | 18 74061749 C T 0.999 0.769 7649 76 | 19 53493158 G A 0.221 0.565 5616 77 | 21 32027930 G C 0.493 0.608 6047 78 | 21 32189912 C A 0.995 0.951 9455 79 | X 2692597 T C 0.882 0.526 5229 80 | X 2778433 C G 0.343 0.445 4421 81 | X 2778796 A T 0.95 0.0251 250 82 | X 2781220 G A 0.532 0.5 4976 83 | X 6646102 C G 0.975 0.423 4208 84 | X 16673278 G C 0.832 0.126 1254 85 | X 17291581 A G 0.361 0.561 5580 86 | X 17300831 A C 0.691 0.817 8121 87 | X 28823240 A G 0.658 0.703 6993 88 | X 28885588 C T 0.143 0.193 1923 89 | X 44751853 G A 0.608 0.798 7932 90 | X 69328182 G T 0.557 0.774 7695 91 | X 111252438 C G 0.469 0.167 1661 92 | X 111985225 C T 0.407 0.839 8338 93 | X 119091364 G A 0.738 0.378 3759 94 | X 119094045 G A 0.0378 0.415 4129 95 | X 123778606 C T 0.133 0.226 2246 96 | X 123780385 T G 0.827 0.588 5851 97 | X 123785308 T C 0.607 0.482 4790 98 | X 150879645 G A 0.33 0.0261 260 99 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-ac-maf-ns.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval maf ac ns 2 | 1 869334 G A 0.94 0.321 267 416 3 | 1 30256975 C T 0.174 0.386 511 416 4 | 1 152553365 C T 0.548 0.115 96 416 5 | 1 185456978 T A 0.822 0.37 524 416 6 | 1 247301870 C T 0.369 0.0577 48 416 7 | 2 5731314 G A 0.0112 0.419 483 416 8 | 3 43289838 T C 0.912 0.0769 768 416 9 | 3 172101787 G C 0.134 0.0553 46 416 10 | 3 182224946 C G 0.392 0.255 620 416 11 | 4 14837288 C A 0.257 0.0349 29 416 12 | 4 14837311 G A 0.392 0.424 479 416 13 | 4 14839570 T C 0.22 0.0962 752 416 14 | 4 14847751 T C 0.837 0.215 179 416 15 | 4 37471169 A G 0.996 5e-3 4 416 16 | 4 41300380 T C 0.0476 0.412 489 416 17 | 4 102654896 G A 0.38 0.374 311 416 18 | 4 102680913 G A 0.806 0.101 748 416 19 | 4 156714386 G A 0.217 0.0192 816 416 20 | 4 173380352 T C 0.1 0.101 84 416 21 | 4 183802989 A G 0.282 0.48 433 416 22 | 4 186174523 A C 0.445 0.4 333 416 23 | 5 12873429 C T 0.608 0.334 554 416 24 | 5 32474315 G C 0.471 0.368 306 416 25 | 5 57025481 A C 0.177 0.298 584 416 26 | 5 57042549 C T 0.131 0.304 253 416 27 | 5 57052343 C A 0.784 0.298 584 416 28 | 5 84013839 C T 0.64 0.471 440 416 29 | 5 119145275 C T 0.142 0.216 652 416 30 | 5 119146631 A G 0.28 0.216 652 416 31 | 5 119149636 C T 0.345 5e-3 4 416 32 | 5 148304121 A G 0.765 0.18 682 416 33 | 6 23981765 T C 0.852 0.0841 762 416 34 | 6 136351726 C T 0.375 0.25 208 416 35 | 6 150770037 G A 0.106 0.0781 767 416 36 | 7 9946134 C T 0.253 0.165 695 416 37 | 7 97291092 G A 0.435 0.133 721 416 38 | 7 138961816 A G 0.818 7e-3 826 416 39 | 8 5074965 A G 0.249 0.0288 808 416 40 | 8 23081645 C G 0.22 0.123 102 416 41 | 8 126118159 T G 0.441 0.32 566 416 42 | 8 126180331 A T 0.536 0.302 581 416 43 | 8 126202988 G A 0.242 0.255 620 416 44 | 8 126219876 C T 0.468 0.344 546 416 45 | 8 142476751 A G 0.381 0.0938 754 416 46 | 8 142477563 C T 0.101 0.23 191 416 47 | 8 142480108 C T 0.253 0.154 128 416 48 | 9 12273191 A G 0.993 0.231 640 416 49 | 9 135901408 A G 0.616 0.284 236 416 50 | 10 3691429 A G 0.534 0.281 234 416 51 | 10 58932779 G A 0.539 0.0565 785 416 52 | 10 67416433 C T 0.102 0.28 599 416 53 | 10 74066567 G A 0.463 0.0469 793 416 54 | 11 86289747 C T 0.603 0.466 444 416 55 | 12 7824725 T C 0.908 0.37 524 416 56 | 12 96582630 G A 0.118 0.244 629 416 57 | 14 21213861 A G 0.475 0.115 736 416 58 | 14 21215692 C T 0.172 0.254 621 416 59 | 14 34871040 G A 0.963 0.339 282 416 60 | 15 71840306 A G 0.136 0.327 560 416 61 | 15 100191909 C T 0.406 0.416 346 416 62 | 16 7014219 C G 0.195 0.405 337 416 63 | 16 19460571 G A 0.676 0.198 165 416 64 | 16 57077179 A G 0.768 0.287 239 416 65 | 18 14588752 A G 0.75 0.0433 796 416 66 | 18 14598138 C T 0.277 0.0649 778 416 67 | 18 38359739 A G 0.0923 0.129 725 416 68 | 18 74063040 A G 0.455 1e-3 831 416 69 | 19 18107913 C T 0.786 7e-3 826 416 70 | 19 53486165 G A 0.357 0.12 732 416 71 | 20 52657590 G A 0.0216 0.364 529 416 72 | 20 59996240 G A 0.695 0.153 705 416 73 | X 364083 G C 0.829 0.131 723 416 74 | X 2778832 A G 0.69 0.293 588 416 75 | X 2779211 G A 0.332 0.148 709 416 76 | X 2780826 C T 0.766 0.147 710 416 77 | X 6040193 G A 0.0101 0.477 397 416 78 | X 21036882 G A 0.473 0.442 368 416 79 | X 79875814 C T 0.701 0.107 89 416 80 | X 98928635 A G 0.776 0.0144 820 416 81 | X 104477976 T G 0.193 5e-3 4 416 82 | X 112685156 G A 0.792 0.412 489 416 83 | X 119054001 C T 0.47 0.03 807 416 84 | X 119064965 C T 0.51 0.394 504 416 85 | X 119098050 A G 0.779 0.281 234 416 86 | X 121654787 A C 0.395 0.244 203 416 87 | X 123785501 T C 0.692 0.499 415 416 88 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-ac-maf.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval maf ac 2 | 1 869334 G A 0.444 0.145 1482 3 | 1 26512233 C G 0.601 0.362 1105 4 | 1 30315354 G A 0.615 0.34 590 5 | 1 152542229 T C 0.306 0.304 526 6 | 1 185451824 A G 0.415 0.31 1195 7 | 1 229371438 G A 0.687 0.365 1101 8 | 1 241731095 G A 0.593 0.254 1292 9 | 2 5732122 G T 0.357 0.415 720 10 | 2 5732154 C T 0.902 0.359 1111 11 | 2 5735926 T C 0.178 0.429 744 12 | 2 53820822 G C 0.83 0.209 362 13 | 2 62333053 T A 0.24 0.229 1337 14 | 3 43360244 G A 0.592 0.308 534 15 | 3 149856631 T C 0.333 0.364 631 16 | 3 157584520 G A 0.502 0.041 71 17 | 3 157592750 A T 0.308 0.0415 1661 18 | 3 157596540 T C 0.933 0.0231 1693 19 | 3 157598628 C T 0.921 0.384 1068 20 | 4 11271642 A G 0.237 0.125 1517 21 | 4 41299492 A G 0.0319 0.32 554 22 | 4 42174012 T G 0.648 0.126 218 23 | 4 70830711 T A 0.65 0.291 1229 24 | 4 114819025 T A 0.426 0.211 366 25 | 4 147739002 T A 0.0195 0.462 801 26 | 4 173380352 T C 0.974 0.297 515 27 | 5 9972668 A G 0.599 0.29 503 28 | 5 32473659 C T 0.3 0.496 873 29 | 5 32474308 T G 0.41 0.075 1603 30 | 5 57066493 C T 0.304 0.0335 1675 31 | 5 79659105 A G 0.648 0.189 328 32 | 5 82621725 C T 0.604 0.271 1264 33 | 5 84038626 G A 0.676 0.0231 1693 34 | 5 84041116 G T 0.957 8e-3 1720 35 | 5 119140366 G C 0.128 0.175 1429 36 | 5 148297823 A T 0.2 0.182 316 37 | 5 180343925 T C 0.372 0.269 1267 38 | 6 33856903 C T 0.276 0.0144 25 39 | 6 143020804 C T 0.982 0.122 1522 40 | 6 145898283 A C 0.845 0.398 689 41 | 6 150770037 G A 0.651 0.0404 70 42 | 6 168452711 G A 0.274 0.37 1092 43 | 7 9922676 G A 0.433 0.348 1130 44 | 7 9944469 T C 0.601 0.161 279 45 | 7 9961509 T A 0.988 0.297 514 46 | 7 97303273 G A 0.579 0.34 1143 47 | 7 97310556 A G 0.299 0.0837 1588 48 | 7 102828393 A G 0.13 0.39 1057 49 | 7 107462149 A G 0.725 0.123 214 50 | 7 138913595 C A 0.567 0.233 403 51 | 8 5073770 G T 0.138 0.115 199 52 | 8 5076309 C T 0.703 0.439 761 53 | 8 13752182 C G 0.195 0.347 1132 54 | 8 23063558 C G 0.896 0.342 1141 55 | 8 38511992 G A 0.196 0.0981 170 56 | 8 124001505 A G 0.572 0.0317 55 57 | 8 124015289 C T 0.663 0.015 1707 58 | 8 126230876 A G 0.634 0.405 702 59 | 8 142476426 G A 0.0812 0.388 673 60 | 9 34743551 G A 0.366 0.215 372 61 | 9 34743681 G A 0.52 0.178 1424 62 | 10 2081052 C T 0.315 0.296 1220 63 | 10 3686435 G T 0.366 0.23 1334 64 | 10 58932779 G A 0.364 0.321 1177 65 | 10 74129178 C T 0.384 0.336 583 66 | 11 26114721 C T 0.932 0.342 1141 67 | 11 125770618 G A 0.272 0.0265 46 68 | 11 125777056 G A 0.886 0.171 297 69 | 12 13770397 G A 0.713 0.451 782 70 | 12 53593931 T G 0.778 0.327 567 71 | 12 104126668 C A 0.273 0.489 848 72 | 12 132101875 A G 0.885 0.122 212 73 | 13 74534179 G T 0.506 0.394 1050 74 | 14 21216967 G T 0.214 0.127 1513 75 | 14 26154936 C T 0.855 0.0906 157 76 | 15 100266779 G A 0.448 0.13 1507 77 | 16 4318455 A G 0.571 0.421 1003 78 | 16 4345448 T C 0.964 0.462 800 79 | 16 7014284 T A 0.111 0.305 528 80 | 16 19460571 G A 0.49 0.26 451 81 | 17 38616734 G C 0.232 0.435 979 82 | 18 21119532 A C 0.909 0.0565 1635 83 | 18 71067126 C T 0.0903 0.164 284 84 | 18 74061749 C T 0.582 0.41 711 85 | 19 12241476 C G 0.0817 0.159 1458 86 | 19 18107913 C T 0.952 0.0173 30 87 | 19 49263721 C G 0.528 0.0837 145 88 | 19 53491454 T A 0.775 0.0808 140 89 | 20 4789911 A G 0.332 0.226 1342 90 | 20 46799262 G A 0.176 0.358 1113 91 | 21 28083372 T C 0.829 0.0831 1589 92 | 21 29317580 T A 0.962 0.0877 152 93 | 21 32189912 C A 0.654 0.496 874 94 | X 2060097 A G 0.253 0.338 1147 95 | X 2686165 A G 0.445 0.122 211 96 | X 17300276 G C 0.763 0.187 1409 97 | X 20872576 A G 0.387 0.0433 75 98 | X 28865574 T C 0.672 0.372 645 99 | X 32531441 G T 0.628 0.168 291 100 | X 102116420 T C 0.154 0.323 560 101 | X 119064446 G C 0.303 0.271 1264 102 | X 119087721 A G 0.0688 0.234 1328 103 | X 123786762 G A 0.024 0.086 1584 104 | X 150878657 G T 0.811 0.0242 42 105 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-ac-ns.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval ac ns 2 | 1 869334 G A 0.949 1979 1871 3 | 1 30113195 A G 0.238 554 1871 4 | 1 30295253 C T 0.482 2089 1871 5 | 1 185457917 A T 0.819 383 1871 6 | 1 187132799 C T 0.0676 1131 1871 7 | 1 247305873 C T 0.574 1623 1871 8 | 2 5712789 A T 0.303 2600 1871 9 | 2 5744058 C T 0.648 2545 1871 10 | 2 18382672 A G 0.827 3540 1871 11 | 2 33587857 G A 0.843 1130 1871 12 | 2 106285756 G A 0.902 1807 1871 13 | 2 106315235 C T 0.0964 1680 1871 14 | 2 121437921 C G 0.281 3462 1871 15 | 2 125280649 C T 0.705 3651 1871 16 | 2 187742154 T A 0.502 563 1871 17 | 2 207717783 T C 0.199 2219 1871 18 | 3 182224946 C G 0.685 785 1871 19 | 3 195315479 C T 0.0722 230 1871 20 | 4 12800811 T G 0.0342 511 1871 21 | 4 102677625 A G 0.366 2752 1871 22 | 4 109972826 A G 0.509 2796 1871 23 | 4 111894848 T G 0.474 1469 1871 24 | 4 118445668 T C 0.471 1090 1871 25 | 4 173215414 T A 0.962 3232 1871 26 | 4 179441957 C T 0.455 2794 1871 27 | 5 10034219 C T 0.499 1404 1871 28 | 5 32474006 T G 0.0699 3274 1871 29 | 5 56990370 G A 0.728 593 1871 30 | 5 57052343 C A 0.378 588 1871 31 | 5 79661091 A G 0.867 1449 1871 32 | 5 148300239 C T 0.812 1185 1871 33 | 5 180364420 A G 0.187 1883 1871 34 | 6 23972731 G A 0.0728 2221 1871 35 | 6 23974452 G A 0.555 3140 1871 36 | 6 23978264 C T 0.629 554 1871 37 | 6 25499895 C T 0.946 2453 1871 38 | 6 33856903 C T 0.985 1976 1871 39 | 6 47094562 A G 0.264 3443 1871 40 | 6 143018973 C T 0.677 2298 1871 41 | 6 143020752 G A 0.256 2053 1871 42 | 6 146070578 C A 0.399 3523 1871 43 | 6 148774277 G A 0.917 699 1871 44 | 6 157906379 A G 0.654 746 1871 45 | 7 9922676 G A 0.0339 2752 1871 46 | 7 97293499 T C 0.529 2764 1871 47 | 7 97311949 C T 0.813 936 1871 48 | 7 97312295 G A 0.203 3300 1871 49 | 7 102720727 A C 0.468 973 1871 50 | 7 138967235 G A 0.652 1880 1871 51 | 8 5069452 G A 0.892 3003 1871 52 | 8 124001505 A G 0.912 1654 1871 53 | 8 124006004 T A 0.109 3275 1871 54 | 8 124016350 G A 0.146 2700 1871 55 | 8 124028479 G A 0.393 3015 1871 56 | 8 126013134 A G 0.0793 1511 1871 57 | 8 126081480 T C 0.326 3661 1871 58 | 8 126142476 G C 3e-218 406 1871 59 | 8 126156120 T C 0.206 1357 1871 60 | 8 126170231 A G 0.575 282 1871 61 | 8 126202988 G A 4e-3 906 1871 62 | 8 126203574 A G 0.549 1715 1871 63 | 8 142480046 C T 0.586 2865 1871 64 | 9 24759031 G A 0.826 2524 1871 65 | 9 34725116 G A 0.0271 2669 1871 66 | 9 34739096 A T 0.533 9 1871 67 | 9 109168963 G T 0.761 1197 1871 68 | 10 3686327 C T 0.141 3489 1871 69 | 10 130457091 G A 0.252 2305 1871 70 | 10 130466074 G A 0.9 1295 1871 71 | 11 89927176 T C 0.311 816 1871 72 | 11 131946420 T C 0.713 2444 1871 73 | 12 18645261 C A 0.309 747 1871 74 | 12 18649309 G A 0.248 3234 1871 75 | 12 18650325 C A 0.176 1953 1871 76 | 12 53588352 C G 0.859 2726 1871 77 | 12 68035018 C T 0.581 1133 1871 78 | 12 89799953 A T 0.506 2891 1871 79 | 12 108702560 G A 0.81 1909 1871 80 | 13 67687684 A T 0.256 1525 1871 81 | 13 98537284 G A 0.515 1185 1871 82 | 14 21216499 C T 0.417 1567 1871 83 | 15 37446302 C T 0.998 379 1871 84 | 16 7020961 A T 0.189 1219 1871 85 | 17 38616358 A C 0.674 1368 1871 86 | 18 5585327 T C 0.673 2907 1871 87 | 18 7006307 G T 0.275 1739 1871 88 | 18 14604029 A C 0.937 3121 1871 89 | 18 22099355 G C 0.413 436 1871 90 | 18 71065831 A T 0.442 1098 1871 91 | 19 12256912 C T 0.291 3603 1871 92 | 19 18107913 C T 0.976 1942 1871 93 | 20 46795039 T C 0.89 1747 1871 94 | 20 46801443 T C 0.995 2997 1871 95 | 20 46853012 C G 0.14 2586 1871 96 | 21 29317433 T C 0.655 827 1871 97 | X 2778832 A G 0.116 2806 1871 98 | X 2779570 A G 0.0753 2216 1871 99 | X 4447697 T C 0.621 529 1871 100 | X 17241943 G A 0.507 2830 1871 101 | X 17277367 C G 0.247 839 1871 102 | X 17296623 T C 0.228 2112 1871 103 | X 17299084 C G 0.32 1689 1871 104 | X 17349464 A T 0.609 92 1871 105 | X 17429398 A C 0.64 1386 1871 106 | X 21370368 G A 0.505 131 1871 107 | X 28806030 G A 0.173 222 1871 108 | X 44811118 T C 0.252 3024 1871 109 | X 69333850 C T 0.726 714 1871 110 | X 88114540 G C 0.82 1475 1871 111 | X 104476175 A T 0.0881 2314 1871 112 | X 111244438 A G 0.86 1893 1871 113 | X 112541211 A G 0.464 3062 1871 114 | X 112548275 C T 0.28 2895 1871 115 | X 119061147 C G 0.954 2419 1871 116 | X 123779658 A G 0.737 2237 1871 117 | X 123785034 T C 0.129 22 1871 118 | X 123785308 T C 0.503 7 1871 119 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-ac.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval ac 2 | 1 869334 G A 0.801 1368 3 | 1 247301870 C T 0.809 166 4 | 2 5738770 C T 0.0901 1217 5 | 2 5742612 G A 0.846 1298 6 | 2 6641731 G A 0.864 700 7 | 2 53829765 C T 0.652 510 8 | 2 160380907 G A 0.798 1522 9 | 2 187747487 T C 0.458 253 10 | 2 207692740 G A 0.198 856 11 | 2 228871641 C T 0.819 448 12 | 2 235979282 G C 0.193 492 13 | 3 30431830 C T 0.21 1220 14 | 3 30442007 T C 0.384 1283 15 | 3 30474577 A G 0.486 286 16 | 3 30482310 C T 0.0347 1463 17 | 3 149871159 G A 0.361 741 18 | 3 172114474 G C 0.159 74 19 | 3 182196593 T G 0.711 773 20 | 3 195312443 G A 0.142 753 21 | 4 12799647 G C 0.917 24 22 | 4 41308647 C G 0.869 565 23 | 4 102651455 G A 0.809 840 24 | 4 102671993 A G 0.985 22 25 | 4 124720631 C G 0.231 1224 26 | 4 156688046 A G 0.765 257 27 | 4 183802989 A G 0.174 1305 28 | 5 57015785 C T 0.62 167 29 | 5 57029433 G A 0.351 1218 30 | 5 57048529 C G 0.678 743 31 | 5 57066659 A G 0.844 285 32 | 5 79650252 C G 0.818 809 33 | 5 148301645 A G 0.942 135 34 | 5 148301658 T C 0.234 1450 35 | 6 33843759 G A 0.282 1405 36 | 6 33856903 C T 0.118 1048 37 | 6 131469461 G T 0.0241 1467 38 | 6 143015234 G A 0.128 505 39 | 6 162571461 C T 0.662 33 40 | 7 138895633 G A 0.574 811 41 | 8 5129244 G A 0.715 37 42 | 8 23061842 A T 0.261 831 43 | 8 23066008 T C 0.234 1298 44 | 8 23069574 T C 0.241 773 45 | 8 28994639 A G 0.296 1242 46 | 8 124006004 T A 0.419 271 47 | 8 124010926 T C 0.142 649 48 | 8 124023785 G A 0.0836 1471 49 | 8 125989698 C T 0.12 1404 50 | 8 126085022 C T 0.279 1032 51 | 8 126147516 T C 0.227 564 52 | 8 126203574 A G 0.772 640 53 | 8 126230876 A G 0.974 601 54 | 8 126250796 G A 0.865 1474 55 | 8 142477517 G A 0.303 1290 56 | 8 142480046 C T 0.789 66 57 | 8 143207311 T C 0.481 534 58 | 9 7815081 G T 0.277 180 59 | 9 34740316 G T 0.547 1422 60 | 9 73249033 A G 0.314 282 61 | 10 2078771 G A 0.655 1124 62 | 10 3691429 A G 0.185 573 63 | 10 3695498 A G 0.49 1281 64 | 10 3698452 C T 0.71 1396 65 | 10 3706028 C G 0.795 23 66 | 10 57544329 C T 0.969 1367 67 | 11 22793906 C T 0.0897 1084 68 | 11 26165548 A G 0.508 214 69 | 12 13774267 G C 0.459 1209 70 | 12 108702560 G A 0.584 1269 71 | 13 98537284 G A 0.991 685 72 | 13 107202871 T C 0.0833 364 73 | 14 21214438 C T 0.0239 1207 74 | 14 34856053 C T 0.656 314 75 | 14 51525174 C A 0.409 168 76 | 14 51525197 A G 0.25 1378 77 | 16 7019124 G A 0.719 1402 78 | 16 60434463 G T 0.126 750 79 | 16 83614472 T C 6e-3 841 80 | 17 14941256 A C 0.309 16 81 | 18 4271064 C T 0.208 1519 82 | 18 5466960 G A 0.921 1121 83 | 18 14600952 T C 0.428 140 84 | 19 10169208 G A 0.846 1134 85 | 19 12247333 T C 0.622 160 86 | 19 49263721 C G 0.0293 449 87 | 20 59996240 G A 0.344 525 88 | 21 29318660 T C 0.0736 821 89 | 21 31892741 G A 0.313 72 90 | 21 41166390 G A 0.297 783 91 | X 2772660 G A 0.24 698 92 | X 2779211 G A 0.507 1143 93 | X 2780826 C T 0.61 1089 94 | X 9895993 G A 0.964 65 95 | X 17326138 A G 0.757 900 96 | X 32541309 T C 0.153 394 97 | X 40618664 C T 0.51 262 98 | X 69331718 G T 0.342 1516 99 | X 79758260 G C 0.104 1525 100 | X 87486191 A G 0.817 482 101 | X 87488354 T C 0.0309 309 102 | X 102125413 C T 0.129 882 103 | X 104476547 A C 0.731 193 104 | X 111947879 A G 0.683 1042 105 | X 112549697 C T 0.166 112 106 | X 119094045 G A 0.439 228 107 | X 123777079 C T 0.0867 961 108 | X 123782617 G A 0.825 908 109 | X 150878212 T C 0.212 1123 110 | X 150879645 G A 0.524 1063 111 | X 152712460 T G 0.335 321 112 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-af-maf-ns.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval maf af ns 2 | 1 869334 G A 0.505 0.0846 0.915 2696 3 | 1 167321325 G A 0.636 0.156 0.844 2696 4 | 1 190815209 G A 0.289 0.255 0.255 2696 5 | 1 208239885 A C 0.0566 0.491 0.509 2696 6 | 1 231393456 G A 0.895 0.206 0.206 2696 7 | 1 247302446 C T 0.579 0.181 0.181 2696 8 | 2 53823838 C G 0.215 0.295 0.705 2696 9 | 2 124024054 A G 0.261 0.0386 0.0386 2696 10 | 2 153727821 G C 0.14 0.188 0.188 2696 11 | 3 43289838 T C 0.894 0.0903 0.0903 2696 12 | 3 119876676 G A 0.679 0.0269 0.973 2696 13 | 3 139686984 A C 0.778 0.431 0.569 2696 14 | 3 139692648 A T 0.389 0.427 0.427 2696 15 | 3 149871159 G A 0.377 0.111 0.111 2696 16 | 3 157588501 C T 0.0553 0.353 0.353 2696 17 | 3 182210857 A G 0.638 0.0912 0.0912 2696 18 | 3 195312938 A G 0.988 0.253 0.253 2696 19 | 3 195315328 T C 9e-3 0.399 0.601 2696 20 | 3 195315329 G A 0.567 0.436 0.436 2696 21 | 4 14837311 G A 0.43 0.0592 0.0592 2696 22 | 4 14839282 G T 0.523 0.245 0.245 2696 23 | 4 41308647 C G 0.767 0.198 0.198 2696 24 | 4 45716712 G A 0.681 0.306 0.306 2696 25 | 4 102671598 T C 0.685 0.0135 0.0135 2696 26 | 4 173329875 T G 0.422 0.332 0.668 2696 27 | 5 79664613 C T 0.422 0.275 0.725 2696 28 | 5 84013839 C T 0.754 0.3 0.7 2696 29 | 5 84050232 C T 0.0855 0.485 0.485 2696 30 | 5 148300239 C T 0.654 0.228 0.772 2696 31 | 6 3073711 G C 0.626 0.175 0.175 2696 32 | 6 5497103 G A 0.835 0.0616 0.938 2696 33 | 6 23978264 C T 0.75 0.116 0.116 2696 34 | 6 120788990 C T 0.831 0.0473 0.953 2696 35 | 6 133856757 T C 0.385 0.294 0.294 2696 36 | 6 157841718 G C 0.767 0.0427 0.957 2696 37 | 7 9917356 C A 0.76 0.22 0.78 2696 38 | 7 9918193 A G 0.621 0.283 0.717 2696 39 | 7 97312295 G A 0.155 0.28 0.28 2696 40 | 7 102656496 T A 0.623 0.071 0.071 2696 41 | 7 131006474 G T 0.505 0.44 0.44 2696 42 | 7 138961816 A G 0.546 0.234 0.234 2696 43 | 8 5074225 A C 0.273 0.431 0.569 2696 44 | 8 23059593 C T 0.642 0.386 0.614 2696 45 | 8 126013134 A G 0.68 0.287 0.713 2696 46 | 8 126185276 T A 0.901 0.22 0.78 2696 47 | 8 126240333 A G 0.21 0.176 0.176 2696 48 | 8 133476430 A T 0.0116 0.206 0.206 2696 49 | 8 142476082 C T 0.946 0.0312 0.969 2696 50 | 9 7812537 T C 0.544 0.343 0.657 2696 51 | 9 24749401 A T 0.565 0.0733 0.0733 2696 52 | 9 34726524 G A 5e-4 0.0111 0.0111 2696 53 | 9 38179517 T C 0.88 0.294 0.294 2696 54 | 9 81723982 G C 0.664 0.404 0.596 2696 55 | 9 81725351 T C 0.647 0.227 0.227 2696 56 | 10 3695498 A G 0.856 0.348 0.652 2696 57 | 10 3701981 T G 0.0633 0.466 0.466 2696 58 | 10 67396794 A G 0.875 0.336 0.664 2696 59 | 11 512255 A G 0.781 0.0606 0.939 2696 60 | 11 15028145 A G 0.455 0.283 0.283 2696 61 | 11 26050585 A C 0.0848 0.0343 0.0343 2696 62 | 11 37001072 T C 0.71 0.0543 0.0543 2696 63 | 12 75658782 C T 0.0745 0.0521 0.0521 2696 64 | 12 108702560 G A 0.373 0.365 0.365 2696 65 | 12 108702981 G A 0.853 0.076 0.076 2696 66 | 14 21214280 T C 0.66 0.217 0.217 2696 67 | 14 95678459 C T 0.0531 0.155 0.845 2696 68 | 14 100227884 T A 0.061 0.177 0.177 2696 69 | 15 36249420 G A 0.793 0.412 0.412 2696 70 | 15 62917910 G T .01 0.0575 0.943 2696 71 | 16 7015903 G A 0.933 0.334 0.666 2696 72 | 16 10625011 G A 0.056 0.0944 0.0944 2696 73 | 16 13529765 G A 0.267 0.0779 0.922 2696 74 | 17 12591317 C G 0.202 0.165 0.835 2696 75 | 18 4271064 C T 0.0795 0.0304 0.97 2696 76 | 18 14600952 T C 0.474 0.141 0.859 2696 77 | 18 38383882 C A 0.99 0.421 0.421 2696 78 | 18 38419866 C T 0.277 0.165 0.165 2696 79 | 18 71065831 A T 0.714 0.197 0.803 2696 80 | 19 12254854 T C 0.463 0.398 0.602 2696 81 | 19 36881643 A G 0.843 1e-3 1e-3 2696 82 | 21 26556200 T C 0.628 0.38 0.62 2696 83 | 21 29368271 A T 0.06 0.434 0.566 2696 84 | 21 29390487 G A 0.315 0.116 0.884 2696 85 | 21 29391417 C A 0.0823 0.431 0.431 2696 86 | 21 31926228 C T 0.292 0.31 0.69 2696 87 | X 2778982 G T 0.456 0.126 0.874 2696 88 | X 2780533 C T 0.219 0.0725 0.927 2696 89 | X 17243246 G C 0.859 0.226 0.774 2696 90 | X 17263345 C T 0.644 0.345 0.345 2696 91 | X 21071635 G A 0.563 0.371 0.371 2696 92 | X 30426641 T G 0.38 0.398 0.398 2696 93 | X 69330998 T A 0.92 0.0812 0.919 2696 94 | X 69345628 T C 0.69 0.298 0.298 2696 95 | X 79933523 G C 0.698 0.343 0.657 2696 96 | X 104488680 A C 0.125 0.474 0.474 2696 97 | X 119061147 C G 0.706 0.443 0.557 2696 98 | X 119090609 A T 0.744 0.0293 0.0293 2696 99 | X 119098050 A G 0.537 0.0347 0.965 2696 100 | X 121637557 T C 0.348 0.127 0.127 2696 101 | X 123778606 C T 0.895 0.329 0.671 2696 102 | X 128981910 C T 0.189 0.285 0.715 2696 103 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-af-maf.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval maf af 2 | 1 869334 G A 0.0495 0.0564 0.0564 3 | 1 30256975 C T 0.5 0.0685 0.0685 4 | 1 185457313 C T 0.304 0.408 0.408 5 | 1 187091709 G A 0.0645 0.0856 0.914 6 | 1 190815209 G A 0.275 0.306 0.694 7 | 1 194370540 T A 0.42 0.429 0.429 8 | 1 247303263 G A 0.429 0.247 0.247 9 | 1 247303895 G C 0.825 0.0906 0.909 10 | 2 25657402 G T 0.883 0.142 0.858 11 | 2 25657910 G A 0.245 0.0282 0.0282 12 | 2 57575512 G A 0.821 0.305 0.695 13 | 2 106301755 A C 0.422 0.444 0.444 14 | 2 106302511 G A 0.248 0.408 0.592 15 | 2 153727821 G C 0.0658 0.204 0.204 16 | 2 174821412 G A 0.263 0.107 0.107 17 | 2 175572002 T G 0.158 0.34 0.34 18 | 2 229186159 A G 0.211 0.0191 0.981 19 | 2 235043481 A C 0.16 0.4 0.4 20 | 3 30427079 G A 0.0829 0.18 0.82 21 | 3 30483367 A G 0.712 0.188 0.812 22 | 3 157587098 G A 0.12 0.499 0.499 23 | 4 2170324 G A 0.323 0.444 0.444 24 | 4 12799647 G C 0.558 0.0191 0.981 25 | 4 12800811 T G 0.515 0.257 0.257 26 | 4 14841195 A G 0.165 0.123 0.123 27 | 4 17856801 C T 0.638 0.224 0.224 28 | 4 102643397 C T 0.0218 0.43 0.57 29 | 4 102662428 G T 0.776 0.119 0.881 30 | 4 102677625 A G 0.465 0.334 0.666 31 | 4 109961574 A C 0.658 0.303 0.303 32 | 4 147804608 C T 0.162 0.178 0.822 33 | 4 173774137 G T 0.261 0.473 0.473 34 | 4 183758066 C A 0.688 0.261 0.739 35 | 4 183802989 A G 0.52 0.419 0.581 36 | 5 9972668 A G 0.518 0.401 0.599 37 | 5 32474472 T C 0.0971 0.475 0.525 38 | 5 57029433 G A 0.559 0.134 0.866 39 | 5 57038252 G A 0.383 0.0866 0.913 40 | 5 57066659 A G 0.233 0.0403 0.96 41 | 5 84038315 G A 0.461 0.45 0.55 42 | 5 84041164 A T 0.492 0.454 0.546 43 | 5 84050232 C T 0.669 0.127 0.873 44 | 5 84054807 C A 0.815 0.381 0.381 45 | 5 119140366 G C 0.11 0.4 0.6 46 | 5 119144763 C T 0.325 0.352 0.352 47 | 5 119145275 C T 0.485 0.189 0.811 48 | 5 128331546 G T 0.981 0.287 0.287 49 | 6 50302981 A G 0.136 2e-3 2e-3 50 | 6 136351386 T A 0.571 0.27 0.27 51 | 7 9926168 G C 0.0913 0.282 0.718 52 | 7 9929047 T C 0.0602 0.183 0.183 53 | 7 97291092 G A 0.732 0.392 0.608 54 | 7 97293499 T C 0.378 0.42 0.42 55 | 7 97311949 C T 0.962 0.483 0.483 56 | 7 102977725 T A 0.0173 0.18 0.82 57 | 7 139006263 G A 0.745 0.285 0.285 58 | 7 139116033 T C 0.439 0.125 0.125 59 | 8 5058220 G C 0.693 0.269 0.269 60 | 8 5068341 G A 0.107 0.345 0.655 61 | 8 5070350 A G 0.0187 0.122 0.122 62 | 8 5129244 G A 0.409 4e-3 0.996 63 | 8 23061842 A T 0.254 0.272 0.272 64 | 8 126118159 T G 0.445 0.318 0.318 65 | 8 126129348 C T 0.787 0.21 0.21 66 | 8 126180331 A T 0.502 0.342 0.658 67 | 8 126246328 G A 0.582 0.228 0.228 68 | 10 2081052 C T 0.387 0.0906 0.0906 69 | 10 130457091 G A 0.609 0.263 0.263 70 | 11 18741084 A T 0.0776 0.206 0.794 71 | 11 125770605 G C 0.401 0.0655 0.935 72 | 12 18650991 T A 0.652 0.217 0.217 73 | 12 89816762 C T 0.643 0.358 0.358 74 | 12 108694757 A G 0.3 0.0352 0.965 75 | 14 34858130 C T 0.557 0.361 0.361 76 | 14 51525686 C T 0.438 0.415 0.415 77 | 15 36249148 T C 0.922 0.0211 0.0211 78 | 15 75859306 C T 0.966 0.208 0.792 79 | 16 799108 C T 0.942 0.0957 0.904 80 | 16 83616592 T C 0.0444 0.476 0.524 81 | 17 14941276 G C 0.845 0.275 0.275 82 | 17 33635640 A T 0.365 0.461 0.461 83 | 18 4270328 A T 0.444 0.0614 0.939 84 | 18 38509352 T C 0.463 0.11 0.89 85 | 18 74061749 C T 0.0147 0.111 0.111 86 | 19 12240965 C T 0.884 0.129 0.129 87 | 19 12254686 T C 0.586 0.137 0.137 88 | 19 12254989 G A 0.227 0.455 0.455 89 | 19 12260858 C T 0.91 0.196 0.196 90 | 19 12261472 C T 0.0433 0.243 0.243 91 | 20 46801443 T C 0.992 0.0383 0.962 92 | 20 46804271 G C 0.445 0.271 0.271 93 | 21 29319903 C T 0.547 0.285 0.285 94 | 21 29338791 T C 0.201 0.0715 0.928 95 | 21 29357172 G A 0.284 0.396 0.604 96 | 21 32189911 C T 0.823 0.148 0.852 97 | X 2534840 T A 0.0835 0.399 0.399 98 | X 2777107 T G 0.316 0.2 0.8 99 | X 17293827 T C 0.0165 0.324 0.676 100 | X 17302168 C T 0.588 0.44 0.56 101 | X 17443238 C T 0.623 0.184 0.184 102 | X 28805708 A T 0.396 0.371 0.629 103 | X 28832216 C T 0.59 0.384 0.384 104 | X 44754987 A G 0.95 0.233 0.233 105 | X 47735235 G A 0.333 0.253 0.747 106 | X 69324676 T C 0.0671 0.131 0.869 107 | X 69330104 G A 0.846 0.326 0.674 108 | X 79719652 C A 0.971 0.168 0.168 109 | X 79933523 G C 0.945 0.251 0.749 110 | X 87486191 A G 0.62 0.143 0.143 111 | X 97693048 G A 0.417 0.294 0.706 112 | X 102085485 A G 0.0636 0.437 0.563 113 | X 104484725 C T 0.0414 0.139 0.139 114 | X 114303460 A T 0.0836 0.0121 0.0121 115 | X 119050501 C A 0.749 0.265 0.735 116 | X 119101693 G T 0.593 0.0232 0.977 117 | X 123786762 G A 0.345 0.419 0.419 118 | X 133712650 C T 0.143 0.16 0.84 119 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-af-ns.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval af ns 2 | 1 869334 G A 0.919 0.941 3824 3 | 1 152542229 T C 0.22 0.39 3824 4 | 1 229371438 G A 0.206 0.295 3824 5 | 1 247300760 A G 0.969 0.214 3824 6 | 1 247303895 G C 0.861 0.332 3824 7 | 2 5710151 G A 0.675 0.823 3824 8 | 2 6633687 G A 0.945 0.368 3824 9 | 2 53823838 C G 0.215 0.0651 3824 10 | 2 53830109 C G 0.709 0.259 3824 11 | 2 214883912 G A 0.209 0.585 3824 12 | 3 30442007 T C 0.351 0.32 3824 13 | 3 30442250 C T 0.888 0.531 3824 14 | 3 119901774 C A 0.438 0.752 3824 15 | 3 139688415 G A 0.577 0.069 3824 16 | 3 157594640 A C 0.766 0.937 3824 17 | 3 157595264 C T 0.81 0.0817 3824 18 | 3 157598628 C T 0.537 0.504 3824 19 | 3 172114474 G C 0.496 0.277 3824 20 | 4 14839241 A G 0.634 0.475 3824 21 | 4 14840094 T C 0.933 0.518 3824 22 | 4 14848086 T G 0.85 0.145 3824 23 | 4 42161066 T C 0.461 0.083 3824 24 | 4 45716712 G A 0.281 0.234 3824 25 | 4 45742830 T A 0.464 0.144 3824 26 | 4 102677625 A G 0.864 0.283 3824 27 | 4 111894848 T G 0.08 0.848 3824 28 | 4 114819025 T A 0.119 0.742 3824 29 | 5 9972668 A G 0.935 0.164 3824 30 | 5 22060117 C T 0.645 0.849 3824 31 | 5 57010108 C T 0.589 0.544 3824 32 | 5 119145219 T C 0.842 0.769 3824 33 | 5 148300239 C T 0.211 0.0838 3824 34 | 6 13612492 T C 0.0875 0.917 3824 35 | 6 50030176 A G 0.902 0.217 3824 36 | 6 52813936 A T 0.0309 0.636 3824 37 | 6 143020804 C T 0.73 0.339 3824 38 | 7 6434688 T C 0.994 0.834 3824 39 | 7 97308201 T A 0.116 0.272 3824 40 | 7 103005511 C T 0.202 0.213 3824 41 | 7 138913595 C A 0.423 0.779 3824 42 | 8 1339798 C G 0.908 0.161 3824 43 | 8 23070848 G T 0.147 0.263 3824 44 | 8 124000354 A G 0.656 0.271 3824 45 | 8 126050879 C T 0.713 0.0416 3824 46 | 8 126133108 G A 0.444 0.448 3824 47 | 8 126250796 G A 0.206 0.825 3824 48 | 8 142473703 G C 0.925 0.62 3824 49 | 8 142476555 A G 0.824 0.654 3824 50 | 8 142477218 C T 0.718 0.399 3824 51 | 8 142477592 G A 0.648 0.428 3824 52 | 9 7814048 A G 0.97 0.292 3824 53 | 9 12272803 G A 0.0794 0.339 3824 54 | 9 16462422 G T 0.462 0.949 3824 55 | 9 34731228 T C 0.927 0.69 3824 56 | 11 134465118 A G 0.0888 0.215 3824 57 | 12 13774267 G C 0.0496 0.209 3824 58 | 12 18621454 C T 0.678 0.537 3824 59 | 14 21213419 G C 0.121 0.918 3824 60 | 14 21217625 G A 0.648 0.645 3824 61 | 14 65452526 G A 0.115 0.882 3824 62 | 16 833727 C A 0.54 0.97 3824 63 | 16 10618910 G C 0.74 0.189 3824 64 | 16 57273525 G A 0.175 0.0885 3824 65 | 16 85605305 G A 0.3 3e-3 3824 66 | 18 4974555 C T 0.596 0.57 3824 67 | 18 4995925 A T 0.69 0.728 3824 68 | 18 14632790 C T 0.0524 0.421 3824 69 | 18 38419866 C T 0.889 0.667 3824 70 | 18 71088642 T C 0.843 0.0356 3824 71 | 18 74050745 T G 0.0978 0.786 3824 72 | 18 74058905 G A 0.956 0.439 3824 73 | 19 53490509 T C 0.665 0.76 3824 74 | 20 46800269 T C 0.687 0.76 3824 75 | 21 29356183 G A 0.876 0.428 3824 76 | 21 29371979 T G 0.501 0.68 3824 77 | 21 29378524 G T 0.0223 0.707 3824 78 | 21 29390547 T C 0.341 0.413 3824 79 | 21 29391417 C A 0.813 0.949 3824 80 | 22 44923076 C A 0.773 0.748 3824 81 | X 2778546 C A 0.451 0.355 3824 82 | X 6646102 C G 0.639 0.33 3824 83 | X 7754331 T C 0.0378 0.313 3824 84 | X 7776379 A G 0.834 0.639 3824 85 | X 17296260 C T 0.114 0.204 3824 86 | X 17326138 A G 0.023 0.743 3824 87 | X 28888279 T A 0.328 0.873 3824 88 | X 30426641 T G 0.624 0.639 3824 89 | X 46095748 C A 0.707 0.181 3824 90 | X 79858525 A C 0.374 0.488 3824 91 | X 85953304 G C 0.177 0.178 3824 92 | X 104501200 T C 0.899 0.0204 3824 93 | X 106691551 G A 0.362 0.464 3824 94 | X 112682435 A G 0.086 0.913 3824 95 | X 119081421 G C 0.63 0.0675 3824 96 | X 123780385 T G 0.612 0.992 3824 97 | X 123780869 C A 0.648 0.929 3824 98 | X 150114064 A T 0.649 0.0656 3824 99 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-af.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval af 2 | 1 869334 G A 0.114 0.108 3 | 1 30297711 T C 0.432 0.432 4 | 1 187083354 C T 0.132 0.364 5 | 1 203189657 A G 0.198 0.784 6 | 2 5710151 G A 0.92 0.162 7 | 2 5712789 A T 0.979 0.3 8 | 2 5713006 G A 0.293 0.406 9 | 2 5732122 G T 0.712 0.793 10 | 2 6641731 G A 0.499 0.901 11 | 2 25657910 G A 2e-3 0.846 12 | 2 60171565 G A 0.697 0.223 13 | 2 106277036 A G 0.252 0.274 14 | 2 106291633 C T 0.823 0.238 15 | 2 106301755 A C 0.127 0.67 16 | 2 106315235 C T 0.304 0.121 17 | 2 160380907 G A 0.469 0.876 18 | 2 207692740 G A 0.252 0.425 19 | 2 229186159 A G 0.535 0.805 20 | 2 235976263 A G 0.247 0.323 21 | 3 139692648 A T 0.647 0.596 22 | 3 139693702 C A 0.318 0.731 23 | 3 156306722 C T 0.783 0.594 24 | 3 195314940 C T 0.772 0.953 25 | 4 2170309 C A 0.281 0.224 26 | 4 14834849 C G 0.727 0.0211 27 | 4 14837656 C T 0.991 0.656 28 | 4 14839976 A G 0.506 0.746 29 | 4 14840107 T C 0.695 0.147 30 | 4 45716712 G A 0.895 0.111 31 | 4 173215414 T A 0.487 0.267 32 | 5 57031972 T C 0.0885 0.911 33 | 5 84051826 G A 0.719 0.0444 34 | 5 180338649 T C 0.55 0.788 35 | 5 180354343 C G 0.731 0.382 36 | 6 136351386 T A 0.434 0.612 37 | 7 6400452 G A 0.501 0.254 38 | 7 9932005 G C 0.258 0.158 39 | 7 9961509 T A 0.304 0.0103 40 | 7 97302167 T C 0.539 0.342 41 | 7 102971097 C T 0.79 0.0961 42 | 7 131004883 A C 0.255 0.883 43 | 7 131006474 G T 0.905 0.0668 44 | 7 139003256 T G 0.247 0.885 45 | 8 1339798 C G 0.548 0.865 46 | 8 23061842 A T 0.917 0.787 47 | 8 23070180 G A 0.307 0.942 48 | 8 124001759 A T 0.846 0.0567 49 | 8 124011505 A G 0.567 0.688 50 | 8 143207311 T C 0.132 0.451 51 | 9 16462422 G T 0.87 0.4 52 | 9 34742364 T C 0.223 0.675 53 | 10 3706028 C G 0.356 0.724 54 | 10 58932779 G A 0.24 0.0793 55 | 10 67406735 G A 0.837 0.742 56 | 10 67470572 G C 0.199 0.0345 57 | 10 67473417 C T 0.332 8e-3 58 | 11 22793439 C G 0.77 0.236 59 | 12 13770640 C T 0.9 0.407 60 | 12 13782127 G A 0.373 0.0792 61 | 12 18655952 C G 0.42 0.893 62 | 14 34856208 T C 0.778 0.438 63 | 15 36249420 G A 0.659 0.936 64 | 15 100191909 C T 0.978 0.834 65 | 15 100266779 G A 0.309 0.452 66 | 16 10622895 C T 0.972 0.498 67 | 16 10625245 A G 0.731 0.0826 68 | 16 13529765 G A 0.573 0.399 69 | 16 85318475 T C 0.073 0.91 70 | 18 5585327 T C 0.54 0.734 71 | 19 12249129 G A 0.791 0.771 72 | 20 15983833 G A 0.187 0.837 73 | 20 46799027 T C 0.836 0.111 74 | 21 29295011 T G 0.934 0.7 75 | 21 29374694 G A 0.588 0.346 76 | 21 29389882 A C 0.657 0.86 77 | 22 28039672 T C 0.128 0.479 78 | X 2060097 A G 0.209 0.603 79 | X 2686165 A G 0.32 0.955 80 | X 2777107 T G 0.638 0.311 81 | X 2780533 C T 0.642 0.151 82 | X 17277367 C G 0.917 0.877 83 | X 17294363 A C 0.88 0.5 84 | X 32541309 T C 0.401 0.311 85 | X 44882615 T C 0.9 0.234 86 | X 69329659 T G 0.748 0.944 87 | X 88114540 G C 0.565 0.841 88 | X 111612676 C G 0.209 0.57 89 | X 111985225 C T 0.936 0.787 90 | X 112549715 T C 0.24 0.492 91 | X 119064965 C T 0.352 0.535 92 | X 119088213 A G 0.815 0.511 93 | X 123783744 G C 0.944 0.356 94 | X 150877953 C T 0.0235 0.0542 95 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-maf-ns.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval maf ns 2 | 1 869334 G A 0.016 0.461 9616 3 | 1 30158750 C G 0.124 0.377 9616 4 | 1 30365235 C T 0.542 0.458 9616 5 | 1 30443066 C T 0.133 0.115 9616 6 | 1 185453884 G A 0.385 0.142 9616 7 | 1 185456978 T A 0.599 0.0783 9616 8 | 1 203189657 A G 0.457 0.416 9616 9 | 1 208239885 A C 0.256 0.346 9616 10 | 1 220508129 G A 0.238 0.206 9616 11 | 2 5726953 G A 0.85 0.493 9616 12 | 2 25657910 G A 0.736 0.443 9616 13 | 2 53814354 A T 0.475 0.0143 9616 14 | 2 76914899 A T 0.593 0.294 9616 15 | 2 160376357 T A 0.339 0.291 9616 16 | 2 187740302 G C 0.682 0.371 9616 17 | 2 207693815 A G 0.44 0.0517 9616 18 | 2 207701151 T G 0.255 0.395 9616 19 | 2 207710407 A G 0.475 0.197 9616 20 | 3 30439098 A C 0.561 0.456 9616 21 | 3 30483367 A G 0.616 0.151 9616 22 | 3 43289838 T C 0.967 0.38 9616 23 | 3 44100173 C T 0.76 0.432 9616 24 | 3 119908292 C T 0.254 0.142 9616 25 | 3 157592712 A T 0.331 0.29 9616 26 | 3 157593470 C T 0.83 0.442 9616 27 | 3 182203482 C T 0.166 0.0933 9616 28 | 4 14840107 T C 0.732 0.0944 9616 29 | 4 17854031 T C 0.667 0.485 9616 30 | 4 109961574 A C 0.802 3e-3 9616 31 | 4 124720631 C G 0.535 0.462 9616 32 | 5 21816517 C T 0.912 0.327 9616 33 | 5 56988033 A G 0.878 0.41 9616 34 | 5 57044232 G C 0.906 0.116 9616 35 | 5 57072560 C T 0.101 0.395 9616 36 | 5 79653658 G C 0.865 0.336 9616 37 | 5 79664000 T C 0.258 0.244 9616 38 | 5 84012571 G C 0.316 0.201 9616 39 | 5 119140366 G C 0.354 0.299 9616 40 | 5 119173421 G C 0.858 0.296 9616 41 | 5 129697384 C T 0.752 0.131 9616 42 | 6 25499895 C T 0.364 0.446 9616 43 | 6 145957770 C A 0.448 0.0312 9616 44 | 6 157906379 A G 0.621 0.443 9616 45 | 7 9922676 G A 0.315 0.38 9616 46 | 7 131006474 G T 0.178 0.178 9616 47 | 7 138946363 C T 0.725 0.0719 9616 48 | 7 139005460 A G 0.184 0.434 9616 49 | 8 5076309 C T 0.111 0.338 9616 50 | 8 5077012 C A 0.53 0.205 9616 51 | 8 23068286 A G 0.845 0.0623 9616 52 | 8 23073802 C G 0.0774 0.104 9616 53 | 8 124001094 T C 0.25 0.0246 9616 54 | 8 124013901 C A 2.7e-237 0.041 9616 55 | 8 126133108 G A 0.265 0.294 9616 56 | 8 126246328 G A 0.52 0.0868 9616 57 | 8 127731037 G A 0.406 0.0491 9616 58 | 8 142473703 G C 0.962 0.213 9616 59 | 8 142477771 C G 0.786 0.446 9616 60 | 9 7808987 A G 0.588 0.0956 9616 61 | 9 9496882 T C 0.488 0.205 9616 62 | 9 12273191 A G 0.643 0.0909 9616 63 | 9 34747944 A G 0.972 0.497 9616 64 | 10 72025336 G A 0.31 0.276 9616 65 | 11 89927176 T C 0.57 0.341 9616 66 | 12 13753750 T G 0.458 0.0335 9616 67 | 12 13755347 T A 0.94 0.463 9616 68 | 12 18651702 G A 0.0802 0.483 9616 69 | 13 37330948 G A 0.141 0.0597 9616 70 | 14 19461500 T C 0.705 0.375 9616 71 | 14 19470346 T A 0.895 0.188 9616 72 | 14 21214023 A G 0.672 0.391 9616 73 | 15 62923874 T G 0.877 0.421 9616 74 | 16 81192256 T A 0.727 0.328 9616 75 | 18 71104038 T C 0.386 0.493 9616 76 | 18 74061749 C T 0.445 0.286 9616 77 | 19 12264074 A G 0.43 0.0668 9616 78 | 19 53487563 G T 0.954 0.434 9616 79 | 19 53489867 T C 0.947 0.306 9616 80 | 21 29374941 A T 0.959 0.0948 9616 81 | X 2775601 A G 0.923 0.0139 9616 82 | X 17296623 T C 0.923 0.239 9616 83 | X 17302413 C G 0.79 0.203 9616 84 | X 28806414 A G 0.913 0.275 9616 85 | X 32530777 A T 0.594 0.0813 9616 86 | X 32531441 G T 0.715 0.472 9616 87 | X 69324676 T C 0.904 0.203 9616 88 | X 69332777 A C 0.302 0.142 9616 89 | X 69341480 T G 0.779 0.38 9616 90 | X 79478567 C T 0.152 0.302 9616 91 | X 102085485 A G 9e-3 0.0683 9616 92 | X 106694505 G A 0.746 0.0873 9616 93 | X 111989963 G C 0.671 0.212 9616 94 | X 115484732 T G 0.399 0.219 9616 95 | X 119098663 A G 0.753 0.361 9616 96 | X 128996953 A T 0.534 0.401 9616 97 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-maf.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval maf 2 | 1 869334 G A 0.016 0.0841 3 | 1 187083354 C T 0.394 0.291 4 | 1 208228435 G C 4e-3 0.0125 5 | 1 247316632 T C 0.206 0.306 6 | 2 5742868 C T 0.574 0.34 7 | 2 5743311 C A 0.051 0.0445 8 | 2 11235179 T A 0.734 0.494 9 | 2 76829084 C G 0.654 0.341 10 | 2 207719839 C T 0.775 0.195 11 | 2 235974398 G A 0.607 0.0546 12 | 2 235979282 G C 0.247 0.263 13 | 3 3103110 A C 0.0376 0.24 14 | 3 30483367 A G 0.462 0.127 15 | 3 119901774 C A 0.869 0.293 16 | 3 182224946 C G 0.0265 0.309 17 | 4 14839359 C T 0.856 0.309 18 | 4 14842391 C T 0.84 0.393 19 | 4 37469074 A G 0.48 9e-3 20 | 4 45745659 A T 0.143 0.157 21 | 4 45817505 C T 0.926 0.447 22 | 4 102692475 A G 0.608 0.497 23 | 4 102692665 G A 0.123 0.0107 24 | 4 147777465 C T 0.575 0.0113 25 | 4 173362977 C T 0.825 0.215 26 | 4 183802989 A G 0.213 0.226 27 | 5 34275660 A G 0.869 0.25 28 | 5 57025476 A G 0.784 0.0151 29 | 5 57044232 G C 0.304 0.268 30 | 5 57066659 A G 0.699 0.267 31 | 5 84041164 A T 0.345 9e-3 32 | 5 90515524 C G 0.73 0.102 33 | 5 129703802 T C 0.562 0.189 34 | 6 23973089 G A 0.245 0.154 35 | 6 23978124 A T 0.125 0.172 36 | 7 9922676 G A 0.338 0.465 37 | 7 9932005 G C 0.698 0.0703 38 | 7 9946134 C T 0.204 0.264 39 | 7 9961509 T A 0.899 0.401 40 | 7 97290158 T C 0.521 0.413 41 | 7 97310556 A G 0.805 0.156 42 | 7 97312295 G A 0.993 0.261 43 | 7 138965428 A G 0.648 0.409 44 | 7 139006133 G A 0.0546 0.109 45 | 7 139006798 C A 0.461 0.332 46 | 7 155018927 G A 0.912 0.0991 47 | 8 5068341 G A 0.788 0.324 48 | 8 23059593 C T 0.721 0.131 49 | 8 124010926 T C 0.645 0.305 50 | 8 126085022 C T 0.758 0.497 51 | 8 126105802 G A 0.446 0.286 52 | 8 126118159 T G 0.29 0.432 53 | 8 126211868 G A 0.861 0.267 54 | 8 142477563 C T 0.697 0.236 55 | 9 16462422 G T 0.891 0.0169 56 | 9 34731228 T C 0.232 0.307 57 | 9 34741548 G A 0.992 0.431 58 | 9 81725351 T C 0.575 0.447 59 | 9 109254115 T C 0.451 0.0602 60 | 10 3686300 C T 0.252 0.317 61 | 10 3694459 T C 0.952 0.0427 62 | 10 3698452 C T 0.485 0.276 63 | 10 12703258 A G 0.515 0.454 64 | 10 30951327 C T 0.579 0.168 65 | 10 130464378 T C 0.454 0.106 66 | 10 130469852 T C 0.676 0.157 67 | 11 935794 C T 0.646 0.28 68 | 11 134465118 A G 0.0705 0.338 69 | 12 13747033 T C 0.959 0.117 70 | 12 18648812 G A 0.0646 0.0182 71 | 12 108702981 G A 0.541 0.338 72 | 13 104310518 C G 0.508 0.322 73 | 14 21214023 A G 0.522 0.317 74 | 14 21226262 T C 0.808 0.483 75 | 14 26154936 C T 0.0371 0.284 76 | 14 95673980 A G 0.344 0.0753 77 | 15 62919312 G A 0.136 0.299 78 | 15 62921020 C T 0.451 0.156 79 | 18 4271005 A G 0.883 0.124 80 | 18 74050174 A G 0.24 0.301 81 | 18 74058754 A G 0.127 0.0533 82 | 19 53486060 G A 0.0316 0.174 83 | 19 53493158 G A 0.0287 0.121 84 | 20 15983833 G A 0.598 0.413 85 | 21 26554398 C G 0.223 0.287 86 | 21 29317433 T C 0.609 0.0477 87 | 21 29325234 C A 0.655 0.345 88 | 21 29374694 G A 0.795 0.269 89 | 21 29390487 G A 0.982 0.302 90 | 22 28034554 G A 0.413 0.276 91 | X 2780533 C T 0.571 0.0282 92 | X 9895993 G A 0.466 0.319 93 | X 17300035 A G 0.668 0.228 94 | X 44811118 T C 0.457 0.466 95 | X 69325595 C A 0.59 0.143 96 | X 79875814 C T 0.657 0.474 97 | X 102085485 A G 0.108 0.245 98 | X 111983675 G A 0.512 0.0602 99 | X 111989963 G C 0.0456 0.392 100 | X 112549697 C T 0.444 0.142 101 | X 114303460 A T 0.218 0.306 102 | X 119094045 G A 0.595 0.215 103 | X 121643568 A C 0.232 0.0107 104 | X 123778606 C T 0.627 0.212 105 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/has-fields-ns.txt: -------------------------------------------------------------------------------- 1 | chrom pos ref alt pval ns 2 | 1 869334 G A 0.208 1798 3 | 1 30085939 C T 0.0861 1798 4 | 1 187092156 G T 0.0617 1798 5 | 1 203186666 C G 0.884 1798 6 | 1 229475097 A C 0.301 1798 7 | 1 237381793 A C 0.416 1798 8 | 2 5713168 G A 0.402 1798 9 | 2 5725322 A G 0.289 1798 10 | 2 5746515 G A 0.267 1798 11 | 2 36899499 G A 0.498 1798 12 | 2 44838484 A T 0.507 1798 13 | 2 53831866 G A 0.137 1798 14 | 2 62199938 T A 0.568 1798 15 | 2 106279652 T C 0.342 1798 16 | 2 106298098 G A 0.635 1798 17 | 2 187769409 G A 0.729 1798 18 | 3 3113831 C G 0.0283 1798 19 | 3 182222574 C G 0.445 1798 20 | 3 182223833 A C 0.429 1798 21 | 3 195312938 A G 0.163 1798 22 | 4 2170324 G A 0.987 1798 23 | 4 14836025 T A 0.57 1798 24 | 4 14839619 C G 0.492 1798 25 | 4 18322840 A G 0.463 1798 26 | 4 102671598 T C 0.405 1798 27 | 4 102677836 G T 0.374 1798 28 | 4 124720631 C G 0.934 1798 29 | 4 147739002 T A 0.313 1798 30 | 5 57042549 C T 0.744 1798 31 | 5 57065622 G T 0.738 1798 32 | 5 79664000 T C 0.0887 1798 33 | 5 113780887 T C 0.355 1798 34 | 5 119140366 G C 0.556 1798 35 | 6 33843898 T C 0.472 1798 36 | 6 120801349 A G 0.737 1798 37 | 7 9918051 C T 0.518 1798 38 | 7 9930147 G A 0.899 1798 39 | 7 9931853 T C 0.266 1798 40 | 7 9943230 G C 0.251 1798 41 | 7 9951887 T C 0.624 1798 42 | 7 139002565 T C 0.78 1798 43 | 8 5069452 G A 0.412 1798 44 | 8 23060726 G A 0.594 1798 45 | 8 84011970 G A 0.822 1798 46 | 8 124004186 C A 0.674 1798 47 | 8 124008339 G A 0.552 1798 48 | 8 124017303 C T 0.341 1798 49 | 8 124028479 G A 0.604 1798 50 | 8 126213089 T C 0.664 1798 51 | 8 126240333 A G 0.771 1798 52 | 8 126266550 G C 0.282 1798 53 | 8 142476820 T C 0.891 1798 54 | 8 142477139 C T 0.344 1798 55 | 8 142477563 C T 0.831 1798 56 | 9 9496882 T C 0.317 1798 57 | 9 34743551 G A 0.7 1798 58 | 9 34747944 A G 0.226 1798 59 | 9 81725351 T C 0.807 1798 60 | 10 37793880 C T 0.12 1798 61 | 10 58955460 G A 0.965 1798 62 | 10 73472882 C T 0.938 1798 63 | 10 130769505 G C 0.361 1798 64 | 11 22793439 C G 0.169 1798 65 | 11 22795022 C G 0.356 1798 66 | 11 36994878 A C 0.455 1798 67 | 11 104274753 A T 0.853 1798 68 | 11 125774051 C T 0.977 1798 69 | 12 13753750 T G 0.906 1798 70 | 12 18621454 C T 0.526 1798 71 | 12 89799953 A T 0.0927 1798 72 | 14 19467188 G T 0.0616 1798 73 | 14 93091592 T C 0.884 1798 74 | 14 95677711 G A 0.204 1798 75 | 16 4339625 C A 0.928 1798 76 | 17 33105512 C T 0.618 1798 77 | 18 7001138 G T 0.892 1798 78 | 18 14582075 G A 0.922 1798 79 | 18 22097899 C T 0.201 1798 80 | 18 38522014 C T 0.269 1798 81 | 18 74053416 A G 0.316 1798 82 | 21 29319600 T C 0.441 1798 83 | 21 29336724 G C 0.39 1798 84 | X 2382381 G A 0.188 1798 85 | X 2693624 G A 0.474 1798 86 | X 2777107 T G 0.964 1798 87 | X 17252611 C G 0.316 1798 88 | X 17431649 A G 0.314 1798 89 | X 17473837 C T 0.673 1798 90 | X 28792663 G C 0.787 1798 91 | X 28885588 C T 0.406 1798 92 | X 44760231 T A 0.723 1798 93 | X 69334184 G A 0.408 1798 94 | X 79858525 A C 0.532 1798 95 | X 87486191 A G 0.453 1798 96 | X 87488354 T C 0.304 1798 97 | X 111989963 G C 0.436 1798 98 | X 112689006 T C 0.693 1798 99 | X 119088213 A G 0.503 1798 100 | X 121654787 A C 0.245 1798 101 | X 123785034 T C 0.209 1798 102 | X 123787207 G T 0.246 1798 103 | -------------------------------------------------------------------------------- /tests/input_files/assoc-files/pheno.3.1.epacts.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/tests/input_files/assoc-files/pheno.3.1.epacts.gz -------------------------------------------------------------------------------- /tests/input_files/assoc-files/pheno2.chr19.tsv: -------------------------------------------------------------------------------- 1 | #CHROM BEG END ref alt MAF PVALUE 2 | 19 12203349 12203349 C T 0.31124 0.2271e-12 3 | 19 12225372 12225372 T G 0.31124 0.171e-9 4 | -------------------------------------------------------------------------------- /tests/input_files/categories.csv: -------------------------------------------------------------------------------- 1 | phenocode,category 2 | 0,category1 3 | 1,category2 4 | 2,category1 5 | 3.1, 6 | EAR-LENGTH,category1 7 | KIMCHI-PER-DAY,category2 8 | snowstorm,category3 9 | "*&\+. !`(%@)", category3 10 | has-fields-,other 11 | has-fields-ac-af-maf-ns,other 12 | has-fields-ac-af-maf,other 13 | has-fields-ac-af-ns,other 14 | has-fields-ac-af,other 15 | has-fields-ac-maf-ns,other 16 | has-fields-ac-maf,other 17 | has-fields-ac-ns,other 18 | has-fields-ac,other 19 | has-fields-af-maf-ns,other 20 | has-fields-af-maf,other 21 | has-fields-af-ns,other 22 | has-fields-af,other 23 | has-fields-maf-ns,other 24 | has-fields-maf,other 25 | has-fields-ns,other 26 | -------------------------------------------------------------------------------- /tests/input_files/config.py: -------------------------------------------------------------------------------- 1 | # this file will be interpreted as python3 2 | 3 | urlprefix = '/test' 4 | 5 | 6 | # Minor allele frequency (MAF) filters: 7 | # Note: 8 | # "Association" means an association between a variant and a phenotype. 9 | # Every association has a p-value. It may also have other attributes. 10 | # MAF-filters will apply to allele frequency (AF) and allele count (AC) (if PheWeb knows num_samples for the phenotype) 11 | # First, PheWeb drops any association with a MAF < assoc_min_maf. 12 | # Next, PheWeb drops any variant where every association has MAF < variant_inclusion_maf. 13 | # In a dataset where all associations to a given variant all have the same MAF, the two filters do the same thing. 14 | # - in that case, use `assoc_min_maf` to save disk space and parse time. 15 | # If variant_inclusion_maf <= assoc_min_maf, it won't have any effect. 16 | # Using assoc_min_maf will save disk space, even if you're already using variant_inclusion_maf. 17 | assoc_min_maf = 0.005 18 | variant_inclusion_maf = 0.01 19 | 20 | 21 | # num_procs = 1 # for debugging convenience. 22 | 23 | 24 | # directory for caching large (~1GB) common files like dbsnp 25 | cache_dir = './fake-cache' 26 | disallow_downloads = True 27 | -------------------------------------------------------------------------------- /tests/input_files/correlations/pheno-correlations.txt: -------------------------------------------------------------------------------- 1 | Trait1 Trait2 rg SE Z P-value Method 2 | snowstorm has-fields- -0.5524 1.5359 -0.3597 0.0191 ldsc 3 | snowstorm has-fields-ac-af -0.3197 0.9499 -0.3366 0.0364 ldsc 4 | snowstorm 3.1 0.4059 1.3969 0.2905 0.0714 ldsc 5 | snowstorm EAR-LENGTH 0.5652 0.5601 1.0091 0.0129 ldsc 6 | snowstorm has-fields-ns -0.1826 1.4921 -0.1224 0.0026 ldsc 7 | snowstorm has-fields-ac-maf 0.9318 0.6361 1.4649 0.0429 ldsc 8 | snowstorm has-fields-ac-af-maf-ns 0.3099 1.0718 0.2892 0.0725 ldsc 9 | has-fields-ac-af 3.1 -0.0051 1.5509 -0.0033 0.0974 ldsc 10 | has-fields-ac-af snowstorm -0.4169 0.8406 -0.496 0.0199 ldsc 11 | has-fields-ac-af has-fields-ac-af-maf-ns 0.5079 1.1877 0.0276 0.6689 ldsc 12 | has-fields-ac-af-maf-ns snowstorm 0.763 0.7785 0.9801 0.0271 ldsc 13 | has-fields-ac-af-maf-ns has-fields-ac 0.0728 0.5065 0.1437 0.0858 ldsc 14 | has-fields-ac-af-maf-ns has-fields-af-ns 0.4483 0.4013 1.1173 0.0639 ldsc 15 | -------------------------------------------------------------------------------- /tests/input_files/correlations/pheno-list.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "category": "infectious diseases", 4 | "phenocode": "008.5", 5 | "phenostring": "Bacterial enteritis" 6 | }, 7 | { 8 | "category": "infectious diseases", 9 | "phenocode": "038", 10 | "phenostring": "Septicemia" 11 | }, 12 | { 13 | "category": "infectious diseases", 14 | "phenocode": "041.4" 15 | }, 16 | { 17 | "category": "digestive", 18 | "phenocode": "559", 19 | "phenostring": "Ileostomy status" 20 | }, 21 | { 22 | "category": "digestive", 23 | "phenocode": "562.1", 24 | "phenostring": "Diverticulosis" 25 | } 26 | ] 27 | -------------------------------------------------------------------------------- /tests/input_files/correlations/rg-pipeline-output.txt: -------------------------------------------------------------------------------- 1 | Trait1 Trait2 rg SE Z P-value Method 2 | 559 038 -0.5524 1.5359 -0.3597 0.7191 ldsc 3 | 008.5 031 -0.1234 1.4459 -0.3579 0.05 ldsc 4 | 008.5 041.4 0.5652 0.5601 1.0091 0.3129 ldsc 5 | 038 559 0.7181 2.1768 0.3299 0.7415 ldsc 6 | 038 562.1 0.5957 0.9638 0.6181 0.5365 ldsc 7 | 038 008.5 0.5882 0.9517 0.6181 0.5365 ldsc 8 | -------------------------------------------------------------------------------- /tests/input_files/custom_templates/index/below-h1.html: -------------------------------------------------------------------------------- 1 |
2 | Good test pages:
3 | SAMD11
4 | 1:869334-G-A
5 | Snowstorm
6 | region
7 | autocomplete: TP53
8 | autocomplete: rs3010527
9 |

10 | 11 | -------------------------------------------------------------------------------- /tests/input_files/fake-cache/gene_aliases-v36.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/tests/input_files/fake-cache/gene_aliases-v36.sqlite3 -------------------------------------------------------------------------------- /tests/input_files/fake-cache/gene_aliases-v37.sqlite3: -------------------------------------------------------------------------------- 1 | gene_aliases-v36.sqlite3 -------------------------------------------------------------------------------- /tests/input_files/fake-cache/genes-v37-hg19.bed: -------------------------------------------------------------------------------- 1 | genes-v36-hg19.bed -------------------------------------------------------------------------------- /tests/input_files/fake-cache/rsids-v150-hg19.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/pheweb/b646214eae1ba524e239d2f5b20ca011756df770/tests/input_files/fake-cache/rsids-v150-hg19.tsv.gz -------------------------------------------------------------------------------- /tests/input_files/fake-cache/rsids-v150-hg38.tsv.gz: -------------------------------------------------------------------------------- 1 | rsids-v150-hg19.tsv.gz -------------------------------------------------------------------------------- /tests/input_files/fake-cache/rsids-v154-hg19.tsv.gz: -------------------------------------------------------------------------------- 1 | rsids-v150-hg19.tsv.gz -------------------------------------------------------------------------------- /tests/input_files/fake-cache/rsids-v154-hg38.tsv.gz: -------------------------------------------------------------------------------- 1 | rsids-v150-hg19.tsv.gz -------------------------------------------------------------------------------- /tests/input_files/phenolist/phenolist-example-broken.csv: -------------------------------------------------------------------------------- 1 | assoc_files,phenocode,info 2 | /Users/peter/PROJECTS/pheweb-data/vb-input-from-peter/oo/phe008.tsv,008,"json:[1,2]" 3 | /Users/peter/PROJECTS/pheweb-data/vb-input-from-peter/phe009.bar.tsv|/Users/peter/PROJECTS/pheweb-data/vb-input-from-peter/phe009.tsv,009,foo 4 | /Users/peter/PROJECTS/pheweb-data/vb--from-peter/phe009.bar.tsv|/Users/peter/PROJECTS/pheweb-data/vb-input-from-peter/phe009.foo.tsv,009,bar 5 | -------------------------------------------------------------------------------- /tests/input_files/phenolist/phenolist-example1.csv: -------------------------------------------------------------------------------- 1 | assoc_files;phenocode;info 2 | /Users/peter/PROJECTS/pheweb-data/vb-input-from-peter/phe008.tsv;008;"json:[1,2]" 3 | /Users/peter/PROJECTS/pheweb-data/vb-input-from-peter/phe009.bar.tsv|/Users/peter/PROJECTS/pheweb-data/vb-input-from-peter/phe009.foo.tsv;009;foo 4 | /Users/peter/PROJECTS/pheweb-data/vb--from-peter/phe009.bar.tsv|/Users/peter/PROJECTS/pheweb-data/vb-input-from-peter/phe009.foo.tsv;009;bar 5 | -------------------------------------------------------------------------------- /tests/run-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | readlinkf() { perl -MCwd -le 'print Cwd::abs_path shift' "$1"; } 4 | SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | 6 | # This script loads data and runs a server using the globally installed `pheweb`. 7 | # It's helpful when you're modifying the code and want to quick see the results. 8 | 9 | f() { 10 | set -x 11 | data_dir=$(mktemp -d) 12 | indir="$SCRIPTDIR/input_files" 13 | cache_dir="$indir/fake-cache" 14 | echo "data_dir = $data_dir" 15 | 16 | cp "$indir/correlations/pheno-correlations.txt" "$data_dir/pheno-correlations.txt" 17 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true -h 18 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true conf 19 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true phenolist glob --simple-phenocode "$indir/assoc-files/*" 20 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true phenolist unique-phenocode 21 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true phenolist read-info-from-association-files 22 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true phenolist filter-phenotypes --minimum-num-cases=20 --minimum-num-controls=20 --minimum-num-samples=20 23 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true phenolist hide-small-numbers-of-samples --minimum-visible-number=50 24 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true phenolist import-phenolist -f "$data_dir/pheno-list-categories.json" "$indir/categories.csv" 25 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true phenolist merge-in-info "$data_dir/pheno-list-categories.json" 26 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true phenolist verify --required-columns=category 27 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true process 28 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true top-loci 29 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true best-of-pheno 30 | 31 | echo "Try http://localhost:5000/variant/1:869334-G-A" 32 | echo "Try http://localhost:5000/pheno/snowstorm" 33 | echo "Try http://localhost:5000/gene/SAMD11" 34 | 35 | pheweb conf data_dir="$data_dir" cache="$cache_dir" disallow_downloads=true custom_templates="$indir/custom_templates" show_correlations=true show_manhattan_filter_button=true serve 36 | }; f 37 | -------------------------------------------------------------------------------- /tests/run-gunicorn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | _readlinkf() { perl -MCwd -le 'print Cwd::abs_path shift' "$1"; } 4 | script_dir="$(cd "$(dirname "$(_readlinkf "${BASH_SOURCE[0]}")")" && echo "$PWD")" 5 | 6 | # This script runs a pheweb server. 7 | # It uses the data loaded when you ran `pytest` (or `./setup.py test`) last. 8 | # It's useful for modifying the server code and immediately seeing the results. 9 | # It should use the local files instead of the globally installed pheweb, but who knows. 10 | 11 | data_dir="$TMPDIR/pytest-of-$USER/pytest-$USER/" 12 | if ! [[ -d "$data_dir" ]]; then 13 | data_dir="$TMPDIR/pytest-of-$USER/pytest-current/test_all0/" # I've seen this format but I'm not sure where 14 | fi 15 | ln -s -f "$script_dir/input_files/config.py" "$data_dir/" 16 | ln -s -f "$script_dir/input_files/fake-cache" "$data_dir/" 17 | ln -s -f "$script_dir/input_files/custom_templates" "$data_dir/" 18 | if ! [[ -f "$data_dir/pheno-list.json" ]]; then ln -s -f "$script_dir/pheno-list.json" "$data_dir/"; fi 19 | for f in "$script_dir"/generated-by-pheweb/*; do ln -s -f "$f" "$data_dir/generated-by-pheweb/"; done; 20 | 21 | echo "http://localhost:5000/" 22 | 23 | cd "$data_dir" 24 | python3 "$(which gunicorn)" --bind="localhost:5000" --error-logfile=- --access-logfile=- --access-logformat="%(s)s | %(L)ss | %(f)s | %(m)s %(U)s %(q)s" -w4 --reload --pythonpath "$script_dir/.." pheweb.serve.server:app 25 | 26 | #pheweb conf data_dir="$data_dir" serve --host='localhost' --port=5000 --num-workers=2 27 | -------------------------------------------------------------------------------- /tests/test_all.py: -------------------------------------------------------------------------------- 1 | 2 | #TODO: split into multiple tests that share tmpdir and run in order 3 | 4 | import os 5 | 6 | def test_all(tmpdir, capsys): 7 | data_dir = str(tmpdir.realpath()) 8 | input_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'input_files/')) 9 | cache_dir = os.path.join(input_dir, 'fake-cache') 10 | conf = ['conf', 'data_dir="{}"'.format(data_dir), 'cache="{}"'.format(cache_dir), 'disallow_downloads=true'] 11 | 12 | from pheweb.command_line import run as cl_run 13 | 14 | with capsys.disabled(): 15 | print('\n') 16 | print('conf =', repr(conf), '\n') 17 | cl_run(conf+['conf']) 18 | print() 19 | 20 | cl_run(conf+['-h']) 21 | cl_run(conf+['conf']) 22 | cl_run(conf+['phenolist', 'glob', '--simple-phenocode', '{}/assoc-files/*'.format(input_dir)]) 23 | cl_run(conf+['phenolist', 'unique-phenocode']) 24 | cl_run(conf+['phenolist', 'read-info-from-association-files']) 25 | cl_run(conf+['phenolist', 'filter-phenotypes', '--minimum-num-cases', '20', '--minimum-num-controls', '20', '--minimum-num-samples', '20']) 26 | cl_run(conf+['phenolist', 'hide-small-numbers-of-samples', '--minimum-visible-number', '50']) 27 | cl_run(conf+['phenolist', 'hide-small-numbers-of-samples', '--minimum-visible-number', '50']) 28 | cl_run(conf+['phenolist', 'import-phenolist', '-f', '{}/pheno-list-categories.json'.format(data_dir), '{}/categories.csv'.format(input_dir)]) 29 | cl_run(conf+['phenolist', 'merge-in-info', '{}/pheno-list-categories.json'.format(data_dir)]) 30 | cl_run(conf+['phenolist', 'verify', '--required-columns', 'category']) 31 | # TODO: verify that `dbsnp-{latest}.tsv` exists (so that it won't be downloaded/parsed) 32 | # TODO: replace `process` with each sub-step. 33 | cl_run(conf+['process']) 34 | # TODO: check some properties of our files, such as manh.json 35 | cl_run(conf+['top-loci']) 36 | cl_run(conf+['wsgi']) 37 | # with capsys.disabled(): print(2) 38 | 39 | from pheweb.serve.server import app # TODO: this relies on data_dir being set earlier, but shouldn't. 40 | app.testing = True # makes application exception propogate up to `client` 41 | with app.test_client() as client: 42 | assert client.get('/').status_code == 200 43 | assert client.get('/variant/1-869334-G-A').status_code == 200 44 | assert client.get('/static/variant.js').status_code == 200 45 | assert client.get('/pheno/snowstorm').status_code == 200 46 | assert client.get('/api/manhattan/pheno/snowstorm.json').status_code == 200 47 | assert client.get('/api/qq/pheno/snowstorm.json').status_code == 200 48 | assert client.get('/region/snowstorm/8-926279-1326279').status_code == 200 49 | assert client.get('/api/region/snowstorm/lz-results/?filter=chromosome%20in%20%20%278%27%20and%20position%20ge%20976279%20and%20position%20le%201276279').status_code == 200 50 | assert client.get('/region/snowstorm/gene/DNAH14?include=1-225494097').status_code == 200 51 | assert client.get('/api/autocomplete?query=%20DAP-2').status_code == 200 52 | assert b'EAR-LENGTH' in client.get('/region/1/gene/SAMD11').data 53 | assert b'\t' in client.get('/download/top_hits.tsv').data 54 | -------------------------------------------------------------------------------- /tests/test_detectref.py: -------------------------------------------------------------------------------- 1 | 2 | def test_detect_ref(): 3 | import pheweb.load.detect_ref, os 4 | default_builds = pheweb.load.detect_ref.get_default_builds() 5 | filepath = os.path.join(os.path.dirname(__file__), 'input_files/', 'assoc-files', 'has-fields-ac-af-maf.txt') # has only chr10 6 | variant_iterator = pheweb.load.detect_ref.make_variant_iterator(filepath, num_header_lines=1) 7 | build_scores = pheweb.load.detect_ref.get_build_scores(variant_iterator) 8 | for build, score in build_scores.items(): 9 | assert build.hg_name.startswith('hg') 10 | assert build.grch_name.startswith('GRCh') 11 | assert sorted(score.keys()) == ['a1','a2','either'] 12 | for a,frac in score.items(): assert 0 <= frac <= 1 13 | assert score['a1'] + score['a2'] >= score['either'] 14 | matching_build, matching_allele_col = pheweb.load.detect_ref.detect_build(build_scores) 15 | assert isinstance(matching_build, pheweb.load.detect_ref.Build) 16 | assert matching_build in default_builds 17 | assert matching_build.hg_name == 'hg19' 18 | assert matching_build.grch_name == 'GRCh37' 19 | assert matching_allele_col == 'a1' 20 | -------------------------------------------------------------------------------- /tests/test_pheno_correlation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for the Table of Correlated Phenotypes feature 3 | 4 | Verifies that the sample file is correctly combined with phenotype information 5 | """ 6 | import os 7 | import shutil 8 | 9 | 10 | import pytest 11 | 12 | from pheweb.load import pheno_correlation 13 | from pheweb import weetabix 14 | 15 | 16 | # Simplified files for testing purposes 17 | CORREL_FILE = os.path.join(os.path.dirname(__file__), 'input_files/correlations/rg-pipeline-output.txt') 18 | PHENOLIST = os.path.join(os.path.dirname(__file__), 'input_files/correlations/pheno-list.json') 19 | 20 | 21 | @pytest.fixture(scope='module') 22 | def sample_data(tmpdir_factory): 23 | """Index a test file""" 24 | fn = tmpdir_factory.getbasetemp() / 'sample.txt' 25 | shutil.copy(CORREL_FILE, fn) 26 | return fn 27 | 28 | 29 | @pytest.fixture(scope='module') 30 | def annotated_sample(sample_data): 31 | output_fn = str(sample_data) + '.out' 32 | pheno_correlation.main(sample_data, output_fn, phenolist_path=PHENOLIST) 33 | return output_fn 34 | 35 | 36 | def test_phenos_are_annotated(annotated_sample): 37 | raw_file_cols = 7 38 | with open(annotated_sample, 'r') as f: 39 | num_cols = len(f.readline().strip().split('\t')) 40 | assert num_cols == (raw_file_cols + 1), 'Processed file has one extra column' 41 | 42 | 43 | def test_new_column_contains_trait2_descriptions(annotated_sample): 44 | with open(annotated_sample, 'r') as f: 45 | next(f) 46 | labels = [line.strip().split('\t')[-1] for line in f] 47 | 48 | expected = ['Septicemia', '041.4', 'Bacterial enteritis', 'Bacterial enteritis', 'Ileostomy status', 'Diverticulosis', 'Bacterial enteritis', 'Septicemia', 'Septicemia'] 49 | assert expected == labels, "Labels correspond to trait 2: phenostring if possible, else phenocode" 50 | 51 | 52 | def test_phenos_are_indexed(annotated_sample): 53 | expected_fn = weetabix._index_name(annotated_sample) 54 | assert os.path.isfile(expected_fn) 55 | 56 | 57 | def test_unknown_phenocodes_get_dropped_from_annotated_file(sample_data, annotated_sample): 58 | # 1 unknown pheno, so annotated file should have one less line 59 | with open(sample_data, 'r') as raw, open(annotated_sample, 'r') as proc: 60 | c1 = raw.readlines() 61 | c2 = proc.readlines() 62 | 63 | c1_trait2_column = [line.split('\t')[1] for line in c1 if not line.startswith('Trait1')] 64 | assert '031' in c1_trait2_column, 'Input file contains phenocode 031' 65 | 66 | c2_trait2_column = [line.split('\t')[1] for line in c2 if not line.startswith('Trait1')] 67 | assert '031' not in c2_trait2_column, 'Annotated file omits phenocode 031 which is missing in pheno-list.json' 68 | -------------------------------------------------------------------------------- /tests/test_weetabix.py: -------------------------------------------------------------------------------- 1 | """Test simple key-based indexing of files""" 2 | 3 | import os 4 | import pickle 5 | import shutil 6 | 7 | import pytest 8 | 9 | from pheweb import weetabix 10 | 11 | 12 | FIXTURE = os.path.join(os.path.dirname(__file__), 'input_files/correlations/rg-pipeline-output.txt') 13 | 14 | # TODO: Add unit tests for the various indexing options (eg column number etc) 15 | 16 | 17 | @pytest.fixture(scope='module') 18 | def sample_data(tmpdir_factory): 19 | """Index a test file""" 20 | fn = tmpdir_factory.getbasetemp() / 'sample.txt' 21 | shutil.copy(FIXTURE, fn) 22 | 23 | weetabix.make_byte_index(fn, 1, skip_lines=1) 24 | return fn 25 | 26 | 27 | def test_generates_index_in_default_location(sample_data): 28 | expected_fn = weetabix._index_name(sample_data) 29 | assert os.path.isfile(expected_fn), "Index file was created" 30 | 31 | 32 | def test_index_has_all_column_values(sample_data): 33 | index_fn = weetabix._index_name(sample_data) 34 | 35 | with open(index_fn, 'rb') as f: 36 | contents = pickle.load(f) 37 | 38 | keys = contents.keys() 39 | assert len(keys) == 3, 'has expected number of keys' 40 | assert set(keys) == {'559', '008.5', '038'}, 'has correct set of unique keys' 41 | 42 | 43 | def test_gets_correct_number_of_lines_for_each_key(sample_data): 44 | expected = ( 45 | ('559', 1), 46 | ('008.5', 2), 47 | ('038', 3) 48 | ) 49 | 50 | for k, count in expected: 51 | rows = weetabix.get_indexed_rows(sample_data, k) 52 | assert len(rows) == count, 'found expected number of rows for key {}'.format(k) 53 | 54 | 55 | def test_fetches_line_content_for_key(sample_data): 56 | expected = ['559 038 -0.5524 1.5359 -0.3597 0.7191 ldsc'] 57 | rows = weetabix.get_indexed_rows(sample_data, '559') 58 | assert rows == expected, 'returned expected row content' 59 | 60 | 61 | def test_strict_mode_fails_if_key_not_in_index(sample_data): 62 | with pytest.raises(KeyError): 63 | weetabix.get_indexed_rows(sample_data, 'not_a_key', strict=True) 64 | --------------------------------------------------------------------------------