├── .dockerignore ├── .github └── workflows │ └── docker.yml ├── .gitignore ├── CMakeLists.txt ├── Dockerfile ├── Dockerfile-CLion ├── LICENSE ├── README.md ├── bin ├── docker_app.sh ├── docker_build_compose.sh ├── docker_build_image.sh ├── docker_remove.sh ├── docker_run.sh └── test_gunicorn.py ├── core ├── CMakeLists.txt ├── arrow.cmake ├── benchmark │ ├── CMakeLists.txt │ └── benchmark.cpp ├── boost.cmake ├── hiredis.cmake ├── openblas.cmake ├── redis.cmake ├── requirements.txt ├── src │ ├── CMakeLists.txt │ ├── Cell.cpp │ ├── Cell.h │ ├── LDServer.cpp │ ├── LDServer.h │ ├── Mask.cpp │ ├── Mask.h │ ├── MetastaarSummaryStatisticsLoader.cpp │ ├── MetastaarSummaryStatisticsLoader.h │ ├── Morton.cpp │ ├── Morton.h │ ├── Phenotypes.cpp │ ├── Phenotypes.h │ ├── RaremetalSummaryStatisticsLoader.cpp │ ├── RaremetalSummaryStatisticsLoader.h │ ├── Raw.cpp │ ├── Raw.h │ ├── Sandbox.cpp │ ├── ScoreCovarianceRunner.cpp │ ├── ScoreCovarianceRunner.h │ ├── ScoreSegment.cpp │ ├── ScoreSegment.h │ ├── ScoreServer.cpp │ ├── ScoreServer.h │ ├── Segment.cpp │ ├── Segment.h │ ├── SummaryStatisticsLoader.cpp │ ├── SummaryStatisticsLoader.h │ ├── Types.h │ ├── VariantCollator.cpp │ ├── VariantCollator.h │ └── pywrapper.cpp ├── tabixpp.cmake └── tests │ ├── CMakeLists.txt │ ├── LDServerTest.cpp │ ├── Main_TestAll.cpp │ ├── RareMetal.cpp │ ├── RareMetal.h │ ├── RvTest.cpp │ └── RvTest.h ├── data ├── AFR.samples.txt ├── ALL.samples.txt ├── EUR.samples.txt ├── chr21.test.RAND_QT.singlevar.cov.txt ├── chr21.test.RAND_QT.singlevar.cov.txt.gz ├── chr21.test.RAND_QT.singlevar.cov.txt.gz.tbi ├── chr21.test.RAND_QT.singlevar.score.txt ├── chr21.test.RAND_QT.singlevar.score.txt.gz ├── chr21.test.RAND_QT.singlevar.score.txt.gz.tbi ├── chr21.test.bcf ├── chr21.test.bcf.csi ├── chr21.test.dat ├── chr21.test.frq ├── chr21.test.missing_genotypes_and_phenotypes.RAND_QT.singlevar.cov.txt ├── chr21.test.missing_genotypes_and_phenotypes.RAND_QT.singlevar.score.txt ├── chr21.test.missing_pheno.RAND_QT.singlevar.cov.txt ├── chr21.test.missing_pheno.RAND_QT.singlevar.score.txt ├── chr21.test.missing_values.ped ├── chr21.test.missing_values.tab ├── chr21.test.missing_values.vcf.gz ├── chr21.test.missing_values.vcf.gz.tbi ├── chr21.test.ped ├── chr21.test.sav ├── chr21.test.sav.s1r ├── chr21.test.tab ├── chr21.test.vcf.gz ├── chr21.test.vcf.gz.tbi ├── chr22.monomorphic_test.vcf.gz ├── chr22.monomorphic_test.vcf.gz.tbi ├── chr22.more_phenotypes.test.dat ├── chr22.more_phenotypes.test.ped ├── chr22.test.bad_float.tab ├── chr22.test.bcf ├── chr22.test.bcf.csi ├── chr22.test.dat ├── chr22.test.frq ├── chr22.test.missing_values.tab ├── chr22.test.ped ├── chr22.test.sav ├── chr22.test.sav.s1r ├── chr22.test.tab ├── chr22.test.vcf.gz ├── chr22.test.vcf.gz.tbi ├── chrX.test.sav ├── chrX.test.sav.s1r ├── gene.WVAY7.cov.assoc.gz ├── gene.WVAY7.cov.assoc.gz.tbi ├── gene.WVAY7.scores.assoc.gz ├── gene.WVAY7.scores.assoc.gz.tbi ├── make_rmw.sh ├── make_test_ped.py ├── mask.epacts.chr22.gencode-exons-AF01.tab.gz ├── mask.epacts.chr22.gencode-exons-AF01.tab.gz.tbi ├── mask.epacts.chr22.gencode-exons-AF05.tab.gz ├── mask.epacts.chr22.gencode-exons-AF05.tab.gz.tbi ├── metastaar_empty.cov.parquet ├── metastaar_empty.score.parquet ├── metastaar_invalid_metadata.parquet ├── region_ld_22_50544251_50549251.hap.ld ├── region_ld_22_51241101_51241385.AFR.hap.ld ├── region_ld_22_51241101_51241385.hap.ld ├── region_ld_X_60100_60150.hap.ld ├── rvtest_cov_fail_base.gz ├── rvtest_cov_fail_base.gz.tbi ├── rvtest_score_fail_ustat.gz ├── rvtest_score_fail_ustat.gz.tbi ├── test.afmissing.MetaScore.assoc.gz ├── test.afmissing.MetaScore.assoc.gz.tbi ├── test.qt.segment1.metastaar.cov.parquet ├── test.qt.segment1.metastaar.sumstat.parquet ├── test.qt.segment2.metastaar.cov.parquet ├── test.qt.segment2.metastaar.sumstat.parquet ├── test.smallchunk.MetaCov.assoc.gz ├── test.smallchunk.MetaCov.assoc.gz.tbi ├── test.smallchunk.MetaScore.assoc.gz ├── test.smallchunk.MetaScore.assoc.gz.tbi ├── test.smallchunk.mask.epacts.tab.gz ├── test.smallchunk.mask.epacts.tab.gz.tbi ├── test.smallchunk.noheader.MetaCov.assoc.gz ├── test.smallchunk.noheader.MetaCov.assoc.gz.tbi ├── test.smallchunk.noheader.MetaScore.assoc.gz ├── test.smallchunk.noheader.MetaScore.assoc.gz.tbi ├── test.twochroms.chr1.MetaCov.assoc.gz ├── test.twochroms.chr1.MetaCov.assoc.gz.tbi ├── test.twochroms.chr1.MetaScore.assoc.gz ├── test.twochroms.chr1.MetaScore.assoc.gz.tbi ├── test.twochroms.chr9.MetaCov.assoc.gz ├── test.twochroms.chr9.MetaCov.assoc.gz.tbi ├── test.twochroms.chr9.MetaScore.assoc.gz ├── test.twochroms.chr9.MetaScore.assoc.gz.tbi ├── test.twochroms.mask.tab.gz ├── test.twochroms.mask.tab.gz.tbi ├── test.yaml ├── test_metastaar_corrupt.yaml ├── test_metastaar_empty.yaml ├── test_no_sav_index.sav ├── test_no_sav_index.yaml ├── test_no_tabix.vcf.gz ├── test_no_tabix.yaml ├── test_no_testable_variants.mask.tab.gz ├── test_no_testable_variants.mask.tab.gz.tbi ├── test_no_testable_variants.tab ├── test_no_testable_variants.vcf.gz ├── test_no_testable_variants.vcf.gz.tbi ├── test_not_float.dat ├── test_not_float.ped ├── test_ped_incorrect_float.yaml ├── test_sumstat_loader_rm.cov.assoc.gz ├── test_sumstat_loader_rm.cov.assoc.gz.tbi ├── test_sumstat_loader_rm.scores.assoc.gz ├── test_sumstat_loader_rm.scores.assoc.gz.tbi ├── test_tab_incorrect_float.yaml ├── variant_ld_22_50546666_vs_50544251_50549251.hap.ld ├── variant_ld_22_51241101_vs_51241101_51241385.hap.ld ├── variant_ld_22_51241309_vs_51241101_51244237.hap.ld └── variant_ld_22_51241386_vs_51241101_51241385.hap.ld ├── docker-compose.yml ├── docs ├── ldserver-api.md └── raremetal-api.md ├── rest ├── build.txt ├── config │ ├── __init__.py │ └── default.py ├── core │ └── __init__.py ├── ldserver │ ├── __init__.py │ ├── api.py │ └── model.py ├── playground │ ├── __init__.py │ ├── templates │ │ ├── base.html │ │ └── home.html │ └── web.py ├── raremetal │ ├── __init__.py │ ├── api.py │ ├── errors.py │ ├── model.py │ └── sentry.py ├── requirements.txt ├── runtime_tests │ └── test_runtime.py └── tests │ ├── ldserver │ ├── conftest.py │ ├── datasets.json │ └── test_api.py │ └── raremetal │ ├── conftest.py │ ├── test_cli.py │ └── test_raremetal.py ├── setup.cfg ├── tasks.py └── tox.ini /.dockerignore: -------------------------------------------------------------------------------- 1 | cget* 2 | venv* 3 | cache 4 | .idea 5 | .git 6 | .tox 7 | **/*.pyc 8 | private 9 | build-release 10 | build-debug 11 | cmake-build-debug 12 | cmake-build-debug-remote 13 | cmake-build-release 14 | cmake-build-release-remote 15 | cmake-install 16 | tmp 17 | docker-compose.override.yml 18 | config.py 19 | config.yaml 20 | **/*.db 21 | **/*.so 22 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: Build and test docker image 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | REGISTRY: ghcr.io 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - 13 | name: Generate image name 14 | run: | 15 | echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} 16 | - 17 | name: Set up Docker Buildx 18 | uses: docker/setup-buildx-action@v1 19 | - 20 | name: Docker meta 21 | id: docker_meta 22 | uses: docker/metadata-action@v3 23 | with: 24 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 25 | flavor: | 26 | latest=auto 27 | tags: | 28 | type=ref,event=branch 29 | type=ref,event=pr 30 | type=sha,format=long 31 | type=semver,pattern={{version}} 32 | - 33 | name: Login to GHCR 34 | if: github.event_name != 'pull_request' 35 | uses: docker/login-action@v1 36 | with: 37 | registry: ${{ env.REGISTRY }} 38 | username: ${{ github.repository_owner }} 39 | password: ${{ secrets.GITHUB_TOKEN }} 40 | - 41 | name: Build image 42 | id: docker_build 43 | uses: docker/build-push-action@v2 44 | with: 45 | push: ${{ github.event_name != 'pull_request' }} 46 | tags: ${{ steps.docker_meta.outputs.tags }} 47 | labels: ${{ steps.docker_meta.outputs.labels }} 48 | cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache 49 | cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache,mode=max 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | private 2 | .idea 3 | cget 4 | cmake-build-debug 5 | *.pyc 6 | *.so 7 | *.o 8 | venv 9 | .pytest_cache 10 | __pycache__ 11 | *.db 12 | docker-compose.override.yml 13 | config.py 14 | config.yaml 15 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(LDServer) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | add_subdirectory(core) 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 as base 2 | 3 | LABEL org.label-schema.name="LDServer" 4 | LABEL org.label-schema.description="LDServer for calculating linkage disequilibrium of genetic variants" 5 | LABEL org.label-schema.vendor="University of Michigan, Center for Statistical Genetics" 6 | LABEL org.label-schema.url="https://github.com/statgen/LDServer" 7 | LABEL org.label-schema.usage="https://github.com/statgen/LDServer#docker" 8 | LABEL org.label-schema.vcs-url="https://github.com/statgen/LDServer" 9 | LABEL org.label-schema.schema-version="1.0" 10 | 11 | # Install required packages for LDServer to install. 12 | ENV DEBIAN_FRONTEND="noninteractive" 13 | RUN apt-get update && apt-get install -y --no-install-recommends \ 14 | build-essential \ 15 | curl \ 16 | cmake \ 17 | python3 \ 18 | python3-dev \ 19 | python3-distutils \ 20 | python3-setuptools \ 21 | python3-pip \ 22 | python3-wheel \ 23 | zlib1g-dev \ 24 | liblzma-dev \ 25 | redis \ 26 | locales \ 27 | git \ 28 | pkg-config \ 29 | sqlite3 \ 30 | && rm -rf /var/lib/apt/lists/* \ 31 | && locale-gen en_US.UTF-8 32 | 33 | ENV LC_ALL en_US.UTF-8 34 | ENV LANG en_US.UTF-8 35 | 36 | # Set a default # of threads, otherwise OMP decides to use every core it possibly can, 37 | # which can cause some strange runtime issues. These variables can be overridden by using 38 | # a .env file with docker compose, for example. 39 | ENV OMP_NUM_THREADS=1 OPENBLAS_NUM_THREADS=1 40 | 41 | # Upgrade pip 42 | RUN pip3 install --upgrade pip 43 | 44 | # Install required python packages for building later packages 45 | COPY rest/build.txt / 46 | RUN pip3 install -r build.txt 47 | 48 | # Create a group and user to execute as, then drop root 49 | ARG UID 50 | ARG GID 51 | RUN \ 52 | if [ -n "$GID" ]; then \ 53 | addgroup --gid $GID ldserver; \ 54 | else \ 55 | addgroup ldserver; \ 56 | fi && \ 57 | if [ -n "$UID" ]; then \ 58 | adduser --gecos "User for running LDServer as non-root" --shell /bin/bash --disabled-password --uid $UID --ingroup ldserver ldserver; \ 59 | else \ 60 | adduser --gecos "User for running LDServer as non-root" --shell /bin/bash --disabled-password --ingroup ldserver ldserver; \ 61 | fi 62 | 63 | WORKDIR /home/ldserver 64 | USER ldserver 65 | 66 | # Install cpp dependencies 67 | COPY --chown=ldserver:ldserver core/requirements.txt /home/ldserver/core/requirements.txt 68 | COPY --chown=ldserver:ldserver core/*.cmake /home/ldserver/core/ 69 | ARG CMAKE_BUILD_PARALLEL_LEVEL 70 | ARG MAKEFLAGS 71 | RUN cget install -f core/requirements.txt 72 | 73 | # Install required python packages 74 | ENV PATH="/home/ldserver/.local/bin:${PATH}" 75 | COPY --chown=ldserver:ldserver rest/requirements.txt rest/requirements.txt 76 | RUN pip3 install -r rest/requirements.txt 77 | 78 | # Next stage: compiled server/binaries 79 | FROM base as compile 80 | 81 | # Copy required test data 82 | # This must happen before compile unfortunately, as cmake install will try to symlink test files 83 | # into its own test directory 84 | COPY --chown=ldserver:ldserver ./data /home/ldserver/data 85 | 86 | # Copy source 87 | COPY --chown=ldserver:ldserver CMakeLists.txt /home/ldserver/CMakeLists.txt 88 | COPY --chown=ldserver:ldserver ./core /home/ldserver/core 89 | RUN mkdir -p rest 90 | 91 | # Compile ldserver cpp 92 | ENV CGET_PREFIX="/home/ldserver/cget" 93 | ENV INSTALL_PREFIX="/home/ldserver/cget" 94 | RUN \ 95 | mkdir build \ 96 | && cd build \ 97 | && cmake .. \ 98 | -DCMAKE_TOOLCHAIN_FILE=${CGET_PREFIX}/cget/cget.cmake \ 99 | -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ 100 | -DCMAKE_BUILD_TYPE=Release \ 101 | && cmake --build . --target install 102 | 103 | # Copy python/flask, other scripts 104 | COPY --chown=ldserver:ldserver ./rest /home/ldserver/rest 105 | COPY --chown=ldserver:ldserver ./bin /home/ldserver/bin 106 | 107 | # Run test cases 108 | FROM compile as test 109 | COPY --chown=ldserver:ldserver tox.ini /home/ldserver/tox.ini 110 | RUN tox && python3 bin/test_gunicorn.py && rm -f ./rest/ldserver/sql.db 111 | 112 | # Frequently changing metadata here to avoid cache misses 113 | ARG BUILD_DATE 114 | ARG GIT_SHA 115 | ARG LDSERVER_VERSION 116 | 117 | LABEL org.label-schema.version=$LDSERVER_VERSION \ 118 | org.label-schema.vcs-ref=$GIT_SHA \ 119 | org.label-schema.build-date=$BUILD_DATE 120 | 121 | # Set the default stage to be the base files + compiled binaries + test cases. 122 | FROM test 123 | -------------------------------------------------------------------------------- /Dockerfile-CLion: -------------------------------------------------------------------------------- 1 | # Setup development environment for CLion 2 | FROM ldserver:base as dev-clion 3 | 4 | USER root 5 | RUN apt-get update && apt-get install -y \ 6 | ssh \ 7 | rsync \ 8 | gdb \ 9 | && apt-get clean 10 | 11 | RUN ( \ 12 | echo 'LogLevel DEBUG2'; \ 13 | echo 'PermitRootLogin yes'; \ 14 | echo 'PasswordAuthentication yes'; \ 15 | echo 'Subsystem sftp /usr/lib/openssh/sftp-server'; \ 16 | ) > /etc/ssh/sshd_config_clion \ 17 | && mkdir /run/sshd 18 | 19 | ARG LDSERVER_SSH_PASSWORD 20 | RUN \ 21 | if [ -z "$LDSERVER_SSH_PASSWORD" ]; then \ 22 | echo "Must provide '--build-arg LDSERVER_SSH_PASSWORD=' when building" && exit 1; \ 23 | else \ 24 | yes "$LDSERVER_SSH_PASSWORD" | passwd ldserver; \ 25 | fi 26 | 27 | CMD ["/usr/sbin/sshd", "-D", "-e", "-f", "/etc/ssh/sshd_config_clion"] -------------------------------------------------------------------------------- /bin/docker_app.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker-compose pull --ignore-pull-failures && docker-compose up -d 3 | -------------------------------------------------------------------------------- /bin/docker_build_compose.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export LDSERVER_VERSION=`git describe --tags --abbrev=11 | sed 's/^v//' | sed 's/-g/-/'` 3 | export GIT_SHA=`git rev-parse HEAD` 4 | export BUILD_DATE=`date -u +'%Y-%m-%dT%H:%M:%SZ'` 5 | 6 | docker-compose build --pull "$@" 7 | -------------------------------------------------------------------------------- /bin/docker_build_image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | LDSERVER_VERSION=`git describe --tags --abbrev=11 | sed 's/^v//' | sed 's/-g/-/'` 3 | GIT_SHA=`git rev-parse HEAD` 4 | BUILD_DATE=`date -u +'%Y-%m-%dT%H:%M:%SZ'` 5 | 6 | # Build the base ldserver image. 7 | docker build --pull -t ldserver:base \ 8 | --build-arg MAKEFLAGS="-j 4" \ 9 | --build-arg CMAKE_BUILD_PARALLEL_LEVEL=4 \ 10 | --build-arg BUILD_DATE=${BUILD_DATE} \ 11 | --build-arg GIT_SHA=${GIT_SHA} \ 12 | --build-arg LDSERVER_VERSION=${LDSERVER_VERSION} \ 13 | --target base \ 14 | "$@" . 15 | 16 | # Create the final compiled ldserver image. 17 | docker build --pull -t ldserver:${LDSERVER_VERSION} \ 18 | --build-arg MAKEFLAGS="-j 4" \ 19 | --build-arg CMAKE_BUILD_PARALLEL_LEVEL=4 \ 20 | --build-arg BUILD_DATE=${BUILD_DATE} \ 21 | --build-arg GIT_SHA=${GIT_SHA} \ 22 | --build-arg LDSERVER_VERSION=${LDSERVER_VERSION} \ 23 | --target compile \ 24 | "$@" . 25 | 26 | # Tag final ldserver image as latest. 27 | docker tag ldserver:${LDSERVER_VERSION} ldserver:latest 28 | 29 | # Create development image for CLion. 30 | # The - < is important, it pipes the Dockerfile-CLion contents into 31 | # the docker build engine, which means it will not attempt to copy the local 32 | # context. CLion will rsync source files into the container so we don't need the 33 | # entire context in the image. 34 | # A password should be set for CLion to SSH into the container (see below for example.) 35 | docker build \ 36 | --build-arg LDSERVER_SSH_PASSWORD=ldserver \ 37 | -t ldserver:dev-clion \ 38 | - < Dockerfile-CLion -------------------------------------------------------------------------------- /bin/docker_remove.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker system prune -f 4 | docker image rm -f `docker image ls --filter "label=org.label-schema.name=LDServer" -q | sort -u` 5 | -------------------------------------------------------------------------------- /bin/docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | LDSERVER_VERSION=`git describe --tags --abbrev=11` 3 | RAREMETAL_CONFIG_DATA="/home/ldserver/var/test.yaml" 4 | RAREMETAL_WORKERS=6 5 | 6 | # Run the raremetal flask app. 7 | # This is an example of how to run the container for the raremetal flask app directly, 8 | # though you most likely want to use docker-compose in production. 9 | docker run -it \ 10 | -v /mnt/data:/home/ldserver/var \ 11 | -v /mnt/data/config.py:/home/ldserver/rest/instance/config.py \ 12 | -p 5000:5000 \ 13 | -e FLASK_APP="rest/raremetal" \ 14 | ldserver:${LDSERVER_VERSION} \ 15 | /bin/bash -c "flask add-yaml ${RAREMETAL_CONFIG_DATA} && gunicorn -b 0.0.0.0:5000 -w ${RAREMETAL_WORKERS} -k gevent --pythonpath rest 'raremetal:create_app()'" 16 | -------------------------------------------------------------------------------- /bin/test_gunicorn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import time 3 | import requests 4 | import psutil as ps 5 | import os 6 | from subprocess import Popen, check_call 7 | from pathlib import Path 8 | 9 | # Set number of workers 10 | N_WORKERS = 1 11 | 12 | # Set database location 13 | # DB_NAME = "test_gunicorn.db" 14 | # os.environ["SQLALCHEMY_DATABASE_URI"] = f"sqlite:///{DB_NAME}" 15 | 16 | def value_ok(v): 17 | if hasattr(v, "__len__"): 18 | if len(v) > 0: 19 | return v 20 | elif v is not None: 21 | return v 22 | 23 | def retry(func, *args, **kwargs): 24 | init_wait = 1 25 | multiply = 2 26 | tries = 5 27 | 28 | wait = init_wait 29 | for _ in range(tries): 30 | try: 31 | v = func(*args, **kwargs) 32 | if value_ok(v): 33 | return v 34 | else: 35 | time.sleep(wait) 36 | wait *= multiply 37 | except: 38 | time.sleep(wait) 39 | wait *= multiply 40 | 41 | raise Exception(f"func {str(func)} failed after {tries} attempts") 42 | 43 | def has_n_children(pid, n): 44 | print(f"Expecting {n} kids for {pid}") 45 | p = ps.Process(pid) 46 | kids = p.children() 47 | return len(kids) == n 48 | 49 | def find_guni_pid(pid): 50 | return ps.Process(pid).children()[0].pid 51 | 52 | # Need to call some flask command first due to an issue w/ sqlalchemy & create_all() 53 | #check_call("flask show-references", shell=True) 54 | 55 | # Start gunicorn 56 | print("Starting gunicorn smoke test...") 57 | proc = Popen(f"gunicorn --access-logfile - --error-logfile - -k gthread -w {N_WORKERS} --pythonpath rest 'ldserver:create_app()'", shell=True) 58 | 59 | # Find gunicorn master PID 60 | print(f"Main PID: {proc.pid}") 61 | guni_master_pid = retry(find_guni_pid, proc.pid) 62 | print(f"Master gunicorn PID: {guni_master_pid}") 63 | 64 | # Wait and check for workers to start 65 | print("Waiting for workers...") 66 | found_workers = retry(has_n_children, guni_master_pid, N_WORKERS) 67 | 68 | if not found_workers: 69 | raise Exception("... failed to find workers") 70 | 71 | # Test endpoint 72 | print("Testing gunicorn endpoint...") 73 | resp = retry(requests.get, "http://127.0.0.1:8000/correlations", timeout=(3, 3)) 74 | 75 | if not resp.ok: 76 | raise Exception("During gunicorn smoke test: correlations endpoint did not return OK") 77 | 78 | # Check for a simple key 79 | if not 'data' in resp.json(): 80 | raise Exception("During gunicorn smoke test: correlation endpoint did not return expected key 'data'") 81 | 82 | # Terminate gunicorn 83 | proc.kill() 84 | 85 | # Remove test database 86 | # Path("rest/ldserver/").joinpath(DB_NAME).unlink() 87 | 88 | print("... endpoint tested successfully") 89 | -------------------------------------------------------------------------------- /core/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(LDServer VERSION 0.0.1) 3 | include(GNUInstallDirs) 4 | add_compile_options(-Wall -Wpedantic -march=native) 5 | 6 | set(CMAKE_VERBOSE_MAKEFILE ON) 7 | set(CMAKE_CXX_STANDARD 14) 8 | 9 | add_subdirectory(src) 10 | add_subdirectory(tests) 11 | add_subdirectory(benchmark) 12 | -------------------------------------------------------------------------------- /core/arrow.cmake: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9) 2 | set(CMAKE_VERBOSE_MAKEFILE ON) 3 | 4 | include(ExternalProject) 5 | 6 | if(CGET_PREFIX) 7 | include_directories(${CGET_PREFIX}/include) 8 | endif() 9 | 10 | if(CGET_PREFIX) 11 | link_directories(${CGET_PREFIX}/lib) 12 | endif() 13 | 14 | message(STATUS "Current build directory: " ${CMAKE_CURRENT_BINARY_DIR}) 15 | message(STATUS "Current source directory: " ${CMAKE_CURRENT_SOURCE_DIR}) 16 | message(STATUS "CMake install libdir: " ${CMAKE_INSTALL_LIBDIR}) 17 | message(STATUS "CMake install prefix: " ${CMAKE_INSTALL_PREFIX}) 18 | message(STATUS "cget install prefix: " ${CGET_INSTALL_PREFIX}) 19 | message(STATUS "cget prefix: " ${CGET_PREFIX}) 20 | message(STATUS "Current build output root directory: " ${BUILD_OUTPUT_ROOT_DIRECTORY}) 21 | message(STATUS "CMake build type: " ${CMAKE_BUILD_TYPE}) 22 | message(STATUS "CMake install include dir: " ${CMAKE_INSTALL_INCLUDEDIR}) 23 | 24 | set(ARROW_PREFIX "${BUILD_OUTPUT_ROOT_DIRECTORY}") 25 | set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include") 26 | set(ARROW_LIB_DIR "${ARROW_PREFIX}") 27 | set(ARROW_SHARED_LIB "${ARROW_LIB_DIR}/libarrow${CMAKE_SHARED_LIBRARY_SUFFIX}") 28 | set(ARROW_STATIC_LIB "${ARROW_LIB_DIR}/libarrow.a") 29 | 30 | set(ARROW_CMAKE_ARGS 31 | # Build settings 32 | #-DARROW_BUILD_STATIC=ON 33 | #-DARROW_BUILD_SHARED=OFF 34 | #-DARROW_BOOST_USE_SHARED=ON 35 | -DARROW_BUILD_TESTS=OFF 36 | -DARROW_OPTIONAL_INSTALL=ON 37 | #-DARROW_TEST_MEMCHECK=OFF 38 | #-DARROW_BUILD_BENCHMARKS=OFF 39 | 40 | # Arrow modules/dependencies 41 | #-DARROW_WITH_LZ4=ON 42 | -DARROW_WITH_ZSTD=ON 43 | #-DARROW_WITH_BROTLI=ON 44 | #-DARROW_WITH_SNAPPY=ON 45 | #-DARROW_WITH_ZLIB=ON 46 | #-DARROW_FLIGHT=ON 47 | #-DARROW_HIVESERVER2=ON 48 | #-DARROW_ORC=ON 49 | #-DARROW_GANDIVA=ON 50 | #-DARROW_GANDIVA_JAVA=ON 51 | -DARROW_PARQUET=ON 52 | -DARROW_FILESYSTEM=ON 53 | #-DARROW_HDFS=ON 54 | #-DARROW_IPC=ON 55 | ##-DARROW_COMPUTE=OFF 56 | #-DARROW_CUDA=OFF 57 | #-DARROW_GPU=OFF 58 | ##-DARROW_JEMALLOC=OFF 59 | ##-DARROW_BOOST_VENDORED=OFF 60 | #-DARROW_PYTHON=ON 61 | ) 62 | 63 | add_custom_target(arrow 64 | ALL 65 | COMMAND cd cpp && mkdir -p ${CMAKE_BUILD_TYPE} && cd ${CMAKE_BUILD_TYPE} && ${CMAKE_COMMAND} -DCMAKE_TOOLCHAIN_FILE=${CGET_PREFIX}/cget/cget.cmake -DCMAKE_INSTALL_PREFIX=${CGET_PREFIX} ${ARROW_CMAKE_ARGS} .. && ${CMAKE_COMMAND} --build . 66 | VERBATIM 67 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 68 | ) 69 | 70 | add_custom_target(arrow_install 71 | COMMAND cd cpp/${CMAKE_BUILD_TYPE} && ${CMAKE_MAKE_PROGRAM} install 72 | VERBATIM 73 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 74 | ) 75 | 76 | add_dependencies(arrow_install arrow) 77 | 78 | install(CODE " 79 | execute_process( 80 | COMMAND ${CMAKE_COMMAND} --build . --target arrow_install 81 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 82 | ) 83 | ") 84 | -------------------------------------------------------------------------------- /core/benchmark/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(benchmark REQUIRED) 2 | find_library(BOOST_SYSTEM_LIB boost_system HINT ${CGET_PREFIX}/lib) 3 | find_library(BOOST_IOSTREAMS_LIB boost_iostreams HINT ${CGET_PREFIX}/lib) 4 | find_library(MKL_RT_LIB mkl_rt) 5 | find_package(Threads REQUIRED) 6 | 7 | find_package(Python3 3.8 EXACT COMPONENTS Development REQUIRED) 8 | 9 | find_library(OPENBLAS_LIB openblas HINT ${CGET_PREFIX}/lib) 10 | message(STATUS "OpenBLAS = ${OPENBLAS_LIB}") 11 | 12 | if(CGET_PREFIX) 13 | include_directories(${CGET_PREFIX}/include) 14 | endif() 15 | 16 | if(NOT MKL_RT_LIB) 17 | set(MKL_RT_LIB "") 18 | endif() 19 | 20 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) 21 | 22 | add_executable(benchmark-ldserver 23 | benchmark.cpp) 24 | 25 | target_link_libraries(benchmark-ldserver 26 | LDServer 27 | benchmark::benchmark 28 | ${Python3_LIBRARIES} 29 | ${BOOST_SYSTEM_LIB} 30 | ${BOOST_IOSTREAMS_LIB} 31 | ${OPENBLAS_LIB} 32 | ${MKL_RT_LIB} 33 | ${CMAKE_THREAD_LIBS_INIT}) 34 | 35 | install(TARGETS benchmark-ldserver COMPONENT cli RUNTIME DESTINATION test OPTIONAL) 36 | -------------------------------------------------------------------------------- /core/benchmark/benchmark.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../src/LDServer.h" 3 | #include "../src/ScoreCovarianceRunner.h" 4 | #include "../src/SummaryStatisticsLoader.h" 5 | #include "../src/RaremetalSummaryStatisticsLoader.h" 6 | #include "../src/MetastaarSummaryStatisticsLoader.h" 7 | #include 8 | #include 9 | 10 | static void BM_LDSERVER_REGIONLD_RSQUARE(benchmark::State& state) { 11 | LDServer server(100); 12 | LDQueryResult result(100000); 13 | server.set_file("chr22.test.sav"); 14 | 15 | for (auto _ : state) { 16 | // Compute 17 | server.compute_region_ld("22", 50244251, 51244237, correlation::LD_RSQUARE, result); 18 | } 19 | } 20 | 21 | BENCHMARK(BM_LDSERVER_REGIONLD_RSQUARE)->Unit(benchmark::kMillisecond)->Iterations(3); 22 | 23 | static void BM_LDSERVER_VARIANTLD_RSQUARE(benchmark::State& state) { 24 | LDServer server(100); 25 | SingleVariantLDQueryResult result(100000); 26 | server.set_file("chr22.test.sav"); 27 | 28 | for (auto _ : state) { 29 | // Compute 30 | server.compute_variant_ld("22:50244298_C/T", "22", 50244251, 51244237, correlation::LD_RSQUARE, result); 31 | } 32 | } 33 | 34 | BENCHMARK(BM_LDSERVER_VARIANTLD_RSQUARE)->UseRealTime()->Unit(benchmark::kMillisecond)->Iterations(3); 35 | 36 | static void BM_LDSERVER_JSON_CLASSIC(benchmark::State& state) { 37 | LDServer server(100); 38 | LDQueryResult result(100000); 39 | server.set_file("chr22.test.sav"); 40 | 41 | // Compute 42 | server.compute_region_ld("22", 50244251, 51244237, correlation::LD_RSQUARE, result); 43 | 44 | for (auto _ : state) { 45 | // Get JSON 46 | auto json = result.get_json_classic("blah"); 47 | 48 | // Parse back out JSON 49 | rapidjson::Document doc; 50 | doc.Parse(json.c_str()); 51 | } 52 | } 53 | 54 | BENCHMARK(BM_LDSERVER_JSON_CLASSIC)->Unit(benchmark::kMillisecond)->Iterations(3); 55 | 56 | static void BM_LDSERVER_JSON_COMPACT(benchmark::State& state) { 57 | LDServer server(100); 58 | LDQueryResult result(100000); 59 | server.set_file("chr22.test.sav"); 60 | 61 | // Compute 62 | server.compute_region_ld("22", 50244251, 51244237, correlation::LD_RSQUARE, result); 63 | 64 | for (auto _ : state) { 65 | // Get JSON 66 | auto json = result.get_json_compact("blah"); 67 | 68 | // Parse back out JSON 69 | rapidjson::Document doc; 70 | doc.Parse(json.c_str()); 71 | } 72 | } 73 | 74 | BENCHMARK(BM_LDSERVER_JSON_COMPACT)->Unit(benchmark::kMillisecond)->Iterations(3); 75 | 76 | static void BM_METASTAAR_LOADER(benchmark::State& state) { 77 | for (auto _ : state) { 78 | MetastaarSummaryStatisticsLoader loader( 79 | { 80 | "test.qt.segment1.metastaar.sumstat.parquet", 81 | "test.qt.segment2.metastaar.sumstat.parquet" 82 | }, 83 | { 84 | "test.qt.segment1.metastaar.cov.parquet", 85 | "test.qt.segment2.metastaar.cov.parquet" 86 | } 87 | ); 88 | 89 | loader.load_region("1", 4957, 5143); 90 | } 91 | } 92 | 93 | BENCHMARK(BM_METASTAAR_LOADER)->Unit(benchmark::kMillisecond)->Iterations(3); 94 | 95 | static void BM_RAREMETAL_LOADER(benchmark::State& state) { 96 | for (auto _ : state) { 97 | RaremetalSummaryStatisticsLoader loader({"test.smallchunk.MetaScore.assoc.gz"}, {"test.smallchunk.MetaCov.assoc.gz"}); 98 | loader.load_region("1", 2, 307); 99 | } 100 | } 101 | 102 | BENCHMARK(BM_RAREMETAL_LOADER)->Unit(benchmark::kMillisecond)->Iterations(3); 103 | 104 | int main(int argc, char** argv) { 105 | cout << "OpenMP max # threads: " << omp_get_max_threads() << endl; 106 | 107 | ::benchmark::Initialize(&argc, argv); 108 | if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; 109 | ::benchmark::RunSpecifiedBenchmarks(); 110 | ::benchmark::Shutdown(); 111 | return 0; 112 | } -------------------------------------------------------------------------------- /core/hiredis.cmake: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(hiredis VERSION 0.13.2) 3 | 4 | string(SUBSTRING ${CMAKE_SHARED_LIBRARY_SUFFIX} 1 -1 SHARED_LIBRARY_SUFFIX) 5 | string(SUBSTRING ${CMAKE_STATIC_LIBRARY_SUFFIX} 1 -1 STATIC_LIBRARY_SUFFIX) 6 | 7 | add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/libhiredis${CMAKE_SHARED_LIBRARY_SUFFIX}" "${CMAKE_CURRENT_SOURCE_DIR}/libhiredis${CMAKE_STATIC_LIBRARY_SUFFIX}" 8 | COMMAND $(MAKE) DYLIBSUFFIX=${SHARED_LIBRARY_SUFFIX} STLIBSUFFIX=${STATIC_LIBRARY_SUFFIX} 9 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") 10 | 11 | add_custom_target(hiredis ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/libhiredis${CMAKE_SHARED_LIBRARY_SUFFIX}" "${CMAKE_CURRENT_SOURCE_DIR}/libhiredis${CMAKE_STATIC_LIBRARY_SUFFIX}") 12 | 13 | install(FILES libhiredis${CMAKE_STATIC_LIBRARY_SUFFIX} DESTINATION lib) 14 | install(DIRECTORY . DESTINATION include/hiredis FILES_MATCHING PATTERN "*.h") 15 | -------------------------------------------------------------------------------- /core/redis.cmake: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(redis VERSION 5.0.4) 3 | 4 | execute_process(COMMAND make WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) 5 | install(PROGRAMS src/redis-server src/redis-cli DESTINATION bin) 6 | -------------------------------------------------------------------------------- /core/requirements.txt: -------------------------------------------------------------------------------- 1 | jonathonl/shrinkwrap@395361020c8 -DCMAKE_C_FLAGS=-fPIC -DCMAKE_CXX_FLAGS=-fPIC 2 | statgen/savvy@5cf11170e5d -DCMAKE_C_FLAGS=-fPIC -DCMAKE_CXX_FLAGS=-fPIC 3 | openblas,https://github.com/xianyi/OpenBLAS/archive/v0.3.17.tar.gz -DUSE_OPENMP=1 --cmake openblas.cmake 4 | armadillo,http://sourceforge.net/projects/arma/files/armadillo-10.6.0.tar.xz 5 | boost,http://downloads.sourceforge.net/project/boost/boost/1.67.0/boost_1_67_0.tar.bz2 --cmake boost.cmake -DBOOST_WITH_CHRONO=1 -DBOOST_WITH_IOSTREAMS=1 -DBOOST_WITH_PYTHON=1 -DBOOST_WITH_SYSTEM=1 -DCMAKE_C_FLAGS=-fPIC -DCMAKE_CXX_FLAGS=-fPIC 6 | gtest,https://github.com/google/googletest/archive/release-1.8.0.tar.gz 7 | benchmark,https://github.com/google/benchmark/archive/refs/tags/v1.5.6.tar.gz --ignore-requirements -DBENCHMARK_ENABLE_GTEST_TESTS=OFF -DCMAKE_BUILD_TYPE=Release 8 | redis,https://github.com/redis/redis/archive/refs/tags/5.0.14.tar.gz --cmake redis.cmake 9 | hiredis,https://github.com/redis/hiredis/archive/v0.13.3.tar.gz --cmake hiredis.cmake 10 | cereal,https://github.com/USCiLab/cereal/archive/v1.2.2.tar.gz --cmake header 11 | ekg/intervaltree@v0.1 --cmake header 12 | welchr/tabixpp@36a15c88922ddab4ae12a3555181c1b1cd56a8ed --cmake tabixpp.cmake 13 | msgpack/msgpack-c@be4d971c62798eb59f8455dc77a4529748bcd08f 14 | apache/arrow@apache-arrow-4.0.1 --cmake arrow.cmake 15 | -------------------------------------------------------------------------------- /core/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_library(SAVVY_LIB savvy HINT ${CGET_PREFIX}/lib) 2 | find_library(HTS_LIB hts HINT ${CGET_PREFIX}/lib) 3 | find_library(TABIXPP_LIB tabixpp HINT ${CGET_PREFIX}/lib) 4 | find_library(Z_LIB NAMES libz.a z HINT ${CGET_PREFIX}/lib) 5 | find_library(ZSTD_LIB zstd HINT ${CGET_PREFIX}/lib) 6 | find_library(HIREDIS_LIB hiredis HINT ${CGET_PREFIX}/lib) 7 | find_library(ARMADILLO_LIB armadillo HINT ${CGET_PREFIX}/lib) 8 | find_library(MKL_RT_LIB mkl_rt) 9 | find_library(ARROW_LIB arrow HINT ${CGET_PREFIX/lib}) 10 | find_library(PARQUET_LIB parquet HINT ${CGET_PREFIX/lib}) 11 | 12 | if(APPLE) 13 | set(Python3_USE_STATIC_LIBS FALSE) 14 | endif() 15 | 16 | find_package(Python3 3.8 EXACT COMPONENTS Development REQUIRED) 17 | 18 | set(BOOST_ROOT "" CACHE PATH ${CGET_PREFIX}) 19 | find_package(Boost REQUIRED COMPONENTS python38 iostreams chrono) 20 | find_package(Threads REQUIRED) 21 | 22 | find_library(OPENBLAS_LIB openblas HINT ${CGET_PREFIX}/lib) 23 | message(STATUS "OpenBLAS = ${OPENBLAS_LIB}") 24 | 25 | find_package(OpenMP REQUIRED) 26 | 27 | if(CGET_PREFIX) 28 | include_directories(${CGET_PREFIX}/include) 29 | endif() 30 | 31 | if(NOT MKL_RT_LIB) 32 | set(MKL_RT_LIB "") 33 | endif() 34 | 35 | set(DEBUG_OPTIONS -g -O0 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer) 36 | 37 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) 38 | set(SOURCE_FILES 39 | LDServer.cpp 40 | LDServer.h 41 | ScoreServer.cpp 42 | ScoreServer.h 43 | Raw.h 44 | Raw.cpp 45 | Segment.cpp 46 | Segment.h 47 | ScoreSegment.cpp 48 | ScoreSegment.h 49 | Cell.cpp 50 | Cell.h 51 | Types.h 52 | Morton.cpp 53 | Morton.h 54 | Phenotypes.h 55 | Phenotypes.cpp 56 | Mask.cpp 57 | Mask.h 58 | SummaryStatisticsLoader.h 59 | SummaryStatisticsLoader.cpp 60 | RaremetalSummaryStatisticsLoader.h 61 | RaremetalSummaryStatisticsLoader.cpp 62 | MetastaarSummaryStatisticsLoader.h 63 | MetastaarSummaryStatisticsLoader.cpp 64 | ScoreCovarianceRunner.cpp 65 | ScoreCovarianceRunner.h 66 | VariantCollator.h 67 | VariantCollator.cpp) 68 | 69 | add_library(LDServer ${SOURCE_FILES}) 70 | target_include_directories(LDServer PUBLIC ${Python3_INCLUDE_DIRS}) 71 | target_compile_options(LDServer PUBLIC $<$:${DEBUG_OPTIONS}>) 72 | target_link_libraries(LDServer 73 | OpenMP::OpenMP_CXX 74 | ${PARQUET_LIB} 75 | ${ARROW_LIB} 76 | ${SAVVY_LIB} 77 | ${HTS_LIB} 78 | ${HIREDIS_LIB} 79 | ${Z_LIB} 80 | ${ZSTD_LIB} 81 | ${ARMADILLO_LIB} 82 | ${OPENBLAS_LIB} 83 | ${Boost_LIBRARIES} 84 | ${TABIXPP_LIB}) 85 | 86 | add_library(pywrapper SHARED 87 | pywrapper.cpp 88 | LDServer.cpp 89 | ScoreServer.cpp 90 | Raw.cpp 91 | Segment.cpp 92 | Segment.h 93 | ScoreSegment.cpp 94 | ScoreSegment.h 95 | Cell.cpp 96 | Cell.h 97 | Types.h 98 | Morton.cpp 99 | Morton.h 100 | Phenotypes.h 101 | Phenotypes.cpp 102 | Mask.cpp 103 | Mask.h 104 | SummaryStatisticsLoader.h 105 | SummaryStatisticsLoader.cpp 106 | RaremetalSummaryStatisticsLoader.h 107 | RaremetalSummaryStatisticsLoader.cpp 108 | MetastaarSummaryStatisticsLoader.h 109 | MetastaarSummaryStatisticsLoader.cpp 110 | ScoreCovarianceRunner.cpp 111 | ScoreCovarianceRunner.h 112 | VariantCollator.h 113 | VariantCollator.cpp) 114 | 115 | set_target_properties(pywrapper PROPERTIES SUFFIX ".so") 116 | set_target_properties(pywrapper PROPERTIES PREFIX "") 117 | target_include_directories(pywrapper PUBLIC ${Python3_INCLUDE_DIRS}) 118 | target_link_libraries(pywrapper 119 | OpenMP::OpenMP_CXX 120 | ${PARQUET_LIB} 121 | ${ARROW_LIB} 122 | ${SAVVY_LIB} 123 | ${HTS_LIB} 124 | ${HIREDIS_LIB} 125 | ${Z_LIB} 126 | ${ZSTD_LIB} 127 | ${ARMADILLO_LIB} 128 | ${OPENBLAS_LIB} 129 | ${MKL_RT_LIB} 130 | ${TABIXPP_LIB} 131 | ${Boost_LIBRARIES} 132 | ${Python3_LIBRARIES}) 133 | 134 | add_executable(sandbox Sandbox.cpp ${SOURCE_FILES}) 135 | target_include_directories(sandbox PUBLIC ${Python3_INCLUDE_DIRS}) 136 | target_compile_options(sandbox PUBLIC $<$:${DEBUG_OPTIONS}>) 137 | target_link_libraries(sandbox 138 | OpenMP::OpenMP_CXX 139 | ${PARQUET_LIB} 140 | ${ARROW_LIB} 141 | ${SAVVY_LIB} 142 | ${HTS_LIB} 143 | ${HIREDIS_LIB} 144 | ${Z_LIB} 145 | ${ZSTD_LIB} 146 | ${ARMADILLO_LIB} 147 | ${OPENBLAS_LIB} 148 | ${MKL_RT_LIB} 149 | ${TABIXPP_LIB} 150 | ${Boost_LIBRARIES} 151 | ${Python3_LIBRARIES}) 152 | 153 | install(TARGETS pywrapper DESTINATION ${PROJECT_SOURCE_DIR}/../rest/core) 154 | -------------------------------------------------------------------------------- /core/src/Cell.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_CELL_H 2 | #define LDSERVER_CELL_H 3 | 4 | #define ARMA_DONT_USE_WRAPPER 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "Raw.h" 19 | #include "Morton.h" 20 | #include "Segment.h" 21 | #include "Types.h" 22 | #include 23 | 24 | using namespace std; 25 | 26 | /** 27 | * Class to represent a "cell" of a matrix of segments, where each segment is a fixed width chunk of the genome. 28 | * The indexes i and j correspond to the position in that matrix. Often these indexes are linearized into a single 29 | * index z by using morton codes. 30 | * https://en.wikipedia.org/wiki/Z-order_curve 31 | */ 32 | class Cell { 33 | protected: 34 | bool cached; 35 | uint64_t i; 36 | uint64_t j; 37 | 38 | /** 39 | * Matrix of computed correlation values (r, r^2, or covariance, etc.) 40 | * If segment i has N rows and M columns, and segment j has O rows and P columns, then 41 | * raw_fmat will be a matrix of dimensions M x P. 42 | */ 43 | unique_ptr raw_fmat; 44 | 45 | public: 46 | shared_ptr segment_i; 47 | shared_ptr segment_j; 48 | 49 | Cell(uint64_t i, uint64_t j); 50 | virtual ~Cell(); 51 | 52 | uint64_t get_i() const; 53 | uint64_t get_j() const; 54 | bool is_diagonal() const; 55 | 56 | /** 57 | * Functions to load/save to redis cache. 58 | * 59 | * The cache key is typically created from a combination of: reference panel or genotype dataset ID, 60 | * name of sample subset, correlation type, chromosome, and morton code. 61 | * 62 | * Only the raw_fmat (matrix of computed correlation values) is stored in the cache. 63 | * 64 | * @param redis_cache 65 | * @param key 66 | */ 67 | void load(redisContext* redis_cache, const string& key); 68 | void save(redisContext* redis_cache, const string& key); 69 | 70 | bool is_cached() const; 71 | 72 | virtual void compute() = 0; 73 | 74 | void extract(std::uint64_t region_start_bp, std::uint64_t region_stop_bp, struct LDQueryResult& result, bool diagonal = false); 75 | void extract(const std::string& index_variant, std::uint64_t index_bp, std::uint64_t region_start_bp, std::uint64_t region_stop_bp, struct SingleVariantLDQueryResult& result); 76 | 77 | }; 78 | 79 | class CellR : public Cell { 80 | public: 81 | using Cell::Cell; 82 | virtual ~CellR(); 83 | void compute() override; 84 | }; 85 | 86 | class CellRsquare : public Cell { 87 | public: 88 | using Cell::Cell; 89 | virtual ~CellRsquare(); 90 | void compute() override; 91 | }; 92 | 93 | class CellCov : public Cell { 94 | public: 95 | using Cell::Cell; 96 | virtual ~CellCov(); 97 | void compute() override; 98 | }; 99 | 100 | class CellRsquareApprox : public Cell { 101 | public: 102 | using Cell::Cell; 103 | virtual ~CellRsquareApprox(); 104 | void compute() override; 105 | }; 106 | 107 | class CellFactory { 108 | public: 109 | static shared_ptr create(correlation correlation_type, uint64_t i, uint64_t j); 110 | }; 111 | #endif 112 | -------------------------------------------------------------------------------- /core/src/LDServer.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_LDSERVER_H 2 | #define LDSERVER_LDSERVER_H 3 | 4 | #define ARMA_DONT_USE_WRAPPER 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "Raw.h" 20 | #include "Morton.h" 21 | #include "Segment.h" 22 | #include "Cell.h" 23 | #include "Types.h" 24 | #include 25 | 26 | using namespace std; 27 | 28 | class LDServer { 29 | private: 30 | unordered_map> samples; 31 | unordered_map> raw; 32 | 33 | uint32_t segment_size; 34 | set allowed_segments; 35 | 36 | bool cache_enabled; 37 | uint32_t cache_key; 38 | string cache_hostname; 39 | int cache_port; 40 | redisContext* cache_context; 41 | 42 | static void parse_variant(const string& variant, string& chromosome, uint64_t& position, string& ref_allele, string& alt_allele); 43 | shared_ptr load_segment(const shared_ptr& raw, genotypes_store store, const string& samples_name, bool only_variants, const std::string& chromosome, uint64_t i, std::map>& segments) const; 44 | 45 | public: 46 | static const string ALL_SAMPLES_KEY; 47 | 48 | LDServer(uint32_t segment_size = 1000); 49 | virtual ~LDServer(); 50 | 51 | /** 52 | * Specify variant positions directly to the server, so that only segments containing these positions 53 | * are loaded (this is useful for sparse queries with a large range, but few variants.) 54 | * @param pos Starting position of the variant 55 | */ 56 | void add_overlap_position(const uint64_t& pos); 57 | void set_file(const string& file); 58 | void set_samples(const string& name, const vector& samples); 59 | void force_samples(const std::string &name, const std::vector &samples); 60 | void enable_cache(uint32_t cache_key, const string& hostname, int port); 61 | void disable_cache(); 62 | 63 | static string make_cell_cache_key(uint32_t cache_key, const string& samples_name, correlation correlation_type, const string& chromosome, uint64_t morton_code); 64 | static string make_segment_cache_key(uint32_t cache_key, const string& samples_name, const string& chromosome, uint64_t start_bp, uint64_t stop_bp); 65 | 66 | vector get_chromosomes(); 67 | uint32_t get_segment_size() const; 68 | 69 | /** 70 | * Compute LD between all variants in a region. 71 | * @param region_chromosome 72 | * @param region_start_bp 73 | * @param region_stop_bp 74 | * @param correlation_type 75 | * @param result 76 | * @param samples_name 77 | * @param diagonal Should we compute the diagonal elements? (variance of each variant) 78 | * @param segments_out Shared vector of segments that can be passed on to the ScoreServer for extra computations. 79 | * @return 80 | */ 81 | bool compute_region_ld(const string& region_chromosome, uint64_t region_start_bp, uint64_t region_stop_bp, correlation correlation_type, struct LDQueryResult& result, const string& samples_name = ALL_SAMPLES_KEY, bool diagonal = false, SharedSegmentVector segments_out = nullptr) const; 82 | bool compute_variant_ld(const string& index_variant, const string& region_chromosome, uint64_t region_start_bp, uint64_t region_stop_bp, correlation correlation_type, struct SingleVariantLDQueryResult& result, const string& samples_name = ALL_SAMPLES_KEY) const; 83 | }; 84 | 85 | #endif -------------------------------------------------------------------------------- /core/src/Mask.cpp: -------------------------------------------------------------------------------- 1 | #include "Mask.h" 2 | using namespace std; 3 | 4 | shared_ptr> VariantGroup::get_variants() const { 5 | auto vs = make_shared>(); 6 | transform( 7 | variants.begin(), 8 | variants.end(), 9 | inserter(*vs, vs->begin()), 10 | [](const VariantMeta& vm) { 11 | return vm.variant; 12 | } 13 | ); 14 | return vs; 15 | } 16 | 17 | shared_ptr> VariantGroup::get_positions() const { 18 | auto pos = make_shared>(); 19 | transform( 20 | variants.begin(), 21 | variants.end(), 22 | inserter(*pos, pos->begin()), 23 | [](const VariantMeta& vm) { 24 | return vm.position; 25 | } 26 | ); 27 | return pos; 28 | } 29 | 30 | void VariantGroup::add_variant(const std::string& variant) { 31 | VariantMeta vm(variant); 32 | variants.emplace(vm); 33 | this->chrom = vm.chromosome; 34 | this->start = this->start == -1 ? vm.position : std::min(this->start, vm.position); 35 | this->stop = this->stop == -1 ? vm.position : std::max(this->stop, vm.position); 36 | } 37 | 38 | void Mask::load_file(const string &filepath, const string &chrom, uint64_t start, uint64_t stop) { 39 | if (start <= 0) { throw std::invalid_argument("Mask starting position was < 0"); } 40 | if (stop <= 0) { throw std::invalid_argument("Mask stop position was < 0"); } 41 | 42 | Tabix tbfile(const_cast(filepath)); 43 | string region = chrom + ":" + to_string(start) + "-" + to_string(stop); 44 | 45 | bool has_chrom = find(tbfile.chroms.begin(), tbfile.chroms.end(), chrom) != tbfile.chroms.end(); 46 | if (!has_chrom) { 47 | throw LDServerGenericException("Chromosome " + chrom + " not found within mask file"); 48 | } 49 | 50 | string line; 51 | vector tokens; 52 | 53 | if ((!chrom.empty()) && (start != 0) && (stop != 0)) { 54 | tbfile.setRegion(region); 55 | } 56 | 57 | uint64_t groups_added = 0; 58 | while (tbfile.getNextLine(line)) { 59 | auto separator = regex("[ \t]"); 60 | copy(sregex_token_iterator(line.begin(), line.end(), separator, -1), sregex_token_iterator(), back_inserter(tokens)); 61 | 62 | // Create group object 63 | VariantGroup group; 64 | group.name = tokens[0]; 65 | group.chrom = tokens[1]; 66 | group.start = stoull(tokens[2]); 67 | group.stop = stoull(tokens[3]); 68 | 69 | // Extract variants 70 | SortedVariantSet vset; 71 | transform( 72 | tokens.begin() + 4, 73 | tokens.end(), 74 | inserter(vset, vset.begin()), 75 | [](const string &str) { 76 | return VariantMeta(str); 77 | } 78 | ); 79 | group.variants = vset; 80 | 81 | // Store group to map 82 | groups.emplace(make_pair(tokens[0], group)); 83 | groups_added++; 84 | 85 | tokens.clear(); 86 | } 87 | 88 | if (groups_added == 0) { 89 | throw LDServerGenericException( 90 | boost::str(boost::format("No groups loaded within genomic region %s:%i-%i for mask %s") % chrom % start % stop % id) 91 | ); 92 | } 93 | } 94 | 95 | Mask::Mask(const uint64_t id, VariantGroupType group_type, GroupIdentifierType ident_type, const std::vector& groups) { 96 | this->id = id; 97 | this->group_type = group_type; 98 | this->identifier_type = ident_type; 99 | for (auto&& g : groups) { 100 | this->groups.emplace(make_pair(g.name, g)); 101 | } 102 | } 103 | 104 | Mask::Mask(const string& filepath, const uint64_t id, VariantGroupType group_type, GroupIdentifierType ident_type) { 105 | this->id = id; 106 | this->group_type = group_type; 107 | this->identifier_type = ident_type; 108 | load_file(filepath); 109 | } 110 | 111 | Mask::Mask(const string &filepath, const uint64_t id, VariantGroupType group_type, GroupIdentifierType ident_type, const string &chrom, uint64_t start, uint64_t stop) { 112 | this->id = id; 113 | this->group_type = group_type; 114 | this->identifier_type = ident_type; 115 | load_file(filepath, chrom, start, stop); 116 | } 117 | 118 | void Mask::print_groups(const uint64_t& group_limit, const uint64_t& variant_limit) const { 119 | uint64_t g = 0; 120 | for (auto&& kv : groups) { 121 | cout << kv.second.name << endl; 122 | cout << "Chrom: " << kv.second.chrom << endl; 123 | cout << "Start: " << kv.second.start << endl; 124 | cout << "Stop: " << kv.second.stop << endl; 125 | cout << "Variants: " << endl; 126 | uint64_t v = 0; 127 | for (auto&& vmeta : kv.second.variants) { 128 | cout << " " + vmeta.variant << endl; 129 | if (v > variant_limit) { 130 | break; 131 | } 132 | v++; 133 | } 134 | 135 | if (g > group_limit) { 136 | break; 137 | } 138 | g++; 139 | } 140 | } 141 | 142 | shared_ptr> Mask::get_variant_set(const string &group) const { 143 | auto iter = groups.find(group); 144 | if (iter == groups.end()) { 145 | throw out_of_range("Group " + group + "not found in mask file"); 146 | } 147 | 148 | auto ptr = make_shared>(); 149 | transform( 150 | iter->second.variants.begin(), 151 | iter->second.variants.end(), 152 | inserter(*ptr, (*ptr).begin()), 153 | [](const VariantMeta& vm) { 154 | return vm.variant; 155 | } 156 | ); 157 | 158 | return ptr; 159 | } 160 | 161 | shared_ptr> Mask::get_group_names() const { 162 | auto group_names = make_shared>(groups.size()); 163 | for (auto&& kv : groups) { 164 | group_names->emplace_back(kv.first); 165 | } 166 | return group_names; 167 | } 168 | 169 | shared_ptr Mask::get_group(const string& group) const { 170 | auto iter = groups.find(group); 171 | if (iter == groups.end()) { 172 | throw out_of_range("Group " + group + "not found in mask file"); 173 | } 174 | 175 | auto ptr = make_shared(iter->second); 176 | return ptr; 177 | } 178 | 179 | Mask::group_iterator Mask::begin() const { 180 | return groups.begin(); 181 | } 182 | 183 | Mask::group_iterator Mask::end() const { 184 | return groups.end(); 185 | } 186 | 187 | bool Mask::operator==(const Mask& other) const { 188 | return id == other.id; 189 | } -------------------------------------------------------------------------------- /core/src/Mask.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_MASK_H 2 | #define LDSERVER_MASK_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "Types.h" 16 | 17 | // TODO: move to Types.h 18 | template 19 | struct VariantSort { 20 | bool operator()(const T& lhs, const T& rhs) const { 21 | return lhs.position < rhs.position; 22 | } 23 | }; 24 | 25 | // TODO: could we just refer to these types by string, instead of having to define each one as a hardcoded enum? 26 | 27 | // Enum for type of variant group (gene, region, other?) 28 | enum VariantGroupType : uint8_t { 29 | GENE, 30 | REGION 31 | }; 32 | 33 | // Enum for type of identifier for each group 34 | enum GroupIdentifierType : uint8_t { 35 | ENSEMBL, 36 | COORDINATES 37 | }; 38 | 39 | typedef std::set> SortedVariantSet; 40 | 41 | struct VariantGroup { 42 | std::string name; 43 | std::string chrom; 44 | uint64_t start; 45 | uint64_t stop; 46 | SortedVariantSet variants; 47 | std::vector filters; 48 | 49 | VariantGroup() : name(""), chrom(""), start(-1), stop(-1) {}; 50 | std::shared_ptr> get_variants() const; 51 | std::shared_ptr> get_positions() const; 52 | void add_variant(const std::string& variant); 53 | bool operator==(const VariantGroup& other) { return (name == other.name) && (variants == other.variants); } 54 | }; 55 | 56 | class Mask { 57 | using group_iterator = std::map::const_iterator; 58 | 59 | public: 60 | /** 61 | * Default constructor that loads the entire mask. 62 | * @param filepath 63 | */ 64 | Mask(const std::string &filepath, const uint64_t id, VariantGroupType group_type, GroupIdentifierType ident_type); 65 | 66 | /** 67 | * Constructor to load only a subset of the mask, using only regions of variants that overlap the given start/stop. 68 | * @param filepath 69 | * @param chrom 70 | * @param start 71 | * @param stop 72 | */ 73 | Mask(const std::string &filepath, const uint64_t id, VariantGroupType group_type, GroupIdentifierType ident_type, const std::string &chrom, uint64_t start, uint64_t stop); 74 | 75 | /** 76 | * Constructor to pass in defined variant groups directly, instead of loading from a file. 77 | * @param filepath 78 | * @param id 79 | * @param group_type 80 | * @param ident_type 81 | * @param groups 82 | */ 83 | Mask(const uint64_t id, VariantGroupType group_type, GroupIdentifierType ident_type, const std::vector& groups); 84 | 85 | /** 86 | * Print out each group and its variants, mainly for debugging purposes. 87 | * @param group_limit Limit the number of groups printed in total. 88 | * @param variant_limit Limit the number of variants printed out for each group. 89 | */ 90 | void print_groups(const uint64_t& group_limit, const uint64_t& variant_limit) const; 91 | 92 | /** 93 | * Get a list of group names. 94 | */ 95 | std::shared_ptr> get_group_names() const; 96 | 97 | /** 98 | * Retrieve a group. 99 | */ 100 | std::shared_ptr get_group(const std::string& group) const; 101 | 102 | /** 103 | * Functions for retrieving the variants in a group. 104 | * @param group The group name. 105 | */ 106 | std::shared_ptr> get_variant_set(const std::string &group) const; 107 | 108 | /** 109 | * Iterator functions. 110 | * The iterator will have first and second members, with the first member 111 | * being the name of the group, and the second member being a MaskGroup object. 112 | */ 113 | group_iterator begin() const; 114 | group_iterator end() const; 115 | 116 | /** 117 | * Getters/setters 118 | */ 119 | inline uint64_t get_id() const { return id; }; 120 | 121 | inline VariantGroupType get_group_type() const { return group_type; }; 122 | inline void set_group_type(VariantGroupType group_type) { this->group_type = group_type; } 123 | 124 | inline GroupIdentifierType get_identifier_type() const { return identifier_type; }; 125 | inline void set_identifier_type(GroupIdentifierType identifier_type) { this->identifier_type = identifier_type; } 126 | 127 | /** 128 | * Operators 129 | */ 130 | bool operator==(const Mask& other) const; 131 | 132 | protected: 133 | uint64_t id; // Unique string for this mask 134 | std::string description; // Text description of what variant filters this mask represents 135 | VariantGroupType group_type; // The type of group (is it a gene, a region, etc?) 136 | GroupIdentifierType identifier_type; // The identifier type of the group (ENSEMBL ID, REFSEQ?) 137 | 138 | /** 139 | * Store map from group name --> vector of variants. 140 | */ 141 | std::map groups; 142 | 143 | /** 144 | * Loader for mask files. Expects the file to be tabixed and bgzipped. 145 | * 146 | * The first 4 columns should be: 147 | * group name (for example, a gene) 148 | * chromosome 149 | * position of furthest upstream variant in the group (start) 150 | * position of furthest downstream variant in the group (stop) 151 | * 152 | * The remainder of the line should be a tab-delimited list of variants in EPACTS format (chrom:pos_ref/alt). 153 | * 154 | * @param filepath 155 | * @param chrom 156 | * @param start 157 | * @param stop 158 | */ 159 | void load_file(const std::string &filepath, const std::string &chrom = "", uint64_t start = 0, uint64_t stop = 0); 160 | }; 161 | 162 | #endif //LDSERVER_MASK_H 163 | -------------------------------------------------------------------------------- /core/src/MetastaarSummaryStatisticsLoader.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_METASTAARSUMMARYSTATISTICSLOADER_H 2 | #define LDSERVER_METASTAARSUMMARYSTATISTICSLOADER_H 3 | 4 | #include "SummaryStatisticsLoader.h" 5 | 6 | /* 7 | Struct to represent the metadata stored within each MetaSTAAR parquet file. 8 | 9 | There are two parquet files per segment of the genome: a single variant statistic file, which contains information per 10 | variant such as chrom/pos/ref/alt, MAC/MAF, and score statistic. Appended at the end of the primary columns are extra 11 | columns representing the GtU matrix (where U is the half hat matrix; there is 1 column per covariate in the null model.) 12 | 13 | MetaSTAAR chunks the genome up into segments of some fixed size, called the segment size. Each segment has a start, 14 | mid, and end position (sometimes referred to as "region" start, mid, end.) 15 | 16 | Each segment though may or may not contain genetic variants, depending on the particular dataset. There may be no known 17 | variants there, or the genotyping array did not have variants typed there, etc. 18 | 19 | The score files look like: 20 | GtU 21 | ┌────────────────┐ 22 | ▼ ▼ 23 | chr pos ref alt alt_AC MAC MAF N U V 1 2 24 | Segment 1 start ──► 1 1 C A 53 53 0.010566 2508 -3.615483 12.264961 0.515152 -0.060252 25 | 1 2 A G 57 57 0.011364 2508 -4.244432 13.173521 0.553539 -0.002675 26 | Segment 1 mid ──► 1 3 A C 52 52 0.010367 2508 1.089355 12.032375 0.504133 0.104684 27 | ────────────────────────────────────────────────────────────────────────────────── 28 | Segment 2 start ──► 1 4 G T 66 66 0.013158 2508 -3.910195 15.667785 0.641294 -0.047769 29 | 1 5 G C 84 84 0.016746 2508 3.675083 21.090993 0.815441 0.033904 30 | Segment 2 mid ──► 1 6 G C 56 56 0.011164 2508 0.155301 13.420570 0.543676 0.016430 31 | 32 | Note that each segment's score file only contains the variants from the start to midpoint of each segment, and not the end. 33 | 34 | The covariance matrices look like: 35 | 36 | Segment Segment 37 | Start End 38 | │ │ 39 | ▼ ▼ 40 | 123456 41 | ┌──────┐ 42 | 1│xxx...│ 43 | 2│.xxx..│ 44 | Segment──►3│..xxx.│ 45 | Mid └──────┘ 46 | 123456 47 | ▲ 48 | │ 49 | Segment 50 | Mid 51 | 52 | Note that each covariance matrix is rectangular, in order to store sliding windows of covariances for 53 | each row variant. The rows only extend to the region midpoint, but the columns extend to the region end. In order to 54 | lookup information for variants past the midpoint, the next segment's score file needs to be loaded. 55 | */ 56 | struct MetastaarParquetMetadata { 57 | std::string filepath; 58 | std::string chrom; 59 | uint64_t region_start = 0; 60 | uint64_t region_mid = 0; 61 | uint64_t region_end = 0; 62 | uint64_t nrows = 0; 63 | uint64_t ncols = 0; 64 | double cov_maf_cutoff = 0; 65 | }; 66 | 67 | MetastaarParquetMetadata read_parquet_metadata(const std::string& s); 68 | 69 | using MetastaarFileIntervalTree = IntervalTree; 70 | 71 | /** 72 | * Loader for MetaSTAAR summary statistic files. 73 | * 74 | * MetaSTAAR separates the final covariance matrix into 75 | */ 76 | class MetastaarSummaryStatisticsLoader : public SummaryStatisticsLoader { 77 | protected: 78 | // Maps from chromosome -> interval tree of (start pos, end pos) for MetaSTAAR segmented files. 79 | std::map score_tree; 80 | std::map cov_tree; 81 | 82 | shared_ptr cov_result; 83 | shared_ptr score_result; 84 | uint64_t nsamples; 85 | public: 86 | MetastaarSummaryStatisticsLoader(const std::vector& score_vec, const std::vector& cov_vec); 87 | void load_region(const std::string& chromosome, uint64_t start, uint64_t stop); 88 | shared_ptr getCovResult(); 89 | shared_ptr getScoreResult(); 90 | double getSigma2(); 91 | uint64_t getNumSamples(); 92 | }; 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /core/src/Morton.cpp: -------------------------------------------------------------------------------- 1 | #include "Morton.h" 2 | 3 | uint64_t split_bits(uint64_t value) { 4 | value &= 0xffffffff; 5 | value = (value ^ (value << 16)) & 0xffff0000ffff; 6 | value = (value ^ (value << 8)) & 0xff00ff00ff00ff; 7 | value = (value ^ (value << 4)) & 0xf0f0f0f0f0f0f0f; 8 | value = (value ^ (value << 2)) & 0x3333333333333333; 9 | value = (value ^ (value << 1)) & 0x5555555555555555; 10 | return value; 11 | } 12 | 13 | uint64_t combine_bits(uint64_t value) { 14 | value &= 0x5555555555555555; 15 | value = (value ^ (value >> 1)) & 0x3333333333333333; 16 | value = (value ^ (value >> 2)) & 0xf0f0f0f0f0f0f0f; 17 | value = (value ^ (value >> 4)) & 0xff00ff00ff00ff; 18 | value = (value ^ (value >> 8)) & 0xffff0000ffff; 19 | value = (value ^ (value >> 16)) & 0xffffffff; 20 | return value; 21 | } 22 | 23 | uint64_t to_morton_code(uint64_t x, uint64_t y) { 24 | return split_bits(x) | (split_bits(y) << 1); 25 | } 26 | 27 | void from_morton_code(uint64_t z, uint64_t& x, uint64_t& y) { 28 | x = combine_bits(z); 29 | y = combine_bits(z >> 1); 30 | } 31 | 32 | uint64_t load_bits(uint64_t bit_pattern, uint32_t bit_position, uint64_t value, uint32_t dim) { // dim = 0 for x; dim = 1 for y 33 | uint64_t wipe_mask = ~(split_bits(0xffffffff >> (32u - (bit_position / 2u + 1u))) << dim); 34 | bit_pattern = split_bits(bit_pattern) << dim; 35 | return (value & wipe_mask) | bit_pattern; 36 | } 37 | 38 | uint64_t compute_bigmin(uint64_t xd, uint64_t z_min, uint64_t z_max) { 39 | uint64_t bigmin = 0u; 40 | uint64_t mask = 0x8000000000000000; 41 | uint32_t bit_position = 63u; 42 | do { 43 | uint64_t z_min_bit = z_min & mask; 44 | uint64_t z_max_bit = z_max & mask; 45 | uint64_t xd_bit = xd & mask; 46 | uint32_t dim = bit_position % 2u; 47 | uint64_t bit_mask = 0x1 << (bit_position / 2u); 48 | if (xd_bit == 0u && z_min_bit == 0u && z_max_bit > 0u) { 49 | bigmin = load_bits(bit_mask, bit_position, z_min, dim); 50 | z_max = load_bits(bit_mask - 1u, bit_position, z_max, dim); 51 | } else if (xd_bit == 0u && z_min_bit > 0u && z_max_bit == 0u) { 52 | // not possible because min <= max 53 | throw logic_error("Error while computing BIGMIN"); 54 | } else if (xd_bit == 0u && z_min_bit > 0u && z_max_bit > 0u) { 55 | bigmin = z_min; 56 | return bigmin; 57 | } else if (xd_bit > 0u && z_min_bit == 0u && z_max_bit == 0u) { 58 | return bigmin; 59 | } else if (xd_bit > 0u && z_min_bit == 0u && z_max_bit > 0u) { 60 | z_min = load_bits(bit_mask, bit_position, z_min, dim); 61 | } else if (xd_bit > 0u && z_min_bit > 0u && z_max_bit == 0u) { 62 | // not possible because min <= max 63 | throw logic_error("Error while computing BIGMIN"); 64 | } 65 | --bit_position; 66 | mask >>= 1; 67 | } while (mask != 0u); 68 | return bigmin; 69 | } 70 | 71 | uint64_t compute_litmax(uint64_t xd, uint64_t z_min, uint64_t z_max) { 72 | uint64_t litmax = 0u; 73 | uint64_t mask = 0x8000000000000000; 74 | uint32_t bit_position = 63u; 75 | do { 76 | uint64_t z_min_bit = z_min & mask; 77 | uint64_t z_max_bit = z_max & mask; 78 | uint64_t xd_bit = xd & mask; 79 | uint32_t dim = bit_position % 2u; 80 | uint64_t bit_mask = 0x1 << (bit_position / 2u); 81 | if (xd_bit == 0u && z_min_bit == 0u && z_max_bit > 0u) { 82 | z_max = load_bits(bit_mask - 1u, bit_position, z_max, dim); 83 | } else if (xd_bit == 0u && z_min_bit > 0u && z_max_bit == 0u) { 84 | // not possible because min <= max 85 | throw logic_error("Error while computing LITMAX"); 86 | } else if (xd_bit == 0u && z_min_bit > 0u && z_max_bit > 0u) { 87 | return litmax; 88 | } else if (xd_bit > 0u && z_min_bit == 0u && z_max_bit == 0u) { 89 | litmax = z_max; 90 | return litmax; 91 | } else if (xd_bit > 0u && z_min_bit == 0u && z_max_bit > 0u) { 92 | litmax = load_bits(bit_mask - 1u, bit_position, z_max, dim); 93 | z_min = load_bits(bit_mask, bit_position, z_min, dim); 94 | } else if (xd_bit > 0u && z_min_bit > 0u && z_max_bit == 0u) { 95 | // not possible because min <= max 96 | throw logic_error("Error while computing LITMAX"); 97 | } 98 | --bit_position; 99 | mask >>= 1; 100 | } while (mask != 0u); 101 | return litmax; 102 | } 103 | 104 | void compute_litmax_bigmin(uint64_t xd, uint64_t z_min, uint64_t z_max, uint64_t& litmax, uint64_t& bigmin) { 105 | uint64_t mask = 0x8000000000000000; 106 | uint32_t bit_position = 63u; 107 | do { 108 | uint64_t z_min_bit = z_min & mask; 109 | uint64_t z_max_bit = z_max & mask; 110 | uint64_t xd_bit = xd & mask; 111 | uint32_t dim = bit_position % 2u; 112 | uint64_t bit_mask = 0x1 << (bit_position / 2u); 113 | if (xd_bit == 0u && z_min_bit == 0u && z_max_bit > 0u) { 114 | bigmin = load_bits(bit_mask, bit_position, z_min, dim); 115 | z_max = load_bits(bit_mask - 1u, bit_position, z_max, dim); 116 | } else if (xd_bit == 0u && z_min_bit > 0u && z_max_bit == 0u) { 117 | // not possible because min <= max 118 | throw logic_error("Error while computing LITMAX and BIGMIN"); 119 | } else if (xd_bit == 0u && z_min_bit > 0u && z_max_bit > 0u) { 120 | bigmin = z_min; 121 | break; 122 | } else if (xd_bit > 0u && z_min_bit == 0u && z_max_bit == 0u) { 123 | litmax = z_max; 124 | break; 125 | } else if (xd_bit > 0u && z_min_bit == 0u && z_max_bit > 0u) { 126 | litmax = load_bits(bit_mask - 1u, bit_position, z_max, dim); 127 | z_min = load_bits(bit_mask, bit_position, z_min, dim); 128 | } else if (xd_bit > 0u && z_min_bit > 0u && z_max_bit == 0u) { 129 | // not possible because min <= max 130 | throw logic_error("Error while computing LITMAX and BIGMIN"); 131 | } 132 | --bit_position; 133 | mask >>= 1; 134 | } while (mask != 0u); 135 | } 136 | 137 | uint64_t get_next_z(uint64_t range_start, uint64_t range_end, uint64_t z_min, uint64_t z_max, uint64_t z_init) { 138 | uint64_t xd_start = 0u, xd_end = 0u; 139 | uint64_t xd = z_init; 140 | while (xd <= z_max) { 141 | from_morton_code(xd, xd_start, xd_end); 142 | if (xd_start >= range_start && xd_start <= range_end && xd_end >= range_start && xd_end <= range_end) { 143 | if (xd_start <= xd_end) { // only upper triangle of the matrix is needed 144 | return xd; 145 | } 146 | ++xd; 147 | } else { 148 | xd = compute_bigmin(xd, z_min, z_max); 149 | } 150 | } 151 | return xd; 152 | } 153 | 154 | uint64_t get_next_z(uint64_t index, uint64_t range_start, uint64_t range_end, uint64_t z_min, uint64_t z_max, uint64_t z_init) { 155 | uint64_t xd_start = 0u, xd_end = 0u; 156 | uint64_t xd = z_init; 157 | while (xd <= z_max) { 158 | from_morton_code(xd, xd_start, xd_end); 159 | if (index <= range_start) { 160 | if ((xd_start >= range_start) && (xd_start <= range_end) && (index == xd_end)) { 161 | return xd; 162 | } 163 | } else if (index >= range_end) { 164 | if ((xd_end >= range_start) && (xd_end <= range_end) && (index == xd_start)) { 165 | return xd; 166 | } 167 | } else { 168 | if ((xd_end >= range_start) && (xd_end <= index) && (xd_start >= index) && (xd_start <= range_end)) { 169 | if (xd_end == index) { 170 | return xd; 171 | } 172 | if (xd_start == index) { 173 | return xd; 174 | } 175 | ++xd; 176 | continue; 177 | } 178 | } 179 | xd = compute_bigmin(xd, z_min, z_max); 180 | } 181 | return xd; 182 | } -------------------------------------------------------------------------------- /core/src/Morton.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_MORTON_H 2 | #define LDSERVER_MORTON_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | uint64_t split_bits(uint64_t value); 12 | 13 | uint64_t combine_bits(uint64_t value); 14 | 15 | uint64_t to_morton_code(uint64_t x, uint64_t y); 16 | 17 | void from_morton_code(uint64_t z, uint64_t& x, uint64_t& y); 18 | 19 | uint64_t load_bits(uint64_t bit_pattern, uint32_t bit_position, uint64_t value, uint32_t dim); 20 | 21 | uint64_t compute_bigmin(uint64_t xd, uint64_t z_min, uint64_t z_max); 22 | 23 | uint64_t compute_litmax(uint64_t xd, uint64_t z_min, uint64_t z_max); 24 | 25 | void compute_litmax_bigmin(uint64_t xd, uint64_t z_min, uint64_t z_max, uint64_t& litmax, uint64_t& bigmin); 26 | 27 | uint64_t get_next_z(uint64_t range_start, uint64_t range_end, uint64_t z_min, uint64_t z_max, uint64_t z_init); 28 | 29 | uint64_t get_next_z(uint64_t index, uint64_t range_start, uint64_t range_end, uint64_t z_min, uint64_t z_max, uint64_t z_init); 30 | 31 | #endif //LDSERVER_MORTON_H 32 | -------------------------------------------------------------------------------- /core/src/Phenotypes.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_PHENOTYPES_H 2 | #define LDSERVER_PHENOTYPES_H 3 | 4 | #define ARMA_DONT_USE_WRAPPER 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "boost/variant.hpp" 11 | #include "Types.h" 12 | #include 13 | 14 | using namespace std; 15 | 16 | //template T most_common(vector& vec); 17 | 18 | class ColumnTypeMap { 19 | protected: 20 | vector> types; 21 | map ctmap; 22 | public: 23 | inline void add(string name, ColumnType type) { 24 | types.emplace_back(make_pair(name, type)); 25 | ctmap[name] = type; 26 | } 27 | inline ColumnType get_type(string name) { return ctmap.at(name); } 28 | inline auto size() const { return types.size(); } 29 | inline auto begin() const { return types.begin(); } 30 | inline auto end() const { return types.end(); } 31 | }; 32 | 33 | class PhenotypeParseException : public std::runtime_error { using std::runtime_error::runtime_error; }; 34 | 35 | class Phenotypes { 36 | protected: 37 | map columns_float; 38 | map> columns_text; 39 | map> map_cat; 40 | map> map_level; 41 | 42 | string file_path; 43 | ColumnTypeMap column_types; 44 | SharedVector sample_ids; 45 | map sample_id_index; 46 | public: 47 | /** 48 | * Load a phenotype file. 49 | * The file may be either: 50 | * 1. A tab-delimited file, with one phenotype per column. Must have a header row. File extension can be either 51 | * .tab or .tab.gz. 52 | * 2. A PED-formatted file. Must have file extension .ped or .ped.gz. There must be an accompanying .dat or .dat.gz 53 | * file in addition. You need only specify the path to the ped file. 54 | * @param path Path to tab or PED file. 55 | * @param types 56 | * @param nrows 57 | * @param delim File delimiter 58 | * @param sample_column Name of the column that contains the sample identifiers 59 | * @param analysis_cols Optional list of columns to actually parse (i.e. these columns will be used for analysis.) 60 | */ 61 | void load_file(const string &path, const ColumnTypeMap &types, size_t nrows, const string& delim, const string& sample_column, SharedVector analysis_cols = nullptr); 62 | 63 | SharedArmaVec as_vec(const string &colname); 64 | SharedVector as_text(const string &colname); 65 | SharedVector get_phenotypes(); 66 | 67 | /** 68 | * Reduce this matrix of phenotypes to the provided list of samples (in the order given.) 69 | * Missing samples will have a NaN value for all phenotypes. 70 | * @param samples 71 | */ 72 | void reorder(const vector &samples); 73 | 74 | /** 75 | * Calculate score statistic, and p-value given a vector of genotypes. 76 | * @param genotypes 77 | * @param phenotype 78 | * @return 79 | */ 80 | shared_ptr compute_score(arma::vec &genotypes, const string &phenotype); 81 | 82 | /** 83 | * Calculate phenotypic variance. 84 | * @param phenotype 85 | * @return 86 | */ 87 | double compute_sigma2(const string &phenotype); 88 | 89 | /** 90 | * Get list of samples for which the phenotype has complete data. 91 | */ 92 | shared_ptr> get_complete_samples(const string& phenotype); 93 | 94 | /** 95 | * Get number of non-missing observations for phenotype. 96 | * @param phenotype 97 | * @return 98 | */ 99 | uint64_t get_nsamples(const string &phenotype); 100 | 101 | /** 102 | * Pretty print a summary of the current state of this object, 103 | * primarly for debugging purposes. 104 | */ 105 | void pprint() const; 106 | }; 107 | 108 | #endif //LDSERVER_PHENOTYPES_H -------------------------------------------------------------------------------- /core/src/RaremetalSummaryStatisticsLoader.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_RAREMETALSUMMARYSTATISTICSLOADER_H 2 | #define LDSERVER_RAREMETALSUMMARYSTATISTICSLOADER_H 3 | 4 | #include "SummaryStatisticsLoader.h" 5 | 6 | class ScoreCovColumn { 7 | protected: 8 | std::string name; 9 | uint16_t index; 10 | public: 11 | ScoreCovColumn(const std::string& name, const uint16_t& index) : name(name), index(index) {} 12 | inline const std::string& get_name() const { return name; } 13 | inline const uint16_t& get_index() const { return index; } 14 | inline operator int() const { return index; } 15 | inline operator std::string() const { return name; } 16 | }; 17 | 18 | struct ScoreColumnSpec { 19 | const ScoreCovColumn colChrom; 20 | const ScoreCovColumn colPos; 21 | const ScoreCovColumn colRef; 22 | const ScoreCovColumn colAlt; 23 | const ScoreCovColumn colInformativeN; 24 | const ScoreCovColumn colAltFreq; 25 | const ScoreCovColumn colInformativeAltAc; 26 | const ScoreCovColumn colU; 27 | const ScoreCovColumn colV; 28 | const ScoreCovColumn colEffectAllele; 29 | const ScoreCovColumn colPvalue; 30 | }; 31 | 32 | struct CovColumnSpec { 33 | const ScoreCovColumn colChrom; 34 | const ScoreCovColumn colStartPos; 35 | const ScoreCovColumn colPos; 36 | const ScoreCovColumn colCov; 37 | }; 38 | 39 | const ScoreColumnSpec SCORE_COLUMNS_RVTEST = { 40 | {"CHROM", 0}, 41 | {"POS", 1}, 42 | {"REF", 2}, 43 | {"ALT", 3}, 44 | {"N_INFORMATIVE", 4}, 45 | {"AF", 5}, 46 | {"INFORMATIVE_ALT_AC", 6}, 47 | {"U_STAT", 12}, 48 | {"SQRT_V_STAT", 13}, 49 | {"effect allele", 3}, 50 | {"PVALUE", 15} 51 | }; 52 | 53 | const ScoreColumnSpec SCORE_COLUMNS_RAREMETAL = { 54 | {"CHROM", 0}, 55 | {"POS", 1}, 56 | {"REF", 2}, 57 | {"ALT", 3}, 58 | {"N_INFORMATIVE", 4}, 59 | {"AF", 5}, 60 | {"INFORMATIVE_ALT_AC", 7}, 61 | {"U_STAT", 13}, 62 | {"SQRT_V_STAT", 14}, 63 | {"effect allele", 3}, 64 | {"PVALUE", 16}, 65 | }; 66 | 67 | const CovColumnSpec COV_COLUMNS_RAREMETAL = { 68 | {"CHROM", 0}, 69 | {"CURRENT_POS", 1}, 70 | {"POS", 2}, 71 | {"COV", 3}, 72 | }; 73 | 74 | const CovColumnSpec COV_COLUMNS_RVTEST = { 75 | {"CHROM", 0}, 76 | {"START_POS", 1}, 77 | {"POS", 4}, 78 | {"COV", 5}, 79 | }; 80 | 81 | void getNthDataLine(const std::string& filepath, std::string& out, int n); 82 | ScoreCovFormat detectScoreCovFormat(const std::string& filepath); 83 | 84 | template 85 | T extract_numeric(T func(const string&), const string& value, const ScoreCovColumn& col, const string& filepath, const string& variant) { 86 | try { 87 | return func(value); 88 | } 89 | catch (...) { 90 | throw LDServerGenericException( 91 | "Invalid value detected while parsing score statistic file" 92 | ).set_secret( 93 | boost::str(boost::format("File was: %s, offending value was '%s' in column '%s' for variant '%s'") % filepath % value % col.get_name() % variant) 94 | ); 95 | } 96 | } 97 | 98 | /** 99 | * Loader for RAREMETAL or rvtest summary statistic datasets. 100 | */ 101 | class RaremetalSummaryStatisticsLoader : public SummaryStatisticsLoader { 102 | protected: 103 | std::map score_map; 104 | std::map cov_map; 105 | std::map alt_freq; 106 | std::map pos_variant; 107 | 108 | shared_ptr cov_result; 109 | shared_ptr score_result; 110 | 111 | ScoreCovFormat detected_format; 112 | double sigma2 = numeric_limits::quiet_NaN(); 113 | uint64_t nsamples = numeric_limits::quiet_NaN(); 114 | 115 | /** 116 | * Function that parses the score statistic file's header to extract: 117 | * 1. program name - the program (rvtest, raremetalworker) was used to create these statistics, which ends up 118 | * stored in a member variable `detected_format` 119 | * 2. sigma2 - residual variance under the null model 120 | * 3. nsamples - number of samples analyzed in the model 121 | * @param filepath 122 | */ 123 | void parseHeader(const std::string& filepath); 124 | 125 | /** 126 | * Currently unused function for getting number of variants in a covariance file, for the purposes of allocating 127 | * a matrix ahead of time. We currently do not need to form the matrix unless future features are required. 128 | * @param filepath Path to covariance matrix file. 129 | * @param region Region of the file to extract, given as chrom:start-end. 130 | * @return Number of variants within the given region in the file. 131 | */ 132 | static uint64_t getNumberOfVariantsFromCovFile(const std::string& filepath, const std::string& region); 133 | 134 | double getAltFreqForPosition(uint64_t& pos); 135 | std::string getVariantForPosition(uint64_t& pos); 136 | 137 | /** 138 | * Load score statistics from a file in a given region. Same idea with load_cov(). 139 | * These functions are both called automatically by load_region(). 140 | * @param chromosome 141 | * @param start 142 | * @param stop 143 | */ 144 | void load_scores(const std::string& chromosome, uint64_t start, uint64_t stop); 145 | void load_cov(const std::string& chromosome, uint64_t start, uint64_t stop); 146 | public: 147 | /** 148 | * Create a loader object. 149 | * @param score_path Path on disk to the score statistic file. Should be bgzipped and tabix-indexed. 150 | * @param cov_path Path on disk to the covariances file. Should be bgzipped and tabix-indexed. Note that the file need 151 | * only be tabix-indexed on the start position of each row, and not the range of each row (this would be difficult 152 | * since neither rvtest nor RAREMETALWORKER format include an "end position" column.) 153 | * 154 | * Once a loader object is created, call load_region() to load statistics from a specific region into memory. 155 | */ 156 | RaremetalSummaryStatisticsLoader(const std::vector& score_vec, const std::vector& cov_vec); 157 | 158 | /** 159 | * Load a region of score statistics and covariances into memory. 160 | * @param chromosome Chromosome. 161 | * @param start Integer start position of the region. 162 | * @param end Integer end position of the region. 163 | */ 164 | void load_region(const std::string& chromosome, uint64_t start, uint64_t stop) override; 165 | 166 | // Return the covariances. 167 | shared_ptr getCovResult() override; 168 | 169 | // Return the score statistics. 170 | shared_ptr getScoreResult() override; 171 | 172 | /** 173 | * Getter to return the residual variance under the null model. 174 | * @return sigma2 175 | */ 176 | double getSigma2() override { return sigma2; } 177 | 178 | /** 179 | * Getter to return number of samples used when calculating scores/covariances. 180 | * @return nsamples 181 | */ 182 | uint64_t getNumSamples() override { return nsamples; } 183 | }; 184 | 185 | #endif 186 | -------------------------------------------------------------------------------- /core/src/Raw.cpp: -------------------------------------------------------------------------------- 1 | #include "Raw.h" 2 | 3 | std::vector extract_samples(const std::string& filepath) { 4 | bool is_vcf = filepath.find(".vcf") != string::npos; 5 | bool is_bcf = filepath.find(".bcf") != string::npos; 6 | bool is_sav = filepath.find(".sav") != string::npos; 7 | if (is_vcf || is_bcf) { 8 | savvy::vcf::reader<1> reader(filepath, savvy::fmt::gt); 9 | return reader.samples(); 10 | } 11 | else if (is_sav) { 12 | savvy::reader reader(filepath, savvy::fmt::gt); 13 | return reader.samples(); 14 | } 15 | else { 16 | throw std::invalid_argument("File " + filepath + " has unsupported format"); 17 | } 18 | } 19 | 20 | Raw::Raw(const string& file) : file(file) { 21 | 22 | } 23 | 24 | Raw::~Raw() { 25 | 26 | } 27 | 28 | RawVCF::~RawVCF() { 29 | 30 | } 31 | 32 | void RawVCF::open(const string& chromosome, const vector& samples, bool coded012) { 33 | f.reset(); 34 | f = std::make_unique>(file, chromosome, coded012 ? savvy::fmt::ac : savvy::fmt::gt); 35 | f->subset_samples({samples.begin(), samples.end()}); 36 | has_cached = false; 37 | } 38 | 39 | vector RawVCF::get_samples() const { 40 | return savvy::vcf::reader<1>(file, savvy::fmt::gt).samples(); 41 | } 42 | 43 | vector RawVCF::get_chromosomes() const { 44 | return savvy::vcf::indexed_reader<1>(file, {""}, savvy::fmt::gt).chromosomes(); 45 | } 46 | 47 | void RawVCF::load(Segment& segment) { 48 | segment.clear(); 49 | if (has_cached && (segment.get_start_bp() <= anno.position()) && (anno.position() <= segment.get_stop_bp())) { 50 | segment.add(anno, alleles); 51 | } else{ 52 | f->reset_region({segment.get_chromosome(), segment.get_start_bp(), numeric_limits::max() - 1}); 53 | } 54 | has_cached = false; 55 | while (f->read(anno, alleles).good()) { 56 | if (anno.position() > segment.get_stop_bp()) { 57 | has_cached = true; 58 | break; 59 | } 60 | segment.add(anno, alleles); 61 | } 62 | segment.freeze(); 63 | } 64 | 65 | void RawVCF::load_names(Segment &segment) { 66 | segment.clear_names(); 67 | if (has_cached && (segment.get_start_bp() <= anno.position()) && (anno.position() <= segment.get_stop_bp())) { 68 | segment.add_name(anno, alleles); 69 | } else{ 70 | f->reset_region({segment.get_chromosome(), segment.get_start_bp(), numeric_limits::max() - 1}); 71 | } 72 | has_cached = false; 73 | while (f->read(anno, alleles)) { 74 | if (anno.position() > segment.get_stop_bp()) { 75 | has_cached = true; 76 | break; 77 | } 78 | segment.add_name(anno, alleles); 79 | } 80 | segment.freeze_names(); 81 | } 82 | 83 | void RawVCF::load_genotypes(Segment &segment) { 84 | segment.clear_genotypes(); 85 | if (has_cached && (segment.get_start_bp() <= anno.position()) && (anno.position() <= segment.get_stop_bp())) { 86 | segment.add_genotypes(alleles); 87 | } else{ 88 | f->reset_region({segment.get_chromosome(), segment.get_start_bp(), numeric_limits::max() - 1}); 89 | } 90 | while (f->read(anno, alleles)) { 91 | if (anno.position() > segment.get_stop_bp()) { 92 | has_cached = true; 93 | break; 94 | } 95 | segment.add_genotypes(alleles); 96 | } 97 | segment.freeze_genotypes(); 98 | } 99 | 100 | RawSAV::~RawSAV() { 101 | } 102 | 103 | void RawSAV::open(const string& chromosome, const vector& samples, bool coded012) { 104 | f.reset(); 105 | f = unique_ptr(new savvy::indexed_reader(file, {chromosome}, coded012 ? savvy::fmt::ac : savvy::fmt::gt)); 106 | f->subset_samples({samples.begin(), samples.end()}); 107 | has_cached = false; 108 | } 109 | 110 | vector RawSAV::get_samples() const { 111 | return savvy::reader(file, savvy::fmt::gt).samples(); 112 | } 113 | 114 | vector RawSAV::get_chromosomes() const { 115 | return savvy::indexed_reader(file, {""}, savvy::fmt::gt).chromosomes(); 116 | } 117 | 118 | void RawSAV::load(Segment& segment) { 119 | segment.clear(); 120 | if (has_cached && (segment.get_start_bp() <= anno.position()) && (anno.position() <= segment.get_stop_bp())) { 121 | segment.add(anno, alleles); 122 | } else { 123 | f->reset_region({segment.get_chromosome(), segment.get_start_bp()}); 124 | } 125 | has_cached = false; 126 | while (f->read(anno, alleles).good()) { 127 | if (anno.position() > segment.get_stop_bp()) { 128 | has_cached = true; 129 | break; 130 | } 131 | segment.add(anno, alleles); 132 | } 133 | segment.freeze(); 134 | } 135 | 136 | void RawSAV::load_names(Segment &segment) { 137 | segment.clear_names(); 138 | if (has_cached && (segment.get_start_bp() <= anno.position()) && (anno.position() <= segment.get_stop_bp())) { 139 | segment.add_name(anno, alleles); 140 | } else{ 141 | f->reset_region({segment.get_chromosome(), segment.get_start_bp()}); 142 | } 143 | has_cached = false; 144 | while (f->read(anno, alleles)) { 145 | if (anno.position() > segment.get_stop_bp()) { 146 | has_cached = true; 147 | break; 148 | } 149 | segment.add_name(anno, alleles); 150 | } 151 | segment.freeze_names(); 152 | } 153 | 154 | void RawSAV::load_genotypes(Segment &segment) { 155 | segment.clear_genotypes(); 156 | if (has_cached && (segment.get_start_bp() <= anno.position()) && (anno.position() <= segment.get_stop_bp())) { 157 | segment.add_genotypes(alleles); 158 | } else{ 159 | f->reset_region({segment.get_chromosome(), segment.get_start_bp()}); 160 | } 161 | while (f->read(anno, alleles)) { 162 | if (anno.position() > segment.get_stop_bp()) { 163 | has_cached = true; 164 | break; 165 | } 166 | segment.add_genotypes(alleles); 167 | } 168 | segment.freeze_genotypes(); 169 | } 170 | 171 | shared_ptr RawFactory::create(const string &file) { 172 | if ((file.length() >= 4) && (file.compare(file.length() - 4, 4, ".sav") == 0)) { 173 | return make_shared(file); 174 | } else if ((file.length() >= 7) && (file.compare(file.length() - 7, 7, ".vcf.gz") == 0)) { 175 | return make_shared(file); 176 | } else if ((file.length() >= 4) && (file.compare(file.length() - 4, 4, ".bcf") == 0)) { 177 | return make_shared(file); 178 | } 179 | throw runtime_error("Unknown genotype file type"); 180 | } 181 | 182 | -------------------------------------------------------------------------------- /core/src/Raw.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_RAW_H 2 | #define LDSERVER_RAW_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "Segment.h" 11 | 12 | using namespace std; 13 | 14 | /** 15 | * Helper function for python to extract full list of samples from a genotype file. 16 | * @param filepath 17 | * @return 18 | */ 19 | std::vector extract_samples(const string& filepath); 20 | 21 | class Raw { 22 | protected: 23 | string file; 24 | 25 | public: 26 | Raw(const string& file); 27 | virtual ~Raw(); 28 | 29 | virtual void open(const string& chromosome, const vector& samples, bool coded012) = 0; 30 | virtual vector get_samples() const = 0; 31 | virtual vector get_chromosomes() const = 0; 32 | virtual void load(Segment& segment) = 0; 33 | virtual void load_names(Segment &segment) = 0; 34 | virtual void load_genotypes(Segment &segment) = 0; 35 | }; 36 | 37 | class RawVCF : public Raw { 38 | private: 39 | unique_ptr> f; 40 | bool has_cached; 41 | savvy::site_info anno; 42 | savvy::compressed_vector alleles; 43 | 44 | public: 45 | using Raw::Raw; 46 | virtual ~RawVCF(); 47 | 48 | void open(const string& chromosome, const vector& samples, bool coded012) override; 49 | vector get_samples() const override; 50 | vector get_chromosomes() const override; 51 | void load(Segment& segment) override; 52 | void load_names(Segment &segment) override; 53 | void load_genotypes(Segment &segment) override; 54 | }; 55 | 56 | class RawSAV : public Raw { 57 | private: 58 | unique_ptr f; 59 | bool has_cached; 60 | savvy::site_info anno; 61 | savvy::compressed_vector alleles; 62 | 63 | public: 64 | using Raw::Raw; 65 | virtual ~RawSAV(); 66 | 67 | void open(const string& chromosome, const vector& samples, bool coded012) override; 68 | vector get_samples() const override; 69 | vector get_chromosomes() const override; 70 | void load(Segment& segment) override; 71 | void load_names(Segment &segment) override; 72 | void load_genotypes(Segment &segment) override; 73 | }; 74 | 75 | class RawFactory { 76 | public: 77 | static shared_ptr create(const string& file); 78 | }; 79 | 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /core/src/ScoreCovarianceRunner.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_RAREMETALRUNNER_H 2 | #define LDSERVER_RAREMETALRUNNER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "Mask.h" 14 | #include "LDServer.h" 15 | #include "ScoreServer.h" 16 | #include "SummaryStatisticsLoader.h" 17 | #include "RaremetalSummaryStatisticsLoader.h" 18 | #include "MetastaarSummaryStatisticsLoader.h" 19 | #include "Phenotypes.h" 20 | #include "Segment.h" 21 | #include "Types.h" 22 | 23 | enum class VariantFormat {EPACTS, COLONS}; 24 | 25 | class ScoreCovarianceConfig { 26 | public: 27 | /** 28 | * Region specification 29 | */ 30 | std::string chrom; 31 | uint64_t start; 32 | uint64_t stop; 33 | 34 | /** 35 | * Relevant settings when genotype and phenotype files are specified. 36 | */ 37 | std::vector genotype_files; 38 | uint32_t genotype_dataset_id; 39 | std::string phenotype_file; 40 | uint32_t phenotype_dataset_id; 41 | std::string phenotype; 42 | ColumnTypeMap phenotype_column_types; 43 | std::vector phenotype_analysis_columns; 44 | uint64_t phenotype_nrows; 45 | std::string phenotype_delim; 46 | std::string phenotype_sample_column; 47 | std::string sample_subset; 48 | std::vector samples; 49 | 50 | /** 51 | * Settings for when serving scores/covariance from rvtest or raremetalworker generated files 52 | */ 53 | uint32_t summary_stat_dataset_id; 54 | std::vector summary_stat_score_files; 55 | std::vector summary_stat_cov_files; 56 | std::string summary_stat_format; 57 | 58 | /** 59 | * Mask related settings 60 | */ 61 | std::vector masks; 62 | 63 | /** 64 | * Cache related settings 65 | */ 66 | uint32_t segment_size; 67 | std::string redis_hostname; 68 | uint16_t redis_port; 69 | 70 | /** 71 | * Output related settings 72 | */ 73 | VariantFormat variant_format = VariantFormat::EPACTS; 74 | 75 | void pprint() const; 76 | }; 77 | 78 | shared_ptr make_score_covariance_config(); 79 | 80 | enum class ScoreCovRunMode {COMPUTE, PRECOMPUTE}; 81 | 82 | class ScoreCovarianceRunner { 83 | protected: 84 | std::shared_ptr document; 85 | std::shared_ptr config; 86 | std::shared_ptr ld_server; 87 | std::shared_ptr score_server; 88 | std::shared_ptr summary_stat_loader; 89 | ScoreCovRunMode run_mode; 90 | public: 91 | ScoreCovarianceRunner(std::shared_ptr config); 92 | void run(); 93 | std::string getJSON() const; 94 | std::string getPrettyJSON() const; 95 | }; 96 | 97 | #endif //LDSERVER_RAREMETALRUNNER_H 98 | -------------------------------------------------------------------------------- /core/src/ScoreSegment.cpp: -------------------------------------------------------------------------------- 1 | #include "ScoreSegment.h" 2 | 3 | ScoreSegment::ScoreSegment(const string& chromosome, uint64_t start_bp, uint64_t stop_bp, genotypes_store store) : Segment(chromosome, start_bp, stop_bp, store) { 4 | score_results = make_shared>(); 5 | } 6 | 7 | ScoreSegment::ScoreSegment(Segment&& other) noexcept : Segment(std::move(other)) { 8 | score_results = make_shared>(); 9 | } 10 | 11 | void ScoreSegment::load(redisContext *redis_cache, const string& key) { 12 | redisReply *reply = nullptr; 13 | reply = (redisReply *) redisCommand(redis_cache, "GET %b", key.c_str(), key.length()); 14 | if (reply == nullptr) { 15 | throw runtime_error("Error while reading a segment from Redis cache"); 16 | } 17 | if (reply->type == REDIS_REPLY_ERROR) { 18 | throw runtime_error("Error while reading a segment from Redis cache: " + string(reply->str)); 19 | } 20 | if (reply->len > 0) { 21 | stringbuf buffer(string(reply->str, reply->len), ios::binary | ios::in); 22 | basic_istream is(&buffer); 23 | { 24 | cereal::BinaryInputArchive iarchive(is); 25 | load(iarchive); 26 | } 27 | cached = true; 28 | names_loaded = true; 29 | genotypes_loaded = false; 30 | } else { 31 | cached = false; 32 | names_loaded = false; 33 | genotypes_loaded = false; 34 | } 35 | freeReplyObject(reply); 36 | } 37 | 38 | void ScoreSegment::save(redisContext *redis_cache, const string& key) { 39 | redisReply *reply = nullptr; 40 | stringstream os(ios::binary | ios::out); 41 | { 42 | cereal::BinaryOutputArchive oarchive(os); 43 | save(oarchive); 44 | } 45 | string temp = os.str(); 46 | size_t temp_size = os.tellp(); 47 | reply = (redisReply *) redisCommand(redis_cache, "SET %b %b", key.c_str(), key.length(), temp.c_str(), temp_size); 48 | if (reply == nullptr) { 49 | throw runtime_error("Error while writing a segment to Redis cache"); 50 | } 51 | if (reply->type == REDIS_REPLY_ERROR) { 52 | throw runtime_error("Error while writing a segment to Redis cache: " + string(reply->str)); 53 | } 54 | cached = true; 55 | freeReplyObject(reply); 56 | } 57 | 58 | bool ScoreSegment::has_scores() const { 59 | return !score_results->empty(); 60 | } 61 | 62 | void ScoreSegment::compute_scores(const arma::vec &phenotype) { 63 | // If the segment has no genotypes, we can't calculate anything. 64 | if (n_haplotypes == 0) { 65 | return; 66 | } 67 | 68 | // Load genotypes. 69 | arma::fmat genotypes(this->get_genotypes()); 70 | 71 | // Center and calculate sigma2 from phenotype 72 | double pheno_mean = arma::mean(phenotype); 73 | arma::vec phenotype_centered = phenotype - pheno_mean; 74 | double sigma2 = arma::var(phenotype_centered, 1); 75 | 76 | // Calculate statistics for each variant 77 | for (uint64_t col = 0; col < genotypes.n_cols; col++) { 78 | ScoreResult result; 79 | result.variant = this->names[col]; 80 | result.position = this->positions[col]; 81 | result.chrom = this->chromosome; 82 | 83 | arma::vec genotype_col = arma::conv_to::from(genotypes.col(col)); 84 | 85 | // Mean center/impute 86 | double mean = means[col]; 87 | genotype_col -= mean; 88 | genotype_col.replace(arma::datum::nan, 0); 89 | 90 | // Score stat 91 | double u = arma::dot(genotype_col, phenotype_centered); 92 | 93 | // Calculate denominator 94 | double denom = 0; 95 | double value; 96 | for (uint64_t i = 0; i < genotype_col.n_elem; i++) { 97 | value = genotype_col[i]; 98 | denom += value * value; 99 | } 100 | denom = denom * sigma2; 101 | 102 | double v = sqrt(denom); 103 | double t = (u / v); 104 | double pvalue = 2 * arma::normcdf(-fabs(t)); 105 | 106 | result.score_stat = u / sigma2; // match RAREMETAL convention 107 | result.pvalue = pvalue; 108 | result.alt_freq = this->freqs[col];; 109 | 110 | score_results->emplace_back(result); 111 | } 112 | } 113 | 114 | void ScoreSegment::add_score(ScoreResult score) { 115 | score_results->emplace_back(score); 116 | } 117 | 118 | void ScoreSegment::extract(uint64_t start, uint64_t end, struct ScoreStatQueryResult& result) const { 119 | int i_start, i_end; 120 | 121 | // If this segment doesn't have scores, or doesn't overlap the region requested, 122 | // signal with a sentinel value of -1 that loading stopped immediately. 123 | // As a side effect, overlaps_region will store the start and end index of the segment to iterate over. 124 | if (!this->has_scores() || !this->overlaps_region(start, end, i_start, i_end)) { 125 | result.last_i = -1; 126 | return; 127 | } 128 | 129 | int64_t i = result.last_i >= 0 ? result.last_i : i_start; 130 | for (; i <= i_end; i++) { 131 | result.data.emplace_back((*score_results)[i]); 132 | if (result.data.size() >= result.limit) { 133 | result.last_i = i + 1; 134 | return; 135 | } 136 | } 137 | 138 | result.last_i = -1; 139 | } 140 | 141 | bool ScoreSegment::operator==(const ScoreSegment& other) const { 142 | for (int i = 0; i < this->get_n_variants(); ++i) { 143 | if (other.get_name(i) != other.get_name(i)) { return false; } 144 | if (other.get_position(i) != other.get_position(i)) { return false; } 145 | } 146 | 147 | if (this->get_store() != other.get_store()) { 148 | return false; 149 | } 150 | 151 | double* this_value; 152 | double* other_value; 153 | double diff; 154 | for (int i = 0; i < this->score_results->size(); i++) { 155 | this_value = &(*this->score_results)[i].pvalue; 156 | other_value = &(*other.score_results)[i].pvalue; 157 | diff = fabs(*this_value - *other_value); 158 | if (diff > 0.00001) { 159 | return false; 160 | } 161 | } 162 | 163 | return true; 164 | } -------------------------------------------------------------------------------- /core/src/ScoreSegment.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_SCORESEGMENT_H 2 | #define LDSERVER_SCORESEGMENT_H 3 | 4 | #include "Segment.h" 5 | #include 6 | #include 7 | 8 | class ScoreSegment : public Segment { 9 | protected: 10 | shared_ptr> score_results; 11 | 12 | public: 13 | ScoreSegment(const string& chromosome, uint64_t start_bp, uint64_t stop_bp, genotypes_store store); 14 | 15 | /** 16 | * Construct a new ScoreSegment, moving the data from an instance of the base class Segment. 17 | * @param other A Segment object. 18 | */ 19 | ScoreSegment(Segment&& other) noexcept; 20 | bool has_scores() const; 21 | void compute_scores(const arma::vec& phenotype); 22 | void extract(uint64_t start, uint64_t end, struct ScoreStatQueryResult& result) const; 23 | void add_score(ScoreResult score); 24 | 25 | /** 26 | * Comparisons 27 | */ 28 | bool operator==(const ScoreSegment& other) const; 29 | 30 | /** 31 | * Load/save functions for redis. 32 | * These are also overloaded below for loading/saving from serialized binary. 33 | * 34 | * TODO: these functions couldn't be used directly from the base class, because the load/save functions 35 | * that take archive parameters can't be virtualed (because they are templated.) There's probably a better way 36 | * to do this but for now it's just directly copied code. 37 | * 38 | * @param redis_cache 39 | * @param key 40 | */ 41 | virtual void load(redisContext* redis_cache, const string& key) override; 42 | virtual void save(redisContext* redis_cache, const string& key) override; 43 | 44 | /** 45 | * Load/save functions for binary format. 46 | * Stores the same elements as the base class Segment, and additionally score stats/pvalues/etc. 47 | * 48 | * Cereal's docs seem to think it's fine to hide the non-virtual method of the base class, since it can't be 49 | * virtual in the first place (can't have templated virtual functions.) If there's a better way to do this, 50 | * it would be nice. 51 | * 52 | * @tparam Archive 53 | * @param ar 54 | */ 55 | template void load(Archive& ar) { 56 | ar(cereal::base_class(this), score_results); 57 | } 58 | 59 | template void save(Archive& ar) const { 60 | ar(cereal::base_class(this), score_results); 61 | } 62 | }; 63 | 64 | #endif //LDSERVER_SCORESEGMENT_H 65 | -------------------------------------------------------------------------------- /core/src/ScoreServer.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_SCORESERVER_H 2 | #define LDSERVER_SCORESERVER_H 3 | 4 | #define ARMA_DONT_USE_WRAPPER 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #define ARMA_DONT_USE_WRAPPER 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include "Raw.h" 23 | #include "Segment.h" 24 | #include "ScoreSegment.h" 25 | #include "Types.h" 26 | #include "Phenotypes.h" 27 | #include "LDServer.h" 28 | 29 | using namespace std; 30 | 31 | class ScoreServer { 32 | private: 33 | unordered_map> samples; 34 | unordered_map> raw; 35 | shared_ptr phenotypes; 36 | uint32_t genotype_dataset_id; 37 | uint32_t phenotype_dataset_id; 38 | string phenotype; 39 | 40 | uint32_t segment_size; 41 | 42 | bool cache_enabled; 43 | string cache_hostname; 44 | int cache_port; 45 | redisContext* cache_context; 46 | 47 | static void parse_variant(const string& variant, string& chromosome, uint64_t& position, string& ref_allele, string& alt_allele); 48 | 49 | public: 50 | static const string ALL_SAMPLES_KEY; 51 | 52 | explicit ScoreServer(uint32_t segment_size = 1000); 53 | virtual ~ScoreServer(); 54 | 55 | /** 56 | * Set a file from which to read genotypes. This can be a VCF or Savvy formatted file. 57 | * @param file 58 | * @param genotype_dataset_id - integer representing the genotype dataset (IDs stored in sqlite database). 59 | */ 60 | void set_genotypes_file(const string& file, const uint32_t& genotype_dataset_id); 61 | 62 | /** 63 | * Set the list of samples to be used. This will set it both when operating on genotypes, and phenotypes. 64 | * @param name 65 | * @param samples 66 | */ 67 | void set_samples(const string& name, const vector& samples); 68 | 69 | void force_samples(const std::string &name, const std::vector &samples); 70 | 71 | /** 72 | * Load phenotypes from a file. This triggers a load of phenotypes into memory. Phenotype files are assumed to 73 | * typically be quite small. When UKBB gets metabolites, then maybe we will need to start loading only needed 74 | * phenotypes in a single pass, or switch to alternative storage. 75 | * 76 | * These are named "load_" because they load the entire file. The "set_genotypes_file" method above does not, 77 | * rather it can selectively load genotypes using an on-disk index. 78 | * 79 | * @param pedpath 80 | * @param phenotype_dataset_id (see genotype_dataset_id above) 81 | */ 82 | void load_phenotypes_file(const string &path, const ColumnTypeMap &types, size_t nrows, const string& delim, const string& sample_column, const uint32_t &phenotype_dataset_id, shared_ptr> analysis_columns = nullptr); 83 | 84 | /** 85 | * Set the phenotype that will be used for score/p-value calculations. 86 | * @param p 87 | */ 88 | void set_phenotype(const string& p); 89 | 90 | /** 91 | * Get list of samples for which the phenotype has complete data. 92 | */ 93 | shared_ptr> get_complete_samples(const string& phenotype) const; 94 | 95 | /** 96 | * Functions to enable/disable the cache. 97 | * Note: this is slightly different than LDServer, where a cache key is not used (these functions only open/close 98 | * the redis context.) 99 | * @param hostname 100 | * @param port 101 | */ 102 | void enable_cache(const string& hostname, int port); 103 | void disable_cache(); 104 | 105 | /** 106 | * Create a key to cache a segment of score statistics. 107 | * @param genotype_dataset_id - ID of genotypes dataset 108 | * @param phenotype_dataset_id - ID of phenotypes dataset 109 | * @param phenotype_name - Name of the phenotype 110 | * @param samples_name - Name of sample subset in use, or "ALL" 111 | * @param chromosome 112 | * @param start_bp 113 | * @param stop_bp 114 | * @return 115 | */ 116 | static string make_segment_cache_key(uint32_t genotype_dataset_id, uint32_t phenotype_dataset_id, const string& phenotype_name, const string& samples_name, const string& chromosome, uint64_t start_bp, uint64_t stop_bp); 117 | 118 | vector get_chromosomes(); 119 | uint32_t get_segment_size() const; 120 | 121 | /** 122 | * Compute score statistics on segments that have already been used by the LDServer. 123 | * If the segments have cached score statistics, we use them directly. 124 | * Otherwise, if the segments at least have loaded genotypes, we can calculate the score statistics from them. 125 | * Finally, if neither is true, we will have to load the genotypes from disk and compute. 126 | * 127 | * If the cache is enabled, score statistics and relevant other stats will be stored to the redis cache. 128 | * 129 | * @param result 130 | * @param samples_name 131 | * @param segments 132 | * @return 133 | */ 134 | bool compute_scores(const string& region_chromosome, uint64_t region_start_bp, uint64_t region_stop_bp, struct ScoreStatQueryResult& result, const string& samples_name = ALL_SAMPLES_KEY, SharedSegmentVector segments = nullptr) const; 135 | }; 136 | 137 | /** 138 | * Helper function to coordinate samples between the LDServer and ScoreServer. 139 | * This function takes into account: 140 | * - Eliminate samples where the phenotype is missing 141 | * - Intersect with sample subset 142 | */ 143 | void coordinate_samples(ScoreServer& score_server, LDServer& ld_server, const string& genotype_file, const string& phenotype, const string& sample_subset, const vector& samples = {}); 144 | 145 | #endif -------------------------------------------------------------------------------- /core/src/Segment.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_SEGMENT_H 2 | #define LDSERVER_SEGMENT_H 3 | 4 | #define ARMA_DONT_USE_WRAPPER 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "Types.h" 18 | 19 | 20 | 21 | using namespace std; 22 | 23 | enum genotypes_store : uint8_t { 24 | CSC_ALL_ONES, // all-ones matrix in compressed sparse column representation, 25 | CSC, // matrix in compressed sparse column representation 26 | BITSET // bitsets 27 | }; 28 | 29 | class Segment { 30 | protected: 31 | bool cached; 32 | bool names_loaded; 33 | bool genotypes_loaded; 34 | 35 | string chromosome; 36 | uint64_t start_bp; 37 | uint64_t stop_bp; 38 | uint64_t n_haplotypes; 39 | 40 | // Vector of variant IDs in EPACTS format (chr:pos_ref/alt). 41 | vector names; 42 | vector positions; 43 | 44 | // tells in which format to store the genotypes 45 | genotypes_store store; 46 | 47 | // for CSC representation 48 | vector sp_mat_rowind; 49 | vector sp_mat_colind; 50 | vector sp_mat_values; 51 | vector means; 52 | bool nans = false; 53 | 54 | // for BITSET representation 55 | vector freqs; 56 | vector> alleles; 57 | vector> alt_carriers; 58 | 59 | public: 60 | Segment(const string& chromosome, uint64_t start_bp, uint64_t stop_bp, genotypes_store store); 61 | Segment(Segment&& segment); 62 | virtual ~Segment(); 63 | 64 | void clear(); 65 | void clear_names(); 66 | void clear_genotypes(); 67 | void add(savvy::site_info& anno, savvy::compressed_vector& alleles); 68 | void add_name(savvy::site_info& anno, savvy::compressed_vector& alleles); 69 | void add_genotypes(savvy::compressed_vector& alleles); 70 | void freeze(); 71 | void freeze_names(); 72 | void freeze_genotypes(); 73 | 74 | bool is_empty() const; 75 | bool is_cached() const; 76 | bool has_names() const; 77 | bool has_genotypes() const; 78 | 79 | const char* get_key() const; 80 | uint64_t get_key_size() const; 81 | const string& get_chromosome() const; 82 | uint64_t get_start_bp() const; 83 | uint64_t get_stop_bp() const; 84 | uint64_t get_n_haplotypes() const; 85 | uint64_t get_n_genotypes() const; 86 | uint32_t get_n_variants() const; 87 | uint32_t get_ac() const; 88 | const string& get_name(int i) const; 89 | uint64_t get_position(int i) const; 90 | arma::sp_fmat get_genotypes(); 91 | const vector& get_freqs() const; 92 | const vector>& get_alleles() const; 93 | const vector>& get_alt_carriers() const; 94 | genotypes_store get_store() const; 95 | 96 | static void create_pairs(uint64_t segment1, uint64_t segment2, int i, int start_j, int stop_j, const float* values, LDQueryResult& result); 97 | static void create_pairs(uint64_t segment1, uint64_t segment2, int i, int start_j, int stop_j, const float* values, SingleVariantLDQueryResult& result); 98 | bool overlaps_region(uint64_t region_start_bp, uint64_t region_stop_bp, int& from_index, int& to_index) const; 99 | bool overlaps_variant(const string& name, uint64_t bp, int& index) const; 100 | 101 | inline bool has_nans() const { return nans; } 102 | inline const vector& get_means() const { return means; } 103 | 104 | /** 105 | * Load/save functions for redis. 106 | * These are also overloaded below for loading/saving from serialized binary. 107 | * @param redis_cache 108 | * @param key 109 | */ 110 | virtual void load(redisContext* redis_cache, const string& key); 111 | virtual void save(redisContext* redis_cache, const string& key); 112 | 113 | /** 114 | * Load/save functions for binary format. 115 | * Only the number of haplotypes, the list of variant EPACTS IDs, and their positions on the chromosome are stored. 116 | * @tparam Archive 117 | * @param ar 118 | */ 119 | template 120 | void load( Archive & ar ) 121 | { 122 | ar( n_haplotypes, names, positions ); 123 | } 124 | 125 | template 126 | void save( Archive & ar ) const 127 | { 128 | ar( n_haplotypes, names, positions ); 129 | } 130 | }; 131 | 132 | typedef shared_ptr>> SharedSegmentVector; 133 | inline SharedSegmentVector make_shared_segment_vector() { return make_shared>>(); } 134 | 135 | #endif 136 | -------------------------------------------------------------------------------- /core/src/SummaryStatisticsLoader.cpp: -------------------------------------------------------------------------------- 1 | #include "SummaryStatisticsLoader.h" 2 | using namespace std; 3 | 4 | 5 | -------------------------------------------------------------------------------- /core/src/SummaryStatisticsLoader.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_SUMMARYSTATISTICSLOADER_H 2 | #define LDSERVER_SUMMARYSTATISTICSLOADER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include "Types.h" 26 | 27 | const uint32_t INIT_QUERY_LIMIT = 10000000; 28 | 29 | enum class ScoreCovFormat {RVTEST, RAREMETAL, METASTAAR}; 30 | 31 | class NoVariantsInRange : public std::runtime_error { using std::runtime_error::runtime_error; }; 32 | 33 | /* Single parameter versions of string conversion functions */ 34 | inline unsigned long long spstoull(const string& s) { return stoull(s); } 35 | inline uint64_t spstoull_uint64(const string& s) { 36 | double d = stod(s); 37 | auto l = static_cast(d); 38 | return l; 39 | } 40 | inline unsigned long spstoul(const string& s) { return stoul(s); } 41 | inline double spstod(const string& s) { return stod(s); } 42 | inline int spstoi(const string& s) { return stoi(s); } 43 | inline string stos(const string& s) { return s; } 44 | 45 | /** 46 | * Loader for "summary statistic" datasets. These are comprised of: 47 | * 48 | * 1. A file of score statistics per single variant (along with other information such as p-value, effect size, etc.) 49 | * 2. A file of the covariance of the score statistics for each pair of variants (usually in a sliding window across the genome) 50 | * 51 | * These files are usually generated by either rvtests, or RAREMETALWORKER. They are normally used for conducting either 52 | * single variant meta-analysis, or meta-analysis of gene-based aggregation tests such as the burden or SKAT test. 53 | * 54 | * https://github.com/zhanxw/rvtests#meta-analysis-models 55 | * https://genome.sph.umich.edu/wiki/RAREMETALWORKER 56 | */ 57 | class SummaryStatisticsLoader { 58 | public: 59 | /** 60 | * Load a region of score statistics and covariances into memory. 61 | * @param chromosome Chromosome. 62 | * @param start Integer start position of the region. 63 | * @param end Integer end position of the region. 64 | */ 65 | virtual void load_region(const std::string& chromosome, uint64_t start, uint64_t stop) = 0; 66 | 67 | // Return the covariances. 68 | virtual shared_ptr getCovResult() = 0; 69 | 70 | // Return the score statistics. 71 | virtual shared_ptr getScoreResult() = 0; 72 | 73 | /** 74 | * Getter to return the residual variance under the null model. 75 | * @return sigma2 76 | */ 77 | virtual double getSigma2() = 0; 78 | 79 | /** 80 | * Getter to return number of samples used when calculating scores/covariances. 81 | * @return nsamples 82 | */ 83 | virtual uint64_t getNumSamples() = 0; 84 | 85 | // Destructor 86 | virtual ~SummaryStatisticsLoader() = default; 87 | }; 88 | 89 | #endif //LDSERVER_SUMMARYSTATISTICSLOADER_H 90 | -------------------------------------------------------------------------------- /core/src/VariantCollator.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_VARIANTCOLLATOR_H 2 | #define LDSERVER_VARIANTCOLLATOR_H 3 | 4 | #include 5 | #include 6 | #include "Types.h" 7 | #include "Raw.h" 8 | #include 9 | #include "RaremetalSummaryStatisticsLoader.h" 10 | #include "MetastaarSummaryStatisticsLoader.h" 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | /** 18 | * Enum for various variant file formats. Variant site information can be stored in VCF/BCF, SAVVY, RAREMETAL (also used 19 | * for rvtest), and METASTAAR files. 20 | */ 21 | enum class VariantFileFormat {VCF, SAVVY, RAREMETAL, METASTAAR}; 22 | 23 | /** 24 | * Convert an object of enum class VariantFileFormat into a string description. 25 | * @param fmt - VariantFileFormat object 26 | * @return - std::string description 27 | */ 28 | std::string to_string(VariantFileFormat& fmt); 29 | 30 | /** 31 | * Class to retrieve lists of variants from various filetypes (vcf, savvy, raremetal/rvtest, metastaar.) 32 | */ 33 | class VariantCollator { 34 | protected: 35 | VariantFileFormat format; 36 | 37 | // Used to keep track of chromosome -> score statistic file (RAREMETAL or rvtest only) or genotype file. 38 | std::unordered_map chrom_file; 39 | 40 | // Data structures used to keep track of MetaSTAAR score and covariance files. 41 | std::map score_tree; 42 | std::map cov_tree; 43 | 44 | std::shared_ptr> variants; 45 | 46 | void read_variants_genotype_file(std::string chrom, uint64_t start, uint64_t end); 47 | void read_variants_raremetal_file(std::string chrom, uint64_t start, uint64_t end); 48 | void read_variants_metastaar_file(std::string chrom, uint64_t start, uint64_t end); 49 | 50 | public: 51 | /** 52 | * Constructor to use when given a list of genotype files (VCF/BCF, SAVVY), one file per chromosome. 53 | * Each file must have a tabix index on disk (chr1.vcf.gz & chr1.vcf.gz.tbi must exist). Only the genotype file though 54 | * needs to be provided in the genotype_files vector. 55 | * @param genotype_files - vector of genotype file paths 56 | * @param format - format of the file. Use VariantFileFormat enum class. 57 | */ 58 | VariantCollator(std::vector genotype_files, VariantFileFormat format); 59 | 60 | /** 61 | * Constructor to use when given a set of summary statistic files (score statistic files & covariance matrix files.) 62 | * The covariance files are unfortunately required for MetaSTAAR, as the files contain important 63 | * metadata needed to filter the list of variants in the score statistic file down to the proper subset (those with MAF cutoff 64 | * below what was used to generate the covariance matrix.) 65 | * @param score_files - vector of score statistic file paths 66 | * @param cov_files - vector of covariance file paths 67 | * @param format - format of the file (use VariantFileFormat enum class.) If given rvtest file, use VariantFileFormat::RAREMETAL. 68 | */ 69 | VariantCollator(std::vector score_files, std::vector cov_files, VariantFileFormat format); 70 | 71 | /** 72 | * Get a list of variants in a region. 73 | * @param chrom 74 | * @param start 75 | * @param end 76 | * @return Vector of VariantMeta objects, one for each variant within the requested region. 77 | */ 78 | std::shared_ptr> get_variants(std::string chrom, uint64_t start, uint64_t end); 79 | }; 80 | 81 | 82 | #endif //LDSERVER_VARIANTCOLLATOR_H 83 | -------------------------------------------------------------------------------- /core/tabixpp.cmake: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(tabixpp VERSION 0.0.1) 3 | set(CMAKE_VERBOSE_MAKEFILE ON) 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | if(CGET_PREFIX) 7 | include_directories(${CGET_PREFIX}/include) 8 | endif() 9 | 10 | if(CGET_PREFIX) 11 | link_directories(${CGET_PREFIX}/lib) 12 | endif() 13 | 14 | set(MAKEFILE_COMPILER_FLAGS "-g -Wall -O2 -fPIC -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE") 15 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MAKEFILE_COMPILER_FLAGS}") 16 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MAKEFILE_COMPILER_FLAGS}") 17 | 18 | find_library(HTS_LIB hts HINT ${CGET_PREFIX}/lib) 19 | add_library(tabixpp 20 | tabix.hpp 21 | tabix.cpp) 22 | target_link_libraries(tabixpp ${HTS_LIB}) 23 | install(TARGETS tabixpp DESTINATION lib) 24 | install(FILES tabix.hpp DESTINATION include RENAME tabixpp.hpp) 25 | -------------------------------------------------------------------------------- /core/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_library(GTEST_LIB gmock HINT ${CGET_PREFIX}/lib) 2 | find_library(BOOST_SYSTEM_LIB boost_system HINT ${CGET_PREFIX}/lib) 3 | find_library(BOOST_IOSTREAMS_LIB boost_iostreams HINT ${CGET_PREFIX}/lib) 4 | find_library(MKL_RT_LIB mkl_rt) 5 | find_package(Threads REQUIRED) 6 | 7 | find_package(Python3 3.8 EXACT COMPONENTS Development REQUIRED) 8 | 9 | find_library(OPENBLAS_LIB openblas HINT ${CGET_PREFIX}/lib) 10 | message(STATUS "OpenBLAS = ${OPENBLAS_LIB}") 11 | 12 | if(CGET_PREFIX) 13 | include_directories(${CGET_PREFIX}/include) 14 | endif() 15 | 16 | if(NOT MKL_RT_LIB) 17 | set(MKL_RT_LIB "") 18 | endif() 19 | 20 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) 21 | add_executable(testAll LDServerTest.cpp Main_TestAll.cpp RareMetal.cpp RvTest.cpp) 22 | target_link_libraries(testAll LDServer ${Python3_LIBRARIES} ${BOOST_SYSTEM_LIB} ${BOOST_IOSTREAMS_LIB} ${GTEST_LIB} ${OPENBLAS_LIB} ${MKL_RT_LIB} ${CMAKE_THREAD_LIBS_INIT}) 23 | 24 | install(TARGETS testAll COMPONENT cli RUNTIME DESTINATION test OPTIONAL) 25 | install(FILES 26 | ../../data/EUR.samples.txt 27 | ../../data/AFR.samples.txt 28 | ../../data/chr21.test.vcf.gz 29 | ../../data/chr21.test.vcf.gz.tbi 30 | ../../data/chr21.test.bcf 31 | ../../data/chr21.test.bcf.csi 32 | ../../data/chr21.test.sav 33 | ../../data/chr21.test.sav.s1r 34 | ../../data/chr21.test.tab 35 | ../../data/chr21.test.ped 36 | ../../data/chr21.test.dat 37 | ../../data/chr21.test.frq 38 | ../../data/chr22.test.vcf.gz 39 | ../../data/chr22.test.vcf.gz.tbi 40 | ../../data/chr22.test.bcf 41 | ../../data/chr22.test.bcf.csi 42 | ../../data/chr22.test.sav 43 | ../../data/chr22.test.sav.s1r 44 | ../../data/chr22.test.tab 45 | ../../data/chr22.test.bad_float.tab 46 | ../../data/chr22.test.missing_values.tab 47 | ../../data/chr22.test.ped 48 | ../../data/chr22.test.dat 49 | ../../data/chr22.test.frq 50 | ../../data/chr21.test.RAND_QT.singlevar.cov.txt.gz 51 | ../../data/chr21.test.RAND_QT.singlevar.cov.txt.gz.tbi 52 | ../../data/chr21.test.RAND_QT.singlevar.score.txt.gz 53 | ../../data/chr21.test.RAND_QT.singlevar.score.txt.gz.tbi 54 | ../../data/chr21.test.missing_genotypes_and_phenotypes.RAND_QT.singlevar.cov.txt 55 | ../../data/chr21.test.missing_genotypes_and_phenotypes.RAND_QT.singlevar.score.txt 56 | ../../data/chr21.test.missing_pheno.RAND_QT.singlevar.cov.txt 57 | ../../data/chr21.test.missing_pheno.RAND_QT.singlevar.score.txt 58 | ../../data/chr21.test.missing_values.ped 59 | ../../data/chr21.test.missing_values.tab 60 | ../../data/chr21.test.missing_values.vcf.gz 61 | ../../data/chr21.test.missing_values.vcf.gz.tbi 62 | ../../data/chr22.monomorphic_test.vcf.gz 63 | ../../data/chr22.monomorphic_test.vcf.gz.tbi 64 | ../../data/test_no_testable_variants.mask.tab.gz 65 | ../../data/test_no_testable_variants.tab 66 | ../../data/test_no_testable_variants.vcf.gz.tbi 67 | ../../data/test_no_testable_variants.mask.tab.gz.tbi 68 | ../../data/test_no_testable_variants.vcf.gz 69 | ../../data/chrX.test.sav 70 | ../../data/chrX.test.sav.s1r 71 | ../../data/region_ld_22_51241101_51241385.hap.ld 72 | ../../data/region_ld_22_50544251_50549251.hap.ld 73 | ../../data/region_ld_22_51241101_51241385.AFR.hap.ld 74 | ../../data/variant_ld_22_51241101_vs_51241101_51241385.hap.ld 75 | ../../data/variant_ld_22_51241386_vs_51241101_51241385.hap.ld 76 | ../../data/variant_ld_22_51241309_vs_51241101_51244237.hap.ld 77 | ../../data/variant_ld_22_50546666_vs_50544251_50549251.hap.ld 78 | ../../data/region_ld_X_60100_60150.hap.ld 79 | ../../data/mask.epacts.chr22.gencode-exons-AF01.tab.gz 80 | ../../data/mask.epacts.chr22.gencode-exons-AF01.tab.gz.tbi 81 | ../../data/mask.epacts.chr22.gencode-exons-AF05.tab.gz 82 | ../../data/mask.epacts.chr22.gencode-exons-AF05.tab.gz.tbi 83 | ../../data/chr21.test.RAND_QT.singlevar.cov.txt 84 | ../../data/chr21.test.RAND_QT.singlevar.score.txt 85 | ../../data/test_sumstat_loader_rm.scores.assoc.gz.tbi 86 | ../../data/test_sumstat_loader_rm.scores.assoc.gz 87 | ../../data/test_sumstat_loader_rm.cov.assoc.gz.tbi 88 | ../../data/test_sumstat_loader_rm.cov.assoc.gz 89 | ../../data/test.smallchunk.MetaScore.assoc.gz.tbi 90 | ../../data/test.smallchunk.MetaScore.assoc.gz 91 | ../../data/test.smallchunk.MetaCov.assoc.gz.tbi 92 | ../../data/test.smallchunk.MetaCov.assoc.gz 93 | ../../data/test.smallchunk.noheader.MetaScore.assoc.gz.tbi 94 | ../../data/test.smallchunk.noheader.MetaScore.assoc.gz 95 | ../../data/test.smallchunk.noheader.MetaCov.assoc.gz.tbi 96 | ../../data/test.smallchunk.noheader.MetaCov.assoc.gz 97 | ../../data/test.afmissing.MetaScore.assoc.gz 98 | ../../data/test.afmissing.MetaScore.assoc.gz.tbi 99 | ../../data/test.twochroms.chr1.MetaCov.assoc.gz 100 | ../../data/test.twochroms.chr1.MetaCov.assoc.gz.tbi 101 | ../../data/test.twochroms.chr1.MetaScore.assoc.gz 102 | ../../data/test.twochroms.chr1.MetaScore.assoc.gz.tbi 103 | ../../data/test.twochroms.chr9.MetaCov.assoc.gz 104 | ../../data/test.twochroms.chr9.MetaCov.assoc.gz.tbi 105 | ../../data/test.twochroms.chr9.MetaScore.assoc.gz 106 | ../../data/test.twochroms.chr9.MetaScore.assoc.gz.tbi 107 | ../../data/test.twochroms.mask.tab.gz 108 | ../../data/test.twochroms.mask.tab.gz.tbi 109 | ../../data/rvtest_score_fail_ustat.gz 110 | ../../data/rvtest_score_fail_ustat.gz.tbi 111 | ../../data/rvtest_cov_fail_base.gz 112 | ../../data/rvtest_cov_fail_base.gz.tbi 113 | ../../data/test.qt.segment1.metastaar.cov.parquet 114 | ../../data/test.qt.segment1.metastaar.sumstat.parquet 115 | ../../data/test.qt.segment2.metastaar.cov.parquet 116 | ../../data/test.qt.segment2.metastaar.sumstat.parquet 117 | ../../data/gene.WVAY7.cov.assoc.gz 118 | ../../data/gene.WVAY7.cov.assoc.gz.tbi 119 | ../../data/gene.WVAY7.scores.assoc.gz 120 | ../../data/gene.WVAY7.scores.assoc.gz.tbi 121 | DESTINATION test) 122 | -------------------------------------------------------------------------------- /core/tests/Main_TestAll.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std; 5 | 6 | int main(int argc, char* argv[]) { 7 | ::testing::InitGoogleTest(&argc, argv); 8 | 9 | ofstream config_file("redis-connection.txt"); 10 | for (int i = 1; i < argc; ++i) { 11 | config_file << argv[i] << std::endl; 12 | } 13 | config_file.close(); 14 | 15 | return RUN_ALL_TESTS(); 16 | } -------------------------------------------------------------------------------- /core/tests/RareMetal.cpp: -------------------------------------------------------------------------------- 1 | #include "RareMetal.h" 2 | using namespace std; 3 | 4 | void RareMetalScores::load(const string &path) { 5 | unique_ptr file; 6 | ifstream fs(path, ios_base::in | ios_base::binary); 7 | boost::iostreams::filtering_streambuf inbuf; 8 | 9 | bool is_gz = path.find(".gz") != string::npos; 10 | if (is_gz) { 11 | inbuf.push(boost::iostreams::gzip_decompressor()); 12 | inbuf.push(fs); 13 | file = make_unique(&inbuf); 14 | } 15 | else { 16 | file = make_unique(path); 17 | } 18 | 19 | string line; 20 | auto line_separator = regex("[ \t]"); 21 | 22 | // Line regexes 23 | auto regex_samples = regex("##Samples=(\\d+)"); 24 | auto regex_sigma = regex("##Sigma_e2_Hat\t(.+)"); 25 | auto regex_trait_sum = regex("##TraitSummaries"); 26 | auto regex_header = regex("#CHROM\tPOS.*"); 27 | 28 | bool header_done = false; 29 | bool parse_trait = false; 30 | while (getline(*file, line)) { 31 | smatch match; 32 | if (!header_done) { 33 | if (regex_search(line, match, regex_samples) && match.size() > 1) { 34 | this->nsamples = stoul(match.str(1)); 35 | } 36 | else if (regex_search(line, match, regex_sigma) && match.size() > 1) { 37 | this->sigma2 = stod(match.str(1)); 38 | } 39 | else if (regex_search(line, match, regex_trait_sum)) { 40 | parse_trait = true; 41 | } 42 | else if (parse_trait) { 43 | parse_trait = false; 44 | vector trait_tok; 45 | copy(sregex_token_iterator(line.begin(), line.end(), line_separator, -1), sregex_token_iterator(), back_inserter(trait_tok)); 46 | this->trait_name = trait_tok.at(0); 47 | } 48 | else if (regex_search(line, match, regex_header)) { 49 | header_done = true; 50 | } 51 | } 52 | else { 53 | // Begin parsing record row 54 | vector tokens; 55 | copy(sregex_token_iterator(line.begin(), line.end(), line_separator, -1), sregex_token_iterator(), back_inserter(tokens)); 56 | 57 | // For some reason RAREMETAL puts a genomic control line all the way at the end of the file... 58 | if (line.substr(0,1) == "#") { continue; } 59 | 60 | // Create record 61 | auto rec = make_shared(); 62 | rec->chrom = tokens.at(0); 63 | rec->pos = stoul(tokens.at(1)); 64 | rec->ref = tokens.at(2); 65 | rec->alt = tokens.at(3); 66 | rec->n_informative = stoul(tokens.at(4)); 67 | rec->founder_af = stod(tokens.at(5)); 68 | rec->all_af = stod(tokens.at(6)); 69 | rec->informative_alt_ac = stoul(tokens.at(7)); 70 | rec->call_rate = stod(tokens.at(8)); 71 | rec->hwe_pvalue = stod(tokens.at(9)); 72 | rec->n_ref = stoul(tokens.at(10)); 73 | rec->n_het = stoul(tokens.at(11)); 74 | rec->n_alt = stoul(tokens.at(12)); 75 | rec->u_stat = stod(tokens.at(13)); 76 | rec->sqrt_vstat = stod(tokens.at(14)); 77 | rec->alt_effsize = stod(tokens.at(15)); 78 | rec->pvalue = stod(tokens.at(16)); 79 | 80 | // Keys 81 | string chrpos = rec->chrom + ":" + to_string(rec->pos); 82 | string variant = rec->chrom + ":" + to_string(rec->pos) + "_" + rec->ref + "/" + rec->alt; 83 | 84 | // Insert 85 | records.push_back(rec); 86 | this->index.emplace(chrpos, rec); 87 | this->index.emplace(variant, rec); 88 | } 89 | } 90 | } 91 | 92 | RareMetalScores::RareMetalScores(const string &file) { 93 | load(file); 94 | } 95 | 96 | double RareMetalScores::get_nsamples() { 97 | return nsamples; 98 | } 99 | 100 | double RareMetalScores::get_sigma2() { 101 | return sigma2; 102 | } 103 | 104 | shared_ptr RareMetalScores::get_record(const string &i) { 105 | auto it = this->index.find(i); 106 | if (it != this->index.end()) { 107 | return it->second; 108 | } 109 | else { 110 | auto null = shared_ptr(); 111 | return null; 112 | } 113 | } -------------------------------------------------------------------------------- /core/tests/RareMetal.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_RAREMETAL_H 2 | #define LDSERVER_RAREMETAL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | struct RareMetalRecord { 17 | std::string chrom; 18 | uint64_t pos; 19 | std::string ref; 20 | std::string alt; 21 | uint64_t n_informative; 22 | double founder_af; 23 | double all_af; 24 | uint64_t informative_alt_ac; 25 | double call_rate; 26 | double hwe_pvalue; 27 | uint64_t n_ref; 28 | uint64_t n_het; 29 | uint64_t n_alt; 30 | double u_stat; 31 | double sqrt_vstat; 32 | double alt_effsize; 33 | double pvalue; 34 | }; 35 | 36 | class RareMetalScores { 37 | protected: 38 | uint64_t nsamples; 39 | double sigma2; // sigma_e2_hat from RAREMETAL 40 | std::string trait_name; 41 | std::vector> records; 42 | std::map> index; 43 | public: 44 | RareMetalScores(const std::string &file); 45 | void load(const std::string &file); 46 | double get_sigma2(); 47 | double get_nsamples(); 48 | std::shared_ptr get_record(const std::string &i); 49 | }; 50 | 51 | #endif //LDSERVER_RAREMETAL_H 52 | -------------------------------------------------------------------------------- /core/tests/RvTest.cpp: -------------------------------------------------------------------------------- 1 | #include "RvTest.h" 2 | using namespace std; 3 | 4 | void RvTestScores::load(const string &path) { 5 | unique_ptr file; 6 | ifstream fs(path, ios_base::in | ios_base::binary); 7 | boost::iostreams::filtering_streambuf inbuf; 8 | 9 | bool is_gz = path.find(".gz") != string::npos; 10 | if (is_gz) { 11 | inbuf.push(boost::iostreams::gzip_decompressor()); 12 | inbuf.push(fs); 13 | file = make_unique(&inbuf); 14 | } 15 | else { 16 | file = make_unique(path); 17 | } 18 | 19 | string line; 20 | auto line_separator = regex("[ \t]"); 21 | 22 | // Line regexes 23 | auto regex_samples = regex("##Samples=(\\d+)"); 24 | auto regex_sigma = regex("## - Sigma2\t(.+)"); 25 | auto regex_trait_sum = regex("##TraitSummary"); 26 | auto regex_header = regex("CHROM\tPOS.*"); 27 | 28 | bool header_done = false; 29 | bool parse_trait = false; 30 | while (getline(*file, line)) { 31 | smatch match; 32 | if (!header_done) { 33 | if (regex_search(line, match, regex_samples) && match.size() > 1) { 34 | this->nsamples = stoul(match.str(1)); 35 | } 36 | else if (regex_search(line, match, regex_sigma) && match.size() > 1) { 37 | this->sigma2 = stod(match.str(1)); 38 | } 39 | else if (regex_search(line, match, regex_trait_sum)) { 40 | parse_trait = true; 41 | } 42 | else if (parse_trait) { 43 | parse_trait = false; 44 | vector trait_tok; 45 | copy(sregex_token_iterator(line.begin(), line.end(), line_separator, -1), sregex_token_iterator(), back_inserter(trait_tok)); 46 | } 47 | else if (regex_search(line, match, regex_header)) { 48 | header_done = true; 49 | } 50 | } 51 | else { 52 | // Begin parsing record row 53 | vector tokens; 54 | copy(sregex_token_iterator(line.begin(), line.end(), line_separator, -1), sregex_token_iterator(), back_inserter(tokens)); 55 | 56 | // For some reason RAREMETAL puts a genomic control line all the way at the end of the file 57 | // rvtest shouldn't do this, but it doesn't hurt to leave this check in anyway 58 | if (line.substr(0,1) == "#") { continue; } 59 | 60 | // Create record 61 | auto rec = make_shared(); 62 | rec->chrom = tokens.at(0); 63 | rec->pos = stoul(tokens.at(1)); 64 | rec->ref = tokens.at(2); 65 | rec->alt = tokens.at(3); 66 | rec->n_informative = stoul(tokens.at(4)); 67 | rec->founder_af = stod(tokens.at(5)); 68 | rec->all_af = stod(tokens.at(5)); 69 | rec->informative_alt_ac = stoul(tokens.at(6)); 70 | rec->call_rate = stod(tokens.at(7)); 71 | rec->hwe_pvalue = stod(tokens.at(8)); 72 | rec->n_ref = stoul(tokens.at(9)); 73 | rec->n_het = stoul(tokens.at(10)); 74 | rec->n_alt = stoul(tokens.at(11)); 75 | rec->u_stat = stod(tokens.at(12)); 76 | rec->sqrt_vstat = stod(tokens.at(13)); 77 | rec->alt_effsize = stod(tokens.at(14)); 78 | rec->pvalue = stod(tokens.at(15)); 79 | 80 | // Keys 81 | string chrpos = rec->chrom + ":" + to_string(rec->pos); 82 | string variant = rec->chrom + ":" + to_string(rec->pos) + "_" + rec->ref + "/" + rec->alt; 83 | 84 | // Insert 85 | records.push_back(rec); 86 | this->index.emplace(chrpos, rec); 87 | this->index.emplace(variant, rec); 88 | } 89 | } 90 | } 91 | 92 | RvTestScores::RvTestScores(const string &file) { 93 | load(file); 94 | } 95 | 96 | double RvTestScores::get_nsamples() { 97 | return nsamples; 98 | } 99 | 100 | double RvTestScores::get_sigma2() { 101 | return sigma2; 102 | } 103 | 104 | shared_ptr RvTestScores::get_record(const string &i) { 105 | auto it = this->index.find(i); 106 | if (it != this->index.end()) { 107 | return it->second; 108 | } 109 | else { 110 | auto null = shared_ptr(); 111 | return null; 112 | } 113 | } -------------------------------------------------------------------------------- /core/tests/RvTest.h: -------------------------------------------------------------------------------- 1 | #ifndef LDSERVER_RVTEST_H 2 | #define LDSERVER_RVTEST_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | struct RvTestRecord { 17 | std::string chrom; 18 | uint64_t pos; 19 | std::string ref; 20 | std::string alt; 21 | uint64_t n_informative; 22 | double founder_af; 23 | double all_af; 24 | uint64_t informative_alt_ac; 25 | double call_rate; 26 | double hwe_pvalue; 27 | uint64_t n_ref; 28 | uint64_t n_het; 29 | uint64_t n_alt; 30 | double u_stat; 31 | double sqrt_vstat; 32 | double alt_effsize; 33 | double pvalue; 34 | }; 35 | 36 | class RvTestScores { 37 | protected: 38 | uint64_t nsamples; 39 | double sigma2; 40 | std::string trait_name; 41 | std::vector> records; 42 | std::map> index; 43 | public: 44 | RvTestScores(const std::string &file); 45 | void load(const std::string &file); 46 | double get_sigma2(); 47 | double get_nsamples(); 48 | std::shared_ptr get_record(const std::string &i); 49 | }; 50 | 51 | #endif //LDSERVER_RVTEST_H 52 | -------------------------------------------------------------------------------- /data/AFR.samples.txt: -------------------------------------------------------------------------------- 1 | HG01879 2 | HG01880 3 | HG01882 4 | HG01883 5 | HG01885 6 | HG01886 7 | HG01889 8 | HG01890 9 | HG01894 10 | HG01896 11 | HG01912 12 | HG01914 13 | HG01915 14 | HG01956 15 | HG01958 16 | HG01985 17 | HG01986 18 | HG01988 19 | HG01989 20 | HG01990 21 | HG02009 22 | HG02010 23 | HG02012 24 | HG02013 25 | HG02014 26 | HG02051 27 | HG02052 28 | HG02053 29 | HG02054 30 | HG02095 31 | HG02107 32 | HG02108 33 | HG02111 34 | HG02143 35 | HG02144 36 | HG02255 37 | HG02256 38 | HG02281 39 | HG02282 40 | HG02283 41 | HG02284 42 | HG02307 43 | HG02308 44 | HG02309 45 | HG02314 46 | HG02315 47 | HG02317 48 | HG02318 49 | HG02322 50 | HG02323 51 | HG02325 52 | HG02330 53 | HG02332 54 | HG02334 55 | HG02337 56 | HG02339 57 | HG02343 58 | HG02419 59 | HG02420 60 | HG02427 61 | HG02429 62 | HG02433 63 | HG02439 64 | HG02442 65 | HG02445 66 | HG02449 67 | HG02450 68 | HG02455 69 | HG02461 70 | HG02462 71 | HG02464 72 | HG02465 73 | HG02470 74 | HG02471 75 | HG02476 76 | HG02477 77 | HG02479 78 | HG02481 79 | HG02484 80 | HG02485 81 | HG02489 82 | HG02496 83 | HG02497 84 | HG02501 85 | HG02502 86 | HG02505 87 | HG02508 88 | HG02511 89 | HG02536 90 | HG02537 91 | HG02541 92 | HG02545 93 | HG02546 94 | HG02549 95 | HG02554 96 | HG02555 97 | HG02557 98 | HG02558 99 | HG02561 100 | HG02562 101 | HG02568 102 | HG02570 103 | HG02571 104 | HG02573 105 | HG02574 106 | HG02577 107 | HG02580 108 | HG02582 109 | HG02583 110 | HG02585 111 | HG02586 112 | HG02588 113 | HG02589 114 | HG02594 115 | HG02595 116 | HG02610 117 | HG02611 118 | HG02613 119 | HG02614 120 | HG02620 121 | HG02621 122 | HG02623 123 | HG02624 124 | HG02628 125 | HG02629 126 | HG02634 127 | HG02635 128 | HG02642 129 | HG02643 130 | HG02645 131 | HG02646 132 | HG02666 133 | HG02667 134 | HG02675 135 | HG02676 136 | HG02678 137 | HG02679 138 | HG02702 139 | HG02703 140 | HG02715 141 | HG02716 142 | HG02721 143 | HG02722 144 | HG02756 145 | HG02757 146 | HG02759 147 | HG02760 148 | HG02763 149 | HG02768 150 | HG02769 151 | HG02771 152 | HG02772 153 | HG02798 154 | HG02799 155 | HG02804 156 | HG02805 157 | HG02807 158 | HG02808 159 | HG02810 160 | HG02811 161 | HG02813 162 | HG02814 163 | HG02816 164 | HG02817 165 | HG02819 166 | HG02820 167 | HG02836 168 | HG02837 169 | HG02839 170 | HG02840 171 | HG02851 172 | HG02852 173 | HG02854 174 | HG02855 175 | HG02860 176 | HG02861 177 | HG02870 178 | HG02878 179 | HG02879 180 | HG02881 181 | HG02882 182 | HG02884 183 | HG02885 184 | HG02887 185 | HG02888 186 | HG02890 187 | HG02891 188 | HG02895 189 | HG02896 190 | HG02922 191 | HG02923 192 | HG02938 193 | HG02941 194 | HG02943 195 | HG02944 196 | HG02946 197 | HG02947 198 | HG02952 199 | HG02953 200 | HG02968 201 | HG02970 202 | HG02971 203 | HG02973 204 | HG02974 205 | HG02976 206 | HG02977 207 | HG02979 208 | HG02981 209 | HG02982 210 | HG02983 211 | HG03024 212 | HG03025 213 | HG03027 214 | HG03028 215 | HG03039 216 | HG03040 217 | HG03045 218 | HG03046 219 | HG03048 220 | HG03049 221 | HG03052 222 | HG03054 223 | HG03055 224 | HG03057 225 | HG03058 226 | HG03060 227 | HG03061 228 | HG03063 229 | HG03064 230 | HG03066 231 | HG03069 232 | HG03072 233 | HG03073 234 | HG03074 235 | HG03077 236 | HG03078 237 | HG03079 238 | HG03081 239 | HG03082 240 | HG03084 241 | HG03085 242 | HG03086 243 | HG03088 244 | HG03091 245 | HG03095 246 | HG03096 247 | HG03097 248 | HG03099 249 | HG03100 250 | HG03103 251 | HG03105 252 | HG03108 253 | HG03109 254 | HG03111 255 | HG03112 256 | HG03114 257 | HG03115 258 | HG03117 259 | HG03118 260 | HG03120 261 | HG03121 262 | HG03123 263 | HG03124 264 | HG03126 265 | HG03127 266 | HG03129 267 | HG03130 268 | HG03132 269 | HG03133 270 | HG03135 271 | HG03136 272 | HG03139 273 | HG03157 274 | HG03159 275 | HG03160 276 | HG03162 277 | HG03163 278 | HG03166 279 | HG03168 280 | HG03169 281 | HG03172 282 | HG03175 283 | HG03189 284 | HG03190 285 | HG03193 286 | HG03195 287 | HG03196 288 | HG03198 289 | HG03199 290 | HG03202 291 | HG03209 292 | HG03212 293 | HG03224 294 | HG03225 295 | HG03240 296 | HG03241 297 | HG03246 298 | HG03247 299 | HG03258 300 | HG03259 301 | HG03265 302 | HG03267 303 | HG03268 304 | HG03270 305 | HG03271 306 | HG03279 307 | HG03280 308 | HG03291 309 | HG03294 310 | HG03295 311 | HG03297 312 | HG03298 313 | HG03300 314 | HG03301 315 | HG03303 316 | HG03304 317 | HG03311 318 | HG03313 319 | HG03342 320 | HG03343 321 | HG03351 322 | HG03352 323 | HG03354 324 | HG03363 325 | HG03366 326 | HG03367 327 | HG03369 328 | HG03370 329 | HG03372 330 | HG03376 331 | HG03378 332 | HG03380 333 | HG03382 334 | HG03385 335 | HG03388 336 | HG03391 337 | HG03394 338 | HG03397 339 | HG03401 340 | HG03410 341 | HG03419 342 | HG03428 343 | HG03432 344 | HG03433 345 | HG03436 346 | HG03437 347 | HG03439 348 | HG03442 349 | HG03445 350 | HG03446 351 | HG03449 352 | HG03451 353 | HG03452 354 | HG03455 355 | HG03457 356 | HG03458 357 | HG03460 358 | HG03461 359 | HG03464 360 | HG03469 361 | HG03470 362 | HG03472 363 | HG03473 364 | HG03476 365 | HG03478 366 | HG03479 367 | HG03484 368 | HG03485 369 | HG03499 370 | HG03511 371 | HG03514 372 | HG03515 373 | HG03517 374 | HG03518 375 | HG03520 376 | HG03521 377 | HG03538 378 | HG03539 379 | HG03547 380 | HG03548 381 | HG03556 382 | HG03557 383 | HG03558 384 | HG03559 385 | HG03563 386 | HG03565 387 | HG03567 388 | HG03571 389 | HG03572 390 | HG03575 391 | HG03577 392 | HG03578 393 | HG03583 394 | NA18486 395 | NA18488 396 | NA18489 397 | NA18498 398 | NA18499 399 | NA18501 400 | NA18502 401 | NA18504 402 | NA18505 403 | NA18507 404 | NA18508 405 | NA18510 406 | NA18511 407 | NA18516 408 | NA18517 409 | NA18519 410 | NA18520 411 | NA18522 412 | NA18523 413 | NA18853 414 | NA18856 415 | NA18858 416 | NA18861 417 | NA18864 418 | NA18865 419 | NA18867 420 | NA18868 421 | NA18870 422 | NA18871 423 | NA18873 424 | NA18874 425 | NA18876 426 | NA18877 427 | NA18878 428 | NA18879 429 | NA18881 430 | NA18907 431 | NA18908 432 | NA18909 433 | NA18910 434 | NA18912 435 | NA18915 436 | NA18916 437 | NA18917 438 | NA18923 439 | NA18924 440 | NA18933 441 | NA18934 442 | NA19017 443 | NA19019 444 | NA19020 445 | NA19023 446 | NA19024 447 | NA19025 448 | NA19026 449 | NA19027 450 | NA19028 451 | NA19030 452 | NA19031 453 | NA19035 454 | NA19036 455 | NA19037 456 | NA19038 457 | NA19041 458 | NA19042 459 | NA19043 460 | NA19092 461 | NA19093 462 | NA19095 463 | NA19096 464 | NA19098 465 | NA19099 466 | NA19102 467 | NA19107 468 | NA19108 469 | NA19113 470 | NA19114 471 | NA19116 472 | NA19117 473 | NA19118 474 | NA19119 475 | NA19121 476 | NA19129 477 | NA19130 478 | NA19131 479 | NA19137 480 | NA19138 481 | NA19141 482 | NA19143 483 | NA19144 484 | NA19146 485 | NA19147 486 | NA19149 487 | NA19152 488 | NA19153 489 | NA19159 490 | NA19160 491 | NA19171 492 | NA19172 493 | NA19175 494 | NA19184 495 | NA19185 496 | NA19189 497 | NA19190 498 | NA19197 499 | NA19198 500 | NA19200 501 | NA19201 502 | NA19204 503 | NA19206 504 | NA19207 505 | NA19209 506 | NA19210 507 | NA19213 508 | NA19214 509 | NA19222 510 | NA19223 511 | NA19225 512 | NA19235 513 | NA19236 514 | NA19238 515 | NA19239 516 | NA19247 517 | NA19248 518 | NA19256 519 | NA19257 520 | NA19307 521 | NA19308 522 | NA19309 523 | NA19310 524 | NA19312 525 | NA19314 526 | NA19315 527 | NA19316 528 | NA19317 529 | NA19318 530 | NA19319 531 | NA19320 532 | NA19321 533 | NA19323 534 | NA19324 535 | NA19327 536 | NA19328 537 | NA19331 538 | NA19332 539 | NA19334 540 | NA19338 541 | NA19346 542 | NA19347 543 | NA19350 544 | NA19351 545 | NA19355 546 | NA19360 547 | NA19372 548 | NA19374 549 | NA19375 550 | NA19376 551 | NA19377 552 | NA19378 553 | NA19379 554 | NA19380 555 | NA19383 556 | NA19384 557 | NA19385 558 | NA19390 559 | NA19391 560 | NA19393 561 | NA19394 562 | NA19395 563 | NA19397 564 | NA19399 565 | NA19401 566 | NA19403 567 | NA19404 568 | NA19428 569 | NA19429 570 | NA19430 571 | NA19431 572 | NA19434 573 | NA19435 574 | NA19436 575 | NA19437 576 | NA19438 577 | NA19439 578 | NA19440 579 | NA19443 580 | NA19445 581 | NA19446 582 | NA19448 583 | NA19449 584 | NA19451 585 | NA19452 586 | NA19454 587 | NA19455 588 | NA19456 589 | NA19457 590 | NA19461 591 | NA19462 592 | NA19463 593 | NA19466 594 | NA19467 595 | NA19468 596 | NA19471 597 | NA19472 598 | NA19473 599 | NA19474 600 | NA19475 601 | NA19625 602 | NA19700 603 | NA19701 604 | NA19703 605 | NA19704 606 | NA19707 607 | NA19711 608 | NA19712 609 | NA19713 610 | NA19818 611 | NA19819 612 | NA19834 613 | NA19835 614 | NA19900 615 | NA19901 616 | NA19904 617 | NA19908 618 | NA19909 619 | NA19913 620 | NA19914 621 | NA19916 622 | NA19917 623 | NA19920 624 | NA19921 625 | NA19922 626 | NA19923 627 | NA19982 628 | NA19984 629 | NA20126 630 | NA20127 631 | NA20274 632 | NA20276 633 | NA20278 634 | NA20281 635 | NA20282 636 | NA20287 637 | NA20289 638 | NA20291 639 | NA20294 640 | NA20296 641 | NA20298 642 | NA20299 643 | NA20314 644 | NA20317 645 | NA20318 646 | NA20320 647 | NA20321 648 | NA20332 649 | NA20334 650 | NA20339 651 | NA20340 652 | NA20342 653 | NA20346 654 | NA20348 655 | NA20351 656 | NA20355 657 | NA20356 658 | NA20357 659 | NA20359 660 | NA20362 661 | NA20412 662 | -------------------------------------------------------------------------------- /data/EUR.samples.txt: -------------------------------------------------------------------------------- 1 | HG00096 2 | HG00097 3 | HG00099 4 | HG00100 5 | HG00101 6 | HG00102 7 | HG00103 8 | HG00105 9 | HG00106 10 | HG00107 11 | HG00108 12 | HG00109 13 | HG00110 14 | HG00111 15 | HG00112 16 | HG00113 17 | HG00114 18 | HG00115 19 | HG00116 20 | HG00117 21 | HG00118 22 | HG00119 23 | HG00120 24 | HG00121 25 | HG00122 26 | HG00123 27 | HG00125 28 | HG00126 29 | HG00127 30 | HG00128 31 | HG00129 32 | HG00130 33 | HG00131 34 | HG00132 35 | HG00133 36 | HG00136 37 | HG00137 38 | HG00138 39 | HG00139 40 | HG00140 41 | HG00141 42 | HG00142 43 | HG00143 44 | HG00145 45 | HG00146 46 | HG00148 47 | HG00149 48 | HG00150 49 | HG00151 50 | HG00154 51 | HG00155 52 | HG00157 53 | HG00158 54 | HG00159 55 | HG00160 56 | HG00171 57 | HG00173 58 | HG00174 59 | HG00176 60 | HG00177 61 | HG00178 62 | HG00179 63 | HG00180 64 | HG00181 65 | HG00182 66 | HG00183 67 | HG00185 68 | HG00186 69 | HG00187 70 | HG00188 71 | HG00189 72 | HG00190 73 | HG00231 74 | HG00232 75 | HG00233 76 | HG00234 77 | HG00235 78 | HG00236 79 | HG00237 80 | HG00238 81 | HG00239 82 | HG00240 83 | HG00242 84 | HG00243 85 | HG00244 86 | HG00245 87 | HG00246 88 | HG00250 89 | HG00251 90 | HG00252 91 | HG00253 92 | HG00254 93 | HG00255 94 | HG00256 95 | HG00257 96 | HG00258 97 | HG00259 98 | HG00260 99 | HG00261 100 | HG00262 101 | HG00263 102 | HG00264 103 | HG00265 104 | HG00266 105 | HG00267 106 | HG00268 107 | HG00269 108 | HG00271 109 | HG00272 110 | HG00273 111 | HG00274 112 | HG00275 113 | HG00276 114 | HG00277 115 | HG00278 116 | HG00280 117 | HG00281 118 | HG00282 119 | HG00284 120 | HG00285 121 | HG00288 122 | HG00290 123 | HG00304 124 | HG00306 125 | HG00308 126 | HG00309 127 | HG00310 128 | HG00311 129 | HG00313 130 | HG00315 131 | HG00318 132 | HG00319 133 | HG00320 134 | HG00321 135 | HG00323 136 | HG00324 137 | HG00325 138 | HG00326 139 | HG00327 140 | HG00328 141 | HG00329 142 | HG00330 143 | HG00331 144 | HG00332 145 | HG00334 146 | HG00335 147 | HG00336 148 | HG00337 149 | HG00338 150 | HG00339 151 | HG00341 152 | HG00342 153 | HG00343 154 | HG00344 155 | HG00345 156 | HG00346 157 | HG00349 158 | HG00350 159 | HG00351 160 | HG00353 161 | HG00355 162 | HG00356 163 | HG00357 164 | HG00358 165 | HG00360 166 | HG00361 167 | HG00362 168 | HG00364 169 | HG00365 170 | HG00366 171 | HG00367 172 | HG00368 173 | HG00369 174 | HG00371 175 | HG00372 176 | HG00373 177 | HG00375 178 | HG00376 179 | HG00378 180 | HG00379 181 | HG00380 182 | HG00381 183 | HG00382 184 | HG00383 185 | HG00384 186 | HG01334 187 | HG01500 188 | HG01501 189 | HG01503 190 | HG01504 191 | HG01506 192 | HG01507 193 | HG01509 194 | HG01510 195 | HG01512 196 | HG01513 197 | HG01515 198 | HG01516 199 | HG01518 200 | HG01519 201 | HG01521 202 | HG01522 203 | HG01524 204 | HG01525 205 | HG01527 206 | HG01528 207 | HG01530 208 | HG01531 209 | HG01536 210 | HG01537 211 | HG01602 212 | HG01603 213 | HG01605 214 | HG01606 215 | HG01607 216 | HG01608 217 | HG01610 218 | HG01612 219 | HG01613 220 | HG01615 221 | HG01617 222 | HG01618 223 | HG01619 224 | HG01620 225 | HG01623 226 | HG01624 227 | HG01625 228 | HG01626 229 | HG01628 230 | HG01630 231 | HG01631 232 | HG01632 233 | HG01668 234 | HG01669 235 | HG01670 236 | HG01672 237 | HG01673 238 | HG01675 239 | HG01676 240 | HG01678 241 | HG01679 242 | HG01680 243 | HG01682 244 | HG01684 245 | HG01685 246 | HG01686 247 | HG01694 248 | HG01695 249 | HG01697 250 | HG01699 251 | HG01700 252 | HG01702 253 | HG01704 254 | HG01705 255 | HG01707 256 | HG01708 257 | HG01709 258 | HG01710 259 | HG01746 260 | HG01747 261 | HG01756 262 | HG01757 263 | HG01761 264 | HG01762 265 | HG01765 266 | HG01766 267 | HG01767 268 | HG01768 269 | HG01770 270 | HG01771 271 | HG01773 272 | HG01775 273 | HG01776 274 | HG01777 275 | HG01779 276 | HG01781 277 | HG01783 278 | HG01784 279 | HG01785 280 | HG01786 281 | HG01789 282 | HG01790 283 | HG01791 284 | HG02215 285 | HG02219 286 | HG02220 287 | HG02221 288 | HG02223 289 | HG02224 290 | HG02230 291 | HG02231 292 | HG02232 293 | HG02233 294 | HG02235 295 | HG02236 296 | HG02238 297 | HG02239 298 | NA06984 299 | NA06985 300 | NA06986 301 | NA06989 302 | NA06994 303 | NA07000 304 | NA07037 305 | NA07048 306 | NA07051 307 | NA07056 308 | NA07347 309 | NA07357 310 | NA10847 311 | NA10851 312 | NA11829 313 | NA11830 314 | NA11831 315 | NA11832 316 | NA11840 317 | NA11843 318 | NA11881 319 | NA11892 320 | NA11893 321 | NA11894 322 | NA11918 323 | NA11919 324 | NA11920 325 | NA11930 326 | NA11931 327 | NA11932 328 | NA11933 329 | NA11992 330 | NA11994 331 | NA11995 332 | NA12003 333 | NA12004 334 | NA12005 335 | NA12006 336 | NA12043 337 | NA12044 338 | NA12045 339 | NA12046 340 | NA12058 341 | NA12144 342 | NA12154 343 | NA12155 344 | NA12156 345 | NA12234 346 | NA12249 347 | NA12272 348 | NA12273 349 | NA12275 350 | NA12282 351 | NA12283 352 | NA12286 353 | NA12287 354 | NA12340 355 | NA12341 356 | NA12342 357 | NA12347 358 | NA12348 359 | NA12383 360 | NA12399 361 | NA12400 362 | NA12413 363 | NA12414 364 | NA12489 365 | NA12546 366 | NA12716 367 | NA12717 368 | NA12718 369 | NA12748 370 | NA12749 371 | NA12750 372 | NA12751 373 | NA12760 374 | NA12761 375 | NA12762 376 | NA12763 377 | NA12775 378 | NA12776 379 | NA12777 380 | NA12778 381 | NA12812 382 | NA12813 383 | NA12814 384 | NA12815 385 | NA12827 386 | NA12828 387 | NA12829 388 | NA12830 389 | NA12842 390 | NA12843 391 | NA12872 392 | NA12873 393 | NA12874 394 | NA12878 395 | NA12889 396 | NA12890 397 | NA20502 398 | NA20503 399 | NA20504 400 | NA20505 401 | NA20506 402 | NA20507 403 | NA20508 404 | NA20509 405 | NA20510 406 | NA20511 407 | NA20512 408 | NA20513 409 | NA20514 410 | NA20515 411 | NA20516 412 | NA20517 413 | NA20518 414 | NA20519 415 | NA20520 416 | NA20521 417 | NA20522 418 | NA20524 419 | NA20525 420 | NA20527 421 | NA20528 422 | NA20529 423 | NA20530 424 | NA20531 425 | NA20532 426 | NA20533 427 | NA20534 428 | NA20535 429 | NA20536 430 | NA20538 431 | NA20539 432 | NA20540 433 | NA20541 434 | NA20542 435 | NA20543 436 | NA20544 437 | NA20581 438 | NA20582 439 | NA20585 440 | NA20586 441 | NA20587 442 | NA20588 443 | NA20589 444 | NA20752 445 | NA20753 446 | NA20754 447 | NA20755 448 | NA20756 449 | NA20757 450 | NA20758 451 | NA20759 452 | NA20760 453 | NA20761 454 | NA20762 455 | NA20763 456 | NA20764 457 | NA20765 458 | NA20766 459 | NA20767 460 | NA20768 461 | NA20769 462 | NA20770 463 | NA20771 464 | NA20772 465 | NA20773 466 | NA20774 467 | NA20775 468 | NA20778 469 | NA20783 470 | NA20785 471 | NA20786 472 | NA20787 473 | NA20790 474 | NA20792 475 | NA20795 476 | NA20796 477 | NA20797 478 | NA20798 479 | NA20799 480 | NA20800 481 | NA20801 482 | NA20802 483 | NA20803 484 | NA20804 485 | NA20805 486 | NA20806 487 | NA20807 488 | NA20808 489 | NA20809 490 | NA20810 491 | NA20811 492 | NA20812 493 | NA20813 494 | NA20814 495 | NA20815 496 | NA20818 497 | NA20819 498 | NA20821 499 | NA20822 500 | NA20826 501 | NA20827 502 | NA20828 503 | NA20832 504 | -------------------------------------------------------------------------------- /data/chr21.test.RAND_QT.singlevar.cov.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.RAND_QT.singlevar.cov.txt.gz -------------------------------------------------------------------------------- /data/chr21.test.RAND_QT.singlevar.cov.txt.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.RAND_QT.singlevar.cov.txt.gz.tbi -------------------------------------------------------------------------------- /data/chr21.test.RAND_QT.singlevar.score.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.RAND_QT.singlevar.score.txt.gz -------------------------------------------------------------------------------- /data/chr21.test.RAND_QT.singlevar.score.txt.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.RAND_QT.singlevar.score.txt.gz.tbi -------------------------------------------------------------------------------- /data/chr21.test.bcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.bcf -------------------------------------------------------------------------------- /data/chr21.test.bcf.csi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.bcf.csi -------------------------------------------------------------------------------- /data/chr21.test.dat: -------------------------------------------------------------------------------- 1 | A RAND_BINARY 2 | T RAND_QT 3 | -------------------------------------------------------------------------------- /data/chr21.test.frq: -------------------------------------------------------------------------------- 1 | 21 9411239 21:9411239_G/A G A 5008 1 0.00019968051118210862 2 | 21 9411245 21:9411245_C/A C A 5008 4 0.0007987220447284345 3 | 21 9411264 21:9411264_A/C A C 5008 1 0.00019968051118210862 4 | 21 9411267 21:9411267_G/T G T 5008 1 0.00019968051118210862 5 | 21 9411302 21:9411302_G/T G T 5008 13 0.002595846645367412 6 | 21 9411313 21:9411313_G/A G A 5008 15 0.0029952076677316293 7 | 21 9411332 21:9411332_G/T G T 5008 2 0.00039936102236421724 8 | 21 9411347 21:9411347_G/C G C 5008 23 0.004592651757188498 9 | 21 9411356 21:9411356_G/A G A 5008 1 0.00019968051118210862 10 | 21 9411358 21:9411358_C/T C T 5008 11 0.002196485623003195 11 | 21 9411381 21:9411381_G/T G T 5008 17 0.0033945686900958465 12 | 21 9411384 21:9411384_C/T C T 5008 1 0.00019968051118210862 13 | 21 9411409 21:9411409_T/C T C 5008 5 0.000998402555910543 14 | 21 9411410 21:9411410_C/T C T 5008 2338 0.46685303514376997 15 | 21 9411413 21:9411413_T/A T A 5008 1 0.00019968051118210862 16 | 21 9411417 21:9411417_C/T C T 5008 4 0.0007987220447284345 17 | 21 9411441 21:9411441_T/G T G 5008 3 0.0005990415335463259 18 | 21 9411446 21:9411446_C/T C T 5008 36 0.00718849840255591 19 | 21 9411449 21:9411449_G/T G T 5008 4 0.0007987220447284345 20 | 21 9411455 21:9411455_C/T C T 5008 1 0.00019968051118210862 21 | 21 9411476 21:9411476_A/T A T 5008 5 0.000998402555910543 22 | 21 9411485 21:9411485_C/A C A 5008 5 0.000998402555910543 23 | 21 9411486 21:9411486_C/A C A 5008 3 0.0005990415335463259 24 | 21 9411497 21:9411497_A/G A G 5008 37 0.007388178913738019 25 | 21 9411500 21:9411500_G/T G T 5008 2299 0.4590654952076677 26 | 21 9411501 21:9411501_A/C A C 5008 1 0.00019968051118210862 27 | 21 9411542 21:9411542_T/A T A 5008 2 0.00039936102236421724 28 | 21 9411553 21:9411553_G/A G A 5008 1 0.00019968051118210862 29 | 21 9411554 21:9411554_G/A G A 5008 1 0.00019968051118210862 30 | 21 9411559 21:9411559_T/G T G 5008 1 0.00019968051118210862 31 | 21 9411577 21:9411577_C/T C T 5008 1 0.00019968051118210862 32 | 21 9411601 21:9411601_T/C T C 5008 2 0.00039936102236421724 33 | 21 9411602 21:9411602_T/C T C 5008 1830 0.3654153354632588 34 | 21 9411607 21:9411607_A/C A C 5008 2 0.00039936102236421724 35 | 21 9411617 21:9411617_A/G A G 5008 2 0.00039936102236421724 36 | 21 9411645 21:9411645_A/G A G 5008 2221 0.44349041533546324 37 | 21 9411689 21:9411689_G/A G A 5008 1 0.00019968051118210862 38 | 21 9411710 21:9411710_C/A C A 5008 1 0.00019968051118210862 39 | 21 9411759 21:9411759_G/C G C 5008 4 0.0007987220447284345 40 | 21 9411785 21:9411785_G/T G T 5008 2284 0.4560702875399361 41 | 21 9411793 21:9411793_G/A G A 5008 1 0.00019968051118210862 42 | 21 9411799 21:9411799_T/C T C 5008 5 0.000998402555910543 43 | 21 9411809 21:9411809_T/C T C 5008 1 0.00019968051118210862 44 | 21 9411818 21:9411818_C/T C T 5008 5 0.000998402555910543 45 | 21 9411822 21:9411822_C/A C A 5008 1 0.00019968051118210862 46 | 21 9411824 21:9411824_T/A T A 5008 2 0.00039936102236421724 47 | 21 9411833 21:9411833_G/T G T 5008 7 0.0013977635782747603 48 | 21 9411848 21:9411848_G/A G A 5008 1 0.00019968051118210862 49 | 21 9411858 21:9411858_T/C T C 5008 2 0.00039936102236421724 50 | 21 9411878 21:9411878_C/T C T 5008 1 0.00019968051118210862 51 | 21 9411879 21:9411879_T/C T C 5008 3 0.0005990415335463259 52 | 21 9411896 21:9411896_A/T A T 5008 3 0.0005990415335463259 53 | 21 9411907 21:9411907_G/A G A 5008 1 0.00019968051118210862 54 | 21 9411911 21:9411911_T/A T A 5008 3 0.0005990415335463259 55 | 21 9411921 21:9411921_G/C G C 5008 5 0.000998402555910543 56 | 21 9411925 21:9411925_T/C T C 5008 1 0.00019968051118210862 57 | 21 9411964 21:9411964_C/G C G 5008 1 0.00019968051118210862 58 | 21 9411969 21:9411969_T/C T C 5008 5 0.000998402555910543 59 | 21 9411975 21:9411975_G/A G A 5008 1 0.00019968051118210862 60 | 21 9412076 21:9412076_CTT/C CTT C 5008 24 0.004792332268370607 61 | 21 9412078 21:9412078_T/A T A 5008 4 0.0007987220447284345 62 | 21 9412089 21:9412089_A/G A G 5008 3 0.0005990415335463259 63 | 21 9412099 21:9412099_C/T C T 5008 39 0.007787539936102236 64 | 21 9412100 21:9412100_G/A G A 5008 11 0.002196485623003195 65 | 21 9412182 21:9412182_A/G A G 5008 21 0.004193290734824281 66 | 21 9412197 21:9412197_C/G C G 5008 2 0.00039936102236421724 67 | 21 9412205 21:9412205_A/G A G 5008 7 0.0013977635782747603 68 | 21 9412216 21:9412216_C/T C T 5008 1 0.00019968051118210862 69 | 21 9412242 21:9412242_T/A T A 5008 37 0.007388178913738019 70 | 21 9412246 21:9412246_GATTA/G GATTA G 5008 5 0.000998402555910543 71 | 21 9412260 21:9412260_A/G A G 5008 1 0.00019968051118210862 72 | 21 9412261 21:9412261_T/C T C 5008 2 0.00039936102236421724 73 | 21 9412266 21:9412266_C/A C A 5008 1 0.00019968051118210862 74 | 21 9412279 21:9412279_C/T C T 5008 2 0.00039936102236421724 75 | 21 9412296 21:9412296_G/C G C 5008 2 0.00039936102236421724 76 | 21 9412300 21:9412300_T/C T C 5008 3 0.0005990415335463259 77 | 21 9412339 21:9412339_C/T C T 5008 1 0.00019968051118210862 78 | 21 9412377 21:9412377_G/A G A 5008 9 0.0017971246006389776 79 | 21 9412385 21:9412385_G/A G A 5008 1 0.00019968051118210862 80 | 21 9412441 21:9412441_TATA/T TATA T 5008 2101 0.4195287539936102 81 | 21 9412485 21:9412485_C/G C G 5008 6 0.0011980830670926517 82 | 21 9412494 21:9412494_A/T A T 5008 1 0.00019968051118210862 83 | 21 9412497 21:9412497_G/C G C 5008 1 0.00019968051118210862 84 | 21 9412501 21:9412501_T/A T A 5008 2 0.00039936102236421724 85 | 21 9412503 21:9412503_C/A C A 5008 4517 0.9019568690095847 86 | 21 9412558 21:9412558_T/A T A 5008 4 0.0007987220447284345 87 | 21 9412562 21:9412562_G/C G C 5008 1 0.00019968051118210862 88 | 21 9412585 21:9412585_C/G C G 5008 1 0.00019968051118210862 89 | 21 9412603 21:9412603_G/A G A 5008 5 0.000998402555910543 90 | 21 9412608 21:9412608_A/G A G 5008 52 0.010383386581469648 91 | 21 9412638 21:9412638_A/C A C 5008 1 0.00019968051118210862 92 | 21 9412659 21:9412659_C/T C T 5008 5 0.000998402555910543 93 | 21 9412680 21:9412680_A/C A C 5008 1 0.00019968051118210862 94 | 21 9412726 21:9412726_C/T C T 5008 2 0.00039936102236421724 95 | 21 9412736 21:9412736_T/C T C 5008 2 0.00039936102236421724 96 | 21 9412740 21:9412740_G/A G A 5008 2 0.00039936102236421724 97 | 21 9412755 21:9412755_C/A C A 5008 1 0.00019968051118210862 98 | 21 9412760 21:9412760_T/C T C 5008 2 0.00039936102236421724 99 | 21 9412809 21:9412809_G/A G A 5008 7 0.0013977635782747603 100 | 21 9412835 21:9412835_G/T G T 5008 1 0.00019968051118210862 101 | 21 9412837 21:9412837_T/C T C 5008 54 0.010782747603833865 102 | 21 9412843 21:9412843_C/A C A 5008 1 0.00019968051118210862 103 | 21 9412881 21:9412881_C/T C T 5008 1 0.00019968051118210862 104 | 21 9412907 21:9412907_C/A C A 5008 6 0.0011980830670926517 105 | 21 9412914 21:9412914_T/C T C 5008 1 0.00019968051118210862 106 | 21 9412934 21:9412934_A/C A C 5008 1 0.00019968051118210862 107 | 21 9412945 21:9412945_T/A T A 5008 2 0.00039936102236421724 108 | 21 9412974 21:9412974_T/C T C 5008 1 0.00019968051118210862 109 | 21 9412975 21:9412975_A/C A C 5008 9 0.0017971246006389776 110 | 21 9412987 21:9412987_C/T C T 5008 7 0.0013977635782747603 111 | 21 9412996 21:9412996_T/A T A 5008 5 0.000998402555910543 112 | -------------------------------------------------------------------------------- /data/chr21.test.missing_values.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.missing_values.vcf.gz -------------------------------------------------------------------------------- /data/chr21.test.missing_values.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.missing_values.vcf.gz.tbi -------------------------------------------------------------------------------- /data/chr21.test.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.sav -------------------------------------------------------------------------------- /data/chr21.test.sav.s1r: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.sav.s1r -------------------------------------------------------------------------------- /data/chr21.test.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.vcf.gz -------------------------------------------------------------------------------- /data/chr21.test.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr21.test.vcf.gz.tbi -------------------------------------------------------------------------------- /data/chr22.monomorphic_test.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr22.monomorphic_test.vcf.gz -------------------------------------------------------------------------------- /data/chr22.monomorphic_test.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr22.monomorphic_test.vcf.gz.tbi -------------------------------------------------------------------------------- /data/chr22.more_phenotypes.test.dat: -------------------------------------------------------------------------------- 1 | T ANOTHER_RAND_QT 2 | -------------------------------------------------------------------------------- /data/chr22.test.bcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr22.test.bcf -------------------------------------------------------------------------------- /data/chr22.test.bcf.csi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr22.test.bcf.csi -------------------------------------------------------------------------------- /data/chr22.test.dat: -------------------------------------------------------------------------------- 1 | A RAND_BINARY 2 | T RAND_QT 3 | -------------------------------------------------------------------------------- /data/chr22.test.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr22.test.sav -------------------------------------------------------------------------------- /data/chr22.test.sav.s1r: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr22.test.sav.s1r -------------------------------------------------------------------------------- /data/chr22.test.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr22.test.vcf.gz -------------------------------------------------------------------------------- /data/chr22.test.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chr22.test.vcf.gz.tbi -------------------------------------------------------------------------------- /data/chrX.test.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chrX.test.sav -------------------------------------------------------------------------------- /data/chrX.test.sav.s1r: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/chrX.test.sav.s1r -------------------------------------------------------------------------------- /data/gene.WVAY7.cov.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/gene.WVAY7.cov.assoc.gz -------------------------------------------------------------------------------- /data/gene.WVAY7.cov.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/gene.WVAY7.cov.assoc.gz.tbi -------------------------------------------------------------------------------- /data/gene.WVAY7.scores.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/gene.WVAY7.scores.assoc.gz -------------------------------------------------------------------------------- /data/gene.WVAY7.scores.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/gene.WVAY7.scores.assoc.gz.tbi -------------------------------------------------------------------------------- /data/make_rmw.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | raremetalworker --ped chr21.test.ped --dat chr21.test.dat --vcf chr21.test.vcf.gz --traitName RAND_QT --prefix chr21.test 3 | raremetalworker --ped chr21.test.missing_values.ped --dat chr21.test.dat --vcf chr21.test.vcf.gz --traitName RAND_QT --prefix chr21.test.missing_pheno 4 | raremetalworker --ped chr21.test.missing_values.ped --dat chr21.test.dat --vcf chr21.test.missing_values.vcf.gz --traitName RAND_QT --prefix chr21.test.missing_genotypes_and_phenotypes 5 | -------------------------------------------------------------------------------- /data/make_test_ped.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import gzip 3 | import numpy as np 4 | 5 | with gzip.open("chr21.test.vcf.gz","rt") as fp: 6 | for line in fp: 7 | if line.startswith("#CHROM"): 8 | ls = line.split() 9 | samples = ls[9:] 10 | break 11 | 12 | columns = { 13 | "iid": samples, 14 | "fid": samples, 15 | "patid": [0 for _ in range(len(samples))], 16 | "matid": [0 for _ in range(len(samples))], 17 | "sex": np.random.randint(1,3,len(samples)), 18 | "rand_binary": np.random.randint(1,3,len(samples)), 19 | "rand_qt": np.random.random(len(samples)) 20 | } 21 | 22 | # Fake some missing values for testing 23 | columns["rand_binary"][0] = 0 24 | 25 | with open("test.ped","wt") as out: 26 | for i in range(len(samples)): 27 | line = "\t".join([str(v[i]) for v in columns.values()]) 28 | print(line,file=out) 29 | 30 | with open("test.dat","wt") as out: 31 | print("A RAND_BINARY",file=out) 32 | print("T RAND_QT",file=out) 33 | -------------------------------------------------------------------------------- /data/mask.epacts.chr22.gencode-exons-AF01.tab.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/mask.epacts.chr22.gencode-exons-AF01.tab.gz -------------------------------------------------------------------------------- /data/mask.epacts.chr22.gencode-exons-AF01.tab.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/mask.epacts.chr22.gencode-exons-AF01.tab.gz.tbi -------------------------------------------------------------------------------- /data/mask.epacts.chr22.gencode-exons-AF05.tab.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/mask.epacts.chr22.gencode-exons-AF05.tab.gz -------------------------------------------------------------------------------- /data/mask.epacts.chr22.gencode-exons-AF05.tab.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/mask.epacts.chr22.gencode-exons-AF05.tab.gz.tbi -------------------------------------------------------------------------------- /data/metastaar_empty.cov.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/metastaar_empty.cov.parquet -------------------------------------------------------------------------------- /data/metastaar_empty.score.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/metastaar_empty.score.parquet -------------------------------------------------------------------------------- /data/metastaar_invalid_metadata.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/metastaar_invalid_metadata.parquet -------------------------------------------------------------------------------- /data/region_ld_22_51241101_51241385.AFR.hap.ld: -------------------------------------------------------------------------------- 1 | CHR POS1 POS2 N_CHR R^2 D Dprime 2 | 22 51241101 51241102 1322 1 0.00451798 1 3 | 22 51241101 51241285 1322 8.78811e-05 -8.58279e-05 -1 4 | 22 51241101 51241342 1322 3.45138e-06 -3.43311e-06 -1 5 | 22 51241102 51241285 1322 8.78811e-05 -8.58279e-05 -1 6 | 22 51241102 51241342 1322 3.45138e-06 -3.43311e-06 -1 7 | 22 51241285 51241342 1322 1.45914e-05 -1.43046e-05 -1 8 | -------------------------------------------------------------------------------- /data/region_ld_22_51241101_51241385.hap.ld: -------------------------------------------------------------------------------- 1 | CHR POS1 POS2 N_CHR R^2 D Dprime 2 | 22 51241101 51241102 5008 1 0.00139581 1 3 | 22 51241101 51241285 5008 7.58732e-06 -7.53587e-06 -1 4 | 22 51241101 51241298 5008 2.79553e-07 -2.79106e-07 -1 5 | 22 51241101 51241309 5008 2.79553e-07 -2.79106e-07 -1 6 | 22 51241101 51241342 5008 2.79553e-07 -2.79106e-07 -1 7 | 22 51241102 51241285 5008 7.58732e-06 -7.53587e-06 -1 8 | 22 51241102 51241298 5008 2.79553e-07 -2.79106e-07 -1 9 | 22 51241102 51241309 5008 2.79553e-07 -2.79106e-07 -1 10 | 22 51241102 51241342 5008 2.79553e-07 -2.79106e-07 -1 11 | 22 51241285 51241298 5008 1.0826e-06 -1.07655e-06 -1 12 | 22 51241285 51241309 5008 1.0826e-06 -1.07655e-06 -1 13 | 22 51241285 51241342 5008 1.0826e-06 -1.07655e-06 -1 14 | 22 51241298 51241309 5008 3.98882e-08 -3.98723e-08 -1 15 | 22 51241298 51241342 5008 3.98882e-08 -3.98723e-08 -1 16 | 22 51241309 51241342 5008 3.98882e-08 -3.98723e-08 -1 17 | -------------------------------------------------------------------------------- /data/region_ld_X_60100_60150.hap.ld: -------------------------------------------------------------------------------- 1 | CHR POS1 POS2 N_CHR R^2 D Dprime 2 | X 60112 60116 5008 0.000130683 -0.000117703 -1 3 | X 60112 60139 5008 2.17587e-05 -1.96172e-05 -1 4 | X 60112 60147 5008 2.17587e-05 -1.96172e-05 -1 5 | X 60112 60149 5008 2.17587e-05 -1.96172e-05 -1 6 | X 60116 60139 5008 2.39569e-07 -2.39234e-07 -1 7 | X 60116 60147 5008 2.39569e-07 -2.39234e-07 -1 8 | X 60116 60149 5008 2.39569e-07 -2.39234e-07 -1 9 | X 60139 60147 5008 3.98882e-08 -3.98723e-08 -1 10 | X 60139 60149 5008 3.98882e-08 -3.98723e-08 -1 11 | X 60147 60149 5008 3.98882e-08 -3.98723e-08 -1 12 | -------------------------------------------------------------------------------- /data/rvtest_cov_fail_base.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/rvtest_cov_fail_base.gz -------------------------------------------------------------------------------- /data/rvtest_cov_fail_base.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/rvtest_cov_fail_base.gz.tbi -------------------------------------------------------------------------------- /data/rvtest_score_fail_ustat.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/rvtest_score_fail_ustat.gz -------------------------------------------------------------------------------- /data/rvtest_score_fail_ustat.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/rvtest_score_fail_ustat.gz.tbi -------------------------------------------------------------------------------- /data/test.afmissing.MetaScore.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.afmissing.MetaScore.assoc.gz -------------------------------------------------------------------------------- /data/test.afmissing.MetaScore.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.afmissing.MetaScore.assoc.gz.tbi -------------------------------------------------------------------------------- /data/test.qt.segment1.metastaar.cov.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.qt.segment1.metastaar.cov.parquet -------------------------------------------------------------------------------- /data/test.qt.segment1.metastaar.sumstat.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.qt.segment1.metastaar.sumstat.parquet -------------------------------------------------------------------------------- /data/test.qt.segment2.metastaar.cov.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.qt.segment2.metastaar.cov.parquet -------------------------------------------------------------------------------- /data/test.qt.segment2.metastaar.sumstat.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.qt.segment2.metastaar.sumstat.parquet -------------------------------------------------------------------------------- /data/test.smallchunk.MetaCov.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.smallchunk.MetaCov.assoc.gz -------------------------------------------------------------------------------- /data/test.smallchunk.MetaCov.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.smallchunk.MetaCov.assoc.gz.tbi -------------------------------------------------------------------------------- /data/test.smallchunk.MetaScore.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.smallchunk.MetaScore.assoc.gz -------------------------------------------------------------------------------- /data/test.smallchunk.MetaScore.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.smallchunk.MetaScore.assoc.gz.tbi -------------------------------------------------------------------------------- /data/test.smallchunk.mask.epacts.tab.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.smallchunk.mask.epacts.tab.gz -------------------------------------------------------------------------------- /data/test.smallchunk.mask.epacts.tab.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.smallchunk.mask.epacts.tab.gz.tbi -------------------------------------------------------------------------------- /data/test.smallchunk.noheader.MetaCov.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.smallchunk.noheader.MetaCov.assoc.gz -------------------------------------------------------------------------------- /data/test.smallchunk.noheader.MetaCov.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.smallchunk.noheader.MetaCov.assoc.gz.tbi -------------------------------------------------------------------------------- /data/test.smallchunk.noheader.MetaScore.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.smallchunk.noheader.MetaScore.assoc.gz -------------------------------------------------------------------------------- /data/test.smallchunk.noheader.MetaScore.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.smallchunk.noheader.MetaScore.assoc.gz.tbi -------------------------------------------------------------------------------- /data/test.twochroms.chr1.MetaCov.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.twochroms.chr1.MetaCov.assoc.gz -------------------------------------------------------------------------------- /data/test.twochroms.chr1.MetaCov.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.twochroms.chr1.MetaCov.assoc.gz.tbi -------------------------------------------------------------------------------- /data/test.twochroms.chr1.MetaScore.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.twochroms.chr1.MetaScore.assoc.gz -------------------------------------------------------------------------------- /data/test.twochroms.chr1.MetaScore.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.twochroms.chr1.MetaScore.assoc.gz.tbi -------------------------------------------------------------------------------- /data/test.twochroms.chr9.MetaCov.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.twochroms.chr9.MetaCov.assoc.gz -------------------------------------------------------------------------------- /data/test.twochroms.chr9.MetaCov.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.twochroms.chr9.MetaCov.assoc.gz.tbi -------------------------------------------------------------------------------- /data/test.twochroms.chr9.MetaScore.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.twochroms.chr9.MetaScore.assoc.gz -------------------------------------------------------------------------------- /data/test.twochroms.chr9.MetaScore.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.twochroms.chr9.MetaScore.assoc.gz.tbi -------------------------------------------------------------------------------- /data/test.twochroms.mask.tab.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.twochroms.mask.tab.gz -------------------------------------------------------------------------------- /data/test.twochroms.mask.tab.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test.twochroms.mask.tab.gz.tbi -------------------------------------------------------------------------------- /data/test.yaml: -------------------------------------------------------------------------------- 1 | genotypes: 2 | - id: 1 3 | name: "1000G" 4 | description: "1000G chr22 Testing VCF" 5 | filepath: "data/chr22.test.vcf.gz" 6 | genome_build: "GRCh37" 7 | 8 | - id: 2 9 | name: "1000G Monomorphic Test" 10 | description: "1000G chr22 Testing VCF with 3 monomorphic variants" 11 | filepath: "data/chr22.monomorphic_test.vcf.gz" 12 | genome_build: "GRCh37" 13 | 14 | phenotypes: 15 | - id: 1 16 | name: "1000G random phenotypes" 17 | description: "An example set of randomly generated phenotypes for 1000G" 18 | genotypes: [1, 2] 19 | filepath: "data/chr22.test.tab" 20 | delim: "\t" 21 | columns: 22 | iid: 23 | column_type: "TEXT" 24 | sample_column: true 25 | 26 | sex: 27 | column_type: "CATEGORICAL" 28 | for_analysis: false 29 | 30 | rand_binary: 31 | column_type: "CATEGORICAL" 32 | description: "A random binary phenotype" 33 | 34 | rand_qt: 35 | column_type: "FLOAT" 36 | description: "A random quantitative phenotype" 37 | 38 | - id: 2 39 | name: "1000G random phenotypes II" 40 | genotypes: [1, 2] 41 | filepath: "data/chr22.more_phenotypes.test.ped" 42 | description: "Adding a second set of phenotypes for 1000G" 43 | delim: "\t" 44 | columns: 45 | ANOTHER_RAND_QT: 46 | column_type: "FLOAT" 47 | description: "Another random quantitative phenotype" 48 | 49 | - id: 3 50 | name: "Test bad float" 51 | description: "Test case for bad floating point values in file" 52 | genotypes: [1, 2] 53 | filepath: "data/chr22.test.bad_float.tab" 54 | delim: "\t" 55 | columns: 56 | iid: 57 | column_type: "TEXT" 58 | sample_column: true 59 | 60 | rand_qt: 61 | column_type: "FLOAT" 62 | description: "QT with bad float value in 5th line" 63 | 64 | - id: 4 65 | name: "Test for_analysis" 66 | description: "Test case for skipping column" 67 | genotypes: [1, 2] 68 | filepath: "data/chr22.test.tab" 69 | delim: "\t" 70 | columns: 71 | iid: 72 | column_type: "TEXT" 73 | sample_column: true 74 | 75 | sex: 76 | for_analysis: false 77 | 78 | rand_qt: 79 | column_type: "FLOAT" 80 | 81 | masks: 82 | - id: 1 83 | name: "AF < 0.01" 84 | description: "Variants with allele frequency < 1%" 85 | filepath: "data/mask.epacts.chr22.gencode-exons-AF01.tab.gz" 86 | genome_build: "GRCh37" 87 | genotypes: [1, 2] 88 | group_type: "GENE" 89 | identifier_type: "ENSEMBL" 90 | 91 | - id: 2 92 | name: "AF < 0.05" 93 | description: "Variants with allele frequency < 5%" 94 | filepath: "data/mask.epacts.chr22.gencode-exons-AF05.tab.gz" 95 | genome_build: "GRCh37" 96 | genotypes: [1, 2] 97 | group_type: "GENE" 98 | identifier_type: "ENSEMBL" 99 | 100 | - id: 3 101 | name: "Simulated genes for rvtests scorecov test" 102 | description: "A couple of simulated genes with a known burden effect" 103 | filepath: "data/test.smallchunk.mask.epacts.tab.gz" 104 | genome_build: "GRCh37" 105 | summary_stats: 2 106 | group_type: "GENE" 107 | identifier_type: "ENSEMBL" 108 | 109 | - id: 4 110 | name: "Test scores/cov split by chrom" 111 | description: "Simulated genes for testing scores/cov split into files by chrom" 112 | filepath: "data/test.twochroms.mask.tab.gz" 113 | genome_build: "GRCh37" 114 | summary_stats: 3 115 | group_type: "GENE" 116 | identifier_type: "ENSEMBL" 117 | 118 | summary_stats: 119 | - id: 1 120 | name: "RAREMETAL scorecov test" 121 | description: "RAREMETAL summary statistics for a small test region" 122 | genome_build: "GRCh37" 123 | score_path: "data/test_sumstat_loader_rm.scores.assoc.gz" 124 | cov_path: "data/test_sumstat_loader_rm.cov.assoc.gz" 125 | 126 | - id: 2 127 | name: "rvtests scorecov test" 128 | description: "rvtests summary statistics for a small test region" 129 | genome_build: "GRCh37" 130 | score_path: "data/test.smallchunk.MetaScore.assoc.gz" 131 | cov_path: "data/test.smallchunk.MetaCov.assoc.gz" 132 | 133 | - id: 3 134 | name: "Test multiple chrom" 135 | description: "With glob" 136 | genome_build: "GRCh37" 137 | score_path: "data/test.twochroms.chr*.MetaScore.assoc.gz" 138 | cov_path: "data/test.twochroms.chr*.MetaCov.assoc.gz" 139 | 140 | - id: 4 141 | name: "MetaSTAAR test multiple segments" 142 | description: "Simulated MetaSTAAR score/cov files" 143 | genome_build: "GRCh37" 144 | score_path: "data/test.qt.segment*.metastaar.sumstat.parquet" 145 | cov_path: "data/test.qt.segment*.metastaar.cov.parquet" 146 | format: "METASTAAR" -------------------------------------------------------------------------------- /data/test_metastaar_corrupt.yaml: -------------------------------------------------------------------------------- 1 | summary_stats: 2 | - id: 5 3 | name: "MetaSTAAR corrupt test" 4 | description: "Files with corrupt metadata" 5 | genome_build: "GRCh37" 6 | score_path: "data/metastaar_invalid_metadata.parquet" 7 | cov_path: "data/test.qt.segment1.metastaar.cov.parquet" 8 | format: "METASTAAR" 9 | -------------------------------------------------------------------------------- /data/test_metastaar_empty.yaml: -------------------------------------------------------------------------------- 1 | summary_stats: 2 | - id: 6 3 | name: "MetaSTAAR empty files test" 4 | description: "Files that are empty" 5 | genome_build: "GRCh37" 6 | score_path: "data/metastaar_empty.score.parquet" 7 | cov_path: "data/metastaar_empty.cov.parquet" 8 | format: "METASTAAR" 9 | -------------------------------------------------------------------------------- /data/test_no_sav_index.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test_no_sav_index.sav -------------------------------------------------------------------------------- /data/test_no_sav_index.yaml: -------------------------------------------------------------------------------- 1 | genotypes: 2 | - id: 9999 3 | name: "Test no savvy index" 4 | description: "Test adding savvy file with no s1r index" 5 | filepath: "data/test_no_sav_index.sav" 6 | genome_build: "GRCh37" 7 | -------------------------------------------------------------------------------- /data/test_no_tabix.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test_no_tabix.vcf.gz -------------------------------------------------------------------------------- /data/test_no_tabix.yaml: -------------------------------------------------------------------------------- 1 | genotypes: 2 | - id: 9999 3 | name: "Test no tabix" 4 | description: "Test adding VCF with missing tabix index" 5 | filepath: "data/test_no_tabix.vcf.gz" 6 | genome_build: "GRCh37" 7 | -------------------------------------------------------------------------------- /data/test_no_testable_variants.mask.tab.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test_no_testable_variants.mask.tab.gz -------------------------------------------------------------------------------- /data/test_no_testable_variants.mask.tab.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test_no_testable_variants.mask.tab.gz.tbi -------------------------------------------------------------------------------- /data/test_no_testable_variants.vcf.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test_no_testable_variants.vcf.gz -------------------------------------------------------------------------------- /data/test_no_testable_variants.vcf.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test_no_testable_variants.vcf.gz.tbi -------------------------------------------------------------------------------- /data/test_not_float.dat: -------------------------------------------------------------------------------- 1 | A RAND_BINARY 2 | T RAND_QT 3 | T ANCESTRY 4 | -------------------------------------------------------------------------------- /data/test_ped_incorrect_float.yaml: -------------------------------------------------------------------------------- 1 | phenotypes: 2 | - id: 6 3 | name: "Test case for column with incorrect data type" 4 | description: "Test PED file with incorrect float column type specified in DAT" 5 | genotypes: [1, 2] 6 | filepath: "data/test_not_float.ped" 7 | delim: "\t" 8 | -------------------------------------------------------------------------------- /data/test_sumstat_loader_rm.cov.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test_sumstat_loader_rm.cov.assoc.gz -------------------------------------------------------------------------------- /data/test_sumstat_loader_rm.cov.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test_sumstat_loader_rm.cov.assoc.gz.tbi -------------------------------------------------------------------------------- /data/test_sumstat_loader_rm.scores.assoc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test_sumstat_loader_rm.scores.assoc.gz -------------------------------------------------------------------------------- /data/test_sumstat_loader_rm.scores.assoc.gz.tbi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/data/test_sumstat_loader_rm.scores.assoc.gz.tbi -------------------------------------------------------------------------------- /data/test_tab_incorrect_float.yaml: -------------------------------------------------------------------------------- 1 | phenotypes: 2 | - id: 5 3 | name: "Test for_analysis" 4 | description: "Test case for column with incorrect data type" 5 | genotypes: [1, 2] 6 | filepath: "data/chr22.test.tab" 7 | delim: "\t" 8 | columns: 9 | iid: 10 | column_type: "TEXT" 11 | sample_column: true 12 | 13 | sex: 14 | column_type: "FLOAT" 15 | description: "This column should fail, because it cannot be coerced to float" 16 | 17 | rand_qt: 18 | column_type: "FLOAT" 19 | description: "Random QT that is truly floating point" 20 | -------------------------------------------------------------------------------- /data/variant_ld_22_51241101_vs_51241101_51241385.hap.ld: -------------------------------------------------------------------------------- 1 | CHR1 POS1 CHR2 POS2 N_CHR R^2 2 | 22 51241101 22 51241101 5008 1 3 | 22 51241101 22 51241102 5008 1 4 | 22 51241101 22 51241285 5008 7.58732e-06 5 | 22 51241101 22 51241298 5008 2.79553e-07 6 | 22 51241101 22 51241309 5008 2.79553e-07 7 | 22 51241101 22 51241342 5008 2.79553e-07 8 | -------------------------------------------------------------------------------- /data/variant_ld_22_51241309_vs_51241101_51244237.hap.ld: -------------------------------------------------------------------------------- 1 | CHR1 POS1 CHR2 POS2 N_CHR R^2 2 | 22 51241309 22 51241101 5008 2.79553e-07 3 | 22 51241309 22 51241102 5008 2.79553e-07 4 | 22 51241309 22 51241285 5008 1.0826e-06 5 | 22 51241309 22 51241298 5008 3.98882e-08 6 | 22 51241309 22 51241309 5008 1.0 7 | 22 51241309 22 51241342 5008 3.98882e-08 8 | 22 51241309 22 51241386 5008 2.83119e-06 9 | 22 51241309 22 51244163 5008 1.56754e-06 10 | 22 51241309 22 51244205 5008 3.98882e-08 11 | 22 51241309 22 51244237 5008 7.60611e-07 12 | -------------------------------------------------------------------------------- /data/variant_ld_22_51241386_vs_51241101_51241385.hap.ld: -------------------------------------------------------------------------------- 1 | CHR1 POS1 CHR2 POS2 N_CHR R^2 2 | 22 51241386 22 51241101 5008 0.0315598 3 | 22 51241386 22 51241102 5008 0.0315598 4 | 22 51241386 22 51241285 5008 7.68412e-05 5 | 22 51241386 22 51241298 5008 2.83119e-06 6 | 22 51241386 22 51241309 5008 2.83119e-06 7 | 22 51241386 22 51241342 5008 2.83119e-06 8 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | ldserver: 4 | build: . 5 | environment: 6 | - FLASK_APP=rest/ldserver 7 | env_file: .env 8 | ports: 9 | - "${LDSERVER_PORT}:${LDSERVER_PORT}" 10 | depends_on: 11 | - redis 12 | working_dir: /home/ldserver 13 | command: /bin/bash -c "source $$LDSERVER_CONFIG_SCRIPT && gunicorn -b 0.0.0.0:$$LDSERVER_PORT -w $$LDSERVER_WORKERS -k gthread --pythonpath rest 'ldserver:create_app()'" 14 | 15 | raremetal: 16 | build: . 17 | environment: 18 | - FLASK_APP=rest/raremetal 19 | env_file: .env 20 | ports: 21 | - "${RAREMETAL_PORT}:${RAREMETAL_PORT}" 22 | depends_on: 23 | - redis 24 | working_dir: /home/ldserver 25 | command: /bin/bash -c "flask add-yaml $$RAREMETAL_CONFIG_DATA && gunicorn -b 0.0.0.0:$$RAREMETAL_PORT -w $$RAREMETAL_WORKERS -k gthread --pythonpath rest 'raremetal:create_app()'" 26 | 27 | redis: 28 | image: "redis:5.0-alpine" 29 | -------------------------------------------------------------------------------- /rest/build.txt: -------------------------------------------------------------------------------- 1 | cget==0.2.0 2 | Cython==0.29.24 3 | invoke==1.5.1 4 | numpy==1.20.3 5 | pytest==6.2.4 6 | tox==3.24.4 7 | sarge==0.1.6 8 | -------------------------------------------------------------------------------- /rest/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/rest/config/__init__.py -------------------------------------------------------------------------------- /rest/config/default.py: -------------------------------------------------------------------------------- 1 | SQLALCHEMY_DATABASE_URI = 'sqlite:///sql.db' 2 | SQLALCHEMY_TRACK_MODIFICATIONS = False 3 | PROXY_PASS = None # set if Apache mod_proxy is used e.g. http://my.host.com/prefix/ 4 | API_MAX_PAGE_SIZE = 100000 5 | API_MAX_REGION_SIZE = 4000000 6 | API_MAX_COV_REGION_SIZE = 1000000 7 | LDSERVER_PRECISION = None 8 | SEGMENT_SIZE_BP = 1000 9 | CACHE_ENABLED = True 10 | CACHE_REDIS_HOSTNAME = '127.0.0.1' 11 | CACHE_REDIS_PORT = 6379 12 | GZIP_COMPRESSION = True # enable build-in response compression if for any reason it was not possible to enable it through Apache 13 | API_BASE_URL = 'http://127.0.0.1:5000' # specify the base URL address of the LD server. It is used by the Playground. 14 | SENTRY_DSN = None # include your Sentry DSN for error reporting to your own Sentry instance 15 | SENTRY_ENV = None # name of your deployment - usually 'production' or 'staging' or 'travis' -------------------------------------------------------------------------------- /rest/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/LDServer/12d82976897991a3f14dda9f848342596180ad61/rest/core/__init__.py -------------------------------------------------------------------------------- /rest/ldserver/__init__.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | 3 | def create_app(test_config = None): 4 | app = Flask(__name__, instance_relative_config = True) 5 | 6 | if test_config is None: 7 | app.config.from_object('config.default') 8 | app.config.from_pyfile('config.py', silent = True) 9 | app.config.from_envvar('RESTLD_CONFIG_FILE', silent = True) 10 | else: 11 | app.config.from_mapping(test_config) 12 | 13 | from ldserver.model import db, load_correlations, load_references_command, show_references_command, \ 14 | add_reference_command, create_subset_command, show_genotypes_command, show_samples_command 15 | db.init_app(app) 16 | app.cli.add_command(load_references_command) 17 | app.cli.add_command(show_references_command) 18 | app.cli.add_command(add_reference_command) 19 | app.cli.add_command(create_subset_command) 20 | app.cli.add_command(show_genotypes_command) 21 | app.cli.add_command(show_samples_command) 22 | 23 | from ldserver import api 24 | app.register_blueprint(api.bp) 25 | 26 | if app.config['GZIP_COMPRESSION']: 27 | app.config['COMPRESS_MIMETYPES'] = ['application/json', 'application/msgpack'] 28 | app.config['COMPRESS_LEVEL'] = 3 29 | app.config['COMPRESS_MIN_SIZE'] = 500 30 | api.compress.init_app(app) 31 | 32 | with app.app_context(): 33 | app.config['CORRELATIONS'] = load_correlations() 34 | 35 | return app 36 | -------------------------------------------------------------------------------- /rest/playground/__init__.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | 3 | def create_app(test_config = None): 4 | app = Flask(__name__, instance_relative_config = True) 5 | 6 | if test_config is None: 7 | app.config.from_object('config.default') 8 | app.config.from_pyfile('config.py', silent = True) 9 | app.config.from_envvar('RESTLD_CONFIG_FILE', silent = True) 10 | else: 11 | app.config.from_mapping(test_config) 12 | 13 | from playground import web 14 | app.register_blueprint(web.bp) 15 | 16 | return app -------------------------------------------------------------------------------- /rest/playground/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | {% block title %}{% endblock %} 11 | 12 | 13 | {% block content %}{% endblock %} 14 | 15 | 16 | 17 | 18 | 19 | 20 | {% block scripts %}{% endblock %} 21 | 22 | 23 | -------------------------------------------------------------------------------- /rest/playground/web.py: -------------------------------------------------------------------------------- 1 | from flask import current_app, Blueprint, request, jsonify, make_response, abort, render_template 2 | 3 | bp = Blueprint('api', __name__) 4 | 5 | @bp.route('/') 6 | def home(): 7 | return render_template('home.html', data = { 'max_limit': current_app.config['API_MAX_PAGE_SIZE'], 'api_base_url': current_app.config['API_BASE_URL'] }) -------------------------------------------------------------------------------- /rest/raremetal/__init__.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | import logging 3 | 4 | def create_app(test_config = None): 5 | app = Flask(__name__, instance_relative_config = True) 6 | app.logger.setLevel(logging.INFO) 7 | 8 | if test_config is None: 9 | app.config.from_object('config.default') 10 | app.config.from_pyfile('config.py', silent = True) 11 | app.config.from_envvar('RESTLD_CONFIG_FILE', silent = True) 12 | else: 13 | app.config.from_mapping(test_config) 14 | 15 | from .model import db, load_correlations, load_genotypes_command, show_genotypes_command, \ 16 | add_genotypes_command, create_subset_command, show_genotypes_command, show_samples_command, \ 17 | add_phenotypes_command, add_masks_command, show_phenotypes_command, show_masks_command, add_yaml_command 18 | 19 | db.init_app(app) 20 | app.cli.add_command(add_masks_command) 21 | app.cli.add_command(load_genotypes_command) 22 | app.cli.add_command(show_genotypes_command) 23 | app.cli.add_command(add_genotypes_command) 24 | app.cli.add_command(add_phenotypes_command) 25 | app.cli.add_command(create_subset_command) 26 | app.cli.add_command(show_genotypes_command) 27 | app.cli.add_command(show_samples_command) 28 | app.cli.add_command(show_phenotypes_command) 29 | app.cli.add_command(show_masks_command) 30 | app.cli.add_command(add_yaml_command) 31 | 32 | from . import api 33 | app.register_blueprint(api.bp) 34 | 35 | from . import sentry 36 | sentry.init_app(app) 37 | 38 | from . import errors 39 | errors.init_app(app) 40 | 41 | if app.config['GZIP_COMPRESSION']: 42 | app.config['COMPRESS_MIMETYPES'] = ['application/json'] 43 | app.config['COMPRESS_LEVEL'] = 3 44 | app.config['COMPRESS_MIN_SIZE'] = 500 45 | api.compress.init_app(app) 46 | 47 | with app.app_context(): 48 | app.config['CORRELATIONS'] = load_correlations() 49 | 50 | return app 51 | -------------------------------------------------------------------------------- /rest/raremetal/errors.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import re 3 | import werkzeug 4 | import sys 5 | from flask import current_app, request, jsonify 6 | from core.pywrapper import LDServerGenericException 7 | 8 | class FlaskException(Exception): 9 | status_code = 400 10 | 11 | def __init__(self, message, status_code=None, secret=None): 12 | """ 13 | Construct an exception representing an error that occurred when running flask or a flask subcommand. 14 | This exception can include "secret" data that will not be included in a HTTP response message, but will be 15 | delivered to the server log. 16 | 17 | :param message: Message describing the exception. This will logged in the console, in Sentry, and in the HTTP response (if applicable.) 18 | :param status_code: HTTP status code. 19 | :param secret: Message with additional information, such as filepaths, that will only appear in the server log. 20 | """ 21 | 22 | super().__init__(message) 23 | self.message = message 24 | self._secret = secret 25 | 26 | if status_code is not None: 27 | self.status_code = status_code 28 | 29 | @property 30 | def secret(self): 31 | return self._secret 32 | 33 | @secret.setter 34 | def secret(self, value): 35 | self._secret = value 36 | 37 | def handle_all(error): 38 | sentry = current_app.extensions.get("sentry") 39 | 40 | # Try to log exception to Sentry if it is configured. 41 | if sentry is not None: 42 | print("Attempting to log exception to Sentry...") 43 | sentry.captureException() 44 | else: 45 | print("Sentry not setup to log exceptions") 46 | 47 | # If we're in debug mode, re-raise the exception so we get the 48 | # browser debugger 49 | if current_app.debug: 50 | raise error 51 | 52 | # Also log the exception to the console. 53 | print("Exception thrown while handling request: " + request.url, file=sys.stderr) 54 | traceback.print_exc() # defaults to stderr 55 | print(str(error), file=sys.stderr) 56 | if isinstance(error, FlaskException) and error.secret is not None: 57 | print(error.secret, file=sys.stderr) 58 | 59 | if isinstance(error, FlaskException): 60 | message = error.message 61 | code = error.status_code 62 | elif isinstance(error, werkzeug.exceptions.NotFound): 63 | message = error.description 64 | code = error.code 65 | elif hasattr(error, 'args') and (error.args is not None) and (len(error.args) > 0) and isinstance(error.args[0], LDServerGenericException): 66 | # This is a safe exception type with sensitive information kept in a separate string object. 67 | 68 | # Print extra C++ exception information to stderr 69 | print("C++ exception thrown:\n∟ public msg: {}\n∟ private msg: {}".format(str(error), error.args[0].get_secret()), file=sys.stderr) 70 | 71 | # Raise a flask exception to tell the developer what went wrong 72 | message = str(error) 73 | code = 400 # Any C++ exception should be a HTTP 400 74 | else: 75 | message = "An unexpected error occurred on the server while processing the request. Please ask the admin to check the server logs for more information." 76 | code = 500 77 | 78 | # A little extra work to figure out the true request URL. 79 | # Requires the following set in apache: 80 | # SetEnvIf Request_URI "^(.*)$" REQUEST_URI=$1 81 | # RequestHeader set X-Request-Uri "%{REQUEST_URI}e" 82 | full_url = request.url 83 | real_uri = request.headers.get("X-Request-Uri") 84 | if real_uri is not None: 85 | match = re.search("\/(?P\w+)\/(?Pv\d+)", real_uri) 86 | if match: 87 | api_name, api_version = match.groups() 88 | full_url = full_url.replace("/" + api_version,"/" + api_name + "/" + api_version) 89 | 90 | response = jsonify({ 91 | "data": {}, 92 | "error": message, 93 | "request": full_url 94 | }) 95 | response.status_code = code 96 | return response 97 | 98 | def init_app(app): 99 | app.register_error_handler(Exception, handle_all) 100 | -------------------------------------------------------------------------------- /rest/raremetal/sentry.py: -------------------------------------------------------------------------------- 1 | import os 2 | from raven.contrib.flask import Sentry 3 | from raven.versioning import fetch_git_sha 4 | 5 | def init_app(app): 6 | # Start logging errors 7 | try: 8 | sha = fetch_git_sha(os.path.join(app.root_path, "../../")) 9 | except: 10 | sha = "no-git" 11 | 12 | release = "raremetal-server@{}".format(sha) 13 | 14 | sentry_dsn = app.config.get("SENTRY_DSN") 15 | if sentry_dsn is not None: 16 | app.config["SENTRY_CONFIG"] = { 17 | "dsn": sentry_dsn, 18 | "release": release 19 | } 20 | 21 | if app.config["SENTRY_ENV"] is not None: 22 | app.config["SENTRY_CONFIG"]["environment"] = app.config["SENTRY_ENV"] 23 | 24 | # This attaches sentry to current_app.extensions['sentry'] 25 | Sentry(app, register_signal=False, wrap_wsgi=False) 26 | -------------------------------------------------------------------------------- /rest/requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.4 2 | atomicwrites==1.4.0 3 | attrs==20.2.0 4 | blinker==1.4 5 | Brotli==1.0.9 6 | certifi==2020.6.20 7 | chardet==3.0.4 8 | click==7.1.2 9 | contextlib2==0.6.0.post1 10 | distlib==0.3.1 11 | filelock==3.0.12 12 | Flask==1.1.2 13 | Flask-Compress==1.7.0 14 | Flask-Cors==3.0.9 15 | Flask-SQLAlchemy==2.4.4 16 | funcsigs==1.0.2 17 | gevent==21.8.0 18 | greenlet==1.1.0 19 | gunicorn==20.1.0 20 | idna==2.10 21 | importlib-metadata==2.0.0 22 | importlib-resources==3.3.0 23 | iniconfig==1.1.1 24 | itsdangerous==1.1.0 25 | Jinja2==2.11.3 26 | MarkupSafe==1.1.1 27 | marshmallow==3.8.0 28 | more-itertools==8.5.0 29 | packaging==20.4 30 | pathlib2==2.3.5 31 | pluggy==0.13.1 32 | psutil==5.7.3 33 | py==1.10.0 34 | pyparsing==2.4.7 35 | PyYAML==5.4.1 36 | raven==6.10.0 37 | requests==2.25.1 38 | scandir==1.10.0 39 | six==1.15.0 40 | SQLAlchemy==1.3.20 41 | tabulate==0.8.7 42 | toml==0.10.1 43 | urllib3==1.26.5 44 | virtualenv==20.7.2 45 | webargs==6.1.1 46 | Werkzeug==1.0.1 47 | zipp==3.4.0 48 | zope.event==4.5.0 49 | msgpack==1.0.2 50 | zope.interface==5.4.0 51 | -------------------------------------------------------------------------------- /rest/runtime_tests/test_runtime.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import requests 3 | import time 4 | import sys 5 | import random 6 | import msgpack 7 | 8 | argparser = argparse.ArgumentParser(description = 'Tool for runtime experiments.') 9 | argparser_subparsers = argparser.add_subparsers(help = '', dest = 'command') 10 | 11 | argparser.add_argument('-n', '--hostname', metavar = 'name', required = True, type = str, dest = 'hostname', help = 'LD Server hostname.') 12 | argparser.add_argument('-p', '--port', metavar = 'number', required = False, type = int, dest = 'port', help = 'LD Server port number.') 13 | argparser.add_argument('-b', '--genome-build', metavar = 'name', required = True, type = str, dest ='genome_build', help = 'Genome build name.') 14 | argparser.add_argument('-f', '--reference', metavar = 'name', required = True, type = str, dest = 'reference', help = 'LD reference panel name.') 15 | argparser.add_argument('-m', '--messagepack', dest = 'msgpack', action='store_true', help = 'Use MessagePack instead of JSON.') 16 | argparser.add_argument('-d', '--decimals', dest = 'decimals', required = False, type = int, help = 'Floating point precision for JSON (0 - full precision, >0 - number of decimals when rounding.') 17 | 18 | argparser_region = argparser_subparsers.add_parser('region', help = 'Region queries.') 19 | argparser_region.add_argument('-g', '--genes', metavar = 'file', required = True, type = str, dest = 'genes_file', help = 'File with gene coordinates. Must have two columns (without header): start position (bp), stop position (bp).') 20 | argparser_region.add_argument('-s', '--page-size', metavar = 'number', required = True, type = int, dest = 'page_size', default = 10000, help = 'Maximal page size.') 21 | argparser_region.add_argument('-l', '--region-length', metavar = 'number', required = True, type = int, dest = 'region_length', default = 10000, help = 'Region length in base-pairs.') 22 | argparser_region.add_argument('-c', '--queries-count', metavar = 'number', required = True, type = int, dest = 'c', default = 10000, help = 'Number of queries to generate.') 23 | 24 | argparser_variant = argparser_subparsers.add_parser('variant', help = 'Variant queries.') 25 | argparser_variant.add_argument('-v', '--variants', metavar = 'file', required = True, type = str, dest = 'variants_file', help = 'File with variants. Must have four columns (without header): chromosome, position (bp), REF allele, ALT allele.') 26 | argparser_variant.add_argument('-s', '--page-size', metavar = 'number', required = True, type = int, dest = 'page_size', default = 10000, help = 'Maximal page size.') 27 | argparser_variant.add_argument('-l', '--region-length', metavar = 'number', required = True, type = int, dest = 'region_length', default = 10000, help = 'Region length in base-pairs.') 28 | argparser_variant.add_argument('-c', '--queries-count', metavar = 'number', required = True, type = int, dest = 'c', default = 10000, help = 'Number of queries to generate.') 29 | 30 | argparser_query = argparser_subparsers.add_parser('query', help = 'List of queries to re-run.') 31 | argparser_query.add_argument('-q', '--queries', metavar = 'file', required = True, type = str, dest = 'queries_file', help = 'Output file form previous run.') 32 | 33 | if __name__ == '__main__': 34 | args = argparser.parse_args() 35 | queries = [] 36 | if args.command == 'region': 37 | window_bp = 100000 38 | genes = [] 39 | with open(args.genes_file, 'r') as f: 40 | for line in f: 41 | if line: 42 | chrom, start_bp, stop_bp = line.rstrip().split() 43 | start_bp, stop_bp = list(map(int, (start_bp, stop_bp))) 44 | genes.append((chrom, start_bp, stop_bp)) 45 | for i in range(0, args.c): 46 | chrom, start_bp, stop_bp = random.choice(genes) 47 | start_bp = random.randrange( start_bp - window_bp if start_bp > window_bp else start_bp, stop_bp + window_bp, 1) 48 | stop_bp = start_bp + args.region_length 49 | query = 'genome_builds/{}/references/{}/populations/ALL/regions?correlation=rsquare&chrom={}&start={}&stop={}&limit={}'.format(args.genome_build, args.reference, chrom, start_bp, stop_bp, args.page_size) 50 | if args.msgpack: 51 | query += '&msgpack=1' 52 | if 'decimals' in args: 53 | query += f'&precision={args.precision}' 54 | queries.append((query, args.region_length, args.page_size)) 55 | elif args.command == 'variant': 56 | variants = [] 57 | with open(args.variants_file, 'r') as f: 58 | for line in f: 59 | chrom, position, ref, alt = line.rstrip().split() 60 | variants.append((chrom, int(position), ref, alt)) 61 | for i in range(0, args.c): 62 | chrom, position, ref, alt = random.choice(variants) 63 | start_bp = random.randrange(position - args.region_length if position > args.region_length else 0 , position, 1) 64 | stop_bp = start_bp + args.region_length 65 | query = 'genome_builds/{}/references/{}/populations/ALL/variants?correlation=rsquare&variant={}:{}_{}/{}&chrom={}&start={}&stop={}&limit={}'.format(args.genome_build, args.reference, chrom, position, ref, alt, chrom, start_bp, stop_bp, args.page_size) 66 | queries.append((query, args.region_length, args.page_size)) 67 | elif args.command == 'query': 68 | with open(args.queries_file, 'r') as f: 69 | header = f.readline().rstrip().split() 70 | for line in f: 71 | fields = dict(list(zip(header, line.rstrip().split()))) 72 | queries.append((fields['QUERY'], fields['REGION_LENGTH'], fields['PAGE_SIZE'])) 73 | print('QUERY\tREGION_LENGTH\tPAGE_SIZE\tN_VARIANTS\tN_RESULTS\tN_PAGES\tTOTAL_SECONDS\tRESPONSE_SECONDS\tUNCOMPRESSED_MB\tCOMPRESSED_MB') 74 | for query, region_length, page_size in queries: 75 | if args.msgpack: 76 | if '&msgpack=1' not in query: 77 | query += '&msgpack=1' 78 | else: 79 | if '&msgpack=1' in query: 80 | query = query.replace('&msgpack=1', '') 81 | if args.decimals: 82 | if '&precision' not in query: 83 | query += f'&precision={args.decimals}' 84 | url = 'http://{}{}/{}'.format(args.hostname, ':' + str(args.port) if args.port else '', query) 85 | total_time = 0 86 | total_results = 0 87 | total_pages = 0 88 | while url != '': 89 | start = time.time() 90 | response = requests.get(url) 91 | end = time.time() 92 | if response.status_code != 200: 93 | sys.exit('Request failed with code {}.\nQuery: {}'.format(response.status_code, query)) 94 | if response.headers['Content-Type'] == 'application/msgpack': 95 | result = msgpack.unpackb(response.content, strict_map_key = False) 96 | else: 97 | result = response.json() 98 | data = result['data'] 99 | total_time += (end - start) 100 | if not 'index_variant' in data: 101 | assert len(data['variants']) == len(data['chromosomes']) 102 | assert len(data['variants']) == len(data['positions']) 103 | assert len(data['variants']) == len(data['offsets']) 104 | total_results += sum([len(x) for x in data['correlations']]) 105 | else: 106 | assert len(data['variants']) == len(data['chromosomes']) 107 | assert len(data['variants']) == len(data['positions']) 108 | assert len(data['variants']) == len(data['correlations']) 109 | total_results += len(data['correlations']) 110 | total_pages += 1 111 | url = result['next'] 112 | print('{}\t{}\t{}\t{}\t{}\t{}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}'.format(query, region_length, page_size, len(data['variants']), total_results, total_pages, total_time, response.elapsed.total_seconds(), len(response.content) / (1024.0 * 1024.0), int(response.headers['Content-Length']) / (1024.0 * 1024.0))) 113 | -------------------------------------------------------------------------------- /rest/tests/ldserver/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | from ldserver import create_app 3 | import pytest 4 | 5 | @pytest.fixture 6 | def app(): 7 | app = create_app({ 8 | 'TESTING': True, 9 | 'SQLALCHEMY_DATABASE_URI': 'sqlite:///' + os.path.join(os.path.dirname(__file__), 'sql.db'), 10 | 'SQLALCHEMY_TRACK_MODIFICATIONS': False, 11 | 'PROXY_PASS': None, 12 | 'API_MAX_PAGE_SIZE': 1000, 13 | 'SEGMENT_SIZE_BP': 1000, 14 | 'CACHE_ENABLED': False, 15 | 'CACHE_REDIS_HOSTNAME': '127.0.0.1', 16 | 'CACHE_REDIS_PORT': 6379, 17 | 'GZIP_COMPRESSION': True, 18 | 'LDSERVER_PRECISION': None 19 | }) 20 | app.config['REFERENCES_JSON'] = os.path.join(os.path.dirname(__file__), 'datasets.json') 21 | from ldserver.model import load_references 22 | with app.app_context(): 23 | load_references(app.config['REFERENCES_JSON']) 24 | yield app 25 | 26 | @pytest.fixture 27 | def client(app): 28 | return app.test_client() 29 | 30 | @pytest.fixture 31 | def config(app): 32 | return app.config 33 | 34 | @pytest.fixture 35 | def goldstandard_ld(): 36 | def _goldstandard_ld(filename): 37 | goldstandard = dict() 38 | with open(filename, 'r') as f: 39 | header = f.readline().rstrip().split('\t') 40 | for line in f: 41 | record = dict(list(zip(header, line.rstrip().split('\t')))) 42 | goldstandard[record['POS1'] + '_' + record['POS2']] = float(record['R^2']) 43 | return goldstandard 44 | yield _goldstandard_ld 45 | -------------------------------------------------------------------------------- /rest/tests/raremetal/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | from raremetal import create_app 3 | from raremetal.model import add_yaml_command, db as dbo 4 | import pytest 5 | 6 | @pytest.fixture 7 | def app(): 8 | app = create_app({ 9 | 'TESTING': True, 10 | 'SQLALCHEMY_DATABASE_URI': 'sqlite:///' + os.path.join(os.path.dirname(__file__), 'sql.db'), 11 | 'SQLALCHEMY_TRACK_MODIFICATIONS': False, 12 | 'PROXY_PASS': None, 13 | 'API_MAX_PAGE_SIZE': 1000, 14 | 'API_MAX_REGION_SIZE': 4000000, 15 | 'API_MAX_COV_REGION_SIZE': 1000000, 16 | 'SEGMENT_SIZE_BP': 1000, 17 | 'CACHE_ENABLED': False, 18 | 'CACHE_REDIS_HOSTNAME': '127.0.0.1', 19 | 'CACHE_REDIS_PORT': 6379, 20 | 'GZIP_COMPRESSION': True, 21 | 'SENTRY_DSN': None, 22 | 'SENTRY_ENV': None 23 | }) 24 | print("Using database: " + app.config["SQLALCHEMY_DATABASE_URI"]) 25 | yield app 26 | 27 | @pytest.fixture 28 | def client(app): 29 | return app.test_client() 30 | 31 | @pytest.fixture 32 | def config(app): 33 | return app.config 34 | 35 | @pytest.fixture 36 | def db(app): 37 | with app.app_context(): 38 | dbo.drop_all() 39 | dbo.create_all() 40 | runner = app.test_cli_runner() 41 | result = runner.invoke(add_yaml_command, ["../data/test.yaml"]) 42 | 43 | return dbo -------------------------------------------------------------------------------- /rest/tests/raremetal/test_cli.py: -------------------------------------------------------------------------------- 1 | from raremetal.model import add_yaml_command, get_genotype_dataset, get_phenotype_dataset, \ 2 | get_phenotype_column_objects, get_mask_by_id, get_analysis_columns, get_summary_stat_dataset, \ 3 | get_score_files, get_cov_files 4 | from core.pywrapper import VariantGroupType, GroupIdentifierType, LDServerGenericException 5 | import traceback 6 | 7 | def test_add_yaml(app, db): 8 | with app.app_context(): 9 | db.drop_all() 10 | db.create_all() 11 | 12 | runner = app.test_cli_runner() 13 | result = runner.invoke(add_yaml_command, ["../data/test.yaml"]) 14 | if isinstance(result.exception, Exception): 15 | traceback.print_tb(result.exc_info[2]) 16 | raise result.exception 17 | 18 | gdata = get_genotype_dataset(1) 19 | assert gdata["name"] == "1000G" 20 | assert gdata["genome_build"] == "GRCh37" 21 | 22 | pdata = get_phenotype_dataset(1) 23 | assert pdata["name"] == "1000G random phenotypes" 24 | assert pdata["description"] == "An example set of randomly generated phenotypes for 1000G" 25 | assert pdata["sample_column"] == "iid" 26 | assert pdata["nrows"] == 2504 27 | assert pdata["ncols"] == 4 28 | 29 | pcols = get_phenotype_column_objects(1) 30 | for col in pcols: 31 | assert col["column_type"] in ("TEXT", "CATEGORICAL", "FLOAT", "INTEGER") 32 | 33 | mdata = get_mask_by_id(1) 34 | assert mdata["name"] == "AF < 0.01" 35 | assert mdata["genome_build"] == "GRCh37" 36 | assert mdata["group_type"] == VariantGroupType.GENE 37 | assert mdata["identifier_type"] == GroupIdentifierType.ENSEMBL 38 | 39 | assert get_phenotype_dataset(2)["sample_column"] == "IID" 40 | 41 | analysis_cols = get_analysis_columns(1) 42 | assert len(analysis_cols) == 2 43 | 44 | ssdata = get_summary_stat_dataset(1) 45 | assert ssdata["name"] == "RAREMETAL scorecov test" 46 | assert ssdata["genome_build"] == "GRCh37" 47 | 48 | def test_tab_incorrect_float(app, db): 49 | with app.app_context(): 50 | db.create_all() 51 | runner = app.test_cli_runner() 52 | result = runner.invoke(add_yaml_command, ["../data/test_tab_incorrect_float.yaml"]) 53 | 54 | assert isinstance(result.exception, ValueError) 55 | assert str(result.exception).startswith("Column sex for phenotype dataset") 56 | assert str(result.exception).startswith("Column sex for phenotype dataset") 57 | assert str(result.exception).endswith("cannot be coerced to float") 58 | 59 | def test_ped_incorrect_float(app, db): 60 | with app.app_context(): 61 | db.create_all() 62 | runner = app.test_cli_runner() 63 | result = runner.invoke(add_yaml_command, ["../data/test_ped_incorrect_float.yaml"]) 64 | 65 | assert isinstance(result.exception, ValueError) 66 | assert str(result.exception).startswith("Column ANCESTRY for PED") 67 | assert str(result.exception).endswith("cannot be coerced to float") 68 | 69 | def test_vcf_missing_tabix(app, db): 70 | with app.app_context(): 71 | db.create_all() 72 | runner = app.test_cli_runner() 73 | result = runner.invoke(add_yaml_command, ["../data/test_no_tabix.yaml"]) 74 | 75 | assert isinstance(result.exception, ValueError) 76 | assert str(result.exception).startswith("Cannot find tabix index for VCF file") 77 | 78 | def test_sav_missing_index(app, db): 79 | with app.app_context(): 80 | db.create_all() 81 | runner = app.test_cli_runner() 82 | result = runner.invoke(add_yaml_command, ["../data/test_no_sav_index.yaml"]) 83 | 84 | assert isinstance(result.exception, ValueError) 85 | assert str(result.exception).startswith("Cannot find savvy index") 86 | 87 | def test_metastaar_corrupt_meta(app, db): 88 | with app.app_context(): 89 | db.create_all() 90 | runner = app.test_cli_runner() 91 | result = runner.invoke(add_yaml_command, ["../data/test_metastaar_corrupt.yaml"]) 92 | 93 | score_files = get_score_files(5) 94 | assert len(score_files) == 0 95 | 96 | def test_metastaar_empty_files(app, db): 97 | with app.app_context(): 98 | db.create_all() 99 | runner = app.test_cli_runner() 100 | result = runner.invoke(add_yaml_command, ["../data/test_metastaar_empty.yaml"]) 101 | 102 | score_files = get_score_files(6) 103 | cov_files = get_cov_files(6) 104 | assert len(score_files) == 0 105 | assert len(cov_files) == 0 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.8.0 3 | commit = True 4 | message = Version {new_version} 5 | tag = True 6 | sign_tags = True 7 | parse = (?P\d+)\.(?P\d+)\.(?P\d+)b?(?P\d*)\.?(dev)?(?P\d*) 8 | serialize = 9 | {major}.{minor}.{patch}b{beta}.dev{dev} 10 | {major}.{minor}.{patch}b{beta} 11 | {major}.{minor}.{patch} 12 | -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | from invoke import task 2 | 3 | """ 4 | LDServer tasks 5 | 6 | To run: 7 | 8 | * invoke build 9 | * invoke test 10 | * invoke version --part 11 | """ 12 | 13 | VALID_PARTS = "major minor patch beta dev".split() 14 | 15 | @task 16 | def build(ctx): 17 | """ 18 | Recompile the C++ component (pywrapper) 19 | """ 20 | 21 | ctx.run("cget install --update core") 22 | 23 | @task(build) 24 | def test(ctx): 25 | """ 26 | Run all test cases using tox. This task will automatically execute build. 27 | """ 28 | 29 | ctx.run("tox") 30 | 31 | @task(test) 32 | def version(ctx, part): 33 | """ 34 | Bump the version number using semantic versioning scheme. The configuration is stored in setup.cfg. 35 | 36 | The version will only be bumped if the build and test tasks run successfully. 37 | """ 38 | 39 | if part not in VALID_PARTS: 40 | raise ValueError("Invalid part specifier: {}, must be one of {}".format(part, ", ".join(VALID_PARTS))) 41 | 42 | ctx.run("bumpversion --verbose {}".format(part)) 43 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py38, cpp 3 | skipsdist = true 4 | 5 | [testenv:py38] 6 | changedir = rest 7 | deps = 8 | -r{toxinidir}/rest/build.txt 9 | -r{toxinidir}/rest/requirements.txt 10 | commands = 11 | python3 -m pytest 12 | 13 | [testenv:cpp] 14 | changedir = cget/test 15 | commands = 16 | ./testAll 127.0.0.1 8888 17 | --------------------------------------------------------------------------------