├── SigProfilerExtractor ├── __init__.py ├── data │ ├── MatObjInput │ │ └── 21_breast_WGS_substitutions.mat │ ├── ReferenceFiles │ │ ├── CNV_features.tsv │ │ └── CN_classes_dictionary.txt │ ├── TextInput │ │ ├── Samples_SV.txt │ │ ├── Samples_DBS.txt │ │ ├── Samples_ID.txt │ │ ├── Samples_SBS.txt │ │ └── Samples_CNV.txt │ ├── CSVInput │ │ └── csv_example.csv │ └── VCFInput │ │ └── PD3851a.vcf ├── sigprofilerextractor_cli.py ├── estimate_best_solution.py ├── controllers │ └── cli_controller.py ├── nmf_gpu.py ├── nmf_cpu.py └── sigpro.py ├── pyproject.toml ├── install_genome.py ├── MANIFEST.in ├── Dockerfile ├── LICENSE.txt ├── .travis.yml ├── CHANGELOG.md ├── .gitignore ├── setup.py ├── test.py └── README.md /SigProfilerExtractor/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import short_version as __version__ 2 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61", "wheel", "build"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /SigProfilerExtractor/data/MatObjInput/21_breast_WGS_substitutions.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlexandrovLab/SigProfilerExtractor/HEAD/SigProfilerExtractor/data/MatObjInput/21_breast_WGS_substitutions.mat -------------------------------------------------------------------------------- /install_genome.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | from SigProfilerMatrixGenerator import install as genInstall 4 | 5 | 6 | def install_ref(ref_path): 7 | genInstall.install("GRCh37", offline_files_path=ref_path) 8 | 9 | 10 | if __name__ == "__main__": 11 | ref_path = sys.argv[1] 12 | install_ref(ref_path) 13 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include SigProfilerExtractor/data/TextInput/* 2 | include SigProfilerExtractor/data/VCFInput/* 3 | include SigProfilerExtractor/data/CNVInput/* 4 | include SigProfilerExtractor/data/CSVInput/* 5 | include SigProfilerExtractor/data/MatObjInput/* 6 | include SigProfilerExtractor/data/ReferenceFiles/* 7 | include SigProfilerExtractor/controllers/* 8 | 9 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # CUDA-enabled base image for GPU support 2 | FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 3 | 4 | ARG DEBIAN_FRONTEND=noninteractive 5 | ARG COMMIT_SHA=master 6 | 7 | # Install Python + minimal deps 8 | RUN apt-get update && apt-get install -y \ 9 | python3-pip python3-dev git && \ 10 | apt-get clean && rm -rf /var/lib/apt/lists/* 11 | 12 | WORKDIR /usr/src/app 13 | 14 | # Install GPU-enabled PyTorch wheels 15 | RUN pip3 install --no-cache-dir \ 16 | torch torchvision torchaudio \ 17 | --extra-index-url https://download.pytorch.org/whl/cu118 18 | 19 | # Install SigProfilerExtractor from specific commit 20 | RUN pip3 install --no-cache-dir \ 21 | 'git+https://github.com/AlexandrovLab/SigProfilerExtractor.git@'${COMMIT_SHA} 22 | 23 | # Create a non-root user 24 | RUN useradd -m -s /bin/bash spm_user 25 | RUN chown -R spm_user:spm_user /usr/src/app 26 | USER spm_user 27 | -------------------------------------------------------------------------------- /SigProfilerExtractor/sigprofilerextractor_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | from SigProfilerExtractor.controllers import cli_controller 5 | 6 | 7 | def main_function(): 8 | commands = { 9 | "sigprofilerextractor": "Extract mutational signatures from input samples." 10 | } 11 | 12 | if len(sys.argv) < 2 or sys.argv[1].lower() not in commands: 13 | print_usage(commands) 14 | return 15 | 16 | command = sys.argv[1].lower() 17 | args = sys.argv[2:] 18 | 19 | controller = cli_controller.CliController() 20 | 21 | if command == "sigprofilerextractor": 22 | controller.dispatch_sigProfilerExtractor(args) 23 | 24 | 25 | def print_usage(commands): 26 | """Prints the usage message.""" 27 | print("Usage: SigProfilerExtractor []\n") 28 | print("Commands:") 29 | for cmd, desc in commands.items(): 30 | print(f" {cmd}: {desc}") 31 | 32 | 33 | if __name__ == "__main__": 34 | main_function() 35 | -------------------------------------------------------------------------------- /SigProfilerExtractor/data/ReferenceFiles/CNV_features.tsv: -------------------------------------------------------------------------------- 1 | 0:homdel:0-100kb 2 | 0:homdel:100kb-1Mb 3 | 0:homdel:>1Mb 4 | 1:LOH:0-100kb 5 | 1:LOH:100kb-1Mb 6 | 1:LOH:1Mb-10Mb 7 | 1:LOH:10Mb-40Mb 8 | 1:LOH:>40Mb 9 | 2:LOH:0-100kb 10 | 2:LOH:100kb-1Mb 11 | 2:LOH:1Mb-10Mb 12 | 2:LOH:10Mb-40Mb 13 | 2:LOH:>40Mb 14 | 3-4:LOH:0-100kb 15 | 3-4:LOH:100kb-1Mb 16 | 3-4:LOH:1Mb-10Mb 17 | 3-4:LOH:10Mb-40Mb 18 | 3-4:LOH:>40Mb 19 | 5-8:LOH:0-100kb 20 | 5-8:LOH:100kb-1Mb 21 | 5-8:LOH:1Mb-10Mb 22 | 5-8:LOH:10Mb-40Mb 23 | 5-8:LOH:>40Mb 24 | 9+:LOH:0-100kb 25 | 9+:LOH:100kb-1Mb 26 | 9+:LOH:1Mb-10Mb 27 | 9+:LOH:10Mb-40Mb 28 | 9+:LOH:>40Mb 29 | 2:het:0-100kb 30 | 2:het:100kb-1Mb 31 | 2:het:1Mb-10Mb 32 | 2:het:10Mb-40Mb 33 | 2:het:>40Mb 34 | 3-4:het:0-100kb 35 | 3-4:het:100kb-1Mb 36 | 3-4:het:1Mb-10Mb 37 | 3-4:het:10Mb-40Mb 38 | 3-4:het:>40Mb 39 | 5-8:het:0-100kb 40 | 5-8:het:100kb-1Mb 41 | 5-8:het:1Mb-10Mb 42 | 5-8:het:10Mb-40Mb 43 | 5-8:het:>40Mb 44 | 9+:het:0-100kb 45 | 9+:het:100kb-1Mb 46 | 9+:het:1Mb-10Mb 47 | 9+:het:10Mb-40Mb 48 | 9+:het:>40Mb -------------------------------------------------------------------------------- /SigProfilerExtractor/data/TextInput/Samples_SV.txt: -------------------------------------------------------------------------------- 1 | Mutation Types PD8969a PD18031a PD4103a PD14472a PD9572a 2 | clustered_del_1-10Kb 0 0 5 0 0 3 | clustered_del_10-100Kb 0 1 4 0 0 4 | clustered_del_100Kb-1Mb 0 2 9 0 0 5 | clustered_del_1Mb-10Mb 0 2 16 0 1 6 | clustered_del_>10Mb 0 0 13 0 0 7 | clustered_tds_1-10Kb 0 0 1 0 0 8 | clustered_tds_10-100Kb 0 0 3 0 1 9 | clustered_tds_100Kb-1Mb 0 4 5 0 0 10 | clustered_tds_1Mb-10Mb 0 1 11 0 0 11 | clustered_tds_>10Mb 0 2 8 0 0 12 | clustered_inv_1-10Kb 0 7 7 0 0 13 | clustered_inv_10-100Kb 0 7 7 0 0 14 | clustered_inv_100Kb-1Mb 0 7 14 0 3 15 | clustered_inv_1Mb-10Mb 0 3 33 0 0 16 | clustered_inv_>10Mb 0 3 19 0 1 17 | clustered_trans 0 11 186 0 1 18 | non-clustered_del_1-10Kb 14 2 14 0 1 19 | non-clustered_del_10-100Kb 10 8 11 0 0 20 | non-clustered_del_100Kb-1Mb 4 2 23 0 0 21 | non-clustered_del_1Mb-10Mb 2 1 18 1 0 22 | non-clustered_del_>10Mb 3 0 11 1 0 23 | non-clustered_tds_1-10Kb 1 0 1 0 0 24 | non-clustered_tds_10-100Kb 2 3 1 0 0 25 | non-clustered_tds_100Kb-1Mb 4 1 7 0 1 26 | non-clustered_tds_1Mb-10Mb 2 2 11 0 0 27 | non-clustered_tds_>10Mb 4 0 10 1 0 28 | non-clustered_inv_1-10Kb 5 6 13 0 1 29 | non-clustered_inv_10-100Kb 2 3 5 0 0 30 | non-clustered_inv_100Kb-1Mb 4 1 18 0 1 31 | non-clustered_inv_1Mb-10Mb 7 2 19 0 0 32 | non-clustered_inv_>10Mb 11 1 27 1 0 33 | non-clustered_trans 51 11 83 3 6 34 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2019, S M Ashiqul Islam (Mishu) [Alexandrov Lab] 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /SigProfilerExtractor/data/TextInput/Samples_DBS.txt: -------------------------------------------------------------------------------- 1 | MutationType PD3851a PD3890a PD3904a PD3905a PD3945a PD4005a 2 | AC>CA 0 1 1 1 0 0 3 | AC>CG 0 0 0 0 0 0 4 | AC>CT 0 0 0 0 1 0 5 | AC>GA 0 0 0 1 2 0 6 | AC>GG 0 0 0 0 0 0 7 | AC>GT 0 0 1 0 0 0 8 | AC>TA 0 0 0 0 0 2 9 | AC>TG 0 0 0 0 0 0 10 | AC>TT 0 1 1 0 2 1 11 | AT>CA 0 0 0 0 1 0 12 | AT>CC 0 0 0 0 0 0 13 | AT>CG 0 0 0 0 0 0 14 | AT>GA 0 0 0 0 1 0 15 | AT>GC 0 0 0 0 0 1 16 | AT>TA 0 0 0 0 1 0 17 | CC>AA 2 2 13 10 17 3 18 | CC>AG 0 2 2 1 4 5 19 | CC>AT 0 2 3 0 5 3 20 | CC>GA 0 1 1 0 1 1 21 | CC>GG 0 0 0 0 0 1 22 | CC>GT 1 2 0 1 1 0 23 | CC>TA 0 1 0 0 1 2 24 | CC>TG 0 1 0 0 0 1 25 | CC>TT 0 2 2 1 3 1 26 | CG>AT 0 0 0 0 1 0 27 | CG>GC 0 0 0 0 0 0 28 | CG>GT 0 0 0 0 0 0 29 | CG>TA 0 0 0 0 0 0 30 | CG>TC 0 0 0 0 0 0 31 | CG>TT 0 0 0 0 0 0 32 | CT>AA 2 1 2 1 2 2 33 | CT>AC 0 0 0 0 1 0 34 | CT>AG 0 0 2 0 2 2 35 | CT>GA 0 0 0 0 1 0 36 | CT>GC 0 0 0 0 0 1 37 | CT>GG 0 1 0 0 0 0 38 | CT>TA 1 0 2 0 3 1 39 | CT>TC 0 1 0 3 0 0 40 | CT>TG 0 0 1 0 0 0 41 | GC>AA 2 1 2 4 9 2 42 | GC>AG 0 0 0 0 3 2 43 | GC>AT 0 2 1 0 0 3 44 | GC>CA 0 1 0 0 1 0 45 | GC>CG 0 0 0 0 0 0 46 | GC>TA 0 0 0 0 0 0 47 | TA>AT 0 1 1 0 1 1 48 | TA>CG 0 0 0 0 0 0 49 | TA>CT 0 3 1 0 1 0 50 | TA>GC 0 0 0 0 0 0 51 | TA>GG 0 0 0 0 0 0 52 | TA>GT 0 0 1 0 0 0 53 | TC>AA 0 2 5 4 19 4 54 | TC>AG 0 0 0 2 0 2 55 | TC>AT 0 1 4 1 7 1 56 | TC>CA 0 1 6 1 5 0 57 | TC>CG 0 0 0 0 0 0 58 | TC>CT 1 0 1 0 1 0 59 | TC>GA 1 0 1 0 5 1 60 | TC>GG 0 0 0 0 0 0 61 | TC>GT 1 1 1 0 1 0 62 | TG>AA 0 0 0 0 1 0 63 | TG>AC 0 0 0 0 0 0 64 | TG>AT 0 2 1 0 4 2 65 | TG>CA 0 0 2 2 2 3 66 | TG>CC 0 0 1 0 0 0 67 | TG>CT 0 0 5 1 3 5 68 | TG>GA 0 1 1 0 0 0 69 | TG>GC 0 1 0 0 0 0 70 | TG>GT 0 0 1 0 1 1 71 | TT>AA 0 1 0 0 3 0 72 | TT>AC 0 0 0 0 0 0 73 | TT>AG 0 2 0 0 0 0 74 | TT>CA 0 0 0 0 0 0 75 | TT>CC 0 0 0 0 0 0 76 | TT>CG 0 0 0 0 0 0 77 | TT>GA 0 0 0 0 0 0 78 | TT>GC 0 0 0 0 0 0 79 | TT>GG 0 0 0 0 0 0 80 | -------------------------------------------------------------------------------- /SigProfilerExtractor/data/ReferenceFiles/CN_classes_dictionary.txt: -------------------------------------------------------------------------------- 1 | del:homdel:(-0.01,0.1] 0:homdel:0-100kb 2 | del:homdel:(0.1,1] 0:homdel:100kb-1Mb 3 | del:homdel:(1,Inf] 0:homdel:>1Mb 4 | del:LOH:(-0.01,0.1] 1:LOH:0-100kb 5 | del:LOH:(0.1,1] 1:LOH:100kb-1Mb 6 | del:LOH:(1,10] 1:LOH:1Mb-10Mb 7 | del:LOH:(10,40] 1:LOH:10Mb-40Mb 8 | del:LOH:(40,Inf] 1:LOH:>40Mb 9 | neut:LOH:(-0.01,0.1] 2:LOH:0-100kb 10 | neut:LOH:(0.1,1] 2:LOH:100kb-1Mb 11 | neut:LOH:(1,10] 2:LOH:1Mb-10Mb 12 | neut:LOH:(10,40] 2:LOH:10Mb-40Mb 13 | neut:LOH:(40,Inf] 2:LOH:>40Mb 14 | dup:LOH:(-0.01,0.1] 3-4:LOH:0-100kb 15 | dup:LOH:(0.1,1] 3-4:LOH:100kb-1Mb 16 | dup:LOH:(1,10] 3-4:LOH:1Mb-10Mb 17 | dup:LOH:(10,40] 3-4:LOH:10Mb-40Mb 18 | dup:LOH:(40,Inf] 3-4:LOH:>40Mb 19 | quad:LOH:(-0.01,0.1] 5-8:LOH:0-100kb 20 | quad:LOH:(0.1,1] 5-8:LOH:100kb-1Mb 21 | quad:LOH:(1,10] 5-8:LOH:1Mb-10Mb 22 | quad:LOH:(10,40] 5-8:LOH:10Mb-40Mb 23 | quad:LOH:(40,Inf] 5-8:LOH:>40Mb 24 | amp:LOH:(-0.01,0.1] 9+:LOH:0-100kb 25 | amp:LOH:(0.1,1] 9+:LOH:100kb-1Mb 26 | amp:LOH:(1,10] 9+:LOH:1Mb-10Mb 27 | amp:LOH:(10,40] 9+:LOH:10Mb-40Mb 28 | amp:LOH:(40,Inf] 9+:LOH:>40Mb 29 | neut:het:(-0.01,0.1] 2:het:0-100kb 30 | neut:het:(0.1,1] 2:het:100kb-1Mb 31 | neut:het:(1,10] 2:het:1Mb-10Mb 32 | neut:het:(10,40] 2:het:10Mb-40Mb 33 | neut:het:(40,Inf] 2:het:>40Mb 34 | dup:het:(-0.01,0.1] 3-4:het:0-100kb 35 | dup:het:(0.1,1] 3-4:het:100kb-1Mb 36 | dup:het:(1,10] 3-4:het:1Mb-10Mb 37 | dup:het:(10,40] 3-4:het:10Mb-40Mb 38 | dup:het:(40,Inf] 3-4:het:>40Mb 39 | quad:het:(-0.01,0.1] 5-8:het:0-100kb 40 | quad:het:(0.1,1] 5-8:het:100kb-1Mb 41 | quad:het:(1,10] 5-8:het:1Mb-10Mb 42 | quad:het:(10,40] 5-8:het:10Mb-40Mb 43 | quad:het:(40,Inf] 5-8:het:>40Mb 44 | amp:het:(-0.01,0.1] 9+:het:0-100kb 45 | amp:het:(0.1,1] 9+:het:100kb-1Mb 46 | amp:het:(1,10] 9+:het:1Mb-10Mb 47 | amp:het:(10,40] 9+:het:10Mb-40Mb 48 | amp:het:(40,Inf] 9+:het:>40Mb 49 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: focal 2 | language: python 3 | 4 | branches: 5 | only: 6 | - master 7 | 8 | python: 9 | - '3.9' 10 | - '3.12' 11 | 12 | services: 13 | - docker 14 | 15 | before_install: 16 | - pip install --upgrade setuptools packaging 17 | - if ! [ -f ./src/GRCh37.tar.gz ]; then wget --connect-timeout=10 --tries=20 ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ./src/; fi 18 | 19 | install: 20 | - pip install . 21 | 22 | cache: 23 | directories: 24 | - $TRAVIS_BUILD_DIR/src/ 25 | 26 | before_script: 27 | - python3 install_genome.py $TRAVIS_BUILD_DIR/src/ 28 | 29 | script: python3 test.py 30 | 31 | after_success: 32 | - | 33 | if [ "$TRAVIS_BRANCH" == "master" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ] && [ "$TRAVIS_PYTHON_VERSION" == "3.12" ]; then 34 | echo "Starting Docker deployment to GHCR for alexandrovlab..." 35 | 36 | VERSION_TAG=$(grep "VERSION = " setup.py | cut -d'"' -f2) 37 | 38 | # Get the repository name and convert it to lowercase 39 | REPO_NAME=$(basename $TRAVIS_REPO_SLUG | tr '[:upper:]' '[:lower:]') 40 | IMAGE_NAME="ghcr.io/alexandrovlab/$REPO_NAME" 41 | 42 | echo "Building version: $VERSION_TAG for image: $IMAGE_NAME" 43 | 44 | echo "$GHCR_PASSWORD" | docker login ghcr.io -u "$GHCR_USERNAME" --password-stdin 45 | 46 | docker build \ 47 | --build-arg COMMIT_SHA=$TRAVIS_COMMIT \ 48 | -t $IMAGE_NAME:$VERSION_TAG \ 49 | -t $IMAGE_NAME:latest . 50 | 51 | echo "Docker build complete — starting push…" 52 | 53 | # Prevent timeout: keep Travis log active during push 54 | bash -c "while true; do echo '[Heartbeat] pushing image…'; sleep 60; done" & 55 | KEEPALIVE_PID=$! 56 | 57 | docker push $IMAGE_NAME:$VERSION_TAG 58 | docker push $IMAGE_NAME:latest 59 | 60 | kill $KEEPALIVE_PID 61 | 62 | echo "Docker deployment to GHCR successful" 63 | else 64 | echo "Skipping Docker deployment" 65 | fi -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | # Changelog 3 | 4 | All notable changes to this project will be documented in this file. 5 | 6 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ## [1.2.5] - 2025-10-28 11 | 12 | ### Added 13 | - Implemented a CI/CD pipeline with Travis CI to automate the building and publishing of Docker images to Docker Hub. 14 | - Added a Dockerfile to the repository for containerization. Documentation on how to use the Dockerfile needs to be added to the README. 15 | 16 | ## [1.2.4] - 2025-10-20 17 | 18 | ### Added 19 | - Added the `assignment_cpu` parameter to independently control the number of CPU cores used for the signature assignment step. This change enables full support for the parallel processing enhancements in **SigProfilerAssignment v1.0.0**, allowing for significant performance improvements and more granular resource control. 20 | 21 | ## [1.2.3] - 2025-09-19 22 | 23 | ### Added 24 | - Added support for rn7 and mm39 genomes in SigProfilerExtractor. 25 | 26 | ## [1.2.2] - 2025-08-11 27 | 28 | ### Added 29 | - Added mutation count and stability to 4608 plots in All_Solutions 30 | - Add a stop parameter to the CLI to stop after de novo extraction. 31 | 32 | ## [1.2.1] - 2025-05-14 33 | 34 | ### Fixed 35 | - Fixed an issue where the CLI was returning a non-zero exit code when the `--help` flag was passed. 36 | 37 | ### Added 38 | - Added `pyproject.toml` for modern Python packaging support. 39 | 40 | ## [1.2.0] - 2025-02-11 41 | 42 | ### Changed 43 | - Updated dependencies: Now requires **Pandas >= 2.0.0**, **NumPy >= 2.0.0**, and **Python >= 3.9**. 44 | - Dropped support for **Python 3.8** 45 | - **Intel-based MacBooks are no longer supported** due to upstream changes in **PyTorch**, which has dropped support for macOS x86_64. Users with Intel-based MacBooks will need to migrate to Apple Silicon (M1/M2) or use a Linux-based development environment. 46 | 47 | ## [1.1.25] - 2024-12-09 48 | 49 | ### Added 50 | - Introduced a Command-Line Interface (CLI) for SigProfilerExtractor, enabling users to interact with the tool via terminal commands. 51 | 52 | ### Updated 53 | - Improved the formatting of the parameter table for sigProfilerExtractor function for better readability and consistency. 54 | - The CI/CD badge link has been fixed. 55 | -------------------------------------------------------------------------------- /SigProfilerExtractor/data/TextInput/Samples_ID.txt: -------------------------------------------------------------------------------- 1 | Mutation Types Sample1 Sample2 Sample3 Sample4 Sample5 2 | 1:Del:C:0 61 53 89 79 73 3 | 1:Del:C:1 26 64 20 67 87 4 | 1:Del:C:2 88 27 96 77 56 5 | 1:Del:C:3 66 52 4 47 95 6 | 1:Del:C:4 34 32 9 1 6 7 | 1:Del:C:5 29 33 37 36 53 8 | 1:Del:T:0 31 41 14 76 23 9 | 1:Del:T:1 61 71 79 96 23 10 | 1:Del:T:2 63 64 56 54 55 11 | 1:Del:T:3 23 32 40 47 57 12 | 1:Del:T:4 26 97 1 83 32 13 | 1:Del:T:5 35 23 33 70 59 14 | 1:Ins:C:0 7 39 12 78 50 15 | 1:Ins:C:1 20 46 78 42 31 16 | 1:Ins:C:2 79 58 59 18 19 17 | 1:Ins:C:3 42 28 44 3 32 18 | 1:Ins:C:4 82 19 27 92 35 19 | 1:Ins:C:5 34 87 27 80 54 20 | 1:Ins:T:0 42 55 3 5 97 21 | 1:Ins:T:1 41 26 32 90 25 22 | 1:Ins:T:2 67 50 14 85 41 23 | 1:Ins:T:3 30 55 86 97 82 24 | 1:Ins:T:4 6 89 85 67 5 25 | 1:Ins:T:5 63 54 50 0 21 26 | 2:Del:R:0 45 73 39 67 60 27 | 2:Del:R:1 94 11 40 1 81 28 | 2:Del:R:2 13 3 77 23 46 29 | 2:Del:R:3 48 8 32 86 52 30 | 2:Del:R:4 68 45 33 31 0 31 | 2:Del:R:5 69 66 22 20 3 32 | 3:Del:R:0 25 42 38 61 78 33 | 3:Del:R:1 19 1 97 52 54 34 | 3:Del:R:2 69 19 33 65 0 35 | 3:Del:R:3 22 63 31 70 57 36 | 3:Del:R:4 18 90 45 98 77 37 | 3:Del:R:5 82 14 39 31 2 38 | 4:Del:R:0 13 27 42 17 43 39 | 4:Del:R:1 20 15 59 77 18 40 | 4:Del:R:2 59 59 40 81 0 41 | 4:Del:R:3 0 20 92 63 37 42 | 4:Del:R:4 99 46 67 84 36 43 | 4:Del:R:5 46 85 63 45 45 44 | 5:Del:R:0 15 71 37 0 74 45 | 5:Del:R:1 47 96 51 28 23 46 | 5:Del:R:2 5 88 90 74 15 47 | 5:Del:R:3 87 12 96 16 91 48 | 5:Del:R:4 15 20 87 68 18 49 | 5:Del:R:5 78 75 94 16 40 50 | 2:Ins:R:0 52 70 12 93 95 51 | 2:Ins:R:1 88 33 3 27 46 52 | 2:Ins:R:2 22 34 85 43 33 53 | 2:Ins:R:3 9 21 7 99 19 54 | 2:Ins:R:4 88 25 76 57 5 55 | 2:Ins:R:5 33 47 55 63 84 56 | 3:Ins:R:0 38 35 73 0 80 57 | 3:Ins:R:1 12 57 70 69 91 58 | 3:Ins:R:2 59 57 55 29 63 59 | 3:Ins:R:3 22 55 47 70 89 60 | 3:Ins:R:4 89 35 54 98 5 61 | 3:Ins:R:5 26 47 40 49 68 62 | 4:Ins:R:0 69 6 22 29 53 63 | 4:Ins:R:1 84 43 63 41 82 64 | 4:Ins:R:2 79 68 97 21 59 65 | 4:Ins:R:3 91 69 23 76 31 66 | 4:Ins:R:4 4 79 23 45 62 67 | 4:Ins:R:5 99 95 66 59 28 68 | 5:Ins:R:0 23 40 41 62 0 69 | 5:Ins:R:1 94 98 90 33 7 70 | 5:Ins:R:2 99 37 31 88 42 71 | 5:Ins:R:3 86 46 36 6 77 72 | 5:Ins:R:4 22 44 53 26 18 73 | 5:Ins:R:5 80 82 61 18 34 74 | 2:Del:M:1 90 95 97 31 94 75 | 3:Del:M:1 2 91 42 84 93 76 | 3:Del:M:2 40 58 15 37 23 77 | 4:Del:M:1 96 12 82 42 53 78 | 4:Del:M:2 31 26 86 1 90 79 | 4:Del:M:3 25 1 50 43 16 80 | 5:Del:M:1 77 98 69 50 26 81 | 5:Del:M:2 56 47 43 8 88 82 | 5:Del:M:3 41 6 50 13 40 83 | 5:Del:M:4 10 35 44 53 3 84 | 5:Del:M:5 30 3 55 7 14 85 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | SigProfilerExtractor/version.py 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | 133 | # test script output 134 | test_csv_output/ 135 | test_matobj_output/ 136 | test_segCNV_output/ 137 | test_text_output/ 138 | test_vcf_output/ 139 | test_matrix_*_output/ 140 | SigProfilerExtractor/data/VCFInput/logs/ 141 | SigProfilerExtractor/data/VCFInput/input/ 142 | SigProfilerExtractor/data/VCFInput/output/ -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import shutil 3 | import os 4 | import sys 5 | import subprocess 6 | 7 | # remove the dist folder first if exists 8 | if os.path.exists("dist"): 9 | shutil.rmtree("dist") 10 | 11 | VERSION = "1.2.5" 12 | 13 | 14 | with open("README.md") as f: 15 | long_description = f.read() 16 | 17 | 18 | def write_version_py(filename="SigProfilerExtractor/version.py"): 19 | # Copied from numpy setup.py 20 | cnt = """ 21 | # THIS FILE IS GENERATED FROM SIGPROFILEREXTRACTOR SETUP.PY 22 | short_version = '%(version)s' 23 | version = '%(version)s' 24 | Update = 'v1.2.5: Add automated Docker build and publish pipeline' 25 | 26 | """ 27 | fh = open(filename, "w") 28 | fh.write( 29 | cnt 30 | % { 31 | "version": VERSION, 32 | } 33 | ) 34 | fh.close() 35 | 36 | 37 | requirements = [ 38 | "scipy>=1.6.3", 39 | "torch>=1.8.1", 40 | "numpy>=2.0.0", 41 | "pandas>=2.0.0", 42 | "nimfa>=1.1.0", 43 | "sigProfilerPlotting>=1.4.1", 44 | "SigProfilerMatrixGenerator>=1.3.5", 45 | "SigProfilerAssignment>=1.0.1", 46 | "statsmodels>=0.9.0", 47 | "scikit-learn>=0.24.2", 48 | "psutil>=5.6.1", 49 | ] 50 | 51 | operating_system = sys.platform 52 | print(operating_system) 53 | if operating_system in ["win32", "cygwin", "windows"]: 54 | requirements.remove("matplotlib>=3.3.0") 55 | requirements.remove("torch==1.5.1") 56 | print("Trying to install pytorch!") 57 | code = 1 58 | try: 59 | code = subprocess.call( 60 | [ 61 | "pip", 62 | "install", 63 | "torch===1.5.1+cpu", 64 | "-f", 65 | "https://download.pytorch.org/whl/torch_stable.html", 66 | ] 67 | ) 68 | if code != 0: 69 | raise Exception("Torch instalation failed !") 70 | except: 71 | try: 72 | code = subprocess.call( 73 | [ 74 | "pip3", 75 | "install", 76 | "torch===1.5.1+cpu", 77 | "-f", 78 | "https://download.pytorch.org/whl/torch_stable.html", 79 | ] 80 | ) 81 | if code != 0: 82 | raise Exception("Torch instalation failed !") 83 | except: 84 | print( 85 | "Failed to install pytorch, please install pytorch manually be following the simple instructions over at: https://pytorch.org/get-started/locally/" 86 | ) 87 | if code == 0: 88 | print( 89 | "Successfully installed pytorch version! (If you need the GPU version, please install it manually, checkout the mindsdb docs and the pytroch docs if you need help)" 90 | ) 91 | 92 | 93 | write_version_py() 94 | setup( 95 | name="SigProfilerExtractor", 96 | version=VERSION, 97 | description="Extracts mutational signatures from mutational catalogues", 98 | long_description=long_description, 99 | long_description_content_type="text/markdown", # This is important! 100 | url="https://github.com/AlexandrovLab/SigProfilerExtractor.git", 101 | author="S Mishu Ashiqul Islam", 102 | author_email="m0islam@ucsd.edu", 103 | license="UCSD", 104 | packages=["SigProfilerExtractor"], 105 | install_requires=requirements, 106 | include_package_data=True, 107 | python_requires=">=3.9", 108 | entry_points={ 109 | "console_scripts": [ 110 | "SigProfilerExtractor=SigProfilerExtractor.sigprofilerextractor_cli:main_function", 111 | ], 112 | }, 113 | zip_safe=False, 114 | ) 115 | -------------------------------------------------------------------------------- /SigProfilerExtractor/data/CSVInput/csv_example.csv: -------------------------------------------------------------------------------- 1 | Mutation type,Trinucleotide,Sample1,Sample2,Sample3,Sample4,Sample5,Sample6,Sample7,Sample8,Sample9 2 | C>A,ACA,583,55,210,80,280,280,141,78,290 3 | C>A,ACC,282,25,94,94,76,134,192,57,131 4 | C>A,ACG,141,11,27,8,27,30,11,11,26 5 | C>A,ACT,355,30,143,72,107,158,84,55,185 6 | C>A,CCA,696,56,151,94,226,249,162,80,218 7 | C>A,CCC,373,25,100,58,78,126,102,56,125 8 | C>A,CCG,142,8,32,14,40,42,28,13,35 9 | C>A,CCT,528,41,106,97,156,161,190,80,211 10 | C>A,GCA,494,70,133,60,413,274,137,112,251 11 | C>A,GCC,240,21,92,72,94,110,153,37,110 12 | C>A,GCG,112,9,23,13,29,36,19,10,36 13 | C>A,GCT,289,45,81,44,211,184,78,72,150 14 | C>A,TCA,535,72,249,88,207,376,187,273,303 15 | C>A,TCC,448,55,178,68,130,211,138,146,238 16 | C>A,TCG,97,4,28,13,35,47,25,31,49 17 | C>A,TCT,693,106,299,132,342,446,200,287,408 18 | C>G,ACA,214,13,72,19,45,127,72,42,130 19 | C>G,ACC,104,18,58,28,39,83,42,26,83 20 | C>G,ACG,48,5,19,9,6,36,10,9,29 21 | C>G,ACT,196,21,128,26,53,134,75,39,259 22 | C>G,CCA,148,10,47,20,22,110,42,33,91 23 | C>G,CCC,131,9,41,14,14,70,31,13,87 24 | C>G,CCG,51,10,17,8,6,45,9,9,35 25 | C>G,CCT,181,12,95,38,50,144,66,33,192 26 | C>G,GCA,121,6,31,14,22,79,31,17,74 27 | C>G,GCC,89,10,37,37,21,47,53,20,50 28 | C>G,GCG,44,1,6,2,7,18,4,3,8 29 | C>G,GCT,107,14,58,21,32,90,54,25,113 30 | C>G,TCA,470,53,612,98,183,1284,322,646,731 31 | C>G,TCC,249,22,145,38,73,335,85,132,247 32 | C>G,TCG,36,4,28,3,11,84,22,22,41 33 | C>G,TCT,647,66,766,129,251,1599,421,800,1080 34 | C>T,ACA,560,53,173,83,135,225,151,131,234 35 | C>T,ACC,301,37,101,55,95,129,90,59,159 36 | C>T,ACG,440,157,194,259,291,299,265,530,420 37 | C>T,ACT,457,50,139,75,121,197,101,79,237 38 | C>T,CCA,508,62,183,120,144,269,148,144,264 39 | C>T,CCC,374,41,130,216,118,154,293,86,228 40 | C>T,CCG,320,109,160,185,203,245,222,404,260 41 | C>T,CCT,518,66,182,191,155,212,276,118,285 42 | C>T,GCA,437,50,138,76,122,207,134,129,165 43 | C>T,GCC,282,49,113,80,122,152,139,127,171 44 | C>T,GCG,466,159,149,213,234,290,277,416,379 45 | C>T,GCT,265,57,123,66,110,145,111,99,160 46 | C>T,TCA,793,178,1530,243,390,1398,517,1832,732 47 | C>T,TCC,537,106,441,165,191,441,254,457,460 48 | C>T,TCG,269,96,285,132,170,304,222,398,286 49 | C>T,TCT,738,127,930,245,339,810,370,1088,613 50 | T>A,ATA,395,14,98,42,108,73,68,27,164 51 | T>A,ATC,237,19,48,31,81,60,66,34,83 52 | T>A,ATG,250,11,64,33,68,60,36,22,109 53 | T>A,ATT,484,31,119,84,148,130,98,93,198 54 | T>A,CTA,253,7,59,59,40,64,49,23,93 55 | T>A,CTC,246,13,51,62,52,71,63,32,120 56 | T>A,CTG,343,18,61,52,47,69,78,18,103 57 | T>A,CTT,330,20,88,81,66,104,128,38,129 58 | T>A,GTA,175,7,35,12,36,34,31,14,52 59 | T>A,GTC,101,5,28,18,29,46,43,14,37 60 | T>A,GTG,228,7,35,15,26,46,28,22,52 61 | T>A,GTT,139,6,41,17,33,62,25,40,63 62 | T>A,TTA,302,13,86,51,64,95,90,57,165 63 | T>A,TTC,159,14,54,29,49,76,57,15,95 64 | T>A,TTG,151,3,31,24,27,64,28,20,87 65 | T>A,TTT,325,15,103,66,113,145,96,65,175 66 | T>C,ATA,860,43,439,83,141,176,109,81,549 67 | T>C,ATC,178,17,55,27,52,69,44,24,120 68 | T>C,ATG,423,25,122,39,78,113,63,261,190 69 | T>C,ATT,532,24,246,58,119,204,109,58,344 70 | T>C,CTA,371,13,78,34,54,96,48,58,140 71 | T>C,CTC,177,23,67,29,51,94,66,113,139 72 | T>C,CTG,255,14,47,40,40,96,61,141,117 73 | T>C,CTT,250,18,103,58,64,116,93,27,183 74 | T>C,GTA,383,25,67,39,95,97,74,52,182 75 | T>C,GTC,140,23,50,58,40,73,61,37,103 76 | T>C,GTG,236,18,52,28,36,65,47,712,112 77 | T>C,GTT,304,25,77,42,90,123,70,51,159 78 | T>C,TTA,418,23,100,33,79,135,83,59,206 79 | T>C,TTC,163,15,51,37,73,86,68,55,158 80 | T>C,TTG,186,11,41,24,42,76,56,1126,97 81 | T>C,TTT,386,28,91,60,114,168,83,54,202 82 | T>G,ATA,99,11,67,14,23,51,30,22,65 83 | T>G,ATC,41,9,20,8,15,34,28,9,32 84 | T>G,ATG,91,10,27,16,30,74,35,12,80 85 | T>G,ATT,120,18,71,17,38,93,59,25,96 86 | T>G,CTA,59,4,23,9,14,44,35,14,45 87 | T>G,CTC,53,8,30,12,10,32,17,17,39 88 | T>G,CTG,131,9,31,20,19,60,26,23,68 89 | T>G,CTT,147,14,103,21,40,92,65,34,89 90 | T>G,GTA,41,2,18,11,11,31,10,9,30 91 | T>G,GTC,21,2,11,12,13,24,26,13,18 92 | T>G,GTG,79,13,26,13,17,55,26,91,47 93 | T>G,GTT,78,13,37,16,24,66,41,17,66 94 | T>G,TTA,108,10,75,15,33,78,36,14,97 95 | T>G,TTC,62,6,27,21,30,46,26,10,62 96 | T>G,TTG,133,6,40,18,47,98,28,19,99 97 | T>G,TTT,231,34,142,53,69,203,113,57,168 -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jul 10 11:45:38 2019 5 | 6 | @author: mishugeb 7 | """ 8 | from SigProfilerExtractor import sigpro as sig 9 | 10 | def run_matrix_96(): 11 | data = sig.importdata("matrix") 12 | sig.sigProfilerExtractor( 13 | "matrix", 14 | "test_matrix_96_output", 15 | data, 16 | minimum_signatures=3, 17 | maximum_signatures=3, 18 | nmf_replicates=5, 19 | min_nmf_iterations=100, 20 | max_nmf_iterations=1000, 21 | nmf_test_conv=100, 22 | ) 23 | 24 | 25 | def run_matrix_78(): 26 | data = sig.importdata("matrix_DBS") 27 | sig.sigProfilerExtractor( 28 | "matrix", 29 | "test_matrix_78_output", 30 | data, 31 | minimum_signatures=3, 32 | maximum_signatures=3, 33 | nmf_replicates=5, 34 | min_nmf_iterations=100, 35 | max_nmf_iterations=1000, 36 | nmf_test_conv=100, 37 | ) 38 | 39 | 40 | def run_matrix_83(): 41 | data = sig.importdata("matrix_ID") 42 | sig.sigProfilerExtractor( 43 | "matrix", 44 | "test_matrix_83_output", 45 | data, 46 | exome=True, 47 | reference_genome="GRCh38", 48 | minimum_signatures=3, 49 | maximum_signatures=3, 50 | nmf_replicates=5, 51 | min_nmf_iterations=100, 52 | max_nmf_iterations=1000, 53 | nmf_test_conv=100, 54 | ) 55 | 56 | 57 | def run_vcf(): 58 | vcf_data = sig.importdata("vcf") 59 | sig.sigProfilerExtractor( 60 | "vcf", 61 | "test_vcf_output", 62 | vcf_data, 63 | minimum_signatures=3, 64 | maximum_signatures=3, 65 | nmf_replicates=5, 66 | min_nmf_iterations=100, 67 | max_nmf_iterations=1000, 68 | nmf_test_conv=100, 69 | ) 70 | 71 | 72 | def run_matrix_48(): 73 | data = sig.importdata("matrix_CNV") 74 | sig.sigProfilerExtractor( 75 | "matrix", 76 | "test_matrix_48_output", 77 | data, 78 | minimum_signatures=3, 79 | maximum_signatures=3, 80 | nmf_replicates=5, 81 | min_nmf_iterations=100, 82 | max_nmf_iterations=1000, 83 | nmf_test_conv=100, 84 | ) 85 | 86 | 87 | def run_seg_48(): 88 | data = sig.importdata("seg:BATTENBERG") 89 | sig.sigProfilerExtractor( 90 | "seg:BATTENBERG", 91 | "test_segCNV_output", 92 | data, 93 | minimum_signatures=3, 94 | maximum_signatures=3, 95 | nmf_replicates=5, 96 | min_nmf_iterations=100, 97 | max_nmf_iterations=1000, 98 | nmf_test_conv=100, 99 | ) 100 | 101 | 102 | def run_matrix_32(): 103 | data = sig.importdata("matrix_SV") 104 | sig.sigProfilerExtractor( 105 | "matrix", 106 | "test_matrix_32_output", 107 | data, 108 | minimum_signatures=3, 109 | maximum_signatures=3, 110 | nmf_replicates=5, 111 | min_nmf_iterations=100, 112 | max_nmf_iterations=1000, 113 | nmf_test_conv=100, 114 | ) 115 | 116 | 117 | def run_matobj(): 118 | data = sig.importdata("matobj") 119 | sig.sigProfilerExtractor( 120 | "matobj", 121 | "test_matobj_output", 122 | data, 123 | minimum_signatures=3, 124 | maximum_signatures=3, 125 | nmf_replicates=5, 126 | min_nmf_iterations=100, 127 | max_nmf_iterations=1000, 128 | nmf_test_conv=100, 129 | ) 130 | 131 | 132 | def run_csv(): 133 | data = sig.importdata("csv") 134 | sig.sigProfilerExtractor( 135 | "csv", 136 | "test_csv_output", 137 | data, 138 | minimum_signatures=3, 139 | maximum_signatures=3, 140 | nmf_replicates=5, 141 | min_nmf_iterations=100, 142 | max_nmf_iterations=1000, 143 | nmf_test_conv=100, 144 | ) 145 | 146 | 147 | if __name__ == "__main__": 148 | run_matrix_96() 149 | run_matrix_78() 150 | run_matrix_83() 151 | run_matrix_48() 152 | run_matrix_32() 153 | run_seg_48() 154 | run_vcf() 155 | # run_matobj() 156 | # run_csv() 157 | -------------------------------------------------------------------------------- /SigProfilerExtractor/estimate_best_solution.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jul 16 16:01:59 2020 5 | 6 | @author: mishugeb 7 | """ 8 | 9 | 10 | import pandas as pd 11 | import numpy as np 12 | import os 13 | from SigProfilerExtractor import subroutines as sub 14 | 15 | 16 | def estimate_solution( 17 | base_csvfile="All_solutions_stat.csv", 18 | All_solution="All_Solutions", 19 | genomes="Samples.txt", 20 | output="results", 21 | title="Selection_Plot", 22 | stability=0.8, 23 | min_stability=0.2, 24 | combined_stability=1.0, 25 | statistics=True, 26 | select=None, 27 | exome=False, 28 | allow_stability_drop=False, 29 | ): 30 | base_csvfile = pd.read_csv(base_csvfile, sep=",", index_col=0) 31 | signatures = list(base_csvfile.index) 32 | genomes = pd.read_csv(genomes, sep="\t", index_col=0) 33 | colnames = genomes.columns 34 | genomes = np.array(genomes) 35 | all_similarities_list = [] 36 | layer_directory = output 37 | 38 | if genomes.shape[0] == 78: 39 | mtype = "DBS78" 40 | elif genomes.shape[0] == 83: 41 | mtype = "ID83" 42 | elif genomes.shape[0] == 48: 43 | mtype = "CNV48" 44 | else: 45 | mtype = "SBS" + str(genomes.shape[0]) 46 | 47 | # set the squence type ("genome" or "exome") for selection criteria 48 | if exome == False: 49 | sequence = "genome" 50 | if exome == True: 51 | sequence = "exome" 52 | 53 | # prepare the csvfile 54 | csvfile = np.zeros([len(signatures), 4]) 55 | csvfile = csvfile 56 | for i in range(len(signatures)): 57 | if base_csvfile.shape[1] != 3: 58 | signatures[i] = signatures[i].rstrip("*") 59 | fnorm = base_csvfile.iloc[i, 4].rstrip("%") 60 | csvfile[i, [1, 3]] = base_csvfile.iloc[i, [1, 0]] 61 | elif base_csvfile.shape[1] == 3: 62 | signatures[i] = str(signatures[i]) 63 | fnorm = base_csvfile.iloc[i, 1].astype(float) 64 | fnorm = fnorm * 100 65 | csvfile[i, [1, 3]] = base_csvfile.iloc[i, [0, 2]] 66 | csvfile[i, 0] = signatures[i] 67 | csvfile[i, 2] = fnorm 68 | w = pd.read_csv( 69 | All_solution 70 | + "/" 71 | + mtype 72 | + "_" 73 | + signatures[i] 74 | + "_Signatures/Signatures/" 75 | + mtype 76 | + "_S" 77 | + signatures[i] 78 | + "_Signatures.txt", 79 | sep="\t", 80 | index_col=0, 81 | ) 82 | h = pd.read_csv( 83 | All_solution 84 | + "/" 85 | + mtype 86 | + "_" 87 | + signatures[i] 88 | + "_Signatures/Activities/" 89 | + mtype 90 | + "_S" 91 | + signatures[i] 92 | + "_NMF_Activities.txt", 93 | sep="\t", 94 | index_col=0, 95 | ) 96 | w = np.array(w) 97 | h = np.array(h).T 98 | est_genomes = np.dot(w, h) 99 | all_similarities, cosine_similarities = sub.calculate_similarities( 100 | genomes, est_genomes, colnames 101 | ) 102 | all_similarities_list.append(all_similarities) 103 | 104 | csvfile = pd.DataFrame(csvfile) 105 | csvfile.columns = [ 106 | "Total Signatures", 107 | "Stability", 108 | "Matrix Frobenius%", 109 | "avgStability", 110 | ] 111 | 112 | try: 113 | if not os.path.exists(layer_directory): 114 | os.makedirs(layer_directory) 115 | except: 116 | print("The {} folder could not be created".format("output")) 117 | 118 | solution, all_stats = sub.stabVsRError( 119 | csvfile, 120 | layer_directory, 121 | title, 122 | all_similarities_list, 123 | input_type="dataframe", 124 | stability=stability, 125 | min_stability=min_stability, 126 | combined_stability=combined_stability, 127 | mtype=mtype, 128 | statistics=statistics, 129 | select=select, 130 | sequence=sequence, 131 | allow_stability_drop=allow_stability_drop, 132 | ) 133 | 134 | all_stats.insert(1, "Stability (Avg Silhouette)", csvfile["avgStability"]) 135 | all_stats = all_stats.set_index(["Signatures"]) 136 | all_stats.to_csv(layer_directory + "/All_solutions_stat.csv", sep=",") 137 | # print("\nSelected Solution: ", solution) 138 | 139 | return solution 140 | -------------------------------------------------------------------------------- /SigProfilerExtractor/data/TextInput/Samples_SBS.txt: -------------------------------------------------------------------------------- 1 | Mutation Types PD4199a PD4005a PD3851a PD4116a PD4086a PD4194a PD4248a PD4120a PD4198a PD3904a PD3945a PD4107a PD3905a PD4192a PD4109a PD4103a PD4115a PD4085a PD3890a PD4006a PD4088a 2 | A[C>A]A 58 74 31 128 31 28 64 165 58 122 243 210 94 48 122 112 228 61 110 198 34 3 | A[C>A]C 36 66 34 138 19 24 43 55 72 112 163 176 69 42 133 50 169 51 91 173 18 4 | A[C>A]G 13 12 9 15 8 6 7 20 12 13 24 33 11 16 12 7 21 7 9 33 3 5 | A[C>A]T 37 64 21 142 23 13 35 65 45 107 155 176 65 40 96 44 158 49 87 192 26 6 | A[C>G]A 23 56 13 102 18 8 25 191 37 52 130 126 66 60 95 30 128 18 100 164 15 7 | A[C>G]C 19 34 15 71 14 5 18 72 27 42 78 85 41 26 72 28 74 18 46 96 11 8 | A[C>G]G 2 16 8 22 0 1 4 18 8 18 19 61 15 28 25 9 21 7 18 56 2 9 | A[C>G]T 26 79 11 103 22 8 31 170 32 63 116 118 71 43 104 33 130 25 101 156 16 10 | A[C>T]A 38 61 41 127 28 24 56 345 75 127 168 191 65 51 162 99 179 62 99 170 46 11 | A[C>T]C 30 49 17 63 18 19 21 133 29 54 80 68 34 39 87 52 89 28 62 85 15 12 | A[C>T]G 121 108 75 79 57 81 75 213 204 94 150 157 84 120 328 242 194 128 95 152 125 13 | A[C>T]T 40 78 20 94 23 11 22 162 46 96 171 176 53 36 113 60 149 28 79 170 29 14 | A[T>A]A 12 25 14 95 17 11 24 112 64 40 131 65 39 34 68 29 79 22 53 58 12 15 | A[T>A]C 29 36 19 62 19 14 21 66 36 31 92 66 30 34 61 32 74 30 40 63 10 16 | A[T>A]G 22 32 21 60 14 9 19 57 36 49 150 91 40 37 65 29 99 29 36 100 10 17 | A[T>A]T 27 34 26 97 21 14 51 53 86 63 182 121 58 42 186 53 141 38 51 121 14 18 | A[T>C]A 28 72 27 135 37 27 60 89 94 100 201 169 54 51 179 72 173 56 89 126 20 19 | A[T>C]C 22 34 19 67 18 10 21 40 25 37 63 95 31 24 73 27 80 22 44 99 10 20 | A[T>C]G 37 61 20 96 40 15 34 84 52 52 127 115 53 40 118 49 120 34 72 107 18 21 | A[T>C]T 32 89 45 113 50 19 39 87 60 73 205 180 79 59 157 69 203 64 104 156 27 22 | A[T>G]A 6 16 11 33 13 3 9 30 50 23 55 53 25 28 47 22 51 13 37 52 4 23 | A[T>G]C 7 7 5 23 6 1 6 23 23 10 27 23 9 24 19 7 18 9 20 21 1 24 | A[T>G]G 3 38 5 46 11 2 10 31 14 28 72 71 42 29 44 10 67 7 39 87 5 25 | A[T>G]T 2 32 9 41 11 7 22 31 24 22 60 45 34 29 43 27 48 8 71 43 5 26 | C[C>A]A 37 66 24 163 30 14 42 135 62 99 199 178 86 45 136 64 184 36 108 175 38 27 | C[C>A]C 27 77 19 159 17 10 21 86 40 72 173 167 55 28 81 32 139 33 66 147 12 28 | C[C>A]G 8 5 6 28 2 4 7 25 8 7 19 29 17 15 15 12 16 3 10 32 4 29 | C[C>A]T 27 46 27 124 22 7 29 82 40 84 180 188 64 20 112 66 149 23 67 153 25 30 | C[C>G]A 20 56 11 79 9 5 10 219 21 56 105 93 42 36 72 28 100 8 61 106 8 31 | C[C>G]C 14 49 6 70 6 3 15 61 17 41 89 82 30 26 44 21 93 16 47 74 7 32 | C[C>G]G 4 14 4 22 6 5 6 39 10 19 17 39 7 25 23 11 24 2 9 43 0 33 | C[C>G]T 36 67 9 98 16 16 18 250 31 85 140 116 42 41 111 37 149 15 90 130 19 34 | C[C>T]A 81 60 41 109 19 34 42 687 67 91 122 123 58 59 127 88 132 42 71 96 21 35 | C[C>T]C 41 41 17 80 24 26 44 207 45 77 103 85 48 39 97 63 100 39 47 61 15 36 | C[C>T]G 97 65 64 69 49 46 44 324 98 57 60 105 40 69 195 148 107 67 53 88 49 37 | C[C>T]T 69 86 34 146 36 25 36 396 58 113 154 185 72 48 141 104 148 49 102 158 33 38 | C[T>A]A 8 26 5 80 10 3 25 37 25 26 87 88 42 24 41 16 88 8 40 65 3 39 | C[T>A]C 21 41 12 85 13 6 16 46 27 51 114 103 49 32 42 21 106 27 57 111 8 40 | C[T>A]G 14 45 11 86 13 4 21 30 31 40 98 115 56 28 40 24 87 15 64 90 5 41 | C[T>A]T 13 54 22 97 12 7 18 34 28 57 134 165 59 34 71 37 158 20 70 168 14 42 | C[T>C]A 16 37 10 67 12 9 18 32 28 42 96 80 34 33 84 27 72 15 42 91 10 43 | C[T>C]C 21 42 15 80 3 11 24 60 39 47 108 161 53 36 76 29 119 17 42 105 17 44 | C[T>C]G 20 39 17 64 21 8 31 94 27 40 104 70 50 39 82 26 76 16 57 76 7 45 | C[T>C]T 16 49 12 87 20 12 17 65 27 55 127 120 46 24 89 55 142 21 78 79 10 46 | C[T>G]A 2 18 3 32 6 4 12 11 15 21 22 42 16 12 28 15 39 5 26 39 3 47 | C[T>G]C 7 15 9 20 5 4 11 27 15 26 47 39 21 21 24 11 60 7 28 41 4 48 | C[T>G]G 15 47 8 53 11 5 11 32 17 39 76 77 31 30 68 19 69 13 53 62 4 49 | C[T>G]T 11 30 17 62 16 5 21 31 35 24 93 76 35 26 76 51 157 19 86 77 11 50 | G[C>A]A 40 41 36 86 18 14 27 113 42 57 112 168 53 56 110 65 132 46 52 135 44 51 | G[C>A]C 27 40 15 103 13 12 21 51 48 77 103 92 40 18 57 40 123 23 53 85 9 52 | G[C>A]G 9 5 4 10 3 8 10 22 3 5 14 15 3 12 15 9 17 6 9 11 6 53 | G[C>A]T 34 40 19 90 17 7 20 52 31 49 101 125 44 31 82 60 136 23 45 97 22 54 | G[C>G]A 16 25 6 61 11 8 9 72 24 37 61 66 26 23 40 14 48 13 50 68 8 55 | G[C>G]C 15 26 10 72 11 7 6 46 26 31 62 58 30 16 43 19 57 11 28 54 7 56 | G[C>G]G 4 9 2 10 3 4 4 19 3 8 13 19 9 9 9 1 12 1 13 24 0 57 | G[C>G]T 22 52 7 88 19 4 18 112 26 41 85 94 38 28 69 18 91 14 60 124 4 58 | G[C>T]A 60 58 24 79 18 32 40 334 58 80 116 108 50 36 94 70 134 38 68 99 23 59 | G[C>T]C 47 45 24 62 18 26 28 167 43 74 99 83 38 35 87 64 93 27 59 75 17 60 | G[C>T]G 90 56 58 60 40 51 49 253 99 55 119 124 69 99 250 150 114 81 65 108 61 61 | G[C>T]T 52 53 19 93 21 19 40 217 45 71 117 103 62 44 97 71 105 35 64 104 15 62 | G[T>A]A 7 24 9 51 10 5 9 34 15 25 58 67 20 19 27 11 57 3 20 63 4 63 | G[T>A]C 15 21 6 46 9 9 5 25 19 21 64 43 22 13 29 11 58 10 32 40 7 64 | G[T>A]G 14 26 8 47 6 5 9 43 20 29 69 57 30 19 35 10 54 8 32 55 8 65 | G[T>A]T 11 35 13 78 8 11 13 25 18 39 90 106 42 29 46 22 108 15 45 83 2 66 | G[T>C]A 23 38 13 70 17 8 28 47 43 33 72 73 46 33 78 27 91 25 41 77 17 67 | G[T>C]C 15 22 8 45 11 9 20 38 27 27 41 68 20 24 47 19 63 13 35 59 11 68 | G[T>C]G 9 29 13 50 15 16 16 61 22 17 56 68 24 32 55 17 52 22 35 52 6 69 | G[T>C]T 26 31 14 68 17 13 39 43 55 38 85 69 36 31 91 41 103 39 47 74 11 70 | G[T>G]A 6 20 5 28 3 1 11 19 14 17 32 29 26 13 22 4 38 7 40 50 4 71 | G[T>G]C 4 18 4 26 7 5 6 22 5 15 28 25 17 12 13 7 28 3 12 24 3 72 | G[T>G]G 17 37 15 54 12 4 19 86 20 33 62 88 30 32 29 9 77 14 54 59 2 73 | G[T>G]T 10 34 14 39 7 4 14 34 23 23 51 69 29 23 35 18 75 18 49 51 6 74 | T[C>A]A 181 160 38 163 36 25 61 1885 96 118 257 168 78 55 192 148 152 59 87 148 55 75 | T[C>A]C 81 111 30 123 38 24 30 763 65 95 159 130 63 54 155 93 149 39 91 145 30 76 | T[C>A]G 15 9 4 10 6 2 7 114 14 12 18 25 7 10 22 20 18 12 8 13 2 77 | T[C>A]T 112 152 33 155 32 21 53 1046 104 123 288 234 91 52 289 152 255 73 113 167 72 78 | T[C>G]A 758 437 14 277 141 67 48 10531 105 153 184 140 155 157 401 158 160 16 230 149 40 79 | T[C>G]C 206 141 17 128 52 28 18 2069 45 86 159 132 93 91 246 74 145 26 135 118 22 80 | T[C>G]G 25 21 6 24 11 2 9 384 2 9 7 19 10 21 20 9 13 2 19 32 1 81 | T[C>G]T 937 639 28 393 167 68 69 13187 134 244 382 264 238 236 671 279 288 52 376 298 44 82 | T[C>T]A 1346 470 56 254 156 89 101 18171 289 248 263 418 162 178 416 376 201 76 196 131 64 83 | T[C>T]C 313 143 45 113 48 39 67 3561 119 165 166 237 73 67 261 183 156 64 140 86 41 84 | T[C>T]G 219 58 45 55 36 43 41 1933 121 56 61 77 43 77 206 129 84 57 57 72 53 85 | T[C>T]T 678 298 44 211 92 49 60 9261 190 219 234 369 119 111 374 277 194 74 147 138 63 86 | T[T>A]A 25 34 21 97 24 14 37 67 104 59 130 94 28 42 164 107 126 51 70 97 15 87 | T[T>A]C 13 27 9 60 11 11 9 29 28 34 99 69 35 8 53 21 76 17 36 84 5 88 | T[T>A]G 11 17 13 39 7 5 8 29 17 36 49 55 30 18 48 14 61 14 33 56 7 89 | T[T>A]T 16 54 19 117 16 16 30 52 58 78 165 184 64 32 123 41 138 34 74 200 12 90 | T[T>C]A 16 47 21 67 15 9 24 49 66 40 114 81 42 26 88 49 106 32 40 91 13 91 | T[T>C]C 31 40 6 60 12 12 20 53 65 30 78 104 38 29 89 30 60 28 55 99 12 92 | T[T>C]G 14 25 12 46 13 4 20 36 24 30 49 63 19 17 49 34 66 6 24 49 7 93 | T[T>C]T 26 55 16 64 19 15 40 61 55 49 98 116 51 35 90 65 126 43 62 94 16 94 | T[T>G]A 5 29 6 43 12 3 14 30 21 25 53 64 24 19 39 19 62 14 46 53 8 95 | T[T>G]C 6 20 5 37 6 4 14 19 12 20 50 56 16 25 24 20 33 9 26 41 5 96 | T[T>G]G 8 33 8 69 12 9 11 30 26 30 85 71 33 40 41 22 80 16 47 87 8 97 | T[T>G]T 22 51 21 93 27 10 28 73 43 50 121 107 56 62 83 66 132 34 77 130 10 98 | -------------------------------------------------------------------------------- /SigProfilerExtractor/controllers/cli_controller.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from typing import List 3 | from SigProfilerExtractor import sigpro 4 | 5 | 6 | def str2bool(v): 7 | if isinstance(v, bool): 8 | return v 9 | if v.lower() in ("yes", "true", "t", "y", "1"): 10 | return True 11 | elif v.lower() in ("no", "false", "f", "n", "0"): 12 | return False 13 | else: 14 | raise argparse.ArgumentTypeError("Boolean value expected.") 15 | 16 | 17 | def parse_arguments_extractor(args: List[str], description: str) -> argparse.Namespace: 18 | parser = argparse.ArgumentParser(description=description) 19 | 20 | # Core required arguments 21 | input_type_help = ( 22 | "The input file type: 'vcf', 'matrix', 'bedpe', or 'seg:TYPE'. " 23 | "Accepted callers for TYPE: {'ASCAT', 'ASCAT_NGS', 'SEQUENZA', " 24 | "'ABSOLUTE', 'BATTENBERG', 'FACETS', 'PURPLE', 'TCGA'}." 25 | ) 26 | 27 | parser.add_argument( 28 | "input_type", 29 | help=input_type_help, 30 | ) 31 | 32 | parser.add_argument( 33 | "output", 34 | help="Path to the output folder.", 35 | ) 36 | 37 | input_data_help = ( 38 | "Path to input data. For 'vcf' or 'bedpe', provide an input folder. " 39 | "For 'matrix' or 'seg:TYPE', provide an input file." 40 | ) 41 | 42 | parser.add_argument( 43 | "input_data", 44 | help=input_data_help, 45 | ) 46 | 47 | # Optional arguments with defaults 48 | parser.add_argument( 49 | "--reference_genome", 50 | default="GRCh37", 51 | help="Reference genome (default: 'GRCh37'). This parameter is applicable only if the input_type is 'vcf'.", 52 | ) 53 | parser.add_argument( 54 | "--opportunity_genome", 55 | default="GRCh37", 56 | help="The build or version of the reference genome for the reference signatures (default: 'GRCh37'). When the input type is 'vcf' the value for 'opportunity_genome' will be used instead.", 57 | ) 58 | parser.add_argument( 59 | "--context_type", 60 | default="default", 61 | help="Mutational context types (default: '96,DINUC,ID').", 62 | ) 63 | parser.add_argument( 64 | "--exome", 65 | type=str2bool, 66 | nargs="?", 67 | const=True, 68 | default=False, 69 | help="Extract exomes (default: False).", 70 | ) 71 | parser.add_argument( 72 | "--minimum_signatures", 73 | type=int, 74 | default=1, 75 | help="Minimum number of signatures to be extracted (default: 1).", 76 | ) 77 | parser.add_argument( 78 | "--maximum_signatures", 79 | type=int, 80 | default=10, 81 | help="Maximum number of signatures to be extracted (default: 10).", 82 | ) 83 | parser.add_argument( 84 | "--nmf_replicates", 85 | type=int, 86 | default=100, 87 | help="Number of NMF replicates to be performed at each rank using W and H (default: 100).", 88 | ) 89 | parser.add_argument( 90 | "--resample", 91 | type=str2bool, 92 | nargs="?", 93 | const=True, 94 | default=True, 95 | help="Add poisson noise to samples by resampling (default: True).", 96 | ) 97 | parser.add_argument( 98 | "--seeds", 99 | default="random", 100 | help="Seeds for reproducible resamples, file path or 'random' (default: 'random').", 101 | ) 102 | parser.add_argument( 103 | "--batch_size", 104 | type=int, 105 | default=1, 106 | help="Batch size is for GPU only and defines the number of NMF replicates to be performed by each CPU during parallel processing (default: 1).", 107 | ) 108 | parser.add_argument( 109 | "--cpu", 110 | type=int, 111 | default=-1, 112 | help="Number of processors to use (default: all available).", 113 | ) 114 | parser.add_argument( 115 | "--assignment_cpu", 116 | type=int, 117 | default=-1, 118 | help="Number of processors to be used by SigProfilerAssignment for the final signature assignment step (default: all available). This is independent of the 'cpu' parameter.", 119 | ) 120 | parser.add_argument( 121 | "--gpu", 122 | type=str2bool, 123 | nargs="?", 124 | const=True, 125 | default=False, 126 | help="Use GPU if available (default: False). note: All available CPU processors are used by default, which may cause a memory error. This error can be resolved by reducing the number of CPU processes through the 'cpu' parameter.", 127 | ) 128 | parser.add_argument( 129 | "--nmf_init", 130 | default="random", 131 | help="The initialization algorithm for W and H matrix of NMF (default: 'random'). Options are 'random', 'nndsvd', 'nndsvda', 'nndsvdar' and 'nndsvd_min'.", 132 | ) 133 | parser.add_argument( 134 | "--precision", 135 | default="single", 136 | help="Precision for calculations (default: 'single'). Options are 'single' and 'double'.", 137 | ) 138 | parser.add_argument( 139 | "--matrix_normalization", 140 | default="gmm", 141 | help="Method of normalizing the genome matrix before it is analyzed by NMF (default: 'gmm'). Options are 'custom', 'gmm', 'log2', or 'none'.", 142 | ) 143 | parser.add_argument( 144 | "--min_nmf_iterations", 145 | type=int, 146 | default=10000, 147 | help="Minimum NMF iterations (default: 10000).", 148 | ) 149 | parser.add_argument( 150 | "--max_nmf_iterations", 151 | type=int, 152 | default=1000000, 153 | help="Maximum NMF iterations (default: 1000000).", 154 | ) 155 | parser.add_argument( 156 | "--nmf_test_conv", 157 | type=int, 158 | default=10000, 159 | help="Test convergence every X iterations (default: 10000).", 160 | ) 161 | parser.add_argument( 162 | "--nmf_tolerance", 163 | type=float, 164 | default=1e-15, 165 | help="NMF tolerance for convergence (default: 1e-15).", 166 | ) 167 | parser.add_argument( 168 | "--get_all_signature_matrices", 169 | type=str2bool, 170 | nargs="?", 171 | const=True, 172 | default=False, 173 | help="Get all NMF matrices (default: False).", 174 | ) 175 | parser.add_argument( 176 | "--export_probabilities", 177 | type=str2bool, 178 | nargs="?", 179 | const=True, 180 | default=True, 181 | help="Export probability matrix (default: True).", 182 | ) 183 | parser.add_argument( 184 | "--stability", 185 | type=float, 186 | default=0.8, 187 | help="Average stability cutoff (default: 0.8).", 188 | ) 189 | parser.add_argument( 190 | "--min_stability", 191 | type=float, 192 | default=0.2, 193 | help="Minimum stability cutoff (default: 0.2).", 194 | ) 195 | parser.add_argument( 196 | "--combined_stability", 197 | type=float, 198 | default=1.0, 199 | help="Combined stability cutoff (default: 1.0).", 200 | ) 201 | parser.add_argument( 202 | "--allow_stability_drop", 203 | type=str2bool, 204 | nargs="?", 205 | const=True, 206 | default=False, 207 | help="Allow stability drop (default: False).", 208 | ) 209 | parser.add_argument( 210 | "--cosmic_version", 211 | type=float, 212 | default=3.4, 213 | help="COSMIC version for reference signatures. Valid values are 1, 2, 3, 3.1, 3.2, 3.3, and 3.4 (default: 3.4).", 214 | ) 215 | parser.add_argument( 216 | "--make_decomposition_plots", 217 | type=str2bool, 218 | nargs="?", 219 | const=True, 220 | default=True, 221 | help="Generate decomposition plots (default: True).", 222 | ) 223 | parser.add_argument( 224 | "--collapse_to_SBS96", 225 | type=str2bool, 226 | nargs="?", 227 | const=True, 228 | default=True, 229 | help="Collapse to SBS288 and SBS1536 matrices to SBS96. If False, will map reference signatures to the same context as input (default: True).", 230 | ) 231 | 232 | parser.add_argument( 233 | "--stop_after_extraction", 234 | type=str2bool, 235 | nargs="?", 236 | const=True, 237 | default=False, 238 | help="Stop after de novo extraction (default: False).", 239 | ) 240 | 241 | parser.add_argument( 242 | "--volume", 243 | default=None, 244 | help="User specified directory for saving/loading template files.", 245 | ) 246 | 247 | return parser.parse_args(args) 248 | 249 | 250 | class CliController: 251 | def dispatch_sigProfilerExtractor(self, user_args: List[str]) -> None: 252 | parsed_args = parse_arguments_extractor( 253 | user_args, "Extract mutational signatures from input samples." 254 | ) 255 | sigpro.sigProfilerExtractor( 256 | input_type=parsed_args.input_type, 257 | output=parsed_args.output, 258 | input_data=parsed_args.input_data, 259 | reference_genome=parsed_args.reference_genome, 260 | opportunity_genome=parsed_args.opportunity_genome, 261 | context_type=parsed_args.context_type, 262 | exome=parsed_args.exome, 263 | minimum_signatures=parsed_args.minimum_signatures, 264 | maximum_signatures=parsed_args.maximum_signatures, 265 | nmf_replicates=parsed_args.nmf_replicates, 266 | resample=parsed_args.resample, 267 | seeds=parsed_args.seeds, 268 | batch_size=parsed_args.batch_size, 269 | cpu=parsed_args.cpu, 270 | assignment_cpu=parsed_args.assignment_cpu, 271 | gpu=parsed_args.gpu, 272 | nmf_init=parsed_args.nmf_init, 273 | precision=parsed_args.precision, 274 | matrix_normalization=parsed_args.matrix_normalization, 275 | min_nmf_iterations=parsed_args.min_nmf_iterations, 276 | max_nmf_iterations=parsed_args.max_nmf_iterations, 277 | nmf_test_conv=parsed_args.nmf_test_conv, 278 | nmf_tolerance=parsed_args.nmf_tolerance, 279 | get_all_signature_matrices=parsed_args.get_all_signature_matrices, 280 | export_probabilities=parsed_args.export_probabilities, 281 | stability=parsed_args.stability, 282 | min_stability=parsed_args.min_stability, 283 | combined_stability=parsed_args.combined_stability, 284 | allow_stability_drop=parsed_args.allow_stability_drop, 285 | cosmic_version=parsed_args.cosmic_version, 286 | make_decomposition_plots=parsed_args.make_decomposition_plots, 287 | collapse_to_SBS96=parsed_args.collapse_to_SBS96, 288 | stop_after_extraction=parsed_args.stop_after_extraction, 289 | volume=parsed_args.volume, 290 | ) 291 | -------------------------------------------------------------------------------- /SigProfilerExtractor/nmf_gpu.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of non-negative matrix factorization for GPU 3 | """ 4 | 5 | from datetime import datetime 6 | 7 | from nimfa.methods.seeding import nndsvd 8 | import numpy as np 9 | import torch 10 | import torch.nn 11 | from torch import nn 12 | 13 | 14 | class NMF: 15 | def __init__( 16 | self, 17 | V, 18 | rank, 19 | max_iterations=200000, 20 | tolerance=1e-8, 21 | test_conv=1000, 22 | gpu_id=0, 23 | generator=None, 24 | init_method="nndsvd", 25 | floating_point_precision="single", 26 | min_iterations=2000, 27 | ): 28 | """ 29 | Run non-negative matrix factorisation using GPU. Uses beta-divergence. 30 | 31 | Args: 32 | V: Matrix to be factorised 33 | rank: (int) number of latent dimensnions to use in factorisation 34 | max_iterations: (int) Maximum number of update iterations to use during fitting 35 | tolerance: tolerance to use in convergence tests. Lower numbers give longer times to convergence 36 | test_conv: (int) How often to test for convergnce 37 | gpu_id: (int) Which GPU device to use 38 | generator: random generator, if None (default) datetime is used 39 | init_method: how to initialise basis and coefficient matrices, options are: 40 | - random (will always be the same if set generator != None) 41 | - NNDSVD 42 | - NNDSVDa (fill in the zero elements with the average), 43 | - NNDSVDar (fill in the zero elements with random values in the space [0:average/100]). 44 | floating_point_precision: (string or type). Can be `double`, `float` or any type/string which 45 | torch can interpret. 46 | min_iterations: the minimum number of iterations to execute before termination. Useful when using 47 | fp32 tensors as convergence can happen too early. 48 | """ 49 | torch.cuda.set_device(gpu_id) 50 | 51 | if floating_point_precision == "single": 52 | self._tensor_type = torch.FloatTensor 53 | self._np_dtype = np.float32 54 | elif floating_point_precision == "double": 55 | self._tensor_type = torch.DoubleTensor 56 | self._np_dtype = np.float64 57 | else: 58 | raise ValueError("Precision needs to be either 'single' or 'double'.") 59 | 60 | self.max_iterations = max_iterations 61 | self.min_iterations = min_iterations 62 | 63 | # If V is not in a batch, put it in a batch of 1 64 | if len(V.shape) == 2: 65 | V = V[None, :, :] 66 | 67 | self._V = V.type(self._tensor_type).cuda() 68 | self._fix_neg = nn.Threshold(0.0, 1e-8) 69 | self._tolerance = tolerance 70 | self._prev_loss = None 71 | self._iter = 0 72 | self._test_conv = test_conv 73 | self._gpu_id = gpu_id 74 | self._rank = rank 75 | self._generator = generator 76 | self._W, self._H = self._initialise_wh(init_method) 77 | 78 | def _initialise_wh(self, init_method): 79 | """ 80 | Initialise basis and coefficient matrices according to `init_method` 81 | """ 82 | if init_method == "random": 83 | W = torch.from_numpy( 84 | self._generator.random( 85 | (self._V.shape[0], self._V.shape[1], self._rank), dtype=np.float64 86 | ) 87 | ).cuda() 88 | H = torch.from_numpy( 89 | self._generator.random( 90 | (self._V.shape[0], self._rank, self._V.shape[2]), dtype=np.float64 91 | ) 92 | ).cuda() 93 | if self._np_dtype is np.float32: 94 | W = W.float() 95 | H = H.float() 96 | return W, H 97 | 98 | elif init_method == "nndsvd": 99 | W = np.zeros([self._V.shape[0], self._V.shape[1], self._rank]) 100 | H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]]) 101 | nv = nndsvd.Nndsvd() 102 | for i in range(self._V.shape[0]): 103 | vin = np.asmatrix(self._V.cpu().numpy()[i]) 104 | W[i, :, :], H[i, :, :] = nv.initialize( 105 | vin, self._rank, options={"flag": 0} 106 | ) 107 | 108 | elif init_method == "nndsvda": 109 | W = np.zeros([self._V.shape[0], self._V.shape[1], self._rank]) 110 | H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]]) 111 | nv = nndsvd.Nndsvd() 112 | for i in range(self._V.shape[0]): 113 | vin = np.asmatrix(self._V.cpu().numpy()[i]) 114 | W[i, :, :], H[i, :, :] = nv.initialize( 115 | vin, self._rank, options={"flag": 1} 116 | ) 117 | 118 | elif init_method == "nndsvdar": 119 | W = np.zeros([self._V.shape[0], self._V.shape[1], self._rank]) 120 | H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]]) 121 | nv = nndsvd.Nndsvd() 122 | for i in range(self._V.shape[0]): 123 | vin = np.asmatrix(self._V.cpu().numpy()[i]) 124 | W[i, :, :], H[i, :, :] = nv.initialize( 125 | vin, self._rank, options={"flag": 2} 126 | ) 127 | elif init_method == "nndsvd_min": 128 | W = np.zeros([self._V.shape[0], self._V.shape[1], self._rank]) 129 | H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]]) 130 | nv = nndsvd.Nndsvd() 131 | for i in range(self._V.shape[0]): 132 | vin = np.asmatrix(self._V.cpu().numpy()[i]) 133 | w, h = nv.initialize(vin, self._rank, options={"flag": 2}) 134 | min_X = np.min(vin[vin > 0]) 135 | h[h <= min_X] = min_X 136 | w[w <= min_X] = min_X 137 | # W= np.expand_dims(W, axis=0) 138 | # H = np.expand_dims(H, axis=0) 139 | W[i, :, :] = w 140 | H[i, :, :] = h 141 | # W,H=initialize_nm(vin, nfactors, init=init, eps=1e-6,random_state=None) 142 | W = torch.from_numpy(W).type(self._tensor_type).cuda(self._gpu_id) 143 | H = torch.from_numpy(H).type(self._tensor_type).cuda(self._gpu_id) 144 | return W, H 145 | 146 | @property 147 | def reconstruction(self): 148 | return self.W @ self.H 149 | 150 | @property 151 | def W(self): 152 | return self._W 153 | 154 | @property 155 | def H(self): 156 | return self._H 157 | 158 | @property 159 | def conv(self): 160 | try: 161 | return self._conv 162 | except: 163 | return 0 164 | 165 | @property 166 | def generator(self): 167 | return self._generator 168 | 169 | @property 170 | def _kl_loss(self): 171 | # calculate kl_loss in double precision for better convergence criteria 172 | return ( 173 | (self._V * (self._V / self.reconstruction).log()).sum(dtype=torch.float64) 174 | - self._V.sum(dtype=torch.float64) 175 | + self.reconstruction.sum(dtype=torch.float64) 176 | ) 177 | 178 | @property 179 | def _loss_converged(self): 180 | """ 181 | Check if loss has converged 182 | """ 183 | if not self._iter: 184 | self._loss_init = self._kl_loss 185 | elif ((self._prev_loss - self._kl_loss) / self._loss_init) < self._tolerance: 186 | return True 187 | self._prev_loss = self._kl_loss 188 | return False 189 | 190 | def fit(self, beta=1): 191 | """ 192 | Fit the basis (W) and coefficient (H) matrices to the input matrix (V) using multiplicative updates and 193 | beta divergence 194 | Args: 195 | beta: value to use for generalised beta divergence. Default is 1 for KL divergence 196 | beta == 2 => Euclidean updates 197 | beta == 1 => Generalised Kullback-Leibler updates 198 | beta == 0 => Itakura-Saito updates 199 | """ 200 | with torch.no_grad(): 201 | 202 | def stop_iterations(): 203 | stop = ( 204 | (self._V.shape[0] == 1) 205 | and (self._iter % self._test_conv == 0) 206 | and self._loss_converged 207 | and (self._iter > self.min_iterations) 208 | ) 209 | if stop: 210 | pass 211 | # print("loss converged with {} iterations".format(self._iter)) 212 | return [stop, self._iter] 213 | 214 | if beta == 2: 215 | for self._iter in range(self.max_iterations): 216 | self._H = ( 217 | self.H 218 | * (self.W.transpose(1, 2) @ self._V) 219 | / (self.W.transpose(1, 2) @ (self.W @ self.H)) 220 | ) 221 | self._W = ( 222 | self.W 223 | * (self._V @ self.H.transpose(1, 2)) 224 | / (self.W @ (self.H @ self.H.transpose(1, 2))) 225 | ) 226 | if stop_iterations()[0]: 227 | self._conv = stop_iterations()[1] 228 | break 229 | 230 | # Optimisations for the (common) beta=1 (KL) case. 231 | elif beta == 1: 232 | ones = ( 233 | torch.ones(self._V.shape).type(self._tensor_type).cuda(self._gpu_id) 234 | ) 235 | for self._iter in range(self.max_iterations): 236 | ht = self.H.transpose(1, 2) 237 | numerator = (self._V / (self.W @ self.H)) @ ht 238 | 239 | denomenator = ones @ ht 240 | self._W *= numerator / denomenator 241 | 242 | wt = self.W.transpose(1, 2) 243 | numerator = wt @ (self._V / (self.W @ self.H)) 244 | denomenator = wt @ ones 245 | self._H *= numerator / denomenator 246 | if stop_iterations()[0]: 247 | self._conv = stop_iterations()[1] 248 | break 249 | 250 | else: 251 | for self._iter in range(self.max_iterations): 252 | self._H = self.H * ( 253 | ( 254 | self.W.transpose(1, 2) 255 | @ (((self.W @ self.H) ** (beta - 2)) * self._V) 256 | ) 257 | / (self.W.transpose(1, 2) @ ((self.W @ self.H) ** (beta - 1))) 258 | ) 259 | self._W = self.W * ( 260 | ( 261 | ((self.W @ self.H) ** (beta - 2) * self._V) 262 | @ self.H.transpose(1, 2) 263 | ) 264 | / (((self.W @ self.H) ** (beta - 1)) @ self.H.transpose(1, 2)) 265 | ) 266 | if stop_iterations()[0]: 267 | self._conv = stop_iterations()[1] 268 | break 269 | -------------------------------------------------------------------------------- /SigProfilerExtractor/nmf_cpu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Mar 25 13:14:06 2020 5 | 6 | @author: mishugeb 7 | """ 8 | """ 9 | Implementation of non-negative matrix factorization for GPU 10 | """ 11 | 12 | from datetime import datetime 13 | 14 | from nimfa.methods.seeding import nndsvd 15 | import numpy as np 16 | import torch 17 | import torch.nn 18 | from torch import nn 19 | 20 | 21 | class NMF: 22 | def __init__( 23 | self, 24 | V, 25 | rank, 26 | max_iterations=200000, 27 | tolerance=1e-8, 28 | test_conv=1000, 29 | gpu_id=0, 30 | generator=None, 31 | init_method="nndsvd", 32 | floating_point_precision="single", 33 | min_iterations=2000, 34 | ): 35 | """ 36 | Run non-negative matrix factorisation using GPU. Uses beta-divergence. 37 | 38 | Args: 39 | V: Matrix to be factorised 40 | rank: (int) number of latent dimensnions to use in factorisation 41 | max_iterations: (int) Maximum number of update iterations to use during fitting 42 | tolerance: tolerance to use in convergence tests. Lower numbers give longer times to convergence 43 | test_conv: (int) How often to test for convergnce 44 | gpu_id: (int) Which GPU device to use 45 | generator: random generator, if None (default) datetime is used 46 | init_method: how to initialise basis and coefficient matrices, options are: 47 | - random (will always be the same if set generator != None) 48 | - NNDSVD 49 | - NNDSVDa (fill in the zero elements with the average), 50 | - NNDSVDar (fill in the zero elements with random values in the space [0:average/100]). 51 | floating_point_precision: (string or type). Can be `double`, `float` or any type/string which 52 | torch can interpret. 53 | min_iterations: the minimum number of iterations to execute before termination. Useful when using 54 | fp32 tensors as convergence can happen too early. 55 | """ 56 | # torch.cuda.set_device(gpu_id) 57 | 58 | if floating_point_precision == "single": 59 | self._tensor_type = torch.FloatTensor 60 | self._np_dtype = np.float32 61 | elif floating_point_precision == "double": 62 | self._tensor_type = torch.DoubleTensor 63 | self._np_dtype = np.float64 64 | else: 65 | raise ValueError("Precision needs to be either 'single' or 'double'.") 66 | 67 | self.max_iterations = max_iterations 68 | self.min_iterations = min_iterations 69 | 70 | # If V is not in a batch, put it in a batch of 1 71 | if len(V.shape) == 2: 72 | V = V[None, :, :] 73 | 74 | self._V = V.type(self._tensor_type) 75 | self._fix_neg = nn.Threshold(0.0, 1e-8) 76 | self._tolerance = tolerance 77 | self._prev_loss = None 78 | self._iter = 0 79 | self._test_conv = test_conv 80 | # self._gpu_id = gpu_id 81 | self._rank = rank 82 | self._generator = generator 83 | self._W, self._H = self._initialise_wh(init_method) 84 | 85 | def _initialise_wh(self, init_method): 86 | """ 87 | Initialise basis and coefficient matrices according to `init_method` 88 | """ 89 | if init_method == "random": 90 | W = torch.unsqueeze( 91 | torch.from_numpy( 92 | self._generator.random( 93 | (self._V.shape[1], self._rank), dtype=np.float64 94 | ) 95 | ), 96 | 0, 97 | ) 98 | H = torch.unsqueeze( 99 | torch.from_numpy( 100 | self._generator.random( 101 | (self._rank, self._V.shape[2]), dtype=np.float64 102 | ) 103 | ), 104 | 0, 105 | ) 106 | if self._np_dtype is np.float32: 107 | W = W.float() 108 | H = H.float() 109 | return W, H 110 | 111 | elif init_method == "nndsvd": 112 | W = np.zeros([self._V.shape[0], self._V.shape[1], self._rank]) 113 | H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]]) 114 | nv = nndsvd.Nndsvd() 115 | for i in range(self._V.shape[0]): 116 | vin = np.asmatrix(self._V.cpu().numpy()[i]) 117 | W[i, :, :], H[i, :, :] = nv.initialize( 118 | vin, self._rank, options={"flag": 0} 119 | ) 120 | 121 | elif init_method == "nndsvda": 122 | W = np.zeros([self._V.shape[0], self._V.shape[1], self._rank]) 123 | H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]]) 124 | nv = nndsvd.Nndsvd() 125 | for i in range(self._V.shape[0]): 126 | vin = np.asmatrix(self._V.cpu().numpy()[i]) 127 | W[i, :, :], H[i, :, :] = nv.initialize( 128 | vin, self._rank, options={"flag": 1} 129 | ) 130 | 131 | elif init_method == "nndsvdar": 132 | W = np.zeros([self._V.shape[0], self._V.shape[1], self._rank]) 133 | H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]]) 134 | nv = nndsvd.Nndsvd() 135 | for i in range(self._V.shape[0]): 136 | vin = np.asmatrix(self._V.cpu().numpy()[i]) 137 | W[i, :, :], H[i, :, :] = nv.initialize( 138 | vin, self._rank, options={"flag": 2} 139 | ) 140 | elif init_method == "nndsvd_min": 141 | W = np.zeros([self._V.shape[0], self._V.shape[1], self._rank]) 142 | H = np.zeros([self._V.shape[0], self._rank, self._V.shape[2]]) 143 | nv = nndsvd.Nndsvd() 144 | for i in range(self._V.shape[0]): 145 | vin = np.asmatrix(self._V.cpu().numpy()[i]) 146 | w, h = nv.initialize(vin, self._rank, options={"flag": 2}) 147 | min_X = np.min(vin[vin > 0]) 148 | h[h <= min_X] = min_X 149 | w[w <= min_X] = min_X 150 | # W= np.expand_dims(W, axis=0) 151 | # H = np.expand_dims(H, axis=0) 152 | W[i, :, :] = w 153 | H[i, :, :] = h 154 | # W,H=initialize_nm(vin, nfactors, init=init, eps=1e-6,random_state=None) 155 | W = torch.from_numpy(W).type(self._tensor_type) 156 | H = torch.from_numpy(H).type(self._tensor_type) 157 | return W, H 158 | 159 | @property 160 | def reconstruction(self): 161 | return self.W @ self.H 162 | 163 | @property 164 | def W(self): 165 | return self._W 166 | 167 | @property 168 | def H(self): 169 | return self._H 170 | 171 | @property 172 | def conv(self): 173 | try: 174 | return self._conv 175 | except: 176 | return 0 177 | 178 | @property 179 | def _kl_loss(self): 180 | # calculate kl_loss in double precision for better convergence criteria 181 | return ( 182 | (self._V * (self._V / self.reconstruction).log()).sum(dtype=torch.float64) 183 | - self._V.sum(dtype=torch.float64) 184 | + self.reconstruction.sum(dtype=torch.float64) 185 | ) 186 | 187 | @property 188 | def generator(self): 189 | return self._generator 190 | 191 | @property 192 | def _loss_converged(self): 193 | """ 194 | Check if loss has converged 195 | """ 196 | if not self._iter: 197 | self._loss_init = self._kl_loss 198 | elif ((self._prev_loss - self._kl_loss) / self._loss_init) < self._tolerance: 199 | return True 200 | self._prev_loss = self._kl_loss 201 | return False 202 | 203 | def fit(self, beta=1): 204 | """ 205 | Fit the basis (W) and coefficient (H) matrices to the input matrix (V) using multiplicative updates and 206 | beta divergence 207 | Args: 208 | beta: value to use for generalised beta divergence. Default is 1 for KL divergence 209 | beta == 2 => Euclidean updates 210 | beta == 1 => Generalised Kullback-Leibler updates 211 | beta == 0 => Itakura-Saito updates 212 | """ 213 | with torch.no_grad(): 214 | 215 | def stop_iterations(): 216 | stop = ( 217 | (self._V.shape[0] == 1) 218 | and (self._iter % self._test_conv == 0) 219 | and self._loss_converged 220 | and (self._iter > self.min_iterations) 221 | ) 222 | if stop: 223 | pass 224 | # print("loss converged with {} iterations".format(self._iter)) 225 | return [stop, self._iter] 226 | 227 | if beta == 2: 228 | for self._iter in range(self.max_iterations): 229 | self._H = ( 230 | self.H 231 | * (self.W.transpose(1, 2) @ self._V) 232 | / (self.W.transpose(1, 2) @ (self.W @ self.H)) 233 | ) 234 | self._W = ( 235 | self.W 236 | * (self._V @ self.H.transpose(1, 2)) 237 | / (self.W @ (self.H @ self.H.transpose(1, 2))) 238 | ) 239 | if stop_iterations()[0]: 240 | self._conv = stop_iterations()[1] 241 | break 242 | 243 | # Optimisations for the (common) beta=1 (KL) case. 244 | elif beta == 1: 245 | ones = torch.ones(self._V.shape).type(self._tensor_type) 246 | for self._iter in range(self.max_iterations): 247 | ht = self.H.transpose(1, 2) 248 | numerator = (self._V / (self.W @ self.H)) @ ht 249 | 250 | denomenator = ones @ ht 251 | self._W *= numerator / denomenator 252 | 253 | wt = self.W.transpose(1, 2) 254 | numerator = wt @ (self._V / (self.W @ self.H)) 255 | denomenator = wt @ ones 256 | self._H *= numerator / denomenator 257 | if stop_iterations()[0]: 258 | self._conv = stop_iterations()[1] 259 | break 260 | 261 | else: 262 | for self._iter in range(self.max_iterations): 263 | self._H = self.H * ( 264 | ( 265 | self.W.transpose(1, 2) 266 | @ (((self.W @ self.H) ** (beta - 2)) * self._V) 267 | ) 268 | / (self.W.transpose(1, 2) @ ((self.W @ self.H) ** (beta - 1))) 269 | ) 270 | self._W = self.W * ( 271 | ( 272 | ((self.W @ self.H) ** (beta - 2) * self._V) 273 | @ self.H.transpose(1, 2) 274 | ) 275 | / (((self.W @ self.H) ** (beta - 1)) @ self.H.transpose(1, 2)) 276 | ) 277 | if stop_iterations()[0]: 278 | self._conv = stop_iterations()[1] 279 | break 280 | -------------------------------------------------------------------------------- /SigProfilerExtractor/data/TextInput/Samples_CNV.txt: -------------------------------------------------------------------------------- 1 | MutationType IGC-03-1328-T07 NSLC-1060-T01 NSLC-0101-T01-R1 IGC-02-1001-T03 NSLC-1070-T01 IGC-02-1097-T01 NSLC-0097-T01 NSLC-0112-T01 NSLC-0170-T01 IGC-02-1194-T01 NSLC-0168-T01 IGC-10-1061-T01 NSLC-1063-T01 IGC-13-1146-T01 IGC-10-1037-T03 NSLC-1069-T01 IGC-03-1321-T01 IGC-13-1174-T01 IGC-04-1164-T01 IGC-02-1212-T01 IGC-02-1025-T01 NSLC-1074-T01 NSLC-0107-T01-R1 NSLC-1050-T01 NSLC-1067-T01 IGC-08-1089-T01 IGC-02-1105-T01 IGC-03-1076-T06 IGC-03-1254-T10 NSLC-0092-T08 NSLC-1052-T01 NSLC-1061-T01 NSLC-1072-T01 IGC-11-1120-T01 IGC-02-1169-T01 IGC-11-1128-T07 IGC-04-1089-T01 IGC-13-1081-T01 NSLC-0111-T01 IGC-03-1265-T04 IGC-02-1029-T01 NSLC-1055-T01 NSLC-0109-T01 IGC-09-1008-T01 IGC-12-1086-T01 IGC-09-1113-T01 IGC-02-1017-T01 IGC-11-1053-T02 NSLC-1058-T01 NSLC-1076-T01 NSLC-1064-T01 IGC-13-1166-T01 IGC-03-1294-T03 NSLC-0105-T01-R1 IGC-10-1141-T01 IGC-11-1108-T02 IGC-10-1044-T01 IGC-03-1026-T02 NSLC-1071-T01 IGC-03-1088-T03 NSLC-1068-T01 IGC-08-1024-T01 NSLC-0106-T01 NSLC-0113-T01 IGC-04-1059-T02 NSLC-0095-T01 IGC-12-1069-T01 IGC-10-1180-T01 IGC-10-1185-T01 IGC-02-1074-T01 IGC-13-1182-T01 IGC-12-1088-T01 NSLC-0102-T01 NSLC-1059-T01 IGC-04-1041-T03 IGC-11-1132-T01 NSLC-1075-T01 NSLC-0171-T01 NSLC-0100-T02 IGC-03-1263-T05 NSLC-1065-T01 IGC-03-1267-T01 IGC-12-1114-T01 NSLC-0103-T01-R1 NSLC-1056-T01 IGC-02-1016-T01 IGC-03-1049-T03 NSLC-0104-T02 IGC-09-1042-T02 IGC-10-1045-T01 NSLC-0090-T01-R1 IGC-09-1086-T02 IGC-10-1021-T04 NSLC-1054-T01 NSLC-0110-T01 IGC-11-1087-T02 NSLC-0093-T02 IGC-10-1055-T01 NSLC-0167-T03 IGC-11-1085-T02 IGC-04-1154-T01 NSLC-0108-T01 NSLC-1066-T01 NSLC-0098-T02 NSLC-1077-T01 NSLC-1051-T01 NSLC-0169-T01 NSLC-1073-T01 2 | 0:homdel:0-100kb 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 3 | 0:homdel:100kb-1Mb 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 1 2 0 0 0 0 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 0 1 0 2 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 | 0:homdel:>1Mb 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 2 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 | 1:LOH:0-100kb 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 2 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 6 | 1:LOH:100kb-1Mb 0 1 0 1 0 0 1 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 0 4 0 1 1 0 0 1 0 0 0 0 3 0 1 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 1 0 0 0 0 1 0 1 0 2 0 2 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 | 1:LOH:1Mb-10Mb 0 0 1 1 0 0 3 0 0 0 0 0 0 1 0 2 2 1 1 1 0 1 0 1 0 0 6 0 0 0 3 0 0 0 0 0 0 0 0 1 3 0 1 0 0 3 4 1 2 0 1 1 1 0 0 0 2 0 1 1 1 2 0 2 0 0 0 0 1 0 0 2 0 0 2 7 0 0 2 0 1 0 0 0 1 0 2 0 0 1 1 0 0 2 0 2 1 3 0 1 0 0 0 0 0 1 0 0 8 | 1:LOH:10Mb-40Mb 0 1 1 0 0 0 5 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 1 0 1 4 0 0 1 2 0 1 0 0 0 1 0 0 0 0 0 0 0 1 1 2 0 2 2 1 1 3 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 3 1 0 1 0 0 0 1 0 0 0 2 1 0 0 1 0 0 0 0 0 0 4 0 2 0 0 0 0 0 0 0 0 9 | 1:LOH:>40Mb 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 1 0 0 0 0 3 0 0 0 7 3 0 0 0 1 0 0 0 0 0 0 0 0 0 3 0 1 0 0 1 2 0 0 0 4 0 1 2 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 1 0 0 1 2 0 0 1 0 0 0 0 0 5 0 1 3 0 0 0 1 0 0 0 3 0 3 0 1 0 0 0 0 0 0 0 0 10 | 2:LOH:0-100kb 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 11 | 2:LOH:100kb-1Mb 0 1 0 2 1 1 1 1 0 0 0 0 0 2 0 1 0 1 0 0 0 0 1 2 0 0 0 0 0 3 0 0 0 0 0 2 1 0 0 0 0 0 0 0 0 0 1 2 0 1 0 3 2 0 0 0 1 1 0 2 0 0 0 0 0 0 0 0 2 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 | 2:LOH:1Mb-10Mb 1 0 2 2 0 2 0 0 1 1 0 3 1 0 1 7 1 1 1 0 0 0 2 3 3 0 1 0 2 0 7 0 3 0 0 0 2 0 0 1 0 0 0 0 1 4 1 1 5 6 0 2 0 0 0 1 0 0 1 1 0 1 0 1 1 1 0 1 1 2 1 3 2 1 5 2 0 0 1 3 1 1 0 2 0 0 2 0 0 1 0 0 0 0 0 2 0 1 0 1 2 2 0 0 1 0 0 0 13 | 2:LOH:10Mb-40Mb 0 2 1 1 0 0 1 0 0 1 0 2 1 0 2 1 0 0 3 0 1 0 1 2 1 1 1 2 1 0 6 0 0 1 2 2 4 2 0 1 0 2 0 2 1 2 0 1 2 4 0 2 0 0 1 2 1 4 0 1 0 5 0 0 1 1 1 0 1 2 1 1 1 0 2 1 1 2 0 1 2 0 3 1 0 0 2 0 1 2 0 1 1 0 0 2 0 0 0 0 1 0 0 0 0 0 0 0 14 | 2:LOH:>40Mb 0 3 0 0 1 1 0 0 3 0 0 1 3 1 0 0 1 2 0 0 1 0 1 3 2 1 0 2 0 0 3 0 0 1 0 1 0 1 0 0 0 3 1 1 2 1 0 1 0 1 0 1 0 0 1 2 1 2 1 2 0 2 0 0 2 0 0 2 1 2 3 2 0 0 3 1 0 0 0 2 0 1 1 2 0 0 0 0 0 1 0 3 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 15 | 3-4:LOH:0-100kb 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 16 | 3-4:LOH:100kb-1Mb 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 2 0 1 1 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 17 | 3-4:LOH:1Mb-10Mb 0 0 1 1 0 0 0 0 1 2 0 3 0 2 2 4 0 0 4 0 0 0 0 1 6 1 0 1 0 0 2 0 1 1 0 3 0 0 0 1 0 0 0 0 0 1 0 0 2 0 0 0 1 0 0 0 0 0 3 0 1 0 0 0 0 1 0 1 3 0 0 0 1 0 2 0 0 0 0 0 0 1 0 2 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 2 0 0 0 18 | 3-4:LOH:10Mb-40Mb 0 0 0 1 0 0 0 1 0 2 0 0 0 0 0 2 0 1 0 0 0 0 2 1 1 0 0 0 0 0 1 0 0 3 0 0 1 0 0 0 0 0 0 0 2 1 0 2 0 1 0 0 0 0 0 0 2 1 0 0 0 1 0 0 0 2 0 0 1 1 2 0 0 0 1 0 0 0 0 0 0 1 2 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 19 | 3-4:LOH:>40Mb 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 2 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 2 0 1 0 2 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 20 | 5-8:LOH:0-100kb 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 21 | 5-8:LOH:100kb-1Mb 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 2 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 22 | 5-8:LOH:1Mb-10Mb 0 1 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 1 0 2 1 0 0 0 0 1 1 0 0 2 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 23 | 5-8:LOH:10Mb-40Mb 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 2 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24 | 5-8:LOH:>40Mb 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 25 | 9+:LOH:0-100kb 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 26 | 9+:LOH:100kb-1Mb 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 27 | 9+:LOH:1Mb-10Mb 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 2 0 0 0 2 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 | 9+:LOH:10Mb-40Mb 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29 | 9+:LOH:>40Mb 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 30 | 2:het:0-100kb 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 31 | 2:het:100kb-1Mb 0 0 0 0 0 1 1 0 0 0 2 2 0 0 0 0 0 0 0 1 0 0 0 2 1 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 1 0 1 0 1 0 0 0 0 0 0 1 0 0 2 1 0 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 2 1 0 0 0 1 0 0 0 1 0 2 0 1 0 0 0 0 0 0 0 0 32 | 2:het:1Mb-10Mb 2 0 0 1 1 0 3 0 0 0 8 1 0 0 0 3 2 1 1 0 1 2 0 1 1 1 1 0 0 0 5 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 2 0 1 0 1 0 4 0 0 0 0 1 2 1 5 1 0 2 0 0 0 0 0 2 0 2 0 0 1 4 0 0 1 1 1 0 0 0 0 0 2 0 0 1 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 0 33 | 2:het:10Mb-40Mb 1 0 1 2 1 1 3 1 0 0 1 1 0 0 0 0 0 0 0 2 1 0 0 5 0 0 2 0 0 3 3 0 0 0 0 0 0 1 0 1 4 1 1 1 2 3 3 0 1 1 4 1 2 2 1 0 0 0 0 0 2 1 0 2 2 0 0 0 0 1 0 3 1 1 2 0 1 0 1 0 0 0 0 0 0 0 4 2 0 0 0 2 1 0 2 2 2 2 1 2 0 0 0 0 0 0 0 0 34 | 2:het:>40Mb 1 1 0 1 0 0 3 1 2 0 5 1 0 0 0 1 3 0 0 2 0 3 2 0 0 7 7 0 0 6 0 2 3 0 4 1 1 0 0 0 3 0 2 1 0 2 4 0 1 1 3 1 2 6 0 0 0 0 1 1 7 2 5 4 1 0 0 0 0 0 2 3 2 0 0 2 0 0 4 0 2 1 0 0 1 0 2 2 1 0 0 1 0 1 1 3 4 2 1 2 0 0 0 3 0 0 0 0 35 | 3-4:het:0-100kb 0 0 0 2 0 3 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 3 0 1 0 0 0 0 0 0 0 0 0 0 0 2 0 0 1 0 0 0 0 0 0 0 1 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 36 | 3-4:het:100kb-1Mb 0 0 0 2 2 8 0 0 0 0 3 4 0 2 0 0 1 1 1 1 1 0 0 1 1 0 0 2 3 3 4 0 2 0 2 2 2 2 0 0 0 0 0 0 4 5 1 1 3 1 0 5 1 0 0 0 2 2 0 2 2 4 0 0 1 0 2 0 0 3 0 1 2 1 2 0 0 1 0 0 0 0 1 1 0 2 1 0 0 4 0 1 0 0 0 4 0 2 0 3 0 0 1 0 0 0 1 0 37 | 3-4:het:1Mb-10Mb 5 1 11 10 8 8 2 2 7 7 8 14 1 3 0 4 4 5 6 0 4 0 2 3 6 1 3 3 3 0 17 4 3 1 1 8 5 0 0 3 2 2 0 4 15 0 1 2 4 4 1 6 1 0 1 2 1 7 4 6 7 4 0 3 0 1 3 1 2 6 0 6 3 5 9 4 1 3 0 1 0 4 4 0 0 1 1 1 1 15 0 4 0 0 0 1 0 3 0 0 3 1 0 0 0 0 0 0 38 | 3-4:het:10Mb-40Mb 2 3 5 3 3 4 0 2 0 2 1 7 0 2 0 4 0 4 2 0 2 0 1 8 9 0 1 2 1 0 7 3 1 0 1 4 5 2 1 2 2 1 0 3 4 7 0 1 5 7 2 2 0 0 2 0 2 3 0 1 1 0 0 0 1 3 3 4 3 5 5 7 3 5 4 0 1 2 0 2 3 2 0 0 0 4 1 2 2 8 0 4 3 0 0 1 1 1 0 0 3 2 3 0 2 1 0 0 39 | 3-4:het:>40Mb 2 1 5 7 2 7 0 4 8 6 0 2 1 6 3 3 1 4 0 4 6 0 8 0 0 2 1 1 8 1 2 2 0 3 0 3 5 1 4 1 0 1 0 3 0 2 0 1 5 4 1 4 0 0 1 6 2 3 2 5 0 2 0 2 1 6 2 2 3 3 3 4 4 3 0 1 3 2 2 1 2 3 3 1 1 4 1 0 4 2 2 1 5 0 1 3 2 0 2 1 3 1 3 0 0 4 5 1 40 | 5-8:het:0-100kb 0 0 0 0 0 2 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 41 | 5-8:het:100kb-1Mb 3 0 2 1 0 4 0 0 3 1 0 2 0 0 1 2 2 0 2 0 0 0 2 5 1 0 0 1 0 0 0 1 0 0 0 2 0 2 0 0 1 0 0 0 4 4 0 0 3 0 0 1 0 0 0 3 2 6 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 2 0 0 0 0 0 2 0 0 0 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42 | 5-8:het:1Mb-10Mb 1 0 2 0 1 5 0 0 1 4 0 11 0 9 2 3 4 10 7 2 0 0 0 3 7 0 1 2 1 1 2 0 1 1 2 8 3 0 2 2 0 1 0 3 8 6 1 0 0 4 0 1 0 0 1 1 1 2 9 0 1 0 0 0 0 3 0 0 0 4 1 0 2 2 6 0 0 2 0 0 2 0 2 0 0 2 1 0 1 10 0 2 1 0 0 1 0 0 0 0 0 0 2 0 3 0 0 0 43 | 5-8:het:10Mb-40Mb 0 3 1 0 0 0 0 0 2 0 0 4 0 0 2 1 1 1 6 0 2 0 0 1 3 0 0 0 0 0 1 0 0 1 0 2 3 1 1 2 0 2 0 1 1 3 0 0 0 1 0 0 1 0 3 0 1 2 0 0 0 0 0 0 0 3 4 0 0 2 1 0 0 2 1 0 0 2 0 2 1 0 0 1 0 0 2 0 0 2 0 1 1 0 0 2 0 1 0 0 0 0 1 0 1 0 0 0 44 | 5-8:het:>40Mb 0 0 0 0 2 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 2 0 1 0 0 0 1 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 2 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 45 | 9+:het:0-100kb 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 46 | 9+:het:100kb-1Mb 4 0 0 0 0 0 0 0 0 0 2 2 0 2 0 0 0 2 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 4 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 2 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 47 | 9+:het:1Mb-10Mb 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 2 1 0 0 0 0 0 0 0 0 4 0 2 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 2 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 0 0 0 2 1 0 0 0 1 0 0 0 0 0 0 0 2 0 0 0 0 0 1 0 2 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 48 | 9+:het:10Mb-40Mb 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 49 | 9+:het:>40Mb 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://osf.io/t6j7u/wiki/home/) 2 | [![License](https://img.shields.io/badge/License-BSD\%202--Clause-orange.svg)](https://opensource.org/licenses/BSD-2-Clause) 3 | [![Build Status](https://app.travis-ci.com/AlexandrovLab/SigProfilerExtractor.svg?branch=master)](https://app.travis-ci.com/AlexandrovLab/SigProfilerExtractor) 4 | 5 | # SigProfilerExtractor 6 | SigProfilerExtractor allows de novo extraction of mutational signatures from data generated in a matrix format. 7 | The tool identifies the number of operative mutational signatures, their activities in each sample, and the probability 8 | for each signature to cause a specific mutation type in a cancer sample. The tool makes use of SigProfilerMatrixGenerator 9 | and SigProfilerPlotting. Detailed documentation can be found at: https://osf.io/t6j7u/wiki/home/ 10 | 11 | # Table of contents 12 | - [Installation](#installation) 13 | - [Functions](#functions) 14 | - [importdata](#importdata) 15 | - [sigProfilerExtractor](#sigProfilerExtractor) 16 | - [estimate_solution](#estimate_solution) 17 | - [decompose](#decompose) 18 | - [PlotActivity.py](#plotActivity) 19 | - [Video Tutorials](#video_tutorials) 20 | - [Citation](#citation) 21 | - [Copyright](#copyright) 22 | - [Contact Information](#contact) 23 | 24 | 25 | ## Installation 26 | 27 | To install the current version of this Github repo, git clone this repo or download the zip file. 28 | Unzip the contents of SigProfilerExtractor-master.zip or the zip file of a corresponding branch. 29 | 30 | In the command line, please run the following: 31 | ```bash 32 | $ cd SigProfilerExtractor-master 33 | $ pip install . 34 | ``` 35 | 36 | For most recent stable pypi version of this tool, 37 | In the command line, please run the following: 38 | ```bash 39 | $ pip install SigProfilerExtractor 40 | ``` 41 | 42 | Install your desired reference genome from the command line/terminal as follows (available reference genomes are: GRCh37, GRCh38, mm9, and mm10): 43 | ```python 44 | $ python 45 | from SigProfilerMatrixGenerator import install as genInstall 46 | genInstall.install('GRCh37') 47 | ``` 48 | 49 | This will install the human 37 assembly as a reference genome. You may install as many genomes as you wish. 50 | 51 | Next, open a python interpreter and import the SigProfilerExtractor module. Please see the examples of the functions. 52 | 53 | ## Functions 54 | The list of available functions are: 55 | - importdata 56 | - sigProfilerExtractor 57 | - estimate_solution 58 | - decompose 59 | 60 | And an additional script: 61 | - plotActivity.py 62 | 63 | ### importdata 64 | Imports the path of example data. 65 | 66 | ```python 67 | importdata(datatype="matrix") 68 | ``` 69 | 70 | #### importdata Example 71 | 72 | ```python 73 | from SigProfilerExtractor import sigpro as sig 74 | path_to_example_table = sig.importdata("matrix") 75 | data = path_to_example_table 76 | # This "data" variable can be used as a parameter of the "project" argument of the sigProfilerExtractor function. 77 | 78 | # To get help on the parameters and outputs of the "importdata" function, please use the following: 79 | help(sig.importdata) 80 | ``` 81 | 82 | ### sigProfilerExtractor 83 | 84 | Extracts mutational signatures from an array of samples. 85 | 86 | ```python 87 | sigProfilerExtractor(input_type, out_put, input_data, reference_genome="GRCh37", opportunity_genome = "GRCh37", context_type = "default", exome = False, 88 | minimum_signatures=1, maximum_signatures=10, nmf_replicates=100, resample = True, batch_size=1, cpu=-1, gpu=False, 89 | nmf_init="random", precision= "single", matrix_normalization= "gmm", seeds= "random", 90 | min_nmf_iterations= 10000, max_nmf_iterations=1000000, nmf_test_conv= 10000, nmf_tolerance= 1e-15, get_all_signature_matrices= False) 91 | ``` 92 | 93 | | Category | Parameter | Variable Type | Parameter Description | 94 | | --------- | --------------------- | -------- |-------- | 95 | | **Input Data** | | | | 96 | | | **input_type** | String | The type of input:
| 97 | | | **output** | String | The name of the output folder. The output folder will be generated in the current working directory. | 98 | | | **input_data** | String |
Path to input folder for input_type:Path to file for input_type: | 99 | | | **reference_genome** | String | The name of the reference genome (default: `"GRCh37"`). This parameter is applicable only if the `input_type` is `"vcf"`. | 100 | | | **opportunity_genome** | String | The build or version of the reference genome for the reference signatures (default: `"GRCh37"`). When the input_type is `"vcf"`, the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (`GRCh37`, `GRCh38`, `mm9`, `mm10`, `mm39`, `rn6`, and `rn7`). If a different opportunity genome is selected, the default genome `GRCh37` will be used. | 101 | | | **context_type** | String | Mutation context name(s), separated by commas (`,`), that define the mutational contexts for signature extraction (default: `"96,DINUC,ID"`). In the default value, `96` represents the SBS96 context, `DINUC` represents the dinucleotide context, and `ID` represents the indel context. | 102 | | | **exome** | Boolean | Defines if the exomes will be extracted (default: `False`). | 103 | | **NMF Replicates** | | | | 104 | | | **minimum_signatures** | Positive Integer | The minimum number of signatures to be extracted (default: `1`). | 105 | | | **maximum_signatures** | Positive Integer | The maximum number of signatures to be extracted (default: `25`). | 106 | | | **nmf_replicates** | Positive Integer | The number of iteration to be performed to extract each number signature (default: `100`). | 107 | | | **resample** | Boolean | If `True`, add poisson noise to samples by resampling (default: `True`). | 108 | | | **seeds** | String | Ensures reproducible NMF replicate resamples. Provide the path to the `Seeds.txt` file (found in the results folder from a previous analysis) to reproduce results (default: `"random"`). | 109 | | **NMF Engines** | | | | 110 | | | **matrix_normalization** | String | Method of normalizing the genome matrix before it is analyzed by NMF (default: `"gmm"`). Options are, `"log2"`, `"custom"` or `"none"`. | 111 | | | **nmf_init** | String | The initialization algorithm for W and H matrix of NMF (default: `"random"`). Options are `"random"`, `"nndsvd"`, `"nndsvda"`, `"nndsvdar"` and `"nndsvd_min"`. | 112 | | | **precision** | String | Values should be single or double (default: `"single"`). | 113 | | | **min_nmf_iterations** | Integer | Value defines the minimum number of iterations to be completed before NMF converges (default: `10000`). | 114 | | | **max_nmf_iterations** | Integer | Value defines the maximum number of iterations to be completed before NMF converges (default: `1000000`). | 115 | | | **nmf_test_conv** | Integer | Value defines the number number of iterations to done between checking next convergence (default: `10000`). | 116 | | | **nmf_tolerance** | Float | Value defines the tolerance to achieve to converge (default: `1e-15`).| 117 | | **Execution** | | | | 118 | | | **cpu** | Integer | The number of processors to be used to extract the signatures (default: all processors). | 119 | | | **assignment_cpu** | Integer | Number of processors to be used by SigProfilerAssignment for the final signature assignment step (default: all available). This is independent of the `cpu` parameter. | 120 | | | **gpu** | Boolean | Defines if the GPU resource will used if available (default: `False`). If `True`, the GPU resources will be used in the computation. *Note: All available CPU processors are used by default, which may cause a memory error. This error can be resolved by reducing the number of CPU processes through the `cpu` parameter.*| 121 | | | **batch_size** | Integer | Will be effective only if the GPU is used. Defines the number of NMF replicates to be performed by each CPU during the parallel processing (default: `1`). *Note: For `batch_size` values greater than 1, each NMF replicate will update until `max_nmf_iterations` is reached.*| 122 | | **Solution Estimation Thresholds** | | | | 123 | | | **stability** | Float | The cutoff thresh-hold of the average stability (default: `0.8`). Solutions with average stabilities below this thresh-hold will not be considered. | 124 | | | **min_stability** | Float | The cutoff thresh-hold of the minimum stability (default: `0.2`). Solutions with minimum stabilities below this thresh-hold will not be considered. | 125 | | | **combined_stability** | Float | The cutoff thresh-hold of the combined stability (sum of average and minimum stability) (default: `1.0`). Solutions with combined stabilities below this thresh-hold will not be considered. | 126 | | | **allow_stability_drop** | Boolean | Defines if solutions with a drop in stability with respect to the highest stable number of signatures will be considered (default: `False`). | 127 | | **Decomposition** | | | | 128 | | | **cosmic_version** | Float | Defines the version of the COSMIC reference signatures (default: `3.4`). Takes a positive float among `1`, `2`, `3`, `3.1`, `3.2`, `3.3`, and `3.4`.| 129 | | | **make_decomposition_plots** | Boolean | Generate de novo to COSMIC signature decomposition plots as part of the results (default: `True`). Set to `False` to skip generating these plots. | 130 | | | **collapse_to_SBS96** | Boolean | If `True`, SBS288 and SBS1536 de novo signatures will be mapped to SBS96 reference signatures (default: `True`). If `False`, those will be mapped to reference signatures of the same context. 131 | | **Others** | | | | 132 | | | **get_all_signature_matrices** | Boolean | Write to output Ws and Hs from all the NMF iterations (default: `False`) | 133 | | | **export_probabilities** | Boolean | Create the probability matrix (default: `True`). | 134 | | | **volume** | String | Path to the volume for writing and loading reference genomes, plotting templates, and COSMIC signature plots (default: `None`). Environmental variables take precedence: `SIGPROFILERMATRIXGENERATOR_VOLUME`, `SIGPROFILERPLOTTING_VOLUME`, and `SIGPROFILERASSIGNMENT_VOLUME`. | 135 | 136 | #### sigProfilerExtractor Example 137 | VCF Files as Input 138 | ```python 139 | from SigProfilerExtractor import sigpro as sig 140 | def main_function(): 141 | # to get input from vcf files 142 | path_to_example_folder_containing_vcf_files = sig.importdata("vcf") 143 | # you can put the path to your folder containing the vcf samples 144 | data = path_to_example_folder_containing_vcf_files 145 | sig.sigProfilerExtractor("vcf", "example_output", data, minimum_signatures=1, maximum_signatures=3) 146 | if __name__=="__main__": 147 | main_function() 148 | # Wait until the excecution is finished. The process may a couple of hours based on the size of the data. 149 | # Check the current working directory for the "example_output" folder. 150 | ``` 151 | Matrix File as Input 152 | ```python 153 | from SigProfilerExtractor import sigpro as sig 154 | def main_function(): 155 | # to get input from table format (mutation catalog matrix) 156 | path_to_example_table = sig.importdata("matrix") 157 | data = path_to_example_table # you can put the path to your tab delimited file containing the mutational catalog matrix/table 158 | sig.sigProfilerExtractor("matrix", "example_output", data, opportunity_genome="GRCh38", minimum_signatures=1, maximum_signatures=3) 159 | if __name__=="__main__": 160 | main_function() 161 | ``` 162 | 163 | #### sigProfilerExtractor Output 164 | To learn about the output, please visit https://osf.io/t6j7u/wiki/home/ 165 | 166 | 167 | ### Estimation of the Optimum Solution 168 | Estimate the optimum solution (rank) among different number of solutions (ranks). 169 | 170 | ```python 171 | estimate_solution(base_csvfile="All_solutions_stat.csv", 172 | All_solution="All_Solutions", 173 | genomes="Samples.txt", 174 | output="results", 175 | title="Selection_Plot", 176 | stability=0.8, 177 | min_stability=0.2, 178 | combined_stability=1.0, 179 | allow_stability_drop=False, 180 | exome=False) 181 | ``` 182 | 183 | | Parameter | Variable Type | Parameter Description | 184 | | --------------------- | -------- |-------- | 185 | | **base_csvfile** | String | Default is `"All_solutions_stat.csv"`. Path to a CSV file that contains the statistics of all solutions. | 186 | | **All_solution** | String | Default is `"All_Solutions"`. Path to a folder that contains the results of all solutions. | 187 | | **genomes** | String | Default is `"Samples.txt"`. Path to a tab delimilted file that contains the mutation counts for all genomes given to different mutation types. | 188 | | **output** | String | Default is `"results"`. Path to the output folder. | 189 | | **title** | String | Default is `"Selection_Plot"`. This sets the title of the selection_plot.pdf | 190 | | **stability** | Float | Default is 0.8. The cutoff thresh-hold of the average stability. Solutions with average stabilities below this thresh-hold will not be considered. | 191 | | **min_stability** | Float | Default is `0.2`. The cutoff thresh-hold of the minimum stability. Solutions with minimum stabilities below this thresh-hold will not be considered. | 192 | | **combined_stability** | Float | Default is `1.0`. The cutoff thresh-hold of the combined stability (sum of average and minimum stability). Solutions with combined stabilities below this thresh-hold will not be considered. | 193 | | **allow_stability_drop** | Boolean | Default is `False`. Defines if solutions with a drop in stability with respect to the highest stable number of signatures will be considered. | 194 | | **exome** | Boolean | Default is `False`. Defines if exomes samples are used. | 195 | 196 | 197 | #### Estimation of the Optimum Solution Example 198 | ```python 199 | from SigProfilerExtractor import estimate_best_solution as ebs 200 | ebs.estimate_solution(base_csvfile="All_solutions_stat.csv", 201 | All_solution="All_Solutions", 202 | genomes="Samples.txt", 203 | output="results", 204 | title="Selection_Plot", 205 | stability=0.8, 206 | min_stability=0.2, 207 | combined_stability=1.0, 208 | allow_stability_drop=False, 209 | exome=False) 210 | ``` 211 | 212 | #### Estimation of the Optimum Solution Output 213 | The files below will be generated in the output folder: 214 | | File Name | Description | 215 | | ----- | ----- | 216 | | **All_solutions_stat.csv** | A csv file that contains the statistics of all solutions. | 217 | | **selection_plot.pdf** | A plot that depict the Stability and Mean Sample Cosine Distance for different solutions. | 218 | 219 | ### Decompose 220 | 221 | For decomposition of de novo signatures please use [SigProfilerAssignment](https://github.com/AlexandrovLab/SigProfilerAssignment) 222 | 223 | ### Activity Stacked Bar Plot 224 | Generates a stacked bar plot showing activities in individuals 225 | 226 | ```python 227 | plotActivity(activity_file, output_file = "Activity_in_samples.pdf", bin_size = 50, log = False) 228 | ``` 229 | 230 | | Parameter | Variable Type | Parameter Description | 231 | | --------------------- | -------- |-------- | 232 | | **activity_file** | String | The standard output activity file showing the number of, or percentage of mutations attributed to each sample. The row names should be samples while the column names should be signatures. | 233 | | **output_file** | String | The path and full name of the output pdf file, including ".pdf" | 234 | | **bin_size** | Integer | Number of samples plotted per page, recommended: 50 | 235 | 236 | #### Activity Stacked Bar Plot Example 237 | ```bash 238 | $ python plotActivity.py 50 sig_attribution_sample.txt test_out.pdf 239 | ``` 240 | 241 | ## Video Tutorials 242 | Take a look at our video tutorials for step-by-step instructions on how to install and run SigProfilerExtractor on Amazon Web Services. 243 | 244 | ### Tutorial #1: Installing SigProfilerExtractor on Amazon Web Services ### 245 | 246 | [![Video Tutorial #3](https://img.youtube.com/vi/30JmjvJ-DtI/0.jpg)](https://www.youtube.com/watch?v=30JmjvJ-DtI/) 247 | 248 | ### Tutorial #2: Running the Quick Start Example Program ### 249 | 250 | [![Video Tutorial #3](https://img.youtube.com/vi/BiBYZz_khIY/0.jpg)](https://www.youtube.com/watch?v=BiBYZz_khIY/) 251 | 252 | ### Tutorial #3: Reviewing the output from SigProfilerExtractor ### 253 | 254 | [![Video Tutorial #3](https://img.youtube.com/vi/BchtNeaQlv0/0.jpg)](https://www.youtube.com/watch?v=BchtNeaQlv0/) 255 | 256 | ### GPU support 257 | 258 | If CUDA out of memory exceptions occur, it will be necessary to reduce the number of CPU processes used (the `cpu` parameter). 259 | 260 | #### For more information, help, and examples, please visit: https://osf.io/t6j7u/wiki/home/ 261 | 262 | ## Citation 263 | Islam SMA, Díaz-Gay M, Wu Y, Barnes M, Vangara R, Bergstrom EN, He Y, Vella M, Wang J, Teague JW, Clapham P, Moody S, Senkin S, Li YR, Riva L, Zhang T, Gruber AJ, Steele CD, Otlu B, Khandekar A, Abbasi A, Humphreys L, Syulyukina N, Brady SW, Alexandrov BS, Pillay N, Zhang J, Adams DJ, Martincorena I, Wedge DC, Landi MT, Brennan P, Stratton MR, Rozen SG, and Alexandrov LB (2022) Uncovering novel mutational signatures by _de novo_ extraction with SigProfilerExtractor. __Cell Genomics__. doi: [10.1016/j.xgen.2022.100179](https://doi.org/10.1016/j.xgen.2022.100179). 264 | 265 | 266 | ## Copyright 267 | This software and its documentation are copyright 2018 as a part of the sigProfiler project. The SigProfilerExtractor framework is free software and is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 268 | 269 | ## Contact Information 270 | Please address any queries or bug reports to Mark Barnes at mdbarnes@ucsd.edu 271 | -------------------------------------------------------------------------------- /SigProfilerExtractor/data/VCFInput/PD3851a.vcf: -------------------------------------------------------------------------------- 1 | 1 809687 PD3851a G C 2 | 1 819245 PD3851a G T 3 | 1 1911011 PD3851a C G 4 | 1 2112413 PD3851a T C 5 | 1 2927666 PD3851a A G 6 | 1 3359791 PD3851a C T 7 | 1 4347912 PD3851a G A 8 | 1 4961889 PD3851a G C 9 | 1 5949138 PD3851a C T 10 | 1 7806339 PD3851a A C 11 | 1 9648435 PD3851a G A 12 | 1 9705025 PD3851a C T 13 | 1 12852166 PD3851a G T 14 | 1 22247659 PD3851a G C 15 | 1 26016563 PD3851a A T 16 | 1 28195262 PD3851a C A 17 | 1 34144653 PD3851a G T 18 | 1 34626490 PD3851a A T 19 | 1 38515555 PD3851a A T 20 | 1 39267511 PD3851a G A 21 | 1 52931759 PD3851a C T 22 | 1 56544525 PD3851a A T 23 | 1 59749822 PD3851a A G 24 | 1 59992490 PD3851a G T 25 | 1 63443632 PD3851a C T 26 | 1 66697391 PD3851a G A 27 | 1 67165085 PD3851a G A 28 | 1 67747867 PD3851a A G 29 | 1 68668799 PD3851a A T 30 | 1 69229076 PD3851a T A 31 | 1 72409374 PD3851a C A 32 | 1 72785119 PD3851a G A 33 | 1 74224309 PD3851a G A 34 | 1 76143922 PD3851a G T 35 | 1 79848236 PD3851a T C 36 | 1 80236199 PD3851a G T 37 | 1 80514797 PD3851a T C 38 | 1 82323357 PD3851a T C 39 | 1 83478938 PD3851a T C 40 | 1 90076248 PD3851a C T 41 | 1 94228763 PD3851a G C 42 | 1 98257263 PD3851a T A 43 | 1 103246975 PD3851a T A 44 | 1 103516319 PD3851a G A 45 | 1 105062224 PD3851a A G 46 | 1 105201191 PD3851a A T 47 | 1 105280517 PD3851a G T 48 | 1 106828422 PD3851a G A 49 | 1 107045679 PD3851a A T 50 | 1 107045680 PD3851a G T 51 | 1 107355749 PD3851a A T 52 | 1 107543738 PD3851a G A 53 | 1 113454325 PD3851a C T 54 | 1 114266668 PD3851a G A 55 | 1 116220097 PD3851a C T 56 | 1 118085736 PD3851a G A 57 | 1 118629711 PD3851a C T 58 | 1 120595933 PD3851a C T 59 | 1 121101678 PD3851a G C 60 | 1 142907563 PD3851a T G 61 | 1 144851195 PD3851a C T 62 | 1 145829247 PD3851a A G 63 | 1 147783644 PD3851a A G 64 | 1 148681593 PD3851a A G 65 | 1 149234158 PD3851a T A 66 | 1 149692117 PD3851a C T 67 | 1 153094600 PD3851a C T 68 | 1 153521201 PD3851a A G 69 | 1 154404637 PD3851a A G 70 | 1 158578613 PD3851a T A 71 | 1 159186481 PD3851a C T 72 | 1 160304867 PD3851a C T 73 | 1 163151626 PD3851a G A 74 | 1 165937652 PD3851a A C 75 | 1 166956272 PD3851a C A 76 | 1 166965326 PD3851a G A 77 | 1 168543942 PD3851a C T 78 | 1 168750101 PD3851a T A 79 | 1 170296063 PD3851a G A 80 | 1 170803205 PD3851a G T 81 | 1 171139225 PD3851a C A 82 | 1 182526291 PD3851a C T 83 | 1 182595471 PD3851a T C 84 | 1 184036167 PD3851a A G 85 | 1 185697318 PD3851a T A 86 | 1 186669924 PD3851a A G 87 | 1 188150124 PD3851a T A 88 | 1 189675789 PD3851a G A 89 | 1 190093409 PD3851a T C 90 | 1 192710371 PD3851a C T 91 | 1 192977509 PD3851a G C 92 | 1 194006542 PD3851a G A 93 | 1 194163899 PD3851a C A 94 | 1 194525609 PD3851a G A 95 | 1 195050654 PD3851a C T 96 | 1 196036882 PD3851a T G 97 | 1 196341069 PD3851a G A 98 | 1 198504083 PD3851a G T 99 | 1 199846650 PD3851a C T 100 | 1 200548728 PD3851a A G 101 | 1 201794139 PD3851a G A 102 | 1 202505342 PD3851a C T 103 | 1 202525522 PD3851a C T 104 | 1 204596341 PD3851a G A 105 | 1 204782986 PD3851a G A 106 | 1 210419982 PD3851a T G 107 | 1 214526616 PD3851a A G 108 | 1 215938111 PD3851a C A 109 | 1 221436661 PD3851a G A 110 | 1 221621803 PD3851a T C 111 | 1 223447211 PD3851a G A 112 | 1 224043892 PD3851a A G 113 | 1 232290473 PD3851a A G 114 | 1 232301970 PD3851a A G 115 | 1 236904096 PD3851a G T 116 | 1 237427889 PD3851a C A 117 | 1 238628961 PD3851a C A 118 | 1 239398484 PD3851a C T 119 | 1 240056146 PD3851a C G 120 | 1 240278520 PD3851a T G 121 | 1 241810898 PD3851a G A 122 | 1 241929541 PD3851a C T 123 | 1 242701459 PD3851a C T 124 | 1 243088994 PD3851a G A 125 | 1 243116589 PD3851a C T 126 | 1 246397504 PD3851a C T 127 | 1 246684150 PD3851a G A 128 | 1 248738183 PD3851a G T 129 | 1 248740190 PD3851a G C 130 | 1 248743579 PD3851a A T 131 | 2 1856543 PD3851a G T 132 | 2 2245112 PD3851a C A 133 | 2 2818266 PD3851a A G 134 | 2 3433314 PD3851a G T 135 | 2 4425775 PD3851a C A 136 | 2 4940397 PD3851a G A 137 | 2 5257959 PD3851a G A 138 | 2 5778989 PD3851a A G 139 | 2 9253525 PD3851a C T 140 | 2 9287688 PD3851a C T 141 | 2 15513211 PD3851a G C 142 | 2 21039024 PD3851a G C 143 | 2 22448559 PD3851a G T 144 | 2 23837236 PD3851a A G 145 | 2 27603230 PD3851a A C 146 | 2 29547328 PD3851a C A 147 | 2 33096812 PD3851a G T 148 | 2 33210925 PD3851a G A 149 | 2 34893319 PD3851a G A 150 | 2 35875025 PD3851a C G 151 | 2 36424160 PD3851a G A 152 | 2 36828406 PD3851a G A 153 | 2 37971196 PD3851a G A 154 | 2 37990837 PD3851a G A 155 | 2 37990838 PD3851a G C 156 | 2 42391408 PD3851a C A 157 | 2 44700809 PD3851a C T 158 | 2 46739072 PD3851a G A 159 | 2 46870573 PD3851a G A 160 | 2 47108896 PD3851a G T 161 | 2 47437069 PD3851a C A 162 | 2 50068726 PD3851a T C 163 | 2 52174062 PD3851a A T 164 | 2 58344252 PD3851a T C 165 | 2 60326050 PD3851a T A 166 | 2 65185874 PD3851a C G 167 | 2 66721793 PD3851a C T 168 | 2 67193686 PD3851a T A 169 | 2 71238624 PD3851a C A 170 | 2 74495127 PD3851a T A 171 | 2 77179707 PD3851a A G 172 | 2 78537966 PD3851a G A 173 | 2 78732977 PD3851a A T 174 | 2 79024627 PD3851a G A 175 | 2 79615703 PD3851a G T 176 | 2 79648633 PD3851a A T 177 | 2 80416948 PD3851a G T 178 | 2 80632767 PD3851a C T 179 | 2 83658133 PD3851a C A 180 | 2 86282799 PD3851a T C 181 | 2 86612765 PD3851a G A 182 | 2 88961184 PD3851a G A 183 | 2 90425246 PD3851a C G 184 | 2 90463653 PD3851a A T 185 | 2 91823785 PD3851a C T 186 | 2 91887550 PD3851a A G 187 | 2 91894842 PD3851a G T 188 | 2 92077494 PD3851a C G 189 | 2 92077727 PD3851a T G 190 | 2 96959959 PD3851a G C 191 | 2 98837786 PD3851a C T 192 | 2 104216444 PD3851a T A 193 | 2 105284881 PD3851a G T 194 | 2 105601580 PD3851a G A 195 | 2 106327930 PD3851a T A 196 | 2 106658145 PD3851a G A 197 | 2 108269478 PD3851a C A 198 | 2 108512935 PD3851a A T 199 | 2 109026427 PD3851a T C 200 | 2 111377525 PD3851a T C 201 | 2 113473510 PD3851a C T 202 | 2 114943476 PD3851a T C 203 | 2 115576904 PD3851a G A 204 | 2 115761729 PD3851a T C 205 | 2 115835670 PD3851a C A 206 | 2 116722645 PD3851a C A 207 | 2 117922799 PD3851a T A 208 | 2 118553680 PD3851a A T 209 | 2 118553684 PD3851a T G 210 | 2 125070210 PD3851a C A 211 | 2 126077016 PD3851a C T 212 | 2 127418922 PD3851a G A 213 | 2 133014846 PD3851a C T 214 | 2 133452415 PD3851a C T 215 | 2 136736124 PD3851a G A 216 | 2 136849681 PD3851a G A 217 | 2 136849682 PD3851a C A 218 | 2 137118234 PD3851a C T 219 | 2 140179802 PD3851a A G 220 | 2 140777013 PD3851a A G 221 | 2 141221447 PD3851a T G 222 | 2 143252750 PD3851a A T 223 | 2 147565058 PD3851a G A 224 | 2 148462288 PD3851a A G 225 | 2 148831676 PD3851a C G 226 | 2 150729361 PD3851a C T 227 | 2 160904189 PD3851a C T 228 | 2 160954369 PD3851a C T 229 | 2 162304120 PD3851a T G 230 | 2 167090750 PD3851a G T 231 | 2 169872940 PD3851a C G 232 | 2 176652336 PD3851a G T 233 | 2 180065738 PD3851a G A 234 | 2 181145145 PD3851a C T 235 | 2 181596191 PD3851a G A 236 | 2 181596192 PD3851a C A 237 | 2 182538504 PD3851a C T 238 | 2 182541934 PD3851a G T 239 | 2 184311131 PD3851a T C 240 | 2 184579991 PD3851a A T 241 | 2 185881679 PD3851a G T 242 | 2 186128443 PD3851a T C 243 | 2 189190565 PD3851a C T 244 | 2 190382062 PD3851a A C 245 | 2 192926500 PD3851a G T 246 | 2 194457098 PD3851a C T 247 | 2 197206271 PD3851a C G 248 | 2 197265935 PD3851a C A 249 | 2 199318427 PD3851a G A 250 | 2 199953283 PD3851a C T 251 | 2 200952630 PD3851a A C 252 | 2 201998608 PD3851a G C 253 | 2 205229365 PD3851a G A 254 | 2 206109879 PD3851a C A 255 | 2 206643397 PD3851a A C 256 | 2 208218132 PD3851a G A 257 | 2 211134089 PD3851a A G 258 | 2 212435409 PD3851a G A 259 | 2 212926743 PD3851a A C 260 | 2 215864167 PD3851a C T 261 | 2 217425558 PD3851a C A 262 | 2 217425559 PD3851a C A 263 | 2 220660583 PD3851a G T 264 | 2 222763558 PD3851a G A 265 | 2 225833809 PD3851a G A 266 | 2 229963092 PD3851a T G 267 | 2 240287989 PD3851a T C 268 | 2 240297950 PD3851a C A 269 | 3 2155252 PD3851a C T 270 | 3 9010000 PD3851a G T 271 | 3 9425663 PD3851a A C 272 | 3 11144455 PD3851a G A 273 | 3 18245918 PD3851a A G 274 | 3 19223029 PD3851a A G 275 | 3 20541337 PD3851a A G 276 | 3 22958348 PD3851a T A 277 | 3 23431023 PD3851a G A 278 | 3 23592371 PD3851a T A 279 | 3 23632800 PD3851a G C 280 | 3 25856006 PD3851a A G 281 | 3 26716951 PD3851a G A 282 | 3 27345049 PD3851a A G 283 | 3 28124385 PD3851a C G 284 | 3 29035244 PD3851a G T 285 | 3 29100644 PD3851a T G 286 | 3 29135180 PD3851a G A 287 | 3 29981338 PD3851a T A 288 | 3 36458725 PD3851a G A 289 | 3 37184774 PD3851a T G 290 | 3 38986981 PD3851a C T 291 | 3 39944360 PD3851a C T 292 | 3 40463933 PD3851a G T 293 | 3 43015809 PD3851a C A 294 | 3 50147416 PD3851a C T 295 | 3 50563605 PD3851a G A 296 | 3 51027518 PD3851a C A 297 | 3 56335334 PD3851a C A 298 | 3 63100172 PD3851a A T 299 | 3 65667638 PD3851a A T 300 | 3 66820176 PD3851a G T 301 | 3 74638822 PD3851a G A 302 | 3 75687504 PD3851a G C 303 | 3 75688787 PD3851a G A 304 | 3 75805082 PD3851a A C 305 | 3 77497529 PD3851a C G 306 | 3 78258085 PD3851a C T 307 | 3 81745190 PD3851a C G 308 | 3 84506820 PD3851a G A 309 | 3 86075456 PD3851a C T 310 | 3 86993441 PD3851a T A 311 | 3 93619687 PD3851a C T 312 | 3 94889229 PD3851a T A 313 | 3 96540239 PD3851a G A 314 | 3 97171154 PD3851a C T 315 | 3 97827405 PD3851a C T 316 | 3 98580571 PD3851a G C 317 | 3 98665243 PD3851a G T 318 | 3 109257568 PD3851a C A 319 | 3 109257569 PD3851a C A 320 | 3 109268127 PD3851a T A 321 | 3 109516531 PD3851a G A 322 | 3 109526029 PD3851a T G 323 | 3 109526967 PD3851a G T 324 | 3 109688225 PD3851a G A 325 | 3 111524323 PD3851a G A 326 | 3 113349087 PD3851a G T 327 | 3 118156090 PD3851a G T 328 | 3 118218389 PD3851a C G 329 | 3 124513640 PD3851a G C 330 | 3 125469222 PD3851a C A 331 | 3 129350183 PD3851a C T 332 | 3 129888259 PD3851a T A 333 | 3 129923631 PD3851a A T 334 | 3 135816971 PD3851a A T 335 | 3 140100844 PD3851a C G 336 | 3 143370012 PD3851a C G 337 | 3 143370425 PD3851a A G 338 | 3 143381964 PD3851a C T 339 | 3 144944016 PD3851a T A 340 | 3 147528681 PD3851a C T 341 | 3 147569769 PD3851a T C 342 | 3 148895822 PD3851a A G 343 | 3 153068879 PD3851a C G 344 | 3 153631364 PD3851a A G 345 | 3 154235694 PD3851a A C 346 | 3 154609604 PD3851a C A 347 | 3 156530499 PD3851a A G 348 | 3 162560528 PD3851a T A 349 | 3 163252386 PD3851a G C 350 | 3 163819294 PD3851a A C 351 | 3 164595487 PD3851a C T 352 | 3 165084730 PD3851a A G 353 | 3 165129259 PD3851a C A 354 | 3 165961305 PD3851a A T 355 | 3 166035524 PD3851a C A 356 | 3 166652507 PD3851a T G 357 | 3 167162490 PD3851a G C 358 | 3 169160059 PD3851a G T 359 | 3 169982991 PD3851a G A 360 | 3 170709679 PD3851a T C 361 | 3 172784170 PD3851a A G 362 | 3 172794635 PD3851a T C 363 | 3 173254755 PD3851a C A 364 | 3 173907321 PD3851a A G 365 | 3 178534689 PD3851a T C 366 | 3 179286055 PD3851a G T 367 | 3 180522794 PD3851a C G 368 | 3 187885383 PD3851a T G 369 | 3 191115943 PD3851a A G 370 | 3 195216148 PD3851a G A 371 | 3 195223147 PD3851a C A 372 | 3 195225264 PD3851a C T 373 | 3 195225667 PD3851a G T 374 | 3 195667470 PD3851a C A 375 | 3 195674233 PD3851a T C 376 | 3 195720836 PD3851a G T 377 | 4 975080 PD3851a T C 378 | 4 1423028 PD3851a T G 379 | 4 3241190 PD3851a C T 380 | 4 5752522 PD3851a G A 381 | 4 6864579 PD3851a G T 382 | 4 8237589 PD3851a A C 383 | 4 9038855 PD3851a A T 384 | 4 9148751 PD3851a T C 385 | 4 9250386 PD3851a G T 386 | 4 10378324 PD3851a G A 387 | 4 12046669 PD3851a T A 388 | 4 12746710 PD3851a C A 389 | 4 15921566 PD3851a G A 390 | 4 18632354 PD3851a A T 391 | 4 19030514 PD3851a T G 392 | 4 20161391 PD3851a C T 393 | 4 20189711 PD3851a G T 394 | 4 20991665 PD3851a G T 395 | 4 21007278 PD3851a G C 396 | 4 24837047 PD3851a G T 397 | 4 30042956 PD3851a T C 398 | 4 33357891 PD3851a C A 399 | 4 36831140 PD3851a C G 400 | 4 41030463 PD3851a C A 401 | 4 41588919 PD3851a G T 402 | 4 44547048 PD3851a A C 403 | 4 45313113 PD3851a G T 404 | 4 46175401 PD3851a G C 405 | 4 47428806 PD3851a G A 406 | 4 47860028 PD3851a A C 407 | 4 49131991 PD3851a T G 408 | 4 49200769 PD3851a C T 409 | 4 49247382 PD3851a A C 410 | 4 49317272 PD3851a T C 411 | 4 55133539 PD3851a G A 412 | 4 55264737 PD3851a C T 413 | 4 55351791 PD3851a G T 414 | 4 61045967 PD3851a G C 415 | 4 62358927 PD3851a A T 416 | 4 63383134 PD3851a G A 417 | 4 63419350 PD3851a C A 418 | 4 63964094 PD3851a A T 419 | 4 65590271 PD3851a C T 420 | 4 65718706 PD3851a G T 421 | 4 66117192 PD3851a G A 422 | 4 66527340 PD3851a C A 423 | 4 68143944 PD3851a C G 424 | 4 69446363 PD3851a C G 425 | 4 69511269 PD3851a C T 426 | 4 71669488 PD3851a G A 427 | 4 72615691 PD3851a G A 428 | 4 72670424 PD3851a C G 429 | 4 73731392 PD3851a G A 430 | 4 74472383 PD3851a G T 431 | 4 75360549 PD3851a T C 432 | 4 77118636 PD3851a G C 433 | 4 77519431 PD3851a G A 434 | 4 77718332 PD3851a C T 435 | 4 79758458 PD3851a C A 436 | 4 80012748 PD3851a G A 437 | 4 80463080 PD3851a A T 438 | 4 81131658 PD3851a A T 439 | 4 81420524 PD3851a T A 440 | 4 88134440 PD3851a T G 441 | 4 88945438 PD3851a G A 442 | 4 89834469 PD3851a A G 443 | 4 94057740 PD3851a G T 444 | 4 94257837 PD3851a C T 445 | 4 94999702 PD3851a G A 446 | 4 95932153 PD3851a T G 447 | 4 98028076 PD3851a T C 448 | 4 99108153 PD3851a A C 449 | 4 111434273 PD3851a G A 450 | 4 111706759 PD3851a T C 451 | 4 112196010 PD3851a T A 452 | 4 112630651 PD3851a T C 453 | 4 117707285 PD3851a G A 454 | 4 119443108 PD3851a A C 455 | 4 119453821 PD3851a A G 456 | 4 119453873 PD3851a T C 457 | 4 119785075 PD3851a C G 458 | 4 120822294 PD3851a C T 459 | 4 122353412 PD3851a C A 460 | 4 123293072 PD3851a A T 461 | 4 127066203 PD3851a C G 462 | 4 128233946 PD3851a G C 463 | 4 135145727 PD3851a C T 464 | 4 135828222 PD3851a T A 465 | 4 143039026 PD3851a T C 466 | 4 145618841 PD3851a C T 467 | 4 148025108 PD3851a C T 468 | 4 149486129 PD3851a T C 469 | 4 150622294 PD3851a T C 470 | 4 151061552 PD3851a C A 471 | 4 155056754 PD3851a C T 472 | 4 156018921 PD3851a C A 473 | 4 157056443 PD3851a A C 474 | 4 163584053 PD3851a A C 475 | 4 165155465 PD3851a A C 476 | 4 165399583 PD3851a G A 477 | 4 165436112 PD3851a C A 478 | 4 165783100 PD3851a A G 479 | 4 166090054 PD3851a T C 480 | 4 167214369 PD3851a T A 481 | 4 167429049 PD3851a A G 482 | 4 167454471 PD3851a C A 483 | 4 167487197 PD3851a A T 484 | 4 167499411 PD3851a C T 485 | 4 168143184 PD3851a T G 486 | 4 171003555 PD3851a C A 487 | 4 172509041 PD3851a G A 488 | 4 174278104 PD3851a T C 489 | 4 174714838 PD3851a C T 490 | 4 175356693 PD3851a G A 491 | 4 175446787 PD3851a C A 492 | 4 176946582 PD3851a A C 493 | 4 177480810 PD3851a A G 494 | 4 177796413 PD3851a G C 495 | 4 178747284 PD3851a A G 496 | 4 179739472 PD3851a A T 497 | 4 180241339 PD3851a C G 498 | 4 180917179 PD3851a G A 499 | 4 181710994 PD3851a G A 500 | 4 185187197 PD3851a G A 501 | 4 185405327 PD3851a C T 502 | 4 187480773 PD3851a G T 503 | 4 188830396 PD3851a T G 504 | 4 189182718 PD3851a C A 505 | 4 189532179 PD3851a C G 506 | 4 189687581 PD3851a T A 507 | 4 190327881 PD3851a C A 508 | 4 190881971 PD3851a A C 509 | 4 190884334 PD3851a A C 510 | 4 190924156 PD3851a G A 511 | 4 190927885 PD3851a G T 512 | 5 416068 PD3851a C A 513 | 5 2037299 PD3851a C T 514 | 5 3444826 PD3851a T C 515 | 5 4242799 PD3851a T C 516 | 5 4344029 PD3851a G C 517 | 5 4459981 PD3851a T A 518 | 5 4527801 PD3851a G A 519 | 5 4699998 PD3851a C G 520 | 5 6433267 PD3851a C A 521 | 5 7023241 PD3851a C T 522 | 5 7163492 PD3851a T A 523 | 5 7666580 PD3851a A C 524 | 5 8423889 PD3851a A G 525 | 5 11324613 PD3851a C T 526 | 5 16739365 PD3851a C T 527 | 5 16764030 PD3851a G A 528 | 5 21340252 PD3851a T C 529 | 5 21642877 PD3851a T C 530 | 5 22384583 PD3851a C T 531 | 5 22557319 PD3851a C T 532 | 5 23722401 PD3851a A G 533 | 5 25601740 PD3851a A T 534 | 5 26615106 PD3851a A G 535 | 5 26632559 PD3851a A G 536 | 5 27191820 PD3851a C T 537 | 5 27843687 PD3851a G A 538 | 5 28791020 PD3851a C A 539 | 5 28888951 PD3851a C A 540 | 5 29119434 PD3851a A G 541 | 5 29148688 PD3851a T A 542 | 5 29555445 PD3851a C T 543 | 5 29641660 PD3851a G T 544 | 5 30256855 PD3851a G A 545 | 5 31133110 PD3851a C T 546 | 5 32255368 PD3851a T A 547 | 5 32517934 PD3851a G T 548 | 5 33215100 PD3851a T C 549 | 5 34312286 PD3851a C T 550 | 5 34473409 PD3851a A G 551 | 5 36186708 PD3851a G A 552 | 5 37502701 PD3851a A T 553 | 5 40136367 PD3851a A T 554 | 5 40977180 PD3851a C T 555 | 5 43166555 PD3851a T C 556 | 5 43274184 PD3851a G T 557 | 5 43393951 PD3851a G A 558 | 5 43911315 PD3851a T C 559 | 5 45848588 PD3851a T A 560 | 5 51018026 PD3851a G A 561 | 5 52249871 PD3851a G C 562 | 5 54503699 PD3851a T C 563 | 5 54959568 PD3851a C A 564 | 5 55390092 PD3851a C A 565 | 5 57661944 PD3851a G A 566 | 5 60428475 PD3851a G C 567 | 5 62316196 PD3851a G A 568 | 5 62975886 PD3851a G A 569 | 5 63478183 PD3851a C T 570 | 5 65372448 PD3851a G T 571 | 5 65459539 PD3851a A G 572 | 5 76350897 PD3851a T C 573 | 5 77776912 PD3851a C A 574 | 5 79731982 PD3851a A G 575 | 5 80500111 PD3851a G T 576 | 5 80522122 PD3851a G A 577 | 5 80763474 PD3851a T G 578 | 5 81894636 PD3851a T C 579 | 5 83922134 PD3851a T A 580 | 5 83938471 PD3851a C T 581 | 5 84157267 PD3851a C G 582 | 5 85083779 PD3851a C G 583 | 5 85804909 PD3851a C T 584 | 5 87262365 PD3851a A G 585 | 5 88110246 PD3851a C G 586 | 5 97717969 PD3851a G T 587 | 5 99131604 PD3851a T A 588 | 5 99502604 PD3851a G C 589 | 5 100051458 PD3851a C T 590 | 5 108553386 PD3851a G A 591 | 5 111604477 PD3851a T G 592 | 5 113386306 PD3851a G A 593 | 5 114209185 PD3851a T C 594 | 5 127589279 PD3851a G C 595 | 5 131202925 PD3851a G A 596 | 5 133891847 PD3851a T C 597 | 5 133989569 PD3851a C T 598 | 5 146615645 PD3851a T A 599 | 5 147405575 PD3851a G T 600 | 5 149002156 PD3851a G T 601 | 5 149842815 PD3851a G A 602 | 5 150891579 PD3851a C T 603 | 5 151670614 PD3851a G T 604 | 5 151716673 PD3851a T C 605 | 5 152120798 PD3851a G A 606 | 5 156232923 PD3851a G A 607 | 5 156519400 PD3851a G A 608 | 5 157673680 PD3851a C T 609 | 5 158753669 PD3851a G T 610 | 5 161584522 PD3851a C T 611 | 5 162334301 PD3851a T C 612 | 5 163588002 PD3851a G T 613 | 5 164872542 PD3851a C A 614 | 5 165150310 PD3851a A C 615 | 5 165242624 PD3851a T G 616 | 5 165533803 PD3851a G T 617 | 5 166402984 PD3851a C G 618 | 5 168060060 PD3851a G A 619 | 5 169937479 PD3851a C A 620 | 5 171992401 PD3851a A C 621 | 5 172140436 PD3851a C G 622 | 5 175933478 PD3851a T C 623 | 5 178266517 PD3851a T G 624 | 5 178434600 PD3851a G A 625 | 6 1135643 PD3851a C A 626 | 6 2953899 PD3851a T G 627 | 6 5973540 PD3851a C T 628 | 6 7655887 PD3851a C T 629 | 6 9807392 PD3851a A G 630 | 6 11468329 PD3851a A G 631 | 6 11959123 PD3851a C A 632 | 6 23880191 PD3851a C T 633 | 6 24848753 PD3851a T A 634 | 6 25078191 PD3851a C T 635 | 6 27167166 PD3851a G T 636 | 6 27711298 PD3851a T C 637 | 6 30773368 PD3851a T A 638 | 6 32581803 PD3851a C T 639 | 6 35147051 PD3851a G A 640 | 6 40315097 PD3851a C G 641 | 6 41985539 PD3851a G T 642 | 6 42062935 PD3851a C T 643 | 6 44832435 PD3851a G T 644 | 6 46278338 PD3851a A T 645 | 6 46601518 PD3851a T C 646 | 6 48568500 PD3851a T A 647 | 6 50469875 PD3851a G A 648 | 6 51760937 PD3851a G A 649 | 6 55037473 PD3851a C A 650 | 6 62574979 PD3851a G T 651 | 6 62735840 PD3851a A G 652 | 6 62814440 PD3851a A C 653 | 6 62842785 PD3851a G T 654 | 6 63465845 PD3851a C T 655 | 6 68427098 PD3851a G T 656 | 6 68757347 PD3851a G A 657 | 6 72522106 PD3851a A T 658 | 6 75370623 PD3851a C T 659 | 6 75801642 PD3851a G A 660 | 6 77211374 PD3851a G A 661 | 6 81205478 PD3851a A T 662 | 6 81205488 PD3851a T C 663 | 6 83238628 PD3851a T A 664 | 6 84910762 PD3851a C A 665 | 6 87392737 PD3851a C G 666 | 6 87632234 PD3851a A C 667 | 6 88064085 PD3851a G A 668 | 6 92743580 PD3851a T A 669 | 6 105210987 PD3851a A C 670 | 6 106930092 PD3851a T C 671 | 6 106930101 PD3851a A C 672 | 6 106930125 PD3851a C T 673 | 6 108196270 PD3851a G A 674 | 6 110021314 PD3851a A T 675 | 6 114102747 PD3851a T C 676 | 6 114210909 PD3851a C G 677 | 6 115166352 PD3851a G T 678 | 6 122414608 PD3851a C T 679 | 6 123499325 PD3851a C T 680 | 6 124186553 PD3851a T C 681 | 6 126980699 PD3851a T C 682 | 6 128894043 PD3851a T G 683 | 6 128894045 PD3851a C T 684 | 6 134483775 PD3851a C A 685 | 6 141132444 PD3851a T G 686 | 6 144392279 PD3851a T C 687 | 6 152108836 PD3851a C A 688 | 6 152196614 PD3851a A G 689 | 6 152823976 PD3851a C T 690 | 6 153151053 PD3851a A G 691 | 6 154092725 PD3851a A T 692 | 6 158924714 PD3851a G T 693 | 6 158988050 PD3851a A G 694 | 6 160266580 PD3851a G A 695 | 6 161124175 PD3851a G A 696 | 6 162294115 PD3851a C T 697 | 6 162916226 PD3851a A T 698 | 6 163803280 PD3851a G A 699 | 6 170045477 PD3851a C T 700 | 7 312095 PD3851a G A 701 | 7 3860861 PD3851a C T 702 | 7 12552706 PD3851a T C 703 | 7 13922010 PD3851a G A 704 | 7 14050750 PD3851a C T 705 | 7 21617366 PD3851a A G 706 | 7 22431644 PD3851a G T 707 | 7 22883837 PD3851a C T 708 | 7 26361191 PD3851a G A 709 | 7 27069751 PD3851a T G 710 | 7 27803163 PD3851a A G 711 | 7 34596199 PD3851a C A 712 | 7 35083942 PD3851a T C 713 | 7 35974140 PD3851a C T 714 | 7 36185638 PD3851a C A 715 | 7 36185684 PD3851a A C 716 | 7 36646716 PD3851a A T 717 | 7 38609687 PD3851a G A 718 | 7 41253755 PD3851a C T 719 | 7 41366255 PD3851a T C 720 | 7 48764481 PD3851a T C 721 | 7 48950469 PD3851a G C 722 | 7 49408655 PD3851a G A 723 | 7 49866375 PD3851a C T 724 | 7 50573044 PD3851a T A 725 | 7 51828089 PD3851a C T 726 | 7 51857655 PD3851a G A 727 | 7 56226309 PD3851a C A 728 | 7 56504108 PD3851a G A 729 | 7 56872196 PD3851a C T 730 | 7 61828954 PD3851a A G 731 | 7 62361138 PD3851a C G 732 | 7 62600352 PD3851a C A 733 | 7 62715963 PD3851a T C 734 | 7 62987643 PD3851a T G 735 | 7 63154502 PD3851a G C 736 | 7 63692312 PD3851a C T 737 | 7 67862258 PD3851a G T 738 | 7 67862268 PD3851a C T 739 | 7 68316243 PD3851a C T 740 | 7 70179864 PD3851a T G 741 | 7 72379449 PD3851a C T 742 | 7 72930502 PD3851a C T 743 | 7 73359068 PD3851a A T 744 | 7 75077023 PD3851a A G 745 | 7 77435162 PD3851a C T 746 | 7 78280741 PD3851a C T 747 | 7 78345117 PD3851a A G 748 | 7 83350028 PD3851a G A 749 | 7 84623335 PD3851a C A 750 | 7 85808200 PD3851a A G 751 | 7 85885858 PD3851a C G 752 | 7 88454468 PD3851a T A 753 | 7 88989475 PD3851a G T 754 | 7 89612988 PD3851a C T 755 | 7 89943957 PD3851a A C 756 | 7 94006954 PD3851a C A 757 | 7 94691933 PD3851a C T 758 | 7 96928080 PD3851a G T 759 | 7 105601718 PD3851a A T 760 | 7 106157580 PD3851a C A 761 | 7 108149536 PD3851a T A 762 | 7 109039517 PD3851a G A 763 | 7 113379848 PD3851a C T 764 | 7 113415354 PD3851a G T 765 | 7 116492533 PD3851a G A 766 | 7 116894519 PD3851a C T 767 | 7 117498771 PD3851a C A 768 | 7 121047564 PD3851a T C 769 | 7 121698252 PD3851a G T 770 | 7 121922311 PD3851a G T 771 | 7 122325288 PD3851a G T 772 | 7 122349035 PD3851a T C 773 | 7 123749783 PD3851a C T 774 | 7 125256310 PD3851a T C 775 | 7 125670921 PD3851a G A 776 | 7 126344383 PD3851a A C 777 | 7 129001703 PD3851a C T 778 | 7 131941164 PD3851a G T 779 | 7 134258923 PD3851a T C 780 | 7 136261645 PD3851a C T 781 | 7 136539432 PD3851a T A 782 | 7 137353282 PD3851a A G 783 | 7 137966017 PD3851a A T 784 | 7 138449631 PD3851a G C 785 | 7 142483748 PD3851a G T 786 | 7 143818566 PD3851a A T 787 | 7 144293943 PD3851a C T 788 | 7 145414302 PD3851a G A 789 | 7 145817081 PD3851a G T 790 | 7 145972900 PD3851a T A 791 | 7 146261769 PD3851a G A 792 | 7 147735517 PD3851a C T 793 | 7 148778000 PD3851a T A 794 | 7 148928789 PD3851a A T 795 | 7 149731778 PD3851a T G 796 | 7 151970797 PD3851a A T 797 | 7 152805235 PD3851a G T 798 | 7 155576883 PD3851a C G 799 | 7 156816201 PD3851a G T 800 | 7 157647676 PD3851a A C 801 | 7 157846793 PD3851a C T 802 | 7 158135160 PD3851a A G 803 | 7 158328310 PD3851a T C 804 | 8 215469 PD3851a C T 805 | 8 1039148 PD3851a A C 806 | 8 4111901 PD3851a C T 807 | 8 8603025 PD3851a G A 808 | 8 12422023 PD3851a T C 809 | 8 15746760 PD3851a G A 810 | 8 15892871 PD3851a C A 811 | 8 20114450 PD3851a C T 812 | 8 21870815 PD3851a G A 813 | 8 22453569 PD3851a A G 814 | 8 25853539 PD3851a A T 815 | 8 28854078 PD3851a T G 816 | 8 30971144 PD3851a A T 817 | 8 30998493 PD3851a A C 818 | 8 32018155 PD3851a G C 819 | 8 35689913 PD3851a C A 820 | 8 36076141 PD3851a A T 821 | 8 39585796 PD3851a T A 822 | 8 47477090 PD3851a C T 823 | 8 48103406 PD3851a G A 824 | 8 49520646 PD3851a C T 825 | 8 50759979 PD3851a A T 826 | 8 50976303 PD3851a T A 827 | 8 51278887 PD3851a T G 828 | 8 53967196 PD3851a A C 829 | 8 55863279 PD3851a A T 830 | 8 55900812 PD3851a G A 831 | 8 56138106 PD3851a C T 832 | 8 56312989 PD3851a G A 833 | 8 56514520 PD3851a C A 834 | 8 57310804 PD3851a T A 835 | 8 57501851 PD3851a C A 836 | 8 60295005 PD3851a C A 837 | 8 62491792 PD3851a C T 838 | 8 64176803 PD3851a C T 839 | 8 64499637 PD3851a C T 840 | 8 64817241 PD3851a T A 841 | 8 65123484 PD3851a G T 842 | 8 65488504 PD3851a A T 843 | 8 68995575 PD3851a T C 844 | 8 70831008 PD3851a T A 845 | 8 70866215 PD3851a C T 846 | 8 78016004 PD3851a T A 847 | 8 79611062 PD3851a T A 848 | 8 80339031 PD3851a C T 849 | 8 82207878 PD3851a A T 850 | 8 82207879 PD3851a G T 851 | 8 84786210 PD3851a C T 852 | 8 85072639 PD3851a T G 853 | 8 92682154 PD3851a G A 854 | 8 93094617 PD3851a C T 855 | 8 94575040 PD3851a G T 856 | 8 96592690 PD3851a G C 857 | 8 100425740 PD3851a A T 858 | 8 101217156 PD3851a G C 859 | 8 105775348 PD3851a A T 860 | 8 106408496 PD3851a A T 861 | 8 106461478 PD3851a C T 862 | 8 109585397 PD3851a G A 863 | 8 110374497 PD3851a T C 864 | 8 111018329 PD3851a C A 865 | 8 111206343 PD3851a G A 866 | 8 112747859 PD3851a G T 867 | 8 115338161 PD3851a G A 868 | 8 119393084 PD3851a A T 869 | 8 120065171 PD3851a G T 870 | 8 123258740 PD3851a C A 871 | 8 129243239 PD3851a A T 872 | 8 133847858 PD3851a A G 873 | 8 136072496 PD3851a C G 874 | 8 137682918 PD3851a G T 875 | 8 138438707 PD3851a C T 876 | 8 139197779 PD3851a C T 877 | 8 139223025 PD3851a G A 878 | 8 139528310 PD3851a G A 879 | 8 140610005 PD3851a G A 880 | 8 142632000 PD3851a C T 881 | 8 144186345 PD3851a C A 882 | 8 145504997 PD3851a T C 883 | 8 145683314 PD3851a A G 884 | 8 146058728 PD3851a T G 885 | 9 1378734 PD3851a C T 886 | 9 1420045 PD3851a C T 887 | 9 2620666 PD3851a C A 888 | 9 3191184 PD3851a T C 889 | 9 3940534 PD3851a T C 890 | 9 4583612 PD3851a C T 891 | 9 7224935 PD3851a C T 892 | 9 10474800 PD3851a C T 893 | 9 10582481 PD3851a T C 894 | 9 11665696 PD3851a T A 895 | 9 12168256 PD3851a C A 896 | 9 13127090 PD3851a C T 897 | 9 15823068 PD3851a C T 898 | 9 20052715 PD3851a T C 899 | 9 26364196 PD3851a T G 900 | 9 26482291 PD3851a C T 901 | 9 26815063 PD3851a G C 902 | 9 31318696 PD3851a C T 903 | 9 32201296 PD3851a T G 904 | 9 34232710 PD3851a C T 905 | 9 38416849 PD3851a T A 906 | 9 39138121 PD3851a A T 907 | 9 42101025 PD3851a T A 908 | 9 44164120 PD3851a C T 909 | 9 44183295 PD3851a T C 910 | 9 44236958 PD3851a G T 911 | 9 66503899 PD3851a G C 912 | 9 66589849 PD3851a T A 913 | 9 71672107 PD3851a T G 914 | 9 75443913 PD3851a G T 915 | 9 76543503 PD3851a G A 916 | 9 78924429 PD3851a G T 917 | 9 79815289 PD3851a C G 918 | 9 86871588 PD3851a G A 919 | 9 89473628 PD3851a G T 920 | 9 92337055 PD3851a C G 921 | 9 94781055 PD3851a T G 922 | 9 95302923 PD3851a C T 923 | 9 99722706 PD3851a C T 924 | 9 99990514 PD3851a G A 925 | 9 101083191 PD3851a C T 926 | 9 115546644 PD3851a A C 927 | 9 118592454 PD3851a C T 928 | 9 119990796 PD3851a A G 929 | 9 121585790 PD3851a G A 930 | 9 121643314 PD3851a C G 931 | 9 123092528 PD3851a G A 932 | 9 123489103 PD3851a G T 933 | 9 135114245 PD3851a C A 934 | 10 493910 PD3851a C T 935 | 10 542070 PD3851a T G 936 | 10 784439 PD3851a T G 937 | 10 2714569 PD3851a G A 938 | 10 3421433 PD3851a G A 939 | 10 6380865 PD3851a A G 940 | 10 6947859 PD3851a A G 941 | 10 7399973 PD3851a A C 942 | 10 7583434 PD3851a G A 943 | 10 10540764 PD3851a T G 944 | 10 11475722 PD3851a G C 945 | 10 15802655 PD3851a T A 946 | 10 18262234 PD3851a T G 947 | 10 20714852 PD3851a C T 948 | 10 25829758 PD3851a T A 949 | 10 25990203 PD3851a C T 950 | 10 27223281 PD3851a G T 951 | 10 27223302 PD3851a C T 952 | 10 30401847 PD3851a G T 953 | 10 32809712 PD3851a G T 954 | 10 36418199 PD3851a A C 955 | 10 36756804 PD3851a C A 956 | 10 36967321 PD3851a A T 957 | 10 37146297 PD3851a T C 958 | 10 38888323 PD3851a G A 959 | 10 38993769 PD3851a C T 960 | 10 42741676 PD3851a G A 961 | 10 42936755 PD3851a A C 962 | 10 44959258 PD3851a C T 963 | 10 50514473 PD3851a G A 964 | 10 51098912 PD3851a T C 965 | 10 51539979 PD3851a C A 966 | 10 56433658 PD3851a C A 967 | 10 56792422 PD3851a C A 968 | 10 57396508 PD3851a G A 969 | 10 57584100 PD3851a T A 970 | 10 60259461 PD3851a C T 971 | 10 62146432 PD3851a C T 972 | 10 62433316 PD3851a A G 973 | 10 63527101 PD3851a C T 974 | 10 63744116 PD3851a A G 975 | 10 64263130 PD3851a C T 976 | 10 64685338 PD3851a G A 977 | 10 66140538 PD3851a C A 978 | 10 66217204 PD3851a A C 979 | 10 67183530 PD3851a T G 980 | 10 69417561 PD3851a C T 981 | 10 72598314 PD3851a G A 982 | 10 78491117 PD3851a T A 983 | 10 82069164 PD3851a T A 984 | 10 86405489 PD3851a T A 985 | 10 86752897 PD3851a C A 986 | 10 88471716 PD3851a A C 987 | 10 93723509 PD3851a T A 988 | 10 93825621 PD3851a C T 989 | 10 97118165 PD3851a A C 990 | 10 103447203 PD3851a C A 991 | 10 104017868 PD3851a A G 992 | 10 106844613 PD3851a C T 993 | 10 107360399 PD3851a G C 994 | 10 110168435 PD3851a G A 995 | 10 110169103 PD3851a G C 996 | 10 111000602 PD3851a G A 997 | 10 111142970 PD3851a A C 998 | 10 115320155 PD3851a T C 999 | 10 115606821 PD3851a G A 1000 | 10 115841054 PD3851a C A 1001 | 10 118290194 PD3851a A G 1002 | 10 118425504 PD3851a A G 1003 | 10 118918171 PD3851a G T 1004 | 10 125808083 PD3851a G C 1005 | 10 127612785 PD3851a T A 1006 | 10 127612788 PD3851a A T 1007 | 10 130492332 PD3851a G T 1008 | 10 130677524 PD3851a A G 1009 | 10 132256429 PD3851a A G 1010 | 10 132403549 PD3851a C T 1011 | 10 132591954 PD3851a T A 1012 | 10 133130219 PD3851a T A 1013 | 10 133295161 PD3851a G A 1014 | 10 134880595 PD3851a G A 1015 | 11 2237448 PD3851a G A 1016 | 11 2378795 PD3851a C A 1017 | 11 4442464 PD3851a C A 1018 | 11 5008672 PD3851a G C 1019 | 11 7199515 PD3851a G A 1020 | 11 10731355 PD3851a G C 1021 | 11 10915300 PD3851a G T 1022 | 11 12524198 PD3851a G A 1023 | 11 13022299 PD3851a T G 1024 | 11 17733062 PD3851a G A 1025 | 11 17774862 PD3851a G A 1026 | 11 20187444 PD3851a G A 1027 | 11 20687765 PD3851a G A 1028 | 11 21302354 PD3851a T A 1029 | 11 22386667 PD3851a C A 1030 | 11 23868404 PD3851a C T 1031 | 11 26143405 PD3851a C T 1032 | 11 26143406 PD3851a T A 1033 | 11 26487287 PD3851a T G 1034 | 11 29362275 PD3851a C T 1035 | 11 29920303 PD3851a G T 1036 | 11 31425617 PD3851a C T 1037 | 11 35904866 PD3851a G T 1038 | 11 36416387 PD3851a C T 1039 | 11 39236232 PD3851a C T 1040 | 11 40691162 PD3851a G T 1041 | 11 41211484 PD3851a C T 1042 | 11 41830622 PD3851a C T 1043 | 11 42661762 PD3851a C T 1044 | 11 42947973 PD3851a G A 1045 | 11 43072965 PD3851a T C 1046 | 11 43818089 PD3851a T G 1047 | 11 45586679 PD3851a C T 1048 | 11 48164668 PD3851a C T 1049 | 11 49083262 PD3851a C T 1050 | 11 50274652 PD3851a C A 1051 | 11 55260391 PD3851a T A 1052 | 11 55925161 PD3851a T C 1053 | 11 55981089 PD3851a A C 1054 | 11 56736838 PD3851a G A 1055 | 11 57099726 PD3851a G A 1056 | 11 57575423 PD3851a A G 1057 | 11 57708496 PD3851a A G 1058 | 11 60942788 PD3851a C T 1059 | 11 79127100 PD3851a G C 1060 | 11 80800269 PD3851a C A 1061 | 11 81179719 PD3851a G A 1062 | 11 82222453 PD3851a T C 1063 | 11 82250708 PD3851a T A 1064 | 11 82459818 PD3851a C A 1065 | 11 84799512 PD3851a A G 1066 | 11 84987055 PD3851a G A 1067 | 11 87857003 PD3851a C T 1068 | 11 88506487 PD3851a T A 1069 | 11 90423332 PD3851a G A 1070 | 11 92986362 PD3851a C A 1071 | 11 95702147 PD3851a T A 1072 | 11 95839079 PD3851a C A 1073 | 11 96781880 PD3851a G A 1074 | 11 96934694 PD3851a C T 1075 | 11 97541553 PD3851a G A 1076 | 11 97911178 PD3851a C T 1077 | 11 97926292 PD3851a C G 1078 | 11 99850501 PD3851a G T 1079 | 11 104061787 PD3851a A G 1080 | 11 104094009 PD3851a C A 1081 | 11 104402629 PD3851a A C 1082 | 11 113266743 PD3851a C T 1083 | 11 114357542 PD3851a G T 1084 | 11 117431668 PD3851a C T 1085 | 11 118164929 PD3851a A G 1086 | 11 120586269 PD3851a C T 1087 | 11 121652520 PD3851a A T 1088 | 11 121864345 PD3851a G A 1089 | 11 122997523 PD3851a C T 1090 | 11 125254530 PD3851a G A 1091 | 11 126291738 PD3851a T A 1092 | 11 127686783 PD3851a G T 1093 | 11 128704080 PD3851a T C 1094 | 11 132538519 PD3851a A T 1095 | 11 132701299 PD3851a C G 1096 | 12 287659 PD3851a C T 1097 | 12 1164455 PD3851a T C 1098 | 12 2922349 PD3851a C G 1099 | 12 2964279 PD3851a C T 1100 | 12 3142995 PD3851a G A 1101 | 12 3665841 PD3851a A T 1102 | 12 4401620 PD3851a G T 1103 | 12 8961168 PD3851a G A 1104 | 12 9300337 PD3851a C T 1105 | 12 10147597 PD3851a C T 1106 | 12 10606954 PD3851a C G 1107 | 12 13918642 PD3851a T A 1108 | 12 16060572 PD3851a C T 1109 | 12 17264353 PD3851a C G 1110 | 12 17786679 PD3851a G T 1111 | 12 18427860 PD3851a G A 1112 | 12 19137924 PD3851a G A 1113 | 12 19379963 PD3851a T C 1114 | 12 20256903 PD3851a G T 1115 | 12 20604251 PD3851a A T 1116 | 12 24580734 PD3851a G A 1117 | 12 28848901 PD3851a T G 1118 | 12 31728333 PD3851a G A 1119 | 12 32509780 PD3851a G A 1120 | 12 32813507 PD3851a G C 1121 | 12 33974273 PD3851a T A 1122 | 12 39483756 PD3851a T A 1123 | 12 39509150 PD3851a A G 1124 | 12 40564471 PD3851a A T 1125 | 12 40880007 PD3851a A T 1126 | 12 42950867 PD3851a C T 1127 | 12 42965778 PD3851a G A 1128 | 12 44491725 PD3851a G A 1129 | 12 47601937 PD3851a G A 1130 | 12 47996259 PD3851a C T 1131 | 12 48194318 PD3851a A G 1132 | 12 48961699 PD3851a A T 1133 | 12 51825363 PD3851a G C 1134 | 12 56044662 PD3851a G T 1135 | 12 56044663 PD3851a A C 1136 | 12 58315533 PD3851a G T 1137 | 12 58521158 PD3851a C T 1138 | 12 58623882 PD3851a A T 1139 | 12 59217164 PD3851a T G 1140 | 12 60296207 PD3851a A T 1141 | 12 60605267 PD3851a G C 1142 | 12 63637608 PD3851a G T 1143 | 12 63663523 PD3851a T C 1144 | 12 65787178 PD3851a C G 1145 | 12 65887277 PD3851a T A 1146 | 12 66400622 PD3851a A G 1147 | 12 68221540 PD3851a G T 1148 | 12 73002089 PD3851a C G 1149 | 12 75937918 PD3851a G T 1150 | 12 76276197 PD3851a A G 1151 | 12 77884244 PD3851a A T 1152 | 12 82060298 PD3851a C T 1153 | 12 84755751 PD3851a C T 1154 | 12 85087799 PD3851a A C 1155 | 12 86933550 PD3851a T G 1156 | 12 87264731 PD3851a G T 1157 | 12 90994206 PD3851a G T 1158 | 12 95189059 PD3851a G A 1159 | 12 96544157 PD3851a C T 1160 | 12 97135223 PD3851a G C 1161 | 12 99286493 PD3851a C T 1162 | 12 103505501 PD3851a C G 1163 | 12 105476062 PD3851a G T 1164 | 12 107359160 PD3851a G A 1165 | 12 108961136 PD3851a G T 1166 | 12 111319022 PD3851a G A 1167 | 12 114336549 PD3851a C T 1168 | 12 114957177 PD3851a T A 1169 | 12 123200964 PD3851a A T 1170 | 12 123547513 PD3851a C A 1171 | 12 123579953 PD3851a C T 1172 | 12 125424798 PD3851a G A 1173 | 12 127029435 PD3851a C A 1174 | 12 127198223 PD3851a C A 1175 | 12 127938204 PD3851a C T 1176 | 12 129143410 PD3851a A G 1177 | 12 129174198 PD3851a G A 1178 | 12 129613909 PD3851a T C 1179 | 12 129736415 PD3851a C A 1180 | 12 130569803 PD3851a G C 1181 | 12 130849312 PD3851a T C 1182 | 12 131014732 PD3851a C A 1183 | 12 131650118 PD3851a C T 1184 | 12 131773104 PD3851a T G 1185 | 12 132151481 PD3851a G A 1186 | 12 132479942 PD3851a A G 1187 | 12 132881656 PD3851a C G 1188 | 12 133756146 PD3851a G C 1189 | 13 19175535 PD3851a G C 1190 | 13 23981361 PD3851a G A 1191 | 13 25527823 PD3851a T C 1192 | 13 28121925 PD3851a G A 1193 | 13 28859381 PD3851a A G 1194 | 13 30186198 PD3851a C T 1195 | 13 33239641 PD3851a G C 1196 | 13 33285815 PD3851a A T 1197 | 13 33686061 PD3851a C T 1198 | 13 33726173 PD3851a T A 1199 | 13 37218367 PD3851a C A 1200 | 13 37739169 PD3851a C A 1201 | 13 40335766 PD3851a G A 1202 | 13 41457024 PD3851a C G 1203 | 13 43592446 PD3851a C T 1204 | 13 45143440 PD3851a G C 1205 | 13 45143465 PD3851a G A 1206 | 13 47315183 PD3851a A G 1207 | 13 49163102 PD3851a G A 1208 | 13 50803538 PD3851a C T 1209 | 13 51189975 PD3851a C A 1210 | 13 52035065 PD3851a G T 1211 | 13 53059786 PD3851a C T 1212 | 13 53059789 PD3851a T A 1213 | 13 56426205 PD3851a C A 1214 | 13 59838919 PD3851a C A 1215 | 13 62113042 PD3851a G T 1216 | 13 62774474 PD3851a C T 1217 | 13 63675567 PD3851a T A 1218 | 13 66034140 PD3851a C A 1219 | 13 66101888 PD3851a T A 1220 | 13 67928832 PD3851a G T 1221 | 13 68011649 PD3851a A G 1222 | 13 68079353 PD3851a C T 1223 | 13 68415079 PD3851a C T 1224 | 13 68422387 PD3851a A T 1225 | 13 70748946 PD3851a G A 1226 | 13 70748952 PD3851a T G 1227 | 13 73894935 PD3851a C T 1228 | 13 76532096 PD3851a C T 1229 | 13 77415198 PD3851a G T 1230 | 13 78221434 PD3851a G A 1231 | 13 78587750 PD3851a T G 1232 | 13 81633601 PD3851a G T 1233 | 13 88008699 PD3851a C A 1234 | 13 90457210 PD3851a A T 1235 | 13 90554370 PD3851a T C 1236 | 13 91626804 PD3851a A T 1237 | 13 93094556 PD3851a C T 1238 | 13 93151618 PD3851a C T 1239 | 13 93168365 PD3851a G C 1240 | 13 98567774 PD3851a G A 1241 | 13 99547267 PD3851a C A 1242 | 13 103916074 PD3851a G A 1243 | 13 104156099 PD3851a C T 1244 | 13 104651017 PD3851a C T 1245 | 13 104935807 PD3851a G A 1246 | 13 107123738 PD3851a C T 1247 | 13 107265921 PD3851a A C 1248 | 13 107326919 PD3851a T G 1249 | 13 107752215 PD3851a G A 1250 | 13 108727902 PD3851a T C 1251 | 13 109733603 PD3851a C A 1252 | 13 111678843 PD3851a T C 1253 | 13 112075964 PD3851a G T 1254 | 14 19077078 PD3851a G A 1255 | 14 19077079 PD3851a A G 1256 | 14 20250203 PD3851a G T 1257 | 14 22757309 PD3851a A G 1258 | 14 23318087 PD3851a G A 1259 | 14 26460188 PD3851a T C 1260 | 14 28665521 PD3851a G T 1261 | 14 28742939 PD3851a T C 1262 | 14 29535800 PD3851a C T 1263 | 14 29641796 PD3851a A T 1264 | 14 29917551 PD3851a C G 1265 | 14 29942533 PD3851a G C 1266 | 14 34174831 PD3851a G T 1267 | 14 41079895 PD3851a C T 1268 | 14 41628342 PD3851a G A 1269 | 14 42009505 PD3851a C T 1270 | 14 42953745 PD3851a T C 1271 | 14 44419281 PD3851a T G 1272 | 14 46791796 PD3851a A G 1273 | 14 50538185 PD3851a C T 1274 | 14 51707771 PD3851a C T 1275 | 14 54697188 PD3851a G T 1276 | 14 56774866 PD3851a C G 1277 | 14 57164168 PD3851a C T 1278 | 14 57258599 PD3851a C G 1279 | 14 57970746 PD3851a T G 1280 | 14 58048695 PD3851a A G 1281 | 14 58356258 PD3851a T G 1282 | 14 64071888 PD3851a A C 1283 | 14 66498026 PD3851a T G 1284 | 14 69524248 PD3851a C A 1285 | 14 70280153 PD3851a C T 1286 | 14 72955286 PD3851a C T 1287 | 14 77063806 PD3851a A G 1288 | 14 78185908 PD3851a T C 1289 | 14 78760990 PD3851a A T 1290 | 14 79015253 PD3851a A T 1291 | 14 79504765 PD3851a C A 1292 | 14 79507693 PD3851a G C 1293 | 14 79715514 PD3851a T A 1294 | 14 79918604 PD3851a C A 1295 | 14 80123036 PD3851a C A 1296 | 14 81353737 PD3851a G T 1297 | 14 83991666 PD3851a A T 1298 | 14 84119748 PD3851a A T 1299 | 14 84843555 PD3851a C T 1300 | 14 87211786 PD3851a G T 1301 | 14 87287629 PD3851a G T 1302 | 14 87479231 PD3851a C A 1303 | 14 87553625 PD3851a A G 1304 | 14 88059696 PD3851a A G 1305 | 14 89735177 PD3851a T G 1306 | 14 91111413 PD3851a C T 1307 | 14 96195278 PD3851a C T 1308 | 14 98908471 PD3851a G T 1309 | 14 98911309 PD3851a G C 1310 | 14 99192571 PD3851a A C 1311 | 14 99275468 PD3851a C G 1312 | 14 99440126 PD3851a A T 1313 | 14 100208368 PD3851a G A 1314 | 14 102447241 PD3851a G C 1315 | 14 102889295 PD3851a G C 1316 | 14 105195075 PD3851a C T 1317 | 14 105735976 PD3851a C A 1318 | 15 20142922 PD3851a A T 1319 | 15 20527141 PD3851a T C 1320 | 15 21194017 PD3851a G T 1321 | 15 22472516 PD3851a G A 1322 | 15 22673624 PD3851a G A 1323 | 15 23619494 PD3851a G A 1324 | 15 24914226 PD3851a T A 1325 | 15 24978101 PD3851a G T 1326 | 15 25252531 PD3851a C T 1327 | 15 26862346 PD3851a G T 1328 | 15 28968138 PD3851a T C 1329 | 15 33446801 PD3851a G T 1330 | 15 34002458 PD3851a G A 1331 | 15 35056519 PD3851a A G 1332 | 15 37678621 PD3851a T C 1333 | 15 37876589 PD3851a G C 1334 | 15 51327577 PD3851a A G 1335 | 15 54185289 PD3851a C A 1336 | 15 59561863 PD3851a C T 1337 | 15 59704001 PD3851a G A 1338 | 15 69398855 PD3851a G A 1339 | 15 69950420 PD3851a G A 1340 | 15 70436590 PD3851a C T 1341 | 15 79019632 PD3851a A C 1342 | 15 79914849 PD3851a T C 1343 | 15 80287365 PD3851a A C 1344 | 15 85927247 PD3851a A C 1345 | 15 87516186 PD3851a G C 1346 | 15 88399571 PD3851a C T 1347 | 15 88979044 PD3851a C G 1348 | 15 89170404 PD3851a A T 1349 | 15 89570434 PD3851a T C 1350 | 15 92157427 PD3851a G T 1351 | 15 92929164 PD3851a C A 1352 | 15 93988320 PD3851a G A 1353 | 15 94641354 PD3851a C G 1354 | 15 96176963 PD3851a A T 1355 | 15 97209156 PD3851a A G 1356 | 15 97737591 PD3851a T C 1357 | 15 100193897 PD3851a T A 1358 | 16 93254 PD3851a C T 1359 | 16 342908 PD3851a G A 1360 | 16 908827 PD3851a G A 1361 | 16 1105536 PD3851a G T 1362 | 16 1295147 PD3851a C T 1363 | 16 4117678 PD3851a C T 1364 | 16 4370497 PD3851a G A 1365 | 16 4672330 PD3851a C T 1366 | 16 5912390 PD3851a G T 1367 | 16 6508411 PD3851a C A 1368 | 16 6997072 PD3851a T C 1369 | 16 7258217 PD3851a C G 1370 | 16 7576156 PD3851a A G 1371 | 16 14085014 PD3851a C G 1372 | 16 16002466 PD3851a C T 1373 | 16 16512780 PD3851a T A 1374 | 16 17398553 PD3851a C G 1375 | 16 18823335 PD3851a G C 1376 | 16 20407869 PD3851a C A 1377 | 16 25523114 PD3851a C A 1378 | 16 28565234 PD3851a G A 1379 | 16 29425945 PD3851a G C 1380 | 16 31020614 PD3851a C A 1381 | 16 32381734 PD3851a C G 1382 | 16 33576044 PD3851a C A 1383 | 16 33586434 PD3851a C A 1384 | 16 33840646 PD3851a T A 1385 | 16 47128656 PD3851a G C 1386 | 16 47923556 PD3851a G T 1387 | 16 49597794 PD3851a C T 1388 | 16 51934753 PD3851a A G 1389 | 16 53845428 PD3851a C T 1390 | 16 53989095 PD3851a G A 1391 | 16 55207671 PD3851a A C 1392 | 16 55357254 PD3851a G A 1393 | 16 57041092 PD3851a C T 1394 | 16 59496231 PD3851a T A 1395 | 16 60010684 PD3851a G A 1396 | 16 60941432 PD3851a T A 1397 | 16 62896922 PD3851a G T 1398 | 16 63024919 PD3851a C A 1399 | 16 63775048 PD3851a G C 1400 | 16 64982424 PD3851a C T 1401 | 16 66253483 PD3851a T C 1402 | 16 66779570 PD3851a T G 1403 | 16 67355302 PD3851a A G 1404 | 16 67444874 PD3851a G A 1405 | 16 74387115 PD3851a G C 1406 | 16 75940409 PD3851a C A 1407 | 16 76087633 PD3851a T G 1408 | 16 76237390 PD3851a T C 1409 | 16 76756881 PD3851a C T 1410 | 16 77907809 PD3851a G A 1411 | 16 78694921 PD3851a G T 1412 | 16 78954468 PD3851a A T 1413 | 16 81615296 PD3851a T A 1414 | 16 82822550 PD3851a G C 1415 | 16 85381583 PD3851a T G 1416 | 16 86483117 PD3851a G A 1417 | 16 86502266 PD3851a G A 1418 | 17 84057 PD3851a G A 1419 | 17 4838693 PD3851a T C 1420 | 17 7445638 PD3851a G A 1421 | 17 8641814 PD3851a A T 1422 | 17 11461104 PD3851a G T 1423 | 17 12083148 PD3851a C G 1424 | 17 12475321 PD3851a G A 1425 | 17 13298849 PD3851a C A 1426 | 17 14505892 PD3851a A C 1427 | 17 14606028 PD3851a G A 1428 | 17 14892988 PD3851a T A 1429 | 17 15596129 PD3851a T G 1430 | 17 16228468 PD3851a G C 1431 | 17 18731520 PD3851a C T 1432 | 17 19240989 PD3851a G A 1433 | 17 19288335 PD3851a G T 1434 | 17 19575877 PD3851a A G 1435 | 17 20321453 PD3851a A G 1436 | 17 21672904 PD3851a C T 1437 | 17 25274143 PD3851a G A 1438 | 17 25304408 PD3851a A T 1439 | 17 30263426 PD3851a C A 1440 | 17 31868200 PD3851a A G 1441 | 17 33496788 PD3851a C T 1442 | 17 33748860 PD3851a A G 1443 | 17 38973362 PD3851a C A 1444 | 17 39973219 PD3851a G T 1445 | 17 40729730 PD3851a A G 1446 | 17 40755034 PD3851a G C 1447 | 17 41316483 PD3851a C A 1448 | 17 43069646 PD3851a C A 1449 | 17 45209655 PD3851a G A 1450 | 17 45209656 PD3851a A C 1451 | 17 45513836 PD3851a A T 1452 | 17 49249557 PD3851a C T 1453 | 17 51747556 PD3851a A G 1454 | 17 51902021 PD3851a T A 1455 | 17 53789139 PD3851a C T 1456 | 17 55326410 PD3851a G A 1457 | 17 55900686 PD3851a T A 1458 | 17 57662858 PD3851a C T 1459 | 17 60253407 PD3851a C T 1460 | 17 60349118 PD3851a G T 1461 | 17 60632578 PD3851a C G 1462 | 17 61731555 PD3851a A T 1463 | 17 62239501 PD3851a T A 1464 | 17 62682233 PD3851a C T 1465 | 17 63663872 PD3851a A T 1466 | 17 66225709 PD3851a T A 1467 | 17 66328228 PD3851a A G 1468 | 17 67693169 PD3851a G T 1469 | 17 68062543 PD3851a C T 1470 | 17 68352572 PD3851a C T 1471 | 17 68753592 PD3851a C T 1472 | 17 69690750 PD3851a A T 1473 | 17 69704439 PD3851a C A 1474 | 17 71672745 PD3851a G A 1475 | 17 75476964 PD3851a G A 1476 | 17 76484181 PD3851a G T 1477 | 17 81165695 PD3851a T G 1478 | 18 2525866 PD3851a C T 1479 | 18 3518089 PD3851a A G 1480 | 18 4534760 PD3851a G T 1481 | 18 5945762 PD3851a C A 1482 | 18 7093272 PD3851a G A 1483 | 18 14272960 PD3851a C T 1484 | 18 14841670 PD3851a G A 1485 | 18 20234963 PD3851a A T 1486 | 18 20258268 PD3851a C T 1487 | 18 25662304 PD3851a G A 1488 | 18 26237361 PD3851a T C 1489 | 18 27802523 PD3851a C A 1490 | 18 29698516 PD3851a G A 1491 | 18 29718409 PD3851a G C 1492 | 18 32641048 PD3851a T A 1493 | 18 33680251 PD3851a C T 1494 | 18 34662379 PD3851a C T 1495 | 18 38473380 PD3851a G T 1496 | 18 38901597 PD3851a A G 1497 | 18 40307121 PD3851a C A 1498 | 18 43468163 PD3851a C T 1499 | 18 45533277 PD3851a T G 1500 | 18 46217918 PD3851a A G 1501 | 18 48306574 PD3851a T C 1502 | 18 48943052 PD3851a G A 1503 | 18 50535400 PD3851a A T 1504 | 18 50856171 PD3851a A T 1505 | 18 51033933 PD3851a G A 1506 | 18 51709829 PD3851a G C 1507 | 18 52908410 PD3851a C A 1508 | 18 53717316 PD3851a T A 1509 | 18 54122753 PD3851a C A 1510 | 18 55079795 PD3851a A C 1511 | 18 55888944 PD3851a A T 1512 | 18 58802486 PD3851a G A 1513 | 18 60668057 PD3851a C A 1514 | 18 61685261 PD3851a C T 1515 | 18 62315784 PD3851a C A 1516 | 18 63689973 PD3851a C T 1517 | 18 64786229 PD3851a C T 1518 | 18 66277118 PD3851a G A 1519 | 18 66865304 PD3851a T A 1520 | 18 68164035 PD3851a C A 1521 | 18 69625852 PD3851a T A 1522 | 18 69767879 PD3851a G T 1523 | 18 70586498 PD3851a C A 1524 | 18 75210423 PD3851a G A 1525 | 18 75462032 PD3851a C T 1526 | 18 76078346 PD3851a C A 1527 | 18 76079680 PD3851a G A 1528 | 18 76541347 PD3851a G A 1529 | 19 307623 PD3851a A G 1530 | 19 843169 PD3851a G A 1531 | 19 12023650 PD3851a A T 1532 | 19 12211997 PD3851a G T 1533 | 19 14816385 PD3851a G A 1534 | 19 19181374 PD3851a G T 1535 | 19 20758017 PD3851a G A 1536 | 19 21558661 PD3851a A G 1537 | 19 29492442 PD3851a C G 1538 | 19 29947533 PD3851a C A 1539 | 19 31997221 PD3851a C G 1540 | 19 33491287 PD3851a C T 1541 | 19 39566735 PD3851a G A 1542 | 19 42873725 PD3851a G C 1543 | 19 49693664 PD3851a C T 1544 | 19 50868461 PD3851a C G 1545 | 19 57020571 PD3851a G A 1546 | 19 58726830 PD3851a C T 1547 | 20 5629768 PD3851a G A 1548 | 20 5813015 PD3851a G T 1549 | 20 7541577 PD3851a G T 1550 | 20 8436447 PD3851a G T 1551 | 20 10620138 PD3851a G A 1552 | 20 11446272 PD3851a G C 1553 | 20 11501552 PD3851a T G 1554 | 20 11780895 PD3851a G A 1555 | 20 12980536 PD3851a C G 1556 | 20 17273721 PD3851a C T 1557 | 20 23846315 PD3851a T A 1558 | 20 25782594 PD3851a T G 1559 | 20 25875248 PD3851a A G 1560 | 20 25876914 PD3851a A C 1561 | 20 25876944 PD3851a A G 1562 | 20 25876950 PD3851a A G 1563 | 20 30833603 PD3851a C T 1564 | 20 31076323 PD3851a A C 1565 | 20 31090435 PD3851a C T 1566 | 20 31790199 PD3851a G T 1567 | 20 31870949 PD3851a G A 1568 | 20 32104346 PD3851a G A 1569 | 20 35706208 PD3851a T A 1570 | 20 41386870 PD3851a C G 1571 | 20 41422401 PD3851a G A 1572 | 20 42940349 PD3851a T G 1573 | 20 43627817 PD3851a C T 1574 | 20 44794448 PD3851a C A 1575 | 20 45625321 PD3851a G A 1576 | 20 47799496 PD3851a G C 1577 | 20 47848036 PD3851a C T 1578 | 20 48837639 PD3851a C T 1579 | 20 48863927 PD3851a G A 1580 | 20 51585073 PD3851a G A 1581 | 20 52143471 PD3851a G T 1582 | 20 52385951 PD3851a G A 1583 | 20 53123214 PD3851a G T 1584 | 20 53243519 PD3851a G T 1585 | 20 54008996 PD3851a C G 1586 | 20 54539423 PD3851a G T 1587 | 20 55697486 PD3851a A T 1588 | 20 56050195 PD3851a G A 1589 | 20 56352606 PD3851a C T 1590 | 20 56742674 PD3851a G C 1591 | 20 57988948 PD3851a G A 1592 | 20 58332428 PD3851a A C 1593 | 20 58544201 PD3851a G C 1594 | 20 58943169 PD3851a A T 1595 | 20 62807323 PD3851a T G 1596 | 21 9425929 PD3851a A T 1597 | 21 9440080 PD3851a T A 1598 | 21 9451356 PD3851a T C 1599 | 21 9677089 PD3851a A T 1600 | 21 9927832 PD3851a C T 1601 | 21 10021157 PD3851a A T 1602 | 21 10026379 PD3851a A G 1603 | 21 10372775 PD3851a A G 1604 | 21 14619340 PD3851a C A 1605 | 21 15160009 PD3851a C T 1606 | 21 15326080 PD3851a C T 1607 | 21 18937092 PD3851a G A 1608 | 21 19163914 PD3851a C T 1609 | 21 23685060 PD3851a C T 1610 | 21 26727664 PD3851a C G 1611 | 21 30623133 PD3851a C A 1612 | 21 31271025 PD3851a G A 1613 | 21 32536959 PD3851a G A 1614 | 21 35818270 PD3851a A G 1615 | 21 37855670 PD3851a C A 1616 | 21 38061657 PD3851a A T 1617 | 21 38930626 PD3851a T C 1618 | 21 40337874 PD3851a C T 1619 | 21 43700873 PD3851a A G 1620 | 22 16974370 PD3851a G T 1621 | 22 17359831 PD3851a A C 1622 | 22 17378411 PD3851a C T 1623 | 22 18536960 PD3851a C T 1624 | 22 18871412 PD3851a C A 1625 | 22 23814262 PD3851a G C 1626 | 22 33896482 PD3851a T C 1627 | 22 33927287 PD3851a G A 1628 | 22 47515446 PD3851a A G 1629 | 22 47515467 PD3851a G A 1630 | 22 48078493 PD3851a C A 1631 | 22 49242557 PD3851a C T 1632 | X 4960256 PD3851a C T 1633 | X 5430343 PD3851a T C 1634 | X 6129999 PD3851a C T 1635 | X 7412879 PD3851a C T 1636 | X 12255065 PD3851a C T 1637 | X 13389908 PD3851a T G 1638 | X 14813492 PD3851a G T 1639 | X 14914225 PD3851a T G 1640 | X 17278135 PD3851a A T 1641 | X 17705156 PD3851a T G 1642 | X 20392488 PD3851a C T 1643 | X 20949087 PD3851a T C 1644 | X 24682285 PD3851a A T 1645 | X 26831793 PD3851a G T 1646 | X 26872479 PD3851a C T 1647 | X 27195672 PD3851a G C 1648 | X 27887064 PD3851a A C 1649 | X 29967142 PD3851a A C 1650 | X 30360337 PD3851a G A 1651 | X 30532522 PD3851a T C 1652 | X 30610343 PD3851a C A 1653 | X 30677389 PD3851a A G 1654 | X 32813667 PD3851a C T 1655 | X 32816854 PD3851a T C 1656 | X 32816916 PD3851a C T 1657 | X 33260795 PD3851a C T 1658 | X 33493249 PD3851a T A 1659 | X 33752518 PD3851a C G 1660 | X 34661976 PD3851a C A 1661 | X 35166096 PD3851a A T 1662 | X 35300128 PD3851a T A 1663 | X 35411352 PD3851a G T 1664 | X 35679647 PD3851a A G 1665 | X 36346147 PD3851a C T 1666 | X 36376444 PD3851a C G 1667 | X 36470297 PD3851a A G 1668 | X 38594118 PD3851a G A 1669 | X 42266993 PD3851a G A 1670 | X 45847378 PD3851a G A 1671 | X 45961841 PD3851a A T 1672 | X 47785150 PD3851a C G 1673 | X 47827359 PD3851a C T 1674 | X 48944289 PD3851a G A 1675 | X 49764598 PD3851a G C 1676 | X 50520304 PD3851a A C 1677 | X 51001179 PD3851a G T 1678 | X 54322245 PD3851a C T 1679 | X 55092317 PD3851a T C 1680 | X 62303262 PD3851a A T 1681 | X 62565049 PD3851a C T 1682 | X 64394405 PD3851a T C 1683 | X 65762651 PD3851a A G 1684 | X 69452815 PD3851a T A 1685 | X 69684768 PD3851a T G 1686 | X 72442375 PD3851a T C 1687 | X 73258287 PD3851a G A 1688 | X 73684829 PD3851a C T 1689 | X 76416842 PD3851a C T 1690 | X 76417032 PD3851a C A 1691 | X 77561805 PD3851a T A 1692 | X 77796140 PD3851a A T 1693 | X 78179993 PD3851a G C 1694 | X 78551712 PD3851a C T 1695 | X 81177262 PD3851a G A 1696 | X 81766172 PD3851a C T 1697 | X 82768441 PD3851a A G 1698 | X 83118456 PD3851a G T 1699 | X 84689705 PD3851a A G 1700 | X 86016775 PD3851a C T 1701 | X 86102414 PD3851a G T 1702 | X 86751958 PD3851a G C 1703 | X 88809277 PD3851a G A 1704 | X 89166689 PD3851a C A 1705 | X 90141706 PD3851a A G 1706 | X 90547179 PD3851a G C 1707 | X 90590437 PD3851a A G 1708 | X 90657136 PD3851a C T 1709 | X 91481777 PD3851a G C 1710 | X 93495936 PD3851a C A 1711 | X 94126178 PD3851a A T 1712 | X 96720211 PD3851a C T 1713 | X 100576545 PD3851a A C 1714 | X 100941309 PD3851a C A 1715 | X 101217457 PD3851a T C 1716 | X 101953543 PD3851a T G 1717 | X 103611807 PD3851a G A 1718 | X 103928177 PD3851a C A 1719 | X 105001922 PD3851a G C 1720 | X 106398946 PD3851a T C 1721 | X 106610421 PD3851a A G 1722 | X 107313950 PD3851a C A 1723 | X 107519687 PD3851a G T 1724 | X 108319588 PD3851a T C 1725 | X 111137866 PD3851a A T 1726 | X 112317092 PD3851a C T 1727 | X 114020204 PD3851a C A 1728 | X 114312091 PD3851a G C 1729 | X 115823083 PD3851a T A 1730 | X 116394297 PD3851a G A 1731 | X 117262951 PD3851a C T 1732 | X 117625133 PD3851a G A 1733 | X 119099148 PD3851a T A 1734 | X 119966487 PD3851a C G 1735 | X 120824979 PD3851a C T 1736 | X 121750828 PD3851a T A 1737 | X 122981600 PD3851a G A 1738 | X 123862517 PD3851a C A 1739 | X 124295124 PD3851a T C 1740 | X 124965038 PD3851a T G 1741 | X 125094459 PD3851a T G 1742 | X 127499018 PD3851a T A 1743 | X 127704661 PD3851a G T 1744 | X 127945208 PD3851a A C 1745 | X 128362541 PD3851a C A 1746 | X 129621442 PD3851a C G 1747 | X 130195547 PD3851a A G 1748 | X 133918560 PD3851a G T 1749 | X 134066857 PD3851a C T 1750 | X 135512587 PD3851a A G 1751 | X 136525132 PD3851a G A 1752 | X 137111791 PD3851a C G 1753 | X 137701670 PD3851a C G 1754 | X 137732731 PD3851a A T 1755 | X 138113669 PD3851a G T 1756 | X 138239662 PD3851a C T 1757 | X 138546385 PD3851a C T 1758 | X 138605985 PD3851a C A 1759 | X 138644835 PD3851a C A 1760 | X 138941622 PD3851a A G 1761 | X 139088184 PD3851a A T 1762 | X 141129825 PD3851a G C 1763 | X 141333231 PD3851a C G 1764 | X 142380795 PD3851a A T 1765 | X 142757476 PD3851a C G 1766 | X 142960678 PD3851a C T 1767 | X 143757193 PD3851a G C 1768 | X 144134555 PD3851a C A 1769 | X 144336735 PD3851a G C 1770 | X 144814409 PD3851a T C 1771 | X 144866405 PD3851a A G 1772 | X 144989897 PD3851a C A 1773 | X 145237499 PD3851a C T 1774 | X 146858161 PD3851a C A 1775 | X 147226365 PD3851a C T 1776 | X 148222288 PD3851a C A 1777 | X 148311975 PD3851a G T 1778 | X 149709324 PD3851a G A 1779 | X 150332896 PD3851a G T 1780 | X 151366329 PD3851a A G 1781 | X 151397455 PD3851a T C 1782 | X 153748614 PD3851a G A 1783 | -------------------------------------------------------------------------------- /SigProfilerExtractor/sigpro.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Aug 27 13:39:29 2018 5 | 6 | @author: S M Ashiqul Islam (Mishu) 7 | 8 | 9 | ########################################## 10 | SigProfilerExtractor (``sigproextractor``) 11 | ########################################## 12 | 13 | SigProfilerExtractor allows de novo extraction of mutational signatures from data 14 | generated in a matrix format. The tool identifies the number of operative mutational 15 | signatures, their activities in each sample, and the probability for each signature to 16 | cause a specific mutation type in a cancer sample. The tool makes use of SigProfilerMatrixGenerator 17 | and SigProfilerPlotting. 18 | 19 | """ 20 | import os 21 | 22 | os.environ["MKL_NUM_THREADS"] = "1" 23 | os.environ["NUMEXPR_NUM_THREADS"] = "1" 24 | os.environ["OMP_NUM_THREADS"] = "1" 25 | 26 | import matplotlib.pyplot as plt 27 | 28 | plt.switch_backend("agg") 29 | 30 | import scipy 31 | from scipy import io as sio 32 | import sklearn 33 | import numpy as np 34 | import pandas as pd 35 | import time 36 | import shutil 37 | import platform 38 | import datetime 39 | import psutil 40 | import copy 41 | import sigProfilerPlotting 42 | import multiprocessing 43 | from SigProfilerExtractor import subroutines as sub 44 | import SigProfilerMatrixGenerator 45 | from SigProfilerMatrixGenerator.scripts import ( 46 | SigProfilerMatrixGeneratorFunc as datadump, 47 | ) 48 | from SigProfilerMatrixGenerator.scripts import SVMatrixGenerator as sv 49 | from SigProfilerMatrixGenerator.scripts import CNVMatrixGenerator as scna 50 | import multiprocessing as mp 51 | import SigProfilerExtractor as cosmic 52 | import SigProfilerAssignment as spa 53 | from SigProfilerAssignment import single_sample as spasub 54 | from SigProfilerAssignment import decomposition as decomp 55 | from numpy.random import SeedSequence 56 | from sigProfilerPlotting import sigProfilerPlotting as sigPlot 57 | 58 | MUTTYPE = "MutationType" 59 | 60 | 61 | def memory_usage(): 62 | pid = os.getpid() 63 | py = psutil.Process(pid) 64 | memoryUse1 = py.memory_info()[0] / 2.0**30 # memory use in GB...I think 65 | print( 66 | "\n************** Reported Current Memory Use: " 67 | + str(round(memoryUse1, 2)) 68 | + " GB *****************\n" 69 | ) 70 | 71 | 72 | def importdata(datatype="matrix"): 73 | """ 74 | Imports the path of example data. 75 | 76 | parameters 77 | ---------- 78 | 79 | datatype: A string. Type of data. The type of data should be one of the following: 80 | - "vcf": used for vcf format data. 81 | - "matrix": used for text format data. This format represents the catalog of mutations seperated by tab. 82 | - "matobj": used for matlab object format data. 83 | 84 | 85 | 86 | Returns: 87 | ------- 88 | 89 | The path of the example data. 90 | 91 | Example: 92 | ------- 93 | >>> from SigProfilerExtractor import sigpro as sig 94 | >>> data = sig.importdata("table") 95 | 96 | This "data" variable can be used as a parameter of the "project" argument of the sigProfilerExtractor function 97 | 98 | """ 99 | 100 | paths = cosmic.__path__[0] 101 | if datatype == "matobj": 102 | data = paths + "/data/MatObjInput/21_breast_WGS_substitutions.mat" 103 | elif datatype == "text" or datatype == "table" or datatype == "matrix": 104 | data = paths + "/data/TextInput/Samples_SBS.txt" 105 | elif datatype == "matrix_DBS": 106 | data = paths + "/data/TextInput/Samples_DBS.txt" 107 | elif datatype == "matrix_ID": 108 | data = paths + "/data/TextInput/Samples_ID.txt" 109 | elif datatype == "matrix_CNV": 110 | data = paths + "/data/TextInput/Samples_CNV.txt" 111 | elif datatype == "csv": 112 | data = paths + "/data/CSVInput/csv_example.csv" 113 | elif datatype == "seg:BATTENBERG": 114 | data = paths + "/data/CNVInput/Battenberg_test.tsv" 115 | elif datatype == "matrix_SV": 116 | data = paths + "/data/TextInput/Samples_SV.txt" 117 | elif datatype == "vcf": 118 | data = paths + "/data/VCFInput/" 119 | return data 120 | 121 | 122 | def record_parameters(sysdata, execution_parameters, start_time): 123 | """ 124 | Extracts mutational signatures from an array of samples. 125 | 126 | """ 127 | sysdata.write("\n--------------EXECUTION PARAMETERS--------------\n") 128 | sysdata.write("INPUT DATA\n") 129 | sysdata.write("\tinput_type: {}\n".format(execution_parameters["input_type"])) 130 | sysdata.write("\toutput: {}\n".format(execution_parameters["output"])) 131 | sysdata.write("\tinput_data: {}\n".format(execution_parameters["input_data"])) 132 | sysdata.write( 133 | "\treference_genome: {}\n".format(execution_parameters["reference_genome"]) 134 | ) 135 | sysdata.write("\tcontext_types: {}\n".format(execution_parameters["context_type"])) 136 | sysdata.write("\texome: {}\n".format(execution_parameters["exome"])) 137 | sysdata.write("NMF REPLICATES\n") 138 | sysdata.write( 139 | "\tminimum_signatures: {}\n".format(execution_parameters["minimum_signatures"]) 140 | ) 141 | sysdata.write( 142 | "\tmaximum_signatures: {}\n".format(execution_parameters["maximum_signatures"]) 143 | ) 144 | sysdata.write( 145 | "\tNMF_replicates: {}\n".format(execution_parameters["NMF_replicates"]) 146 | ) 147 | sysdata.write("NMF ENGINE\n") 148 | sysdata.write("\tNMF_init: {}\n".format(execution_parameters["NMF_init"])) 149 | sysdata.write("\tprecision: {}\n".format(execution_parameters["precision"])) 150 | sysdata.write( 151 | "\tmatrix_normalization: {}\n".format( 152 | execution_parameters["matrix_normalization"] 153 | ) 154 | ) 155 | sysdata.write("\tresample: {}\n".format(execution_parameters["resample"])) 156 | sysdata.write("\tseeds: {}\n".format(execution_parameters["seeds"])) 157 | sysdata.write( 158 | "\tmin_NMF_iterations: {}\n".format( 159 | format(execution_parameters["min_NMF_iterations"], ",d") 160 | ) 161 | ) 162 | sysdata.write( 163 | "\tmax_NMF_iterations: {}\n".format( 164 | format(execution_parameters["max_NMF_iterations"], ",d") 165 | ) 166 | ) 167 | sysdata.write( 168 | "\tNMF_test_conv: {}\n".format( 169 | format(execution_parameters["NMF_test_conv"], ",d") 170 | ) 171 | ) 172 | sysdata.write("\tNMF_tolerance: {}\n".format(execution_parameters["NMF_tolerance"])) 173 | sysdata.write("CLUSTERING\n") 174 | sysdata.write("\tclustering_distance: {}\n".format(execution_parameters["dist"])) 175 | 176 | sysdata.write("EXECUTION\n") 177 | if execution_parameters["cpu"] == -1: 178 | sysdata.write( 179 | "\tcpu: {}; Maximum number of CPU is {}\n".format( 180 | multiprocessing.cpu_count(), multiprocessing.cpu_count() 181 | ) 182 | ) 183 | else: 184 | sysdata.write( 185 | "\tcpu: {}; Maximum number of CPU is {}\n".format( 186 | execution_parameters["cpu"], multiprocessing.cpu_count() 187 | ) 188 | ) 189 | 190 | sysdata.write("\tassignment_cpu: {}\n".format(execution_parameters["assignment_cpu"])) 191 | sysdata.write("\tgpu: {}\n".format(execution_parameters["gpu"])) 192 | sysdata.write("Solution Estimation\n") 193 | sysdata.write("\tstability: {}\n".format(execution_parameters["stability"])) 194 | sysdata.write("\tmin_stability: {}\n".format(execution_parameters["min_stability"])) 195 | sysdata.write( 196 | "\tcombined_stability: {}\n".format(execution_parameters["combined_stability"]) 197 | ) 198 | sysdata.write( 199 | "\tallow_stability_drop: {}\n".format( 200 | execution_parameters["allow_stability_drop"] 201 | ) 202 | ) 203 | 204 | sysdata.write("COSMIC MATCH\n") 205 | sysdata.write( 206 | "\topportunity_genome: {}\n".format(execution_parameters["opportunity_genome"]) 207 | ) 208 | sysdata.write( 209 | "\tcosmic_version: {}\n".format(execution_parameters["cosmic_version"]) 210 | ) 211 | sysdata.write( 212 | "\tnnls_add_penalty: {}\n".format(execution_parameters["nnls_add_penalty"]) 213 | ) 214 | sysdata.write( 215 | "\tnnls_remove_penalty: {}\n".format( 216 | execution_parameters["nnls_remove_penalty"] 217 | ) 218 | ) 219 | sysdata.write( 220 | "\tinitial_remove_penalty: {}\n".format( 221 | execution_parameters["initial_remove_penalty"] 222 | ) 223 | ) 224 | sysdata.write( 225 | "\texport_probabilities: {}\n".format( 226 | execution_parameters["export_probabilities"] 227 | ) 228 | ) 229 | sysdata.write( 230 | "\tcollapse_to_SBS96: {}\n".format(execution_parameters["collapse_to_SBS96"]) 231 | ) 232 | 233 | sysdata.write("\n-------Analysis Progress------- \n") 234 | sysdata.write("[{}] Analysis started: \n".format(str(start_time).split(".")[0])) 235 | 236 | 237 | def sigProfilerExtractor( 238 | input_type, 239 | output, 240 | input_data, 241 | reference_genome="GRCh37", 242 | opportunity_genome="GRCh37", 243 | cosmic_version=3.4, 244 | context_type="default", 245 | exome=False, 246 | minimum_signatures=1, 247 | maximum_signatures=25, 248 | nmf_replicates=100, 249 | resample=True, 250 | batch_size=1, 251 | cpu=-1, 252 | assignment_cpu=-1, 253 | gpu=False, 254 | nmf_init="random", 255 | precision="single", 256 | matrix_normalization="gmm", 257 | seeds="random", 258 | min_nmf_iterations=10000, 259 | max_nmf_iterations=1000000, 260 | nmf_test_conv=10000, 261 | nmf_tolerance=1e-15, 262 | nnls_add_penalty=0.05, 263 | nnls_remove_penalty=0.01, 264 | initial_remove_penalty=0.05, 265 | collapse_to_SBS96=True, 266 | clustering_distance="cosine", 267 | export_probabilities=True, 268 | make_decomposition_plots=True, 269 | stability=0.8, 270 | min_stability=0.2, 271 | combined_stability=1.0, 272 | allow_stability_drop=False, 273 | get_all_signature_matrices=False, 274 | stop_after_extraction=False, 275 | volume=None, 276 | ): 277 | """ 278 | Extracts mutational signatures from an array of samples. 279 | 280 | 281 | Parameters 282 | ---------- 283 | 284 | INPUT DATA:- 285 | 286 | input_type: A string. Type of input. The type of input should be one of the following: 287 | - "vcf": used for vcf format inputs. 288 | - "matrix": used for table format inputs using a tab seperated file. 289 | 290 | 291 | output: A string. The name of the output folder. The output folder will be generated in the current working directory. 292 | 293 | input_data: A string. Name of the input folder (in case of "vcf" type input) or the input file (in case of "table" type input). The project file or folder should be inside the current working directory. For the "vcf" type input,the project has to be a folder which will contain the vcf files in vcf format or text formats. The "text"type projects have to be a file. 294 | 295 | reference_genome: A string, optional. The name of the reference genome. The default reference genome is "GRCh37". This parameter is applicable only if the input_type is "vcf". 296 | 297 | opportunity_genome: The build or version of the reference genome for the reference signatures. The default opportunity genome is GRCh37. If the input_type is "vcf", the opportunity_genome automatically matches the input reference genome value. Only the genomes available in COSMIC are supported (GRCh37, GRCh38, mm9, mm10 and rn6). If a different opportunity genome is selected, the default genome GRCh37 will be used. 298 | 299 | context_type: A list of strings, optional. The items in the list defines the mutational contexts to be considered to extract the signatures. The default value is "SBS96,DBS78,ID83". 300 | 301 | exome: Boolean, optional. Defines if the exomes will be extracted. The default value is "False". 302 | 303 | 304 | NMF RUNS:- 305 | 306 | minimum_signature: A positive integer, optional. The minimum number of signatures to be extracted. The default value is 1 307 | 308 | maximum_signatures: A positive integer, optional. The maximum number of signatures to be extracted. The default value is 10 309 | 310 | nmf_replicates: A positive integer, optional. The number of iteration to be performed to extract each number signature. The default value is 100 311 | 312 | resample: Boolean, optional. Default is True. If True, add poisson noise to samples by resampling. 313 | 314 | seeds: Boolean. Default is "random". If random, then the seeds for resampling will be random for different analysis. 315 | If not random, then seeds will be obtained from a given path of a .txt file that contains a list of seed. 316 | 317 | NMF RUNS:- 318 | 319 | matrix_normalization: A string. Method of normalizing the genome matrix before it is analyzed by NMF. Default is "log2". Other options are "gmm", "100X" or "no_normalization". 320 | 321 | nmf_init: A String. The initialization algorithm for W and H matrix of NMF. Options are 'random', 'nndsvd', 'nndsvda', 'nndsvdar' and 'nndsvd_min' 322 | Default is 'nndsvd_min'. 323 | 324 | precision: A string. Values should be single or double. Default is single. 325 | 326 | min_nmf_iterations: An integer. Value defines the minimum number of iterations to be completed before NMF converges. Default is 2000. 327 | 328 | max_nmf_iterations: An integer. Value defines the maximum number of iterations to be completed before NMF converges. Default is 200000 329 | 330 | nmf_test_conv: An integer. Value definer the number number of iterations to done between checking next convergence. 331 | 332 | nmf_tolerance: A float. Value defines the tolerance to achieve to converge. 333 | 334 | 335 | EXECUTION:- 336 | 337 | cpu: An integer, optional. The number of processors to be used to extract the signatures. The default value is -1 which will use all available processors. 338 | 339 | gpu:Boolean, optional. Defines if the GPU resource will used if available. Default is False. If True, the GPU resource 340 | will be used in the computation. 341 | 342 | batch_size: An integer. Will be effective only if the GPU is used. Defines the number of NMF replicates to be performed 343 | by each CPU during the parallel processing. Default is 1. 344 | 345 | 346 | SOLUTION ESTIMATION THRESH-HOLDS:- 347 | 348 | stability: A float. Default is 0.8. The cutoff thresh-hold of the average stability. Solutions with average stabilities below this thresh-hold will not be considered. 349 | 350 | min_stability: A float. Default is 0.2. The cutoff thresh-hold of the minimum stability. Solutions with minimum stabilities below this thresh-hold will not be considered. 351 | 352 | combined_stability: A float. Default is 1.0. The cutoff thresh-hold of the combined stability (sum of average and minimum stability). Solutions with combined stabilities below this thresh-hold will not be considered. 353 | 354 | allow_stability_drop: Boolean, optional. Default is False. Defines if solutions with a drop in stability with respect to the highest stable number of signatures will be considered. 355 | 356 | 357 | DECOMPOSITION:- 358 | 359 | nnls_add_penalty: Float, optional. Takes any positive float. Default is 0.05. Defines the strong (add) thresh-hold cutoff to be assigned signatures to a sample. 360 | 361 | nnls_remove_penalty: Float, optional. Takes any positive float. Default is 0.01. Defines the weak (remove) thresh-hold cutoff to be assigned signatures to a sample. 362 | 363 | initial_remove_penalty: Float, optional. Takes any positive float. Default is 0.05. Defines the initial weak (remove) thresh-hold cutoff to be assigned COSMIC signatures to a sample. 364 | 365 | refit_denovo_signatures: Boolean, optional. Default is False. If True, then refit the denovo signatures with nnls. 366 | 367 | make_decomposition_plots: Boolean, optional. Defualt is True. If True, Denovo to Cosmic sigantures decompostion plots will be created as a part the results. 368 | 369 | 370 | OTHERS:- 371 | 372 | get_all_signature_matrices: A Boolean. If true, the Ws and Hs from all the NMF iterations are generated in the output. 373 | 374 | export_probabilities: A Boolean. Defualt is True. If False, then doesn't create the probability matrix. 375 | 376 | 377 | 378 | Returns 379 | ------- 380 | To learn about the output, please visit https://osf.io/t6j7u/wiki/home/ 381 | 382 | 383 | Examples 384 | -------- 385 | 386 | Examples 387 | -------- 388 | 389 | >>> from SigProfilerExtractor import sigpro as sig 390 | 391 | # to get input from vcf files 392 | >>> path_to_example_folder_containing_vcf_files = sig.importdata("vcf") 393 | >>> data = path_to_example_folder_containing_vcf_files # you can put the path to your folder containing the vcf samples 394 | >>> sig.sigProfilerExtractor("vcf", "example_output", data, minimum_signatures=1, maximum_signatures=3) 395 | 396 | Wait untill the excecution is finished. The process may a couple of hours based on the size of the data. 397 | Check the current working directory for the "example_output" folder. 398 | 399 | # to get input from table format (mutation catalog matrix) 400 | >>> path_to_example_table = sig.importdata("matrix") 401 | >>> data = path_to_example_table # you can put the path to your tab delimited file containing the mutational catalog matrix/table 402 | >>> sig.sigProfilerExtractor("matrix", "example_output", data, opportunity_genome="GRCh38", minimum_signatures=1, maximum_signatures=3) 403 | 404 | Wait untill the excecution is finished. The process may a couple of hours based on the size of the data. 405 | Check the results in the "example_output" folder. 406 | """ 407 | memory_usage() 408 | # record the start time 409 | start_time = datetime.datetime.now() 410 | 411 | # set the output variable 412 | out_put = output 413 | 414 | if gpu == True: 415 | import torch 416 | 417 | if gpu and (torch.cuda.device_count() == 0): 418 | raise RuntimeError("GPU not available!") 419 | 420 | #################################### At first create the system data file #################################### 421 | if not os.path.exists(out_put): 422 | os.makedirs(out_put) 423 | sysdata = open(out_put + "/JOB_METADATA.txt", "w") 424 | sysdata.write("THIS FILE CONTAINS THE METADATA ABOUT SYSTEM AND RUNTIME\n\n\n") 425 | sysdata.write("-------System Info-------\n") 426 | sysdata.write( 427 | "Operating System Name: " 428 | + platform.uname()[0] 429 | + "\n" 430 | + "Nodename: " 431 | + platform.uname()[1] 432 | + "\n" 433 | + "Release: " 434 | + platform.uname()[2] 435 | + "\n" 436 | + "Version: " 437 | + platform.uname()[3] 438 | + "\n" 439 | ) 440 | sysdata.write("\n-------Python and Package Versions------- \n") 441 | sysdata.write( 442 | "Python Version: " 443 | + str(platform.sys.version_info.major) 444 | + "." 445 | + str(platform.sys.version_info.minor) 446 | + "." 447 | + str(platform.sys.version_info.micro) 448 | + "\n" 449 | ) 450 | sysdata.write("SigProfilerExtractor Version: " + cosmic.__version__ + "\n") 451 | sysdata.write( 452 | "SigProfilerPlotting Version: " + sigProfilerPlotting.__version__ + "\n" 453 | ) 454 | sysdata.write( 455 | "SigProfilerMatrixGenerator Version: " 456 | + SigProfilerMatrixGenerator.__version__ 457 | + "\n" 458 | ) 459 | sysdata.write("SigProfilerAssignment Version: " + spa.__version__ + "\n") 460 | sysdata.write("Pandas version: " + pd.__version__ + "\n") 461 | sysdata.write("Numpy version: " + np.__version__ + "\n") 462 | sysdata.write("Scipy version: " + scipy.__version__ + "\n") 463 | sysdata.write("Scikit-learn version: " + sklearn.__version__ + "\n") 464 | 465 | # format the project_name first: 466 | project = input_data # will use this variable as the parameter for project argument in SigprofilerMatrixGenerator 467 | try: 468 | if project[-1] != "/": 469 | project_name = project.split("/")[ 470 | -1 471 | ] # will use this variable as the parameter for project_name argument in SigprofilerMatrixGenerator 472 | else: 473 | project_name = project.split("/")[-2] 474 | except: 475 | project_name = "Input from DataFrame" 476 | 477 | execution_parameters = { 478 | "input_type": input_type, 479 | "output": output, 480 | "input_data": input_data, 481 | "reference_genome": reference_genome, 482 | "opportunity_genome": opportunity_genome, 483 | "cosmic_version": cosmic_version, 484 | "context_type": context_type, 485 | "exome": exome, 486 | "minimum_signatures": minimum_signatures, 487 | "maximum_signatures": maximum_signatures, 488 | "NMF_replicates": nmf_replicates, 489 | "cpu": cpu, 490 | "assignment_cpu": assignment_cpu, 491 | "gpu": gpu, 492 | "batch_size": batch_size, 493 | "NMF_init": nmf_init, 494 | "precision": precision, 495 | "matrix_normalization": matrix_normalization, 496 | "resample": resample, 497 | "seeds": seeds, 498 | "min_NMF_iterations": min_nmf_iterations, 499 | "max_NMF_iterations": max_nmf_iterations, 500 | "NMF_test_conv": nmf_test_conv, 501 | "NMF_tolerance": nmf_tolerance, 502 | "nnls_add_penalty": nnls_add_penalty, 503 | "nnls_remove_penalty": nnls_remove_penalty, 504 | "initial_remove_penalty": initial_remove_penalty, 505 | "collapse_to_SBS96": collapse_to_SBS96, 506 | "dist": clustering_distance, 507 | "export_probabilities": export_probabilities, 508 | "make_decompostion_plots": make_decomposition_plots, 509 | "stability": stability, 510 | "min_stability": min_stability, 511 | "combined_stability": combined_stability, 512 | "allow_stability_drop": allow_stability_drop, 513 | "get_all_signature_matrices": get_all_signature_matrices, 514 | } 515 | 516 | ################################ take the inputs from the general optional arguments #################################### 517 | startProcess = minimum_signatures 518 | endProcess = maximum_signatures 519 | mtype = context_type 520 | wall = get_all_signature_matrices 521 | add_penalty = nnls_add_penalty 522 | remove_penalty = nnls_remove_penalty 523 | genome_build = opportunity_genome 524 | refgen = reference_genome 525 | 526 | # set the sequence type ("genome" or "exome") for selection criteria and tmb plot inside the make_final_solution function 527 | if exome == False: 528 | sequence = "genome" 529 | if exome == True: 530 | sequence = "exome" 531 | 532 | # Use a SeedSequence to create generators for random number generation 533 | if seeds == "random": 534 | execution_parameters["seeds"] = seeds 535 | tmp_seed = SeedSequence().entropy 536 | seed = np.array(tmp_seed) 537 | seeds = pd.DataFrame([tmp_seed], columns=["Seed"]) 538 | seeds.to_csv(out_put + "/Seeds.txt", sep="\t", quoting=None) 539 | else: 540 | try: 541 | execution_parameters["seeds"] = seeds 542 | seeds = pd.read_csv(seeds, sep="\t", index_col=0) 543 | seeds.to_csv(out_put + "/Seeds.txt", sep="\t") 544 | seed = np.array(seeds["Seed"]) 545 | 546 | except: 547 | raise ValueError("Please set valid seeds") 548 | 549 | if input_type == "text" or input_type == "table" or input_type == "matrix": 550 | ################################### For text input files ###################################################### 551 | text_file = project 552 | title = "" # set the title for plotting 553 | 554 | if type(text_file) != str: 555 | data = text_file 556 | execution_parameters["input_data"] = ( 557 | "Matrix[" 558 | + str(data.shape[0]) 559 | + " rows X " 560 | + str(data.shape[1]) 561 | + " columns]" 562 | ) 563 | else: 564 | data = pd.read_csv(text_file, sep="\t").iloc[:, :] 565 | 566 | data = data.dropna(axis=1, inplace=False) 567 | data = data.loc[:, (data != 0).any(axis=0)] 568 | # printing the number of mutations 569 | mutation_number = str(data.shape[0]) 570 | # Re-indexing the input matrix file by using process_input function from SigProfilePlotting 571 | data = sigPlot.process_input(data, mutation_number) 572 | data.reset_index(inplace=True) 573 | genomes = data.iloc[:, 1:] 574 | genomes = np.array(genomes) 575 | allgenomes = genomes.copy() # save the allgenomes for the final results 576 | # Contruct the indeces of the matrix 577 | # setting index and columns names of processAvg and exposureAvg 578 | index = data.iloc[:, 0] 579 | colnames = data.columns[1:] 580 | allcolnames = colnames.copy() # save the allcolnames for the final results 581 | 582 | # creating list of mutational type to sync with the vcf type input 583 | mtypes = [str(genomes.shape[0])] 584 | if mtypes[0] == "78": 585 | mtypes = ["DBS78"] 586 | elif mtypes[0] == "83": 587 | mtypes = ["ID83"] 588 | elif mtypes[0] == "48": 589 | mtypes = ["CNV48"] 590 | elif mtypes[0] == "32": 591 | mtypes = ["SV32"] 592 | elif mtypes[0] == "96" or "288" or "384" or "1536" or "4608": 593 | mtypes = ["SBS" + mtypes[0]] 594 | else: 595 | mtypes = ["CH" + mtypes[0]] 596 | 597 | elif input_type == "csv": 598 | ################################# For CSV input files ####################################################### 599 | filename = project 600 | title = "" # set the title for plotting 601 | genomes, index, colnames, mtypes = sub.read_csv(filename) 602 | allgenomes = genomes.copy() 603 | allcolnames = colnames.copy() 604 | # Define the mtypes 605 | mtypes = [str(genomes.shape[0])] 606 | if mtypes[0] == "78": 607 | mtypes = ["DINUC"] 608 | elif mtypes[0] == "83": 609 | mtypes = ["ID"] 610 | 611 | elif input_type == "matobj": 612 | ################################# For matlab input files ####################################################### 613 | mat_file = project 614 | title = "" # set the title for plotting 615 | mat = sio.loadmat(mat_file) 616 | mat = sub.extract_input(mat) 617 | genomes = mat[1] 618 | allgenomes = genomes.copy() # save the allgenomes for the final results 619 | 620 | # Contruct the indeces of the matrix 621 | # setting index and columns names of processAvg and exposureAvg 622 | index1 = mat[3] 623 | index2 = mat[4] 624 | index = [] 625 | for i, j in zip(index1, index2): 626 | index.append(i[0] + "[" + j + "]" + i[2]) 627 | colnames = np.array(pd.Series(mat[2])) 628 | allcolnames = colnames.copy() # save the allcolnames for the final results 629 | index = np.array(pd.Series(index)) 630 | 631 | # creating list of mutational type to sync with the vcf type input 632 | mtypes = [str(genomes.shape[0])] 633 | if mtypes[0] == "78": 634 | mtypes = ["DINUC"] 635 | elif mtypes[0] == "83": 636 | mtypes = ["ID"] 637 | 638 | elif input_type == "vcf": 639 | ################################# For vcf input files ####################################################### 640 | title = project # set the title for plotting 641 | data = datadump.SigProfilerMatrixGeneratorFunc( 642 | project_name, 643 | refgen, 644 | project, 645 | exome=exome, 646 | bed_file=None, 647 | chrom_based=False, 648 | plot=False, 649 | gs=False, 650 | volume=volume, 651 | ) 652 | # Selecting the MutationType 653 | if mtype == ["default"]: 654 | mtypes = ["SBS96", "DBS78", "ID83"] 655 | elif mtype == "default": 656 | mtypes = ["SBS96", "DBS78", "ID83"] 657 | else: 658 | # mkeys = data.keys() 659 | mtype = mtype.upper() 660 | mtype = mtype.replace(" ", "") 661 | mtypes = mtype.split(",") 662 | # set the genome_build 663 | genome_build = refgen 664 | elif input_type.lower() == "bedpe": 665 | ##################### For SV's bedpe input files ##################### 666 | # create a directory to write the output matrices to 667 | title = project 668 | mtypes = ["SV32"] 669 | sv_outputs = os.path.join(os.path.split(input_data)[0], "SV_Matrices") 670 | 671 | # SV input processing, execution parameters 672 | genomes = sv.generateSVMatrix(project, project_name, sv_outputs) 673 | index = genomes.index.values 674 | colnames = genomes.columns 675 | allcolnames = colnames.copy() 676 | allgenomes = genomes.copy() 677 | elif input_type.split(":")[0].lower() == "seg": # seg 678 | ################# For CNV's segmentation input files ################# 679 | title = project 680 | mtypes = ["CNV48"] 681 | cnv_file_type = input_type.split(":")[1].upper() 682 | # cnv_outputs = os.path.join(os.path.split(input_data)[0], "CNV_Matrices") 683 | 684 | # SV input processing, execution parameters 685 | # project needs to be something NOT a file 686 | genomes = scna.generateCNVMatrix( 687 | cnv_file_type, input_data, cnv_file_type, output 688 | ) 689 | if MUTTYPE in genomes.columns: 690 | genomes = genomes.set_index("MutationType") 691 | index = genomes.index.values 692 | colnames = genomes.columns 693 | allcolnames = colnames.copy() 694 | allgenomes = genomes.copy() 695 | else: 696 | raise ValueError( 697 | "Please provide a correct input_type. Check help for more details" 698 | ) 699 | 700 | # recording context types 701 | execution_parameters["context_type"] = ",".join(mtypes) 702 | record_parameters(sysdata, execution_parameters, start_time) 703 | sysdata.close() 704 | ########################################################################################################################################################################################### 705 | 706 | for m in mtypes: 707 | # we need to rename the m because users input could be SBS96, SBS1536, DBS78, ID83 etc 708 | if m.startswith("SBS"): 709 | m = m[3:] # removing "SBS" 710 | elif m.startswith("DBS"): 711 | m = "DINUC" 712 | elif m.startswith("ID"): 713 | m = "ID" 714 | elif m.startswith("CNV"): 715 | m = "CNV" 716 | elif m.startswith("SV"): 717 | m = "SV" 718 | 719 | # Determine the types of mutation which will be needed for exporting and copying the files 720 | if not ( 721 | m == "DINUC" 722 | or m.startswith("DBS") 723 | or m.startswith("ID") 724 | or m.startswith("CNV") 725 | or m.startswith("SV") 726 | ): 727 | if m.startswith("SBS"): 728 | mutation_type = m 729 | elif m in ["96", "288", "384", "1536", "4608"]: 730 | mutation_type = "SBS" + m 731 | elif m.startswith("78"): 732 | mutation_type = "DBS78" 733 | elif m.startswith("83"): 734 | mutation_type = "ID83" 735 | elif m.startswith("48"): 736 | mutation_type = "CNV48" 737 | elif m.startswith("32"): 738 | mutation_type = "SV32" 739 | else: 740 | mutation_type = "CH" + m 741 | 742 | else: 743 | if m == "DINUC" or m.startswith("DBS"): 744 | mutation_type = "DBS78" 745 | elif m == "ID" or m.startswith("ID"): 746 | mutation_type = "ID83" 747 | elif m == "CNV" or m.startswith("CNV"): 748 | mutation_type = "CNV48" 749 | elif m == "SV" or m.startswith("SV"): 750 | mutation_type = "SV32" 751 | 752 | if input_type == "vcf": 753 | try: 754 | genomes = pd.DataFrame(data[m]) 755 | except KeyError: 756 | sysdata = open(out_put + "/JOB_METADATA.txt", "a") 757 | sysdata.write( 758 | "Context {} is not available in the current vcf files".format(m) 759 | + "\n" 760 | ) 761 | print("Context {} is not available in the current vcf files".format(m)) 762 | sysdata.close() 763 | continue 764 | # check if the genome is a nonzero matrix 765 | if genomes.shape == (0, 0): 766 | sysdata = open(out_put + "/JOB_METADATA.txt", "a") 767 | sysdata.write( 768 | "Sample is not a nonzero matrix for the mutation context " 769 | + m 770 | + "\n" 771 | ) 772 | print("Sample is not a nozero matrix for the mutation context " + m) 773 | sysdata.close() 774 | continue 775 | 776 | genomes = genomes.loc[:, (genomes != 0).any(axis=0)] 777 | allgenomes = genomes.copy() # save the allgenomes for the final results 778 | index = genomes.index.values 779 | colnames = genomes.columns 780 | allcolnames = colnames.copy() # save the allcolnames for the final results 781 | 782 | # check if start and end processes are bigger than the number of samples 783 | startProcess = min(startProcess, genomes.shape[1]) 784 | endProcess = min(endProcess, genomes.shape[1]) 785 | 786 | # in the plotting funciton "ID" is used as "INDEL" 787 | if m == "ID": 788 | m = "INDEL" # for plotting 789 | 790 | # create output directories to store all the results 791 | output = out_put + "/" + mutation_type 792 | est_genomes = np.zeros([1, 1]) 793 | genomes = np.array(genomes) 794 | information = [] 795 | layer_directory = output 796 | try: 797 | if not os.path.exists(layer_directory): 798 | os.makedirs(layer_directory) 799 | except: 800 | print("The {} folder could not be created".format("output")) 801 | 802 | fh = open(layer_directory + "/All_solutions_stat.csv", "w") 803 | fh.write("Total Signatures,Stability,Matrix Frobenius%,avgStability\n") 804 | fh.close() 805 | # The following for loop operates to extract data from each number of signature 806 | 807 | all_similirities_list = ( 808 | [] 809 | ) # this list is going to store the dataframes of different similirieties as items 810 | minimum_stabilities = [] 811 | 812 | # get the cutoff for normatization to handle the hypermutators 813 | 814 | normalization_cutoff = sub.get_normalization_cutoff( 815 | genomes, manual_cutoff=100 * genomes.shape[0] 816 | ) 817 | execution_parameters["normalization_cutoff"] = normalization_cutoff 818 | 819 | # pass the seed values to inner funtions: 820 | execution_parameters["seeds"] = seed 821 | 822 | if genomes.shape[1] < endProcess: 823 | endProcess = genomes.shape[1] 824 | 825 | # report the notmatlization criteria 826 | sysdata = open(out_put + "/JOB_METADATA.txt", "a") 827 | context_start_time = datetime.datetime.now() 828 | sysdata.write("\n##################################\n") 829 | sysdata.write( 830 | "\n[{}] Analysis started for {}. Matrix size [{} rows x {} columns]\n".format( 831 | str(context_start_time).split(".")[0], 832 | mutation_type, 833 | genomes.shape[0], 834 | genomes.shape[1], 835 | ) 836 | ) 837 | if execution_parameters["matrix_normalization"] == "gmm": 838 | sysdata.write( 839 | "\n[{}] Normalization GMM with cutoff value set at {}\n".format( 840 | str(datetime.datetime.now()).split(".")[0], normalization_cutoff 841 | ) 842 | ) 843 | elif execution_parameters["matrix_normalization"] == "100X": 844 | sysdata.write( 845 | "\n[{}] Normalization 100X with cutoff value set at {}\n".format( 846 | str(datetime.datetime.now()).split(".")[0], (genomes.shape[0] * 100) 847 | ) 848 | ) 849 | elif execution_parameters["matrix_normalization"] == "log2": 850 | sysdata.write( 851 | "\n[{}] Normalization Log2\n".format( 852 | str(datetime.datetime.now()).split(".")[0] 853 | ) 854 | ) 855 | elif execution_parameters["matrix_normalization"] == "none": 856 | sysdata.write( 857 | "\n[{}] Analysis is proceeding without normalization\n".format( 858 | str(datetime.datetime.now()).split(".")[0] 859 | ) 860 | ) 861 | else: 862 | sysdata.write( 863 | "\n[{}] Normalization Custom with cutoff value set at {}\n".format( 864 | str(datetime.datetime.now()).split(".")[0], 865 | execution_parameters["matrix_normalization"], 866 | ) 867 | ) 868 | 869 | sysdata.close() 870 | 871 | # Create list of pairs (x,y) where x is poisson generator (will be used to create the same noise at each rank) 872 | # and y is a random generator. The pair will be used to spawn more generators. 873 | # Note: Poisson seed will be same in each pair, but random generator will be different. 874 | 875 | # initialize root seed sequence with seed 876 | seed_seq = SeedSequence(int(execution_parameters["seeds"])) 877 | poisson_seed = seed_seq.spawn(1) 878 | 879 | # create num rank copies of the poisson seed so that noise is consistent across ranks for same replicate number 880 | poisson_list = [ 881 | copy.deepcopy(poisson_seed) for x in range(startProcess, endProcess + 1) 882 | ] 883 | replicate_generators = seed_seq.spawn(endProcess + 1 - startProcess) 884 | cluster_generators = seed_seq.spawn(endProcess + 1 - startProcess) 885 | noise_rep_pair = [] 886 | 887 | for i, j, k in zip(poisson_list, replicate_generators, cluster_generators): 888 | noise_rep_pair.append([i, j, k]) 889 | 890 | for num_sigs in range(startProcess, endProcess + 1): 891 | current_time_start = datetime.datetime.now() 892 | ( 893 | processAvg, 894 | exposureAvg, 895 | processStd, 896 | exposureStd, 897 | avgSilhouetteCoefficients, 898 | clusterSilhouetteCoefficients, 899 | finalgenomeErrors, 900 | finalgenomesReconstructed, 901 | finalWall, 902 | finalHall, 903 | converge_information, 904 | reconstruction_error, 905 | processes, 906 | ) = sub.decipher_signatures( 907 | execution_parameters, 908 | genomes=genomes, 909 | mut_context=m, 910 | i=num_sigs, 911 | noise_rep_pair=noise_rep_pair[num_sigs - startProcess], 912 | ) 913 | 914 | # remove signatures only if the process stability is above a thresh-hold of 0.85 915 | if avgSilhouetteCoefficients > -1.0: 916 | stic = time.time() 917 | if cpu > 0: 918 | pool = mp.Pool(processes=cpu) 919 | else: 920 | pool = mp.Pool() 921 | results = [ 922 | pool.apply_async( 923 | spasub.fit_signatures_pool, 924 | args=( 925 | genomes, 926 | processAvg, 927 | x, 928 | ), 929 | ) 930 | for x in range(genomes.shape[1]) 931 | ] 932 | pooloutput = [p.get() for p in results] 933 | pool.close() 934 | 935 | for i in range(len(pooloutput)): 936 | exposureAvg[:, i] = pooloutput[i][0] 937 | stoc = time.time() 938 | print("Optimization time is {} seconds".format(stoc - stic)) 939 | # Get total mutationation for each signature in reverse order and order the signatures from high to low mutation barden 940 | signature_total_mutations = np.sum(exposureAvg, axis=1).astype(int) 941 | sorted_idx = np.argsort(-signature_total_mutations) 942 | processAvg = np.take(processAvg, sorted_idx, axis=1) 943 | exposureAvg = np.take(exposureAvg, sorted_idx, axis=0) 944 | signature_total_mutations = np.sum(exposureAvg, axis=1).astype(int) 945 | processStd = np.take(processStd, sorted_idx, axis=1) 946 | exposureStd = np.take(exposureStd, sorted_idx, axis=0) 947 | clusterSilhouetteCoefficients = np.take( 948 | clusterSilhouetteCoefficients, sorted_idx, axis=0 949 | ) 950 | signature_stats = pd.DataFrame( 951 | { 952 | "Stability": clusterSilhouetteCoefficients, 953 | "Total Mutations": signature_total_mutations, 954 | } 955 | ) 956 | minimum_stabilities.append( 957 | round(np.mean(clusterSilhouetteCoefficients), 2) 958 | ) # here minimum stability is the average stability !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 959 | # Compute the estimated genome from the processAvg and exposureAvg 960 | est_genomes = np.dot(processAvg, exposureAvg) 961 | # check the similarities between the original and estimated genome for each number of signatures 962 | all_similarities, cosine_similarities = sub.calculate_similarities( 963 | genomes, est_genomes, colnames 964 | ) 965 | ########################################################################################################################################################################## 966 | # store the resutls of the loop. Here, processStd and exposureStd are standard Errors, NOT STANDARD DEVIATIONS. 967 | loopResults = [ 968 | genomes, 969 | processAvg, 970 | exposureAvg, 971 | processStd, 972 | exposureStd, 973 | avgSilhouetteCoefficients, 974 | clusterSilhouetteCoefficients, 975 | signature_total_mutations, 976 | all_similarities, 977 | signature_stats, 978 | reconstruction_error, 979 | finalgenomeErrors, 980 | finalgenomesReconstructed, 981 | converge_information, 982 | finalWall, 983 | finalHall, 984 | processes, 985 | ] 986 | information.append( 987 | [ 988 | processAvg, 989 | exposureAvg, 990 | processStd, 991 | exposureStd, 992 | clusterSilhouetteCoefficients, 993 | signature_total_mutations, 994 | signature_stats, 995 | all_similarities, 996 | ] 997 | ) # Will be used during hierarchycal approach 998 | 999 | ################################# Export the results ########################################################### 1000 | sub.export_information( 1001 | loopResults, 1002 | mutation_type, 1003 | layer_directory, 1004 | index, 1005 | colnames, 1006 | wall=wall, 1007 | sequence=sequence, 1008 | volume=volume, 1009 | ) 1010 | all_similirities_list.append(all_similarities) 1011 | current_time_end = datetime.datetime.now() 1012 | sysdata = open(out_put + "/JOB_METADATA.txt", "a") 1013 | sysdata.write( 1014 | "\n[{}] {} de novo extraction completed for a total of {} signatures! \nExecution time:{}\n".format( 1015 | str(datetime.datetime.now()).split(".")[0], 1016 | mutation_type, 1017 | processes, 1018 | str(current_time_end - current_time_start).split(".")[0], 1019 | current_time_end, 1020 | ) 1021 | ) 1022 | sysdata.close() 1023 | 1024 | ########################################## Plot Stabiltity vs Reconstruction Error ############################# 1025 | # Print the Stabiltity vs Reconstruction Error as get the solution as well 1026 | solution, all_stats = sub.stabVsRError( 1027 | layer_directory + "/All_solutions_stat.csv", 1028 | layer_directory, 1029 | title, 1030 | all_similirities_list, 1031 | mtype=mutation_type, 1032 | stability=stability, 1033 | min_stability=min_stability, 1034 | combined_stability=combined_stability, 1035 | sequence=sequence, 1036 | allow_stability_drop=allow_stability_drop, 1037 | ) 1038 | all_stats.insert( 1039 | 1, "Stability (Avg Silhouette)", minimum_stabilities 1040 | ) #!!!!!!!!!!!!!!!!1 here minimum stability is avg stability 1041 | all_stats = all_stats.set_index(["Signatures"]) 1042 | all_stats.to_csv(layer_directory + "/All_solutions_stat.csv", sep=",") 1043 | 1044 | # write the name of Samples and Matrix participating in each Layer. 1045 | layer_genome = pd.DataFrame(genomes) 1046 | layer_genome = layer_genome.set_index(index) 1047 | layer_genome.columns = colnames 1048 | layer_genome = layer_genome.rename_axis("MutationType", axis="columns") 1049 | 1050 | # record the samples 1051 | layer_genome.to_csv( 1052 | output + "/Samples.txt", sep="\t", index_label=[layer_genome.columns.name] 1053 | ) 1054 | # similarity_dataframe.to_csv(data_stat_folder+"/Similatiry_Data_All_Sigs"+str(H_iteration)+".text", sep = "\t") 1055 | del layer_genome 1056 | ################################### Decompose the new signatures into global signatures ######################### 1057 | processAvg = information[solution - startProcess][0] 1058 | exposureAvg = information[solution - startProcess][1] 1059 | processSTE = information[solution - startProcess][2] 1060 | signature_stabilities = information[solution - startProcess][4] 1061 | signature_total_mutations = information[solution - startProcess][5] 1062 | signature_stats = information[solution - startProcess][6] 1063 | all_similarities = information[solution - startProcess][7] 1064 | 1065 | signature_stabilities = sub.signature_plotting_text( 1066 | signature_stabilities, "Stability", "float" 1067 | ) 1068 | signature_total_mutations = sub.signature_plotting_text( 1069 | signature_total_mutations, "Total Mutations", "integer" 1070 | ) 1071 | listOfSignatures = sub.make_letter_ids( 1072 | idlenth=processAvg.shape[1], mtype=mutation_type 1073 | ) 1074 | 1075 | layer_directory1 = ( 1076 | output + "/Suggested_Solution/" + mutation_type + "_De-Novo_Solution" 1077 | ) 1078 | layer_directory2 = ( 1079 | output 1080 | + "/Suggested_Solution/COSMIC_" 1081 | + mutation_type 1082 | + "_Decomposed_Solution" 1083 | ) 1084 | devopts = {} 1085 | devopts["denovo_outpath"] = layer_directory1 1086 | devopts["decompose_outpath"] = layer_directory2 1087 | devopts["Assignment_outpath"] = layer_directory2 1088 | devopts["signature_stabilities"] = signature_stabilities 1089 | devopts["signature_total_mutations"] = signature_total_mutations 1090 | devopts["listOfSignatures"] = listOfSignatures 1091 | devopts["index"] = index 1092 | devopts["colnames"] = allcolnames 1093 | devopts["signature_stats"] = signature_stats 1094 | devopts["sequence"] = sequence 1095 | devopts["processSTE"] = processSTE 1096 | devopts["sequence"] = sequence 1097 | devopts["make_decomposition_plots"] = make_decomposition_plots 1098 | 1099 | # Check if genome_build is available in COSMIC, if not reset to GRCh37 1100 | if genome_build in ["GRCh37", "GRCh38", "mm9", "mm10", "mm39", "rn6", "rn7"]: 1101 | genome_build = genome_build 1102 | else: 1103 | sysdata = open(out_put + "/JOB_METADATA.txt", "a") 1104 | sysdata.write( 1105 | "\n[{}] The selected opportunity genome is {}. COSMIC signatures are available only for GRCh37/38, mm9/10/39 and rn6/7 genomes. So, the opportunity genome is reset to GRCh37.\n".format( 1106 | str(datetime.datetime.now()).split(".")[0], str(genome_build) 1107 | ) 1108 | ) 1109 | print( 1110 | "The selected opportunity genome is " 1111 | + str(genome_build) 1112 | + ". COSMIC signatures are available only for GRCh37/38, mm9/10/39 and rn6/7 genomes. So, the opportunity genome is reset to GRCh37." 1113 | ) 1114 | sysdata.close() 1115 | genome_build = "GRCh37" 1116 | if not stop_after_extraction: 1117 | decomp.spa_analyze( 1118 | allgenomes, 1119 | output, 1120 | signatures=processAvg, 1121 | genome_build=genome_build, 1122 | cosmic_version=cosmic_version, 1123 | exome=exome, 1124 | verbose=False, 1125 | decompose_fit_option=True, 1126 | denovo_refit_option=True, 1127 | cosmic_fit_option=False, 1128 | export_probabilities=export_probabilities, 1129 | devopts=devopts, 1130 | make_metadata=False, 1131 | volume=volume, 1132 | cpu=assignment_cpu, 1133 | ) 1134 | 1135 | sysdata = open(out_put + "/JOB_METADATA.txt", "a") 1136 | end_time = datetime.datetime.now() 1137 | sysdata.write("\n[{}] Analysis ended: \n".format(str(end_time).split(".")[0])) 1138 | sysdata.write("\n-------Job Status------- \n") 1139 | sysdata.write( 1140 | "Analysis of mutational signatures completed successfully! \nTotal execution time: " 1141 | + str(end_time - start_time).split(".")[0] 1142 | + " \nResults can be found in: " 1143 | + " " 1144 | + out_put 1145 | + " " 1146 | + " folder" 1147 | ) 1148 | sysdata.close() 1149 | 1150 | print( 1151 | "\n\n \nYour Job Is Successfully Completed! Thank You For Using SigProfilerExtractor.\n " 1152 | ) 1153 | --------------------------------------------------------------------------------