├── .github └── workflows │ └── tests.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── RELEASING.md ├── code-of-conduct.md ├── deploy.sh ├── develop.sh ├── docs ├── Makefile ├── conf.py └── index.rst ├── lint.sh ├── pylintrc ├── requirements.txt ├── run-vaxrank-b16-test-data.sh ├── setup.py ├── test.sh ├── tests ├── __init__.py ├── common.py ├── data │ └── b16.f10 │ │ ├── b16.combined.bam │ │ ├── b16.combined.bam.bai │ │ ├── b16.combined.sam │ │ ├── b16.combined.sorted.bam │ │ ├── b16.combined.sorted.bam.bai │ │ ├── b16.expressed.vcf │ │ ├── b16.f10.127a.aldh1b1.chr4.45802539.refG.altC.sam │ │ ├── b16.f10.127a.klf6.chr13.5864876.refC.altCG.sam │ │ ├── b16.f10.127a.phip.chr9.82927102.refG.altT.sam │ │ ├── b16.f10.127a.wdr13.chrX.8125624.refC.altA.sam │ │ ├── b16.f10.Phip.vcf │ │ ├── b16.f10.Wdr13.vcf │ │ ├── b16.not-expressed.vcf │ │ └── b16.vcf ├── test_cancer_driver_gene.py ├── test_epitope_prediction.py ├── test_manufacturability.py ├── test_mutant_protein_sequence.py ├── test_shell_script.py └── testing_helpers.py └── vaxrank ├── __init__.py ├── cli.py ├── core_logic.py ├── data ├── cancer-driver-genes.csv ├── cancer-driver-variants.csv ├── class1-mhc-presentation-pathway.csv └── interferon-gamma-response.csv ├── epitope_prediction.py ├── gene_pathway_check.py ├── logging.conf ├── manufacturability.py ├── mutant_protein_fragment.py ├── patient_info.py ├── reference_proteome.py ├── report.py ├── templates ├── stylesheet.css ├── template.html └── template.txt ├── vaccine_peptide.py └── vaxrank_results.py /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Tests 5 | on: [push, pull_request] 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | fail-fast: true 12 | matrix: 13 | python-version: ["3.9", "3.10", "3.11"] 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v3 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | - name: Checkout private netmhc-bundle repo 21 | uses: actions/checkout@v4 22 | with: 23 | repository: openvax/netmhc-bundle 24 | token: ${{ secrets.NETMHC_BUNDLE_ACCESS_TOKEN }} 25 | path: netmhc-bundle 26 | 27 | - name: Install netmhc-bundle dependencies 28 | uses: awalsh128/cache-apt-pkgs-action@latest 29 | with: 30 | packages: tcsh gawk python2-minimal 31 | version: 1.0 32 | - name: Install dependencies 33 | run: | 34 | python -m pip install --upgrade pip 35 | python -m pip install pytest pytest-cov pylint 36 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 37 | - name: Install wkthtmltopdf 38 | run: | 39 | sudo apt-get install -y xfonts-base xfonts-75dpi 40 | wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.bionic_amd64.deb 41 | sudo dpkg -i wkhtmltox_0.12.6-1.bionic_amd64.deb 42 | - name: Lint with PyLint 43 | run: | 44 | ./lint.sh 45 | - name: Download Ensembl data 46 | run: | 47 | echo "Before installing Ensembl releases" && df -h 48 | pyensembl install --release 75 --species human --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCh37.75/ 49 | pyensembl install --release 102 --species mouse --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCm38.102/ 50 | pyensembl install --release 93 --species human --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCh38.93/ 51 | pyensembl install --release 93 --species mouse --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCm38.93/ 52 | - name: Test with pytest 53 | run: | 54 | # configure netmhc-bundle paths 55 | export NETMHC_BUNDLE_HOME=$PWD/netmhc-bundle 56 | echo "NetMHC-bundle dir:" && ls -l $NETMHC_BUNDLE_HOME 57 | mkdir $PWD/netmhc-bundle-tmp 58 | export NETMHC_BUNDLE_TMPDIR=$PWD/netmhc-bundle-tmp 59 | export PATH=$PATH:$NETMHC_BUNDLE_HOME/bin 60 | ./test.sh 61 | - name: Publish coverage to Coveralls 62 | uses: coverallsapp/github-action@v2.2.3 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | #Ipython Notebook 62 | .ipynb_checkpoints 63 | 64 | # Generated outputs 65 | vaccine-peptides-report.txt 66 | vaccine-peptides-report.html 67 | vaccine-peptides-report.pdf 68 | vaccine-peptides-report.xlsx 69 | vaccine-peptides-report.json 70 | vaccine-peptides-all-passing.csv 71 | vaccine-peptides.csv 72 | neoepitope-report.xlsx 73 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Vaxrank 2 | 3 | We would love your help in making Vaxrank a useful resource for the community. No contribution is too small, and we especially appreciate usability improvements like better documentation, tutorials, tests, or code cleanup. 4 | 5 | ## Making a contribution 6 | All contributions can be made as pull requests on Github. One of the core developers will review your contribution. As needed the core contributors will also make releases and submit to PyPI. 7 | 8 | A few other guidelines: 9 | 10 | * Vaxrank supports 3.3+ on Linux and OS X. We don't guarantee support for Windows. 11 | * All functions should be documented using [numpy-style docstrings](https://numpydoc.readthedocs.io/en/latest/format.html) and associated with unit tests. 12 | * Bugfixes should be accompanied with a test that illustrates the bug when feasible. 13 | * Contributions are licensed under Apache 2.0 14 | * Please adhere to our [code of conduct](https://github.com/openvax/vaxrank/blob/master/code-of-conduct.md). 15 | 16 | Working on your first Pull Request? One resource that may be helpful is [How to Contribute to an Open Source Project on GitHub](https://egghead.io/series/how-to-contribute-to-an-open-source-project-on-github). 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE README.md 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Tests](https://github.com/openvax/vaxrank/actions/workflows/tests.yml/badge.svg)](https://github.com/openvax/vaxrank/actions/workflows/tests.yml) 2 | [![Coverage Status](https://coveralls.io/repos/github/openvax/vaxrank/badge.svg?branch=master)](https://coveralls.io/github/openvax/vaxrank?branch=master) 3 | 4 | PyPI 5 | 6 | 7 | # vaxrank 8 | 9 | Selection of mutated protein fragments for therapeutic personalized cancer vaccines. 10 | 11 | ## Usage 12 | 13 | ```sh 14 | 15 | vaxrank \ 16 | --vcf test/data/b16.f10/b16.vcf \ 17 | --bam test/data/b16.f10/b16.combined.bam \ 18 | --vaccine-peptide-length 25 \ 19 | --mhc-predictor netmhc \ 20 | --mhc-alleles H2-Kb,H2-Db \ 21 | --padding-around-mutation 5 \ 22 | --output-ascii-report vaccine-peptides.txt \ 23 | --output-pdf-report vaccine-peptides.pdf \ 24 | --output-html-report vaccine-peptides.html 25 | ``` 26 | 27 | ## Installation 28 | 29 | Vaxrank can be installed using [pip](https://packaging.python.org/installing/#use-pip-for-installing): 30 | 31 | ``` 32 | pip install vaxrank 33 | ``` 34 | 35 | Note: to generate PDF reports, you first need to install [wkhtmltopdf](http://wkhtmltopdf.org/), which you can do (on OS X) like so: 36 | 37 | ``` 38 | brew install Caskroom/cask/wkhtmltopdf 39 | ``` 40 | 41 | Vaxrank uses [PyEnsembl](https://github.com/openvax/pyensembl) for accessing information about the reference genome. You must install an Ensembl release corresponding to the reference genome associated with the mutations provided to Vaxrank. 42 | 43 | The latest release for GRCh38 is Ensembl 93: 44 | ``` 45 | pyensembl install --release 93 --species human 46 | ``` 47 | 48 | The last release for GRCh37 is Ensembl 75: 49 | ``` 50 | pyensembl install --release 75 --species human 51 | ``` 52 | 53 | If your variants were called from alignments against hg19 then you can still use GRCh37 but should ignore mitochondrial variants. 54 | 55 | ## Paper & Citation 56 | 57 | There is a Vaxrank paper on biorxiv called [Vaxrank: A Computational Tool For Designing Personalized Cancer Vaccines](https://www.biorxiv.org/content/early/2017/05/27/142919) which can be cited as: 58 | 59 | @article {Rubinsteyn142919, 60 | author = {Rubinsteyn, Alex and Hodes, Isaac and Kodysh, Julia and Hammerbacher, Jeffrey}, 61 | title = {Vaxrank: A Computational Tool For Designing Personalized Cancer Vaccines}, 62 | year = {2017}, 63 | doi = {10.1101/142919}, 64 | publisher = {Cold Spring Harbor Laboratory}, 65 | abstract = {Therapeutic vaccines targeting mutant tumor antigens ({\textquotedblleft}neoantigens{\textquotedblright}) are an increasingly popular form of personalized cancer immunotherapy. Vaxrank is a computational tool for selecting neoantigen vaccine peptides from tumor mutations, tumor RNA data, and patient HLA type. Vaxrank is freely available at www.github.com/hammerlab/vaxrank under the Apache 2.0 open source license and can also be installed from the Python Package Index.}, 66 | URL = {https://www.biorxiv.org/content/early/2017/05/27/142919}, 67 | eprint = {https://www.biorxiv.org/content/early/2017/05/27/142919.full.pdf}, 68 | journal = {bioRxiv} 69 | } 70 | 71 | 72 | # Development 73 | 74 | To install Vaxrank for local development, you may do the below: 75 | 76 | ``` 77 | git clone git@github.com:openvax/vaxrank.git 78 | conda create -q -n vaxrank-dev-env python=3.5.2 numpy scipy pandas pylint 79 | source activate vaxrank-dev-env 80 | pip install -r requirements.txt 81 | pip install . 82 | pyensembl install --release 87 --species human 83 | pyensembl install --release 87 --species mouse 84 | ``` 85 | 86 | You should run the linter and the test suite as you work on Vaxrank (and these will be run automatically by our continuous integration server up on a PR being made). 87 | 88 | ``` 89 | ./lint.sh && ./test.sh 90 | ``` 91 | 92 | The first run of the tests may take a while (8 minutes on a 2016 Macbook Pro) to create the FM index of the proteome, but subsequent tests should take only a few seconds. 93 | 94 | -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- 1 | # Releasing Vaxrank 2 | 3 | This document explains what do once your [Pull Request](https://www.atlassian.com/git/tutorials/making-a-pull-request/) has been reviewed and all final changes applied. Now you're ready merge your branch into master and release it to the world: 4 | 5 | 0. Make sure that you have `pandoc` and `pypandoc` installed: this is needed for readme markdown on PyPI. (See [here](http://pandoc.org/installing.html) and [here](https://pypi.python.org/pypi/pypandoc), respectively, for instructions.) 6 | 1. Bump the [version](http://semver.org/) on __init__.py, as part of the PR you want to release. 7 | 2. Merge your branch into master. 8 | 3. Run `python setup.py sdist upload`, which pushes the newest release to PyPI. 9 | -------------------------------------------------------------------------------- /code-of-conduct.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at hello@openvax.org. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | ./lint.sh && \ 2 | ./test.sh && \ 3 | python3 -m pip install --upgrade build && \ 4 | python3 -m pip install --upgrade twine && \ 5 | rm -rf dist && \ 6 | python3 -m build && \ 7 | python3 -m twine upload dist/* 8 | 9 | -------------------------------------------------------------------------------- /develop.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | pip install -e . 4 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python -msphinx 7 | SPHINXPROJ = vaxrank 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # vaxrank documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Oct 10 16:59:03 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | # import os 20 | # import sys 21 | # sys.path.insert(0, os.path.abspath('.')) 22 | 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | # 28 | # needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [] 34 | 35 | # Add any paths that contain templates here, relative to this directory. 36 | templates_path = ['_templates'] 37 | 38 | # The suffix(es) of source filenames. 39 | # You can specify multiple suffix as a list of string: 40 | # 41 | # source_suffix = ['.rst', '.md'] 42 | source_suffix = '.rst' 43 | 44 | # The master toctree document. 45 | master_doc = 'index' 46 | 47 | # General information about the project. 48 | project = u'vaxrank' 49 | copyright = u'2017, Alex Rubinsteyn, Julia Kodysh' 50 | author = u'Alex Rubinsteyn, Julia Kodysh' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = u'' 58 | # The full version, including alpha/beta/rc tags. 59 | release = u'' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | # 64 | # This is also used if you do content translation via gettext catalogs. 65 | # Usually you set "language" from the command line for these cases. 66 | language = None 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | # This patterns also effect to html_static_path and html_extra_path 71 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 72 | 73 | # The name of the Pygments (syntax highlighting) style to use. 74 | pygments_style = 'sphinx' 75 | 76 | # If true, `todo` and `todoList` produce output, else they produce nothing. 77 | todo_include_todos = False 78 | 79 | 80 | # -- Options for HTML output ---------------------------------------------- 81 | 82 | # The theme to use for HTML and HTML Help pages. See the documentation for 83 | # a list of builtin themes. 84 | # 85 | html_theme = 'alabaster' 86 | 87 | # Theme options are theme-specific and customize the look and feel of a theme 88 | # further. For a list of options available for each theme, see the 89 | # documentation. 90 | # 91 | # html_theme_options = {} 92 | 93 | # Add any paths that contain custom static files (such as style sheets) here, 94 | # relative to this directory. They are copied after the builtin static files, 95 | # so a file named "default.css" will overwrite the builtin "default.css". 96 | html_static_path = ['_static'] 97 | 98 | # Custom sidebar templates, must be a dictionary that maps document names 99 | # to template names. 100 | # 101 | # This is required for the alabaster theme 102 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 103 | html_sidebars = { 104 | '**': [ 105 | 'about.html', 106 | 'navigation.html', 107 | 'relations.html', # needs 'show_related': True theme option to display 108 | 'searchbox.html', 109 | 'donate.html', 110 | ] 111 | } 112 | 113 | 114 | # -- Options for HTMLHelp output ------------------------------------------ 115 | 116 | # Output file base name for HTML help builder. 117 | htmlhelp_basename = 'vaxrankdoc' 118 | 119 | 120 | # -- Options for LaTeX output --------------------------------------------- 121 | 122 | latex_elements = { 123 | # The paper size ('letterpaper' or 'a4paper'). 124 | # 125 | # 'papersize': 'letterpaper', 126 | 127 | # The font size ('10pt', '11pt' or '12pt'). 128 | # 129 | # 'pointsize': '10pt', 130 | 131 | # Additional stuff for the LaTeX preamble. 132 | # 133 | # 'preamble': '', 134 | 135 | # Latex figure (float) alignment 136 | # 137 | # 'figure_align': 'htbp', 138 | } 139 | 140 | # Grouping the document tree into LaTeX files. List of tuples 141 | # (source start file, target name, title, 142 | # author, documentclass [howto, manual, or own class]). 143 | latex_documents = [ 144 | (master_doc, 'vaxrank.tex', u'vaxrank Documentation', 145 | u'Alex Rubinsteyn, Julia Kodysh', 'manual'), 146 | ] 147 | 148 | 149 | # -- Options for manual page output --------------------------------------- 150 | 151 | # One entry per manual page. List of tuples 152 | # (source start file, name, description, authors, manual section). 153 | man_pages = [ 154 | (master_doc, 'vaxrank', u'vaxrank Documentation', 155 | [author], 1) 156 | ] 157 | 158 | 159 | # -- Options for Texinfo output ------------------------------------------- 160 | 161 | # Grouping the document tree into Texinfo files. List of tuples 162 | # (source start file, target name, title, author, 163 | # dir menu entry, description, category) 164 | texinfo_documents = [ 165 | (master_doc, 'vaxrank', u'vaxrank Documentation', 166 | author, 'vaxrank', 'One line description of project.', 167 | 'Miscellaneous'), 168 | ] 169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. vaxrank documentation master file, created by 2 | sphinx-quickstart on Tue Oct 10 16:59:03 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | :caption: Contents: 9 | 10 | Getting Started With Vaxrank 11 | ============================ 12 | 13 | Overview 14 | -------- 15 | Vaxrank is a tool for selecting mutated peptides for use in personalized therapeutic cancer vaccination. Vaxrank determines which peptides should be used in a vaccine from tumor-specific somatic mutations, tumor RNA sequencing data, and a patient's HLA type. Additionally, Vaxrank considers surrounding non-mutated residues in a peptide to prioritize vaccine peptide candidates and improve the odds of successful synthesis. 16 | 17 | Vaxrank is being actively developed at the Icahn School of Medicine at Mount Sinai. 18 | 19 | Questions, Bug Reporting, and Issue Tracking 20 | -------------------------------------------- 21 | Questions, bug reporting and issue tracking are provided by GitHub. Please report all bugs by creating a new issue. You can ask questions by creating a new issue with the question tag. 22 | 23 | Installation 24 | ============ 25 | 26 | Vaxrank can be installed using `pip `_: 27 | 28 | .. code-block:: bash 29 | 30 | pip install vaxrank 31 | 32 | Note: to generate PDF reports, you first need to install `wkhtmltopdf `_, which you can do (on OS X) like so: 33 | 34 | .. code-block:: bash 35 | 36 | brew install Caskroom/cask/wkhtmltopdf 37 | 38 | Vaxrank uses `PyEnsembl `_ for accessing information about the reference genome. You must install an Ensembl release corresponding to the reference genome associated with the mutations provided to Vaxrank. 39 | 40 | The latest supported release for GRCh38 is Ensembl 87: 41 | 42 | .. code-block:: bash 43 | 44 | pyensembl install --release 87 --species human 45 | 46 | The latest release for GRCh37 is Ensembl 75: 47 | 48 | .. code-block:: bash 49 | 50 | pyensembl install --release 75 --species human 51 | 52 | 53 | Running Vaxrank 54 | =============== 55 | 56 | Basic Vaxrank usage involves these parameters: 57 | 58 | .. code-block:: bash 59 | 60 | vaxrank \ 61 | --vcf somatic-variants.vcf \ 62 | --bam tumor-rna.bam \ 63 | --mhc-predictor netmhc \ 64 | --mhc-alleles A*02:01,A*02:03 \ 65 | --mhc-epitope-lengths 8 \ 66 | --padding-around-mutation 5 \ 67 | --vaccine-peptide-length 25 \ 68 | --output-ascii-report vaccine-peptides-report.txt 69 | 70 | This tells Vaxrank to: 71 | 72 | - consider each variant from the input VCF file against the RNA evidence in the input BAM file; 73 | - predict MHC binding of each resulting mutant protein sequence using the NetMHC prediction algorithm with the A*02:01 and A*02:03 MHC alleles, evaluating sequences of length 8 for purposes of MHC binding prediction; 74 | - choose protein vaccine candidates, each composed of 25 amino acids; and 75 | - generate a report written to vaccine-peptides-report.txt, containing the top ranked variants with their associated vaccine proteins. 76 | 77 | For a complete description of parameters supported by Vaxrank, keep on reading. 78 | 79 | 80 | Variant Parameters 81 | ------------------ 82 | Vaxrank starts with a set of candidate genomic variants and considers each for inclusion in the vaccine. There are several ways to specify a set of variants for Vaxrank to consider: 83 | 84 | --vcf VCF_FILE 85 | Genomic variants in `VCF `_ format. 86 | --maf MAF_FILE 87 | Genomic variants in `MAF `_ format. 88 | --json-variants JSON_VARIANTS 89 | Path to Varcode.VariantCollection object serialized as a JSON 90 | file. To learn more about Varcode, see `docs `_. 91 | 92 | MHC Prediction Parameters 93 | ------------------------- 94 | 95 | Vaxrank uses a patient's HLA type information to predict which of the candidate vaccine peptides are most likely to be seen and targeted by the patient's immune system. The MHC alleles can be passed in either in a file or as a comma-separated list of inputs. 96 | 97 | --mhc-alleles-file MHC_ALLELES_FILE 98 | File with one HLA allele per line 99 | --mhc-alleles MHC_ALLELES 100 | Comma-separate or space-separated list of MHC alleles, e.g. "HLA-A*02:01,HLA-A*02:03". 101 | --mhc-peptide-lengths MHC_PEPTIDE_LENGTHS 102 | Comma-separated list of epitope lengths to consider for MHC binding prediction, e.g. "8,9,10,11". This can also take a range of values, e.g. "8-11". 103 | 104 | In addition, the user can specify different MHC binding predictors for Vaxrank to use: 105 | 106 | --mhc-predictor MHC_PREDICTOR 107 | MHC predictor to use. MHCFlurry is an open-source predictor installed by default. Note that to use NetMHC predictors, you need to have locally installed the NetMHC suite software, with binaries like NetMHCpan as executable files on your path. See a list of all supported predictors `here `_. 108 | 109 | RNA Parameters 110 | -------------- 111 | 112 | Vaxrank uses input tumor RNA data to see whether the input somatic variants are sufficiently expressed. 113 | 114 | --bam BAM 115 | BAM file containing tumor RNA reads. 116 | 117 | Each variant's effect on a resulting protein is predicted and matched against what we see in the input RNA. There are many options available to the power user, but the only actual required argument is the location of the tumor RNA BAM; all values listed below come with reasonable defaults. 118 | 119 | --min-alt-rna-reads MIN_ALT_RNA_READS 120 | Minimum number of RNA reads supporting the variant allele. Default: 2. 121 | --min-variant-sequence-coverage MIN_VARIANT_SEQUENCE_COVERAGE 122 | Minimum number of reads supporting a variant sequence. Variant sequences will be trimmed to positions supported by at least this number of RNA reads. Default: 2. 123 | --disable-variant-sequence-assembly 124 | By default, variant cDNA sequences are assembled from overlapping reads. Include this argument to disable the assembly behavior. 125 | --protein-sequence-length 126 | Vaxrank will try to translate protein sequences of this length, though sometimes the resulting sequence may be shorter (depending on the RNA data, presence of stop codons, etc.). Default: 20. 127 | --max-reference-transcript-mismatches MAX_REFERENCE_TRANSCRIPT_MISMATCHES 128 | Maximum number of mismatches between the variant sequence being constructed and the reference sequence before the variant sequence gets dropped from consideration. Default: 2. 129 | --include-mismatches-after-variant 130 | By default, only mismatches that occur before the actual variant locus count against --max-reference-transcript-mismatches. Set this value to True if you also want to count mismatches after the variant locus towards the total. Default: false. 131 | --min-transcript-prefix-length MIN_TRANSCRIPT_PREFIX_LENGTH 132 | Number of nucleotides before the variant we try to match against a reference transcript. Default: 10. 133 | --min-mapping-quality MIN_MAPPING_QUALITY 134 | Minimum MAPQ value to allow for a read. Default: 1. 135 | --use-duplicate-reads 136 | Use a read even if it's been marked as a duplicate. Default: false. 137 | --drop-secondary-alignments 138 | If true, Vaxrank will use a read even at a location that isn't its primary alignment. Default: false. 139 | 140 | Vaccine Peptide Parameters 141 | -------------------------- 142 | There are some more options to specify the desired characteristics of the output vaccine peptides, which will contain shorter sequences that contain the mutation and are predicted to be strong MHC binders. 143 | 144 | --vaccine-peptide-length VACCINE_PEPTIDE_LENGTH 145 | Number of amino acids in the resulting vaccine peptides. Default: 25. 146 | --padding-around-mutation PADDING_AROUND_MUTATION 147 | Number of off-center windows around the mutation to consider as vaccine peptides. Default: 0. 148 | --min-epitope-score MIN_EPITOPE_SCORE 149 | Ignore epitopes whose normalized score falls below this threshold. Default: 0.001. 150 | 151 | Output Parameters 152 | ----------------- 153 | 154 | By default, the report will contain all high-confidence vaccine peptides, but the report can be made more restrictive using the following parameters: 155 | 156 | --max-vaccine-peptides-per-mutation MAX_VACCINE_PEPTIDES_PER_MUTATION 157 | Number of vaccine peptides to generate for each 158 | mutation 159 | --max-mutations-in-report MAX_MUTATIONS_IN_REPORT 160 | Number of mutations to report 161 | 162 | Output Formats 163 | ^^^^^^^^^^^^^^ 164 | 165 | Vaxrank can generate many types of outputs. The most basic output is an ASCII-formatted report, listing each high-scoring variant and its associated vaccine peptides. However, the user can also generate a PDF report and two types of Excel reports. 166 | 167 | Options related to report generation: 168 | --output-ascii-report OUTPUT_ASCII_REPORT 169 | Path to ASCII vaccine peptide report 170 | --output-html-report OUTPUT_HTML_REPORT 171 | Path to HTML vaccine peptide report 172 | --output-pdf-report OUTPUT_PDF_REPORT 173 | Path to PDF vaccine peptide report 174 | --output-xlsx-report OUTPUT_XLSX_REPORT 175 | Path to XLSX vaccine peptide report worksheet, one 176 | sheet per variant. This is meant for use by the 177 | vaccine manufacturer. 178 | --output-neoepitope-report OUTPUT_NEOEPITOPE_REPORT 179 | Path to XLSX neoepitope report, containing information 180 | focusing on short peptide sequences. 181 | 182 | Vaxrank can also output all variants and vaccine sequences in a JSON file, which can be used for further programmatic processing if necessary. The file output location should be specified by: 183 | 184 | --output-json-file OUTPUT_JSON_FILE 185 | Path to JSON vaccine peptide data 186 | -------------------------------------------------------------------------------- /lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit 3 | 4 | 5 | # disabling several categories of errors due to false positives in pylint, 6 | # see these issues: 7 | # - https://bitbucket.org/logilab/pylint/issues/701/false-positives-with-not-an-iterable-and 8 | # - https://bitbucket.org/logilab/pylint/issues/58 9 | 10 | find vaxrank/ -name '*.py' \ 11 | | xargs pylint \ 12 | --errors-only \ 13 | --disable=unsubscriptable-object,not-an-iterable,no-member,invalid-unary-operand-type 14 | 15 | echo 'Passes pylint check' 16 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [TYPECHECK] 2 | # Without ignoring this, we get errors like: 3 | # E:249,20: Module 'numpy' has no 'nan' member (no-member) 4 | ignored-modules = numpy 5 | ignored-classes = nose.tools 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | six 2 | numpy>=1.14.0 3 | pandas 4 | pyensembl>=1.5.0 5 | varcode>=0.5.9 6 | isovar>=1.1.1 7 | mhctools>=1.5.0 8 | roman 9 | jinja2<3.1 10 | pdfkit # needs wkhtmltopdf: brew install Caskroom/cask/wkhtmltopdf 11 | pypandoc # needs pandoc: brew install pandoc 12 | shellinford>=0.3.4 13 | xlsxwriter 14 | xlrd>=1.0.0,<2.0.0 15 | xvfbwrapper 16 | future>=0.16.0 # needed by pylint 17 | astropy 18 | datacache 19 | pysam>=0.15.2 20 | -------------------------------------------------------------------------------- /run-vaxrank-b16-test-data.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | set -x 3 | vaxrank \ 4 | --download-reference-genome-data \ 5 | --vcf test/data/b16.f10/b16.vcf \ 6 | --bam test/data/b16.f10/b16.combined.bam \ 7 | --vaccine-peptide-length 15 \ 8 | --mhc-predictor netmhc \ 9 | --mhc-alleles H2-Kb,H2-Db \ 10 | --mhc-epitope-lengths 8 \ 11 | --padding-around-mutation 0 \ 12 | --min-epitope-score 10e-100 \ 13 | --num-epitopes-per-peptide 5 \ 14 | --output-ascii-report vaccine-peptides-report.txt \ 15 | --output-html-report vaccine-peptides-report.html \ 16 | --output-pdf-report vaccine-peptides-report.pdf \ 17 | --output-xlsx-report vaccine-peptides-report.xlsx \ 18 | --output-neoepitope-report neoepitope-report.xlsx \ 19 | --output-json-file vaccine-peptides-report.json \ 20 | --output-csv vaccine-peptides.csv \ 21 | --output-passing-variants-csv vaccine-peptides-all-passing.csv \ 22 | --output-reviewed-by "John Doe,Jane Doe" \ 23 | --output-final-review "All the Does" \ 24 | --output-patient-id "Test Patient" 25 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | 14 | from __future__ import (absolute_import,) 15 | 16 | import os 17 | import logging 18 | import re 19 | 20 | from setuptools import setup 21 | 22 | readme_dir = os.path.dirname(__file__) 23 | readme_path = os.path.join(readme_dir, 'README.md') 24 | 25 | try: 26 | with open(readme_path, 'r') as f: 27 | readme_markdown = f.read() 28 | except: 29 | logging.warn("Failed to load %s" % readme_path) 30 | readme_markdown = "" 31 | 32 | with open('vaxrank/__init__.py', 'r') as f: 33 | version = re.search( 34 | r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', 35 | f.read(), 36 | re.MULTILINE).group(1) 37 | 38 | if not version: 39 | raise RuntimeError("Cannot find version information") 40 | 41 | if __name__ == '__main__': 42 | setup( 43 | name='vaxrank', 44 | version=version, 45 | description="Mutant peptide ranking for personalized cancer vaccines", 46 | author="Alex Rubinsteyn, Julia Kodysh", 47 | author_email="alex@openvax.org, julia@openvax.org", 48 | url="https://github.com/openvax/vaxrank", 49 | license="http://www.apache.org/licenses/LICENSE-2.0.html", 50 | classifiers=[ 51 | 'Development Status :: 4 - Beta', 52 | 'Environment :: Console', 53 | 'Operating System :: OS Independent', 54 | 'Intended Audience :: Science/Research', 55 | 'License :: OSI Approved :: Apache Software License', 56 | 'Programming Language :: Python', 57 | 'Topic :: Scientific/Engineering :: Bio-Informatics', 58 | ], 59 | install_requires=[ 60 | 'numpy>=1.14.0,<2.0.0', 61 | 'pandas>=2.1.4,<3.0.0', 62 | 'pyensembl>=2.0.0,<3.0.0', 63 | 'varcode>=1.1.0,<2.0.0', 64 | 'isovar>=1.3.0,<2.0.0', 65 | 'mhctools>=1.8.2,<2.0.0', 66 | 'roman', 67 | 'jinja2<3.1', 68 | 'pdfkit', 69 | 'pypandoc', 70 | 'shellinford>=0.3.4', 71 | 'xlrd>=1.0.0,<2.0.0', 72 | 'xlsxwriter', 73 | 'xvfbwrapper', 74 | 'future>=0.16.0', # needed by pylint 75 | 'astropy', 76 | ], 77 | 78 | long_description=readme_markdown, 79 | long_description_content_type='text/markdown', 80 | packages=['vaxrank'], 81 | package_data={'vaxrank': ['templates/*', 'data/*', 'logging.conf']}, 82 | entry_points={ 83 | 'console_scripts': [ 84 | 'vaxrank = vaxrank.cli:main' 85 | ] 86 | } 87 | ) 88 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | pytest --cov=vaxrank/ --cov-report=term-missing tests 2 | 3 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | from __future__ import absolute_import, print_function, division 14 | -------------------------------------------------------------------------------- /tests/common.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | def ok_(a, s=None): 14 | if s is None: 15 | assert a 16 | else: 17 | assert a, s 18 | 19 | def eq_(a, b, s=None): 20 | if s is None: 21 | assert a == b 22 | else: 23 | assert a == b, s 24 | 25 | def neq_(a, b, s=None): 26 | if s is None: 27 | assert a != b 28 | else: 29 | assert a != b, s 30 | 31 | def gt_(a, b, s=None): 32 | if s is None: 33 | assert a > b 34 | else: 35 | assert a > b, s 36 | 37 | def lt_(a, b, s=None): 38 | if s is None: 39 | assert a < b 40 | else: 41 | assert a < b, s 42 | 43 | def gte_(a, b, s=None): 44 | if s is None: 45 | assert a >= b 46 | else: 47 | assert a >= b, s 48 | 49 | def lte_(a, b, s=None): 50 | if s is None: 51 | assert a <= b 52 | else: 53 | assert a <= b, s 54 | 55 | def almost_eq_(a, b, tol=1e-6, s=None): 56 | if s is None: 57 | assert abs(a - b) < tol 58 | else: 59 | assert abs(a - b) < tol, s -------------------------------------------------------------------------------- /tests/data/b16.f10/b16.combined.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openvax/vaxrank/a95e4e19070fe06b67b806f995b209f190a7af9a/tests/data/b16.f10/b16.combined.bam -------------------------------------------------------------------------------- /tests/data/b16.f10/b16.combined.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openvax/vaxrank/a95e4e19070fe06b67b806f995b209f190a7af9a/tests/data/b16.f10/b16.combined.bam.bai -------------------------------------------------------------------------------- /tests/data/b16.f10/b16.combined.sorted.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openvax/vaxrank/a95e4e19070fe06b67b806f995b209f190a7af9a/tests/data/b16.f10/b16.combined.sorted.bam -------------------------------------------------------------------------------- /tests/data/b16.f10/b16.combined.sorted.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openvax/vaxrank/a95e4e19070fe06b67b806f995b209f190a7af9a/tests/data/b16.f10/b16.combined.sorted.bam.bai -------------------------------------------------------------------------------- /tests/data/b16.f10/b16.expressed.vcf: -------------------------------------------------------------------------------- 1 | ##reference=mm10 2 | #chr pos id ref alt qual filter info 3 | chr9 82927102 . G T . . . 4 | chrX 8125624 . C A . . . 5 | -------------------------------------------------------------------------------- /tests/data/b16.f10/b16.f10.127a.aldh1b1.chr4.45802539.refG.altC.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.0 SO:coordinate 2 | @SQ SN:chr1 LN:195471971 3 | @SQ SN:chr2 LN:182113224 4 | @SQ SN:chr3 LN:160039680 5 | @SQ SN:chr4 LN:156508116 6 | @SQ SN:chr5 LN:151834684 7 | @SQ SN:chr6 LN:149736546 8 | @SQ SN:chr7 LN:145441459 9 | @SQ SN:chr8 LN:129401213 10 | @SQ SN:chr9 LN:124595110 11 | @SQ SN:chr10 LN:130694993 12 | @SQ SN:chr11 LN:122082543 13 | @SQ SN:chr12 LN:120129022 14 | @SQ SN:chr13 LN:120421639 15 | @SQ SN:chr14 LN:124902244 16 | @SQ SN:chr15 LN:104043685 17 | @SQ SN:chr16 LN:98207768 18 | @SQ SN:chr17 LN:94987271 19 | @SQ SN:chr18 LN:90702639 20 | @SQ SN:chr19 LN:61431566 21 | @SQ SN:chrX LN:171031299 22 | @SQ SN:chrY LN:91744698 23 | @SQ SN:chrM LN:16299 24 | @RG ID:Tumor_B16_F10_0810_127A PL:ILLUMINA PU:HiSeq2500 LB:Tumor_B16_F10_0810_127A DS:rnaseq SM:Tumor_B16_F10_0810_127A CN:MSSM 25 | HWI-D00273:119:C7FUMANXX:2:2314:9979:73514 163 chr4 45693473 255 13M145161N88M = 45838774 145402 GTGCCAAAGAGACCGACACTCTTGGTGCTCGGGGTACAGTCTCCTCAAAAGTTCCCCTCTTCTGTTTTATAAGATAGGCTTTGAGGGTGCGATGCGCACGC CCCCCGGGGG1@FBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGGGGGGGGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:1 AS:i:196 XS:A:- 26 | HWI-D00273:119:C7FUMANXX:2:1302:1900:88137 147 chr4 45799162 255 8M3285N93M = 45799073 -3475 CACCGCAGGTCCTCAGGATGCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGFEGGGGGGFC@DB/EFGGEGGGGGE1@@FGGGFGGGBC00FDFG@.FG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:201 XS:A:+ 28 | HWI-D00273:119:C7FUMANXX:2:1309:15476:29614 83 chr4 45799165 255 5M3285N96M = 45799092 -3459 CGCAGGTCCTCAGGATGCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAAC GGGGGCGGGGGGGGEBGGGGEGGGGGG>GGGF@DGGGGGGGGEGFC1FGGF>GGF11EE1DFBGE=/;A<3A3 RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:1 AS:i:201 XS:A:+ 29 | HWI-D00273:119:C7FUMANXX:2:2109:16809:15061 83 chr4 45799165 255 5M3285N96M = 45799092 -3459 CGCAGGTCCTCAGGATGCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAAC GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGF=GGGECCCBC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:203 XS:A:+ 30 | HWI-D00273:119:C7FUMANXX:2:2115:6797:61925 163 chr4 45802455 255 101M = 45802642 288 GTCCTCAGGATGCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAAT BBC=BGCGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGFGGGGGGGGGGGFDEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDAGGGGEB RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:200 31 | HWI-D00273:119:C7FUMANXX:2:1106:19607:76744 83 chr4 45802460 255 101M = 45799110 -3451 CAGGATGCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAA GGGGGFGGGGGGGGGGGGFGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:201 XS:A:+ 32 | HWI-D00273:119:C7FUMANXX:2:2109:18038:25161 83 chr4 45802466 255 101M = 45799093 -3474 GCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAG EGGGGG:GGGDGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGFGGE>GGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGFGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:201 XS:A:+ 33 | HWI-D00273:119:C7FUMANXX:2:2310:14056:99606 147 chr4 45802468 255 101M = 45799077 -3492 TGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:201 XS:A:+ 34 | HWI-D00273:119:C7FUMANXX:2:2314:15707:68909 83 chr4 45802468 255 101M = 45799080 -3489 TGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAG CGGDGGGGGGGGEEFDGGADGGGEBGGGGGGE8>GGGGFCG@CGFDEGGF:DGFF>GGGGEGDGEGGGGGGGGFDGGGGGDGGEGDDE>BGGGFFGB@@BB RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:3 AS:i:195 XS:A:+ 35 | HWI-D00273:119:C7FUMANXX:2:1110:20436:75562 99 chr4 45802487 255 101M = 45802556 170 GCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAGATTTGCTACAACAAGCTGT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:200 36 | HWI-D00273:119:C7FUMANXX:2:1206:18456:69465 99 chr4 45802491 255 101M = 45802525 135 CGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAGATTTGCTACAACAAGCTGTTCAT CBCCCCFGEGGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGGGGEEDGGGGEGGGGGGGGGGGGGFGGGGGGGBCGFGGGGGGGGGGGGG>FFGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:200 37 | HWI-D00273:119:C7FUMANXX:2:2310:16660:100528 163 chr4 45802498 255 101M = 45802719 322 TCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAGATTTGCTACAACAAGCTGTTCATCAACAAC BCCBCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBB RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:200 38 | HWI-D00273:119:C7FUMANXX:2:1206:18456:69465 147 chr4 45802525 255 101M = 45802491 -135 ACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAGATTTGCTACAACAAGCTGTTCATCAACAACGAGTGGCATGATGCGGTCAGCAAAAAG 8GGGE>>@GGGGGCFC8E=GGGGGCGBGD@@GGGGGDGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDCGDGGGGGGGGGGGBBBAB RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:200 39 | HWI-D00273:119:C7FUMANXX:2:1216:11959:66492 83 chr4 45802534 255 101M = 45799111 -3524 CAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAGATTTGCTACAACAAGCTGTTCATCAACAACGAGTGGCATGATGCGGTCAGCAAAAAGACCTTCCCC GDGGGGGGGGGGGGF=GGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGFGGFGGGGGGGGGGFFGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGBCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:201 XS:A:+ 40 | -------------------------------------------------------------------------------- /tests/data/b16.f10/b16.f10.127a.phip.chr9.82927102.refG.altT.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.0 SO:coordinate 2 | @SQ SN:chr1 LN:195471971 3 | @SQ SN:chr2 LN:182113224 4 | @SQ SN:chr3 LN:160039680 5 | @SQ SN:chr4 LN:156508116 6 | @SQ SN:chr5 LN:151834684 7 | @SQ SN:chr6 LN:149736546 8 | @SQ SN:chr7 LN:145441459 9 | @SQ SN:chr8 LN:129401213 10 | @SQ SN:chr9 LN:124595110 11 | @SQ SN:chr10 LN:130694993 12 | @SQ SN:chr11 LN:122082543 13 | @SQ SN:chr12 LN:120129022 14 | @SQ SN:chr13 LN:120421639 15 | @SQ SN:chr14 LN:124902244 16 | @SQ SN:chr15 LN:104043685 17 | @SQ SN:chr16 LN:98207768 18 | @SQ SN:chr17 LN:94987271 19 | @SQ SN:chr18 LN:90702639 20 | @SQ SN:chr19 LN:61431566 21 | @SQ SN:chrX LN:171031299 22 | @SQ SN:chrY LN:91744698 23 | @SQ SN:chrM LN:16299 24 | @RG ID:Tumor_B16_F10_0810_127A PL:ILLUMINA PU:HiSeq2500 LB:Tumor_B16_F10_0810_127A DS:rnaseq SM:Tumor_B16_F10_0810_127A CN:MSSM 25 | @PG ID:STAR PN:STAR VN:STAR_2.4.0g1 CL:STAR --runThreadN 10 --genomeDir /sc/orga/projects/PBG/REFERENCES/mm10/star/Mus_musculus.GRCm38.75.processed.overhang75 --readFilesIn /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Raw/RNA.IlluminaHiSeq2500.PolyA/Tumor_B16_F10_0810_127A_ACTGAT_L002_R1_001.C7FUMANXX.fastq.gz /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Raw/RNA.IlluminaHiSeq2500.PolyA/Tumor_B16_F10_0810_127A_ACTGAT_L002_R2_001.C7FUMANXX.fastq.gz --readFilesCommand zcat --outFileNamePrefix /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Processed/RAPiD.2_0_0/star/accepted_hits --outStd SAM --outReadsUnmapped Fastx --outSAMmode Full --outSAMstrandField intronMotif --chimSegmentMin 15 --chimJunctionOverhangMin 15 --sjdbGTFfile /sc/orga/projects/PBG/REFERENCES/mm10/tophat/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf 26 | @CO user command line: STAR --chimSegmentMin 15 --chimJunctionOverhangMin 15 --outSAMstrandField intronMotif --genomeDir /sc/orga/projects/PBG/REFERENCES/mm10/star/Mus_musculus.GRCm38.75.processed.overhang75 --sjdbGTFfile /sc/orga/projects/PBG/REFERENCES/mm10/tophat/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf --runThreadN 10 --outReadsUnmapped Fastx --outStd SAM --outSAMmode Full --outFileNamePrefix /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Processed/RAPiD.2_0_0/star/accepted_hits --readFilesCommand zcat --readFilesIn /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Raw/RNA.IlluminaHiSeq2500.PolyA/Tumor_B16_F10_0810_127A_ACTGAT_L002_R1_001.C7FUMANXX.fastq.gz /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Raw/RNA.IlluminaHiSeq2500.PolyA/Tumor_B16_F10_0810_127A_ACTGAT_L002_R2_001.C7FUMANXX.fastq.gz 27 | HWI-D00273:119:C7FUMANXX:2:1210:3717:80737 163 chr9 82926503 255 29M503N72M = 82927087 685 TCAAGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTAT ?ABBBGGGGGGGGGG1@CBFGGGG1:BFFEGGFGGGGGGCGGGGGGGGGGGGEGGGGGGGFGGGG>FGGGGGGGGCGG>GGGGGGGGGGGGGG>GGFDGGE RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:202 XS:A:- 28 | HWI-D00273:119:C7FUMANXX:2:1108:2867:16351 83 chr9 82926503 255 29M503N72M = 82926413 -694 TCAAGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTAT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:202 XS:A:- 29 | HWI-D00273:119:C7FUMANXX:2:2314:11512:21608 83 chr9 82926503 255 29M503N72M = 82926413 -694 TCAAGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTAT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFC?1GFGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:202 XS:A:- 30 | HWI-D00273:119:C7FUMANXX:2:1306:10026:100751 99 chr9 82926504 255 28M503N73M = 82929045 3870 CAAGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTANNNGNATTNCANANNTTNAGAGTCATGTTATTAACTGCAGTTATA ?:A:0FG1EFG0EFGGGGGGGG:FFFGEFD>FG@FGGGBGGGGGGGB11BCE1D>DGCGGGGGGGGGDE RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:157 XS:A:- 31 | HWI-D00273:119:C7FUMANXX:2:2301:2974:21602 99 chr9 82926504 255 28M503N73M = 82929045 3870 CAAGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATA CCCCCGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGEEGGGGGGGGGGGGGGGGGGGGGDFGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:1 AS:i:201 XS:A:- 32 | HWI-D00273:119:C7FUMANXX:2:2304:3748:77089 147 chr9 82926506 255 26M503N75M = 82926422 -688 AGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAAC GGGGGBE0E=0@0C0F:BGGGC11>GGCGGCGCGGGGGGGGGEEGF:FCGGDFF1GFEBCGGGGGGGGF1EGGGE1=1C@F1GGGDFGGG>GGGDGA0BB? RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:198 XS:A:- 33 | HWI-D00273:119:C7FUMANXX:2:1215:3447:97938 99 chr9 82926508 255 24M503N77M = 82926516 612 AACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTG CCCCBFGGGGGGGGGGGGGGGGGGGBGGGGGGGGGEGG@GDEGEGGGGEGGGGGGEDGGFGGGGGBDCGGGGGGGGGGGGGGGGGGGEGGEGGGGGGGCG= RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:4 AS:i:196 XS:A:- 34 | HWI-D00273:119:C7FUMANXX:2:2114:3120:28476 147 chr9 82926510 255 22M503N79M = 82909869 -17245 CAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTG EGGEE=GGGGGGCFECGGGGGFGGGGGGGGEGFGGGGGGGGGGGGGGGGGGGGGGGF>BGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:202 XS:A:- 35 | HWI-D00273:119:C7FUMANXX:2:1213:20576:91910 147 chr9 82926516 255 16M503N85M = 82926426 -694 CCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCA GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCBA RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:3 AS:i:196 XS:A:- 36 | HWI-D00273:119:C7FUMANXX:2:1215:3447:97938 147 chr9 82926516 255 16M503N85M = 82926508 -612 CCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCA GGEGEGGGGGFGGGGGGGGDB@EGGGGGGGGGGGGGGDGGGDGDGGFGGGGGGFGGGGGGGGGGGGDF>GGFGFGGFEFGGGCGGGGFBFGGGGGGBCBAB RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:4 AS:i:196 XS:A:- 37 | HWI-D00273:119:C7FUMANXX:2:2110:3107:94042 147 chr9 82926516 255 16M503N85M = 82915412 -11708 CCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCA GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGGGGCBBBA RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:203 XS:A:- 38 | HWI-D00273:119:C7FUMANXX:2:2313:1725:79080 147 chr9 82926516 255 16M503N85M = 82926413 -707 CCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCA >GCFGGCGGGGGGCGGGGGGGGGGEGGGGGGGFFGGGGGEGGGGGGGGGGGGDFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBB@ RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:202 XS:A:- 39 | HWI-D00273:119:C7FUMANXX:2:2314:5945:11999 147 chr9 82926516 255 16M503N85M = 82913858 -13262 CCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCA GGFGGGGGGGGGGGGGGGEGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:1 AS:i:201 XS:A:- 40 | HWI-D00273:119:C7FUMANXX:2:2216:12319:64123 147 chr9 82926521 255 11M503N90M = 82926426 -699 TCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCG CGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGFGGFFGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGBCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:202 XS:A:- 41 | HWI-D00273:119:C7FUMANXX:2:2315:15040:51160 99 chr9 82926522 255 10M503N91M = 82929000 3807 CTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGA CCCCCGGGGGGGGGGGGGCFGGGGGGGGGGGGGGGGGGGEFGGGGGGGGGCGGGGG1FEGCCGGGGGGGG>FGGGCGGGGGGEGGGGGGGGGEGCGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:203 XS:A:- 42 | HWI-D00273:119:C7FUMANXX:2:2212:8037:3803 147 chr9 82926528 255 4M503N97M = 82915344 -11788 GACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAA C>GGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:203 XS:A:- 43 | HWI-D00273:119:C7FUMANXX:2:2305:20190:47439 83 chr9 82926528 255 4M503N97M = 82926438 -694 GACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAA FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:1 AS:i:202 XS:A:- 44 | HWI-D00273:119:C7FUMANXX:2:1307:19253:35846 163 chr9 82926529 255 3M503N98M = 82927122 1605 ACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAG CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGF RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:199 XS:A:- 45 | HWI-D00273:119:C7FUMANXX:2:2203:3890:70174 83 chr9 82926529 255 3M503N98M = 82915307 -11826 ACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCACAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAG FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGFE?1GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:1 AS:i:199 XS:A:- 46 | HWI-D00273:119:C7FUMANXX:2:2308:11032:56946 99 chr9 82927034 255 2S99M = 82927179 1157 CCCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAG CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:199 XS:A:- 47 | HWI-D00273:119:C7FUMANXX:2:1311:15268:66444 163 chr9 82927036 255 101M = 82928837 2007 ATTAGAACATGTATCAGTTGACCGGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTAC AABB@1CGFGCGEGCGGGGGFGEGGGFGG1@F@DGGG>DFGGGGGGGGGGGGBF@FGEGGGGGGF RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:1 AS:i:199 XS:A:- 48 | HWI-D00273:119:C7FUMANXX:2:1315:19055:88914 99 chr9 82927038 255 101M = 82928101 1794 TAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCA ABB==EFC@FCCBDFG>BGCEGD1;BFF1F=FD1:GGGGG:BFGGGGBECGGGGGGGFGGGGGGGGE>DG>FGG?DGEEG0FFDFGG@FFD0FG00? RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:197 XS:A:- 49 | HWI-D00273:119:C7FUMANXX:2:2311:7334:96601 99 chr9 82927042 255 3S98M = 82929066 5464 GGGACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCAT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGFGEGGGEGGGGGEGGG>GG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:200 XS:A:- 50 | HWI-D00273:119:C7FUMANXX:2:1201:13659:67863 83 chr9 82927042 255 101M = 82915362 -11781 ACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGT GGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFCCCCB RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:197 XS:A:- 51 | HWI-D00273:119:C7FUMANXX:2:1301:12664:14291 163 chr9 82927043 255 101M = 82928108 1796 CATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGFGGGGEGGGGGGGGGGGGGGGGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:201 XS:A:- 52 | HWI-D00273:119:C7FUMANXX:2:2301:14292:28834 163 chr9 82927043 255 101M = 82928108 1796 CATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTT BBC@BGGGGGGGGGGGG0EFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:201 XS:A:- 53 | HWI-D00273:119:C7FUMANXX:2:2216:5381:79114 163 chr9 82927048 255 1S100M = 82927143 1107 CATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:200 XS:A:- 54 | HWI-D00273:119:C7FUMANXX:2:1112:7261:43382 83 chr9 82927050 255 101M = 82926425 -726 CAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCA 0GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:200 55 | HWI-D00273:119:C7FUMANXX:2:2204:4664:88087 147 chr9 82927051 255 101M = 82915351 -11801 AGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCAT GGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGCBCCB RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:201 XS:A:- 56 | HWI-D00273:119:C7FUMANXX:2:2207:2698:99061 99 chr9 82927056 255 1S100M = 82927129 1085 CACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGEGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:200 XS:A:- 57 | HWI-D00273:119:C7FUMANXX:2:1102:11259:82311 99 chr9 82927057 255 1S99M1S = 82928997 3269 TCCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGEGGGGEGGGGGGGGGGGGGFGGGGGGGGGGGDGGGGGGGG# RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:199 XS:A:- 58 | HWI-D00273:119:C7FUMANXX:2:1103:19599:62009 163 chr9 82927057 255 101M = 82928131 1805 CCGGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:1 AS:i:199 XS:A:- 59 | HWI-D00273:119:C7FUMANXX:2:2212:7108:51045 99 chr9 82927057 255 101M = 82927141 1096 CCCGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGT CCCCCGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:1 AS:i:199 XS:A:- 60 | HWI-D00273:119:C7FUMANXX:2:2208:4517:81955 163 chr9 82927061 255 101M = 82929000 3268 TATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATT BBB@?=B1EFEGGGGGGGG;1C@1F11EGGGGGGBCGEC11F1EFGBE@GGGGGGGGGGG>F1DG@:FG>FGGGE11:FBCFDGCF@FGGGGGCGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:197 XS:A:- 61 | HWI-D00273:119:C7FUMANXX:2:2210:18779:40897 99 chr9 82927061 255 101M = 82927088 128 TATAAAAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATT 3AABB@@1FGEFGGGGDGF@>F>FGCCEG@GGGG>1C@FGGGEGGGGGGGFEGDDEGGGGGGG0C0BEFDDGF>DFGGDDFFGGGG>F>GCCGGGGGECCG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:1 AS:i:198 62 | HWI-D00273:119:C7FUMANXX:2:1312:18199:96205 99 chr9 82927062 255 101M = 82928990 3257 ATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTT BCCCBGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGG@FGGGGFEEDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:201 XS:A:- 63 | HWI-D00273:119:C7FUMANXX:2:1305:7949:79617 163 chr9 82927064 255 101M = 82929002 3267 AAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTG BBCCCGG@DGCFFGGGC@1FGGGGGGGGEDGC>FGGGEGGGGGGG>FGGGG1DFGGGGGD:<0F>GGGGGGEGEGGDFGGGGGGGGGFGGGGFFGGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:4 AS:i:193 XS:A:- 69 | HWI-D00273:119:C7FUMANXX:2:2111:17160:4559 163 chr9 82927070 255 1S100M = 82927127 1069 CTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTC BBCBAGGGGGGGGGGGGGGGD=BDGEGEGCGGGGGFGEGGGGDGFGGGGGGGGEGGGGGGGCBGGGEGGGGGGGGGGGGGGGGFGGGGDGGGGGGGGGGGD RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:196 XS:A:- 70 | HWI-D00273:119:C7FUMANXX:2:2101:5725:47047 147 chr9 82927075 255 101M = 82926472 -704 ACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCC FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGBCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:198 XS:A:- 71 | HWI-D00273:119:C7FUMANXX:2:1208:15555:44669 83 chr9 82927086 255 101M = 82926496 -691 CATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTT >GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:198 XS:A:- 72 | HWI-D00273:119:C7FUMANXX:2:2204:9269:81404 83 chr9 82927086 255 101M = 82913858 -13329 CATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTT GGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:197 XS:A:- 73 | HWI-D00273:119:C7FUMANXX:2:2205:14913:32579 147 chr9 82927086 255 101M = 82915369 -11818 CATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTT GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFEGGGGGGGGGGGGGGCCCCC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:197 XS:A:- 74 | HWI-D00273:119:C7FUMANXX:2:1210:3717:80737 83 chr9 82927087 255 101M = 82926503 -685 ATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTG GGGGGGGGGGGGGFEGGGGEGFGGGGGGGGGFGGGGGGGGF>F@1GGGGGFGGFGGGGGGGGGGGGGGGEGGGGGGGF@EGGGGGGGDGGGGGGGGBCCBC RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:202 XS:A:- 75 | HWI-D00273:119:C7FUMANXX:2:2210:18779:40897 147 chr9 82927088 255 101M = 82927061 -128 TGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGG BGGGGGGCGCGGGGEGGGGGCGGCGGGGGGGGFCGFGEGF/EGDGGGC@GGDDGFDC@GGGDFGGGGDGGEDEGGGGGGGGGGEGGGGGBGFGGGGBCBBB RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:1 AS:i:198 76 | HWI-D00273:119:C7FUMANXX:2:1203:17569:36966 163 chr9 82927089 255 101M = 82928999 3239 GTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGGC @BCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:197 XS:A:- 77 | HWI-D00273:119:C7FUMANXX:2:2315:15400:52331 163 chr9 82927089 255 101M = 82928999 3239 GTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGGC CCCBBGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGFGFGGGG>>FGGDCGGEGGGGCDG RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:2 AS:i:197 XS:A:- 78 | HWI-D00273:119:C7FUMANXX:2:1111:4417:66340 99 chr9 82927094 255 95M911N6M = 82927150 1068 TAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGGCCTGCT BBBBBFGGGGGEFBGGFF=FDGGGGEGGGGGGGGGGGGGGGGGGGEGGGGGGGGEGGGGGGGGGGGFCGGGGGGGGGGGGGGGGD0CDGCGGGGGGGFEFE RG:Z:Tumor_B16_F10_0810_127A NH:i:1 HI:i:1 nM:i:0 AS:i:203 XS:A:- 79 | HWI-D00273:119:C7FUMANXX:2:2313:5631:84945 83 chr9 82927094 255 95M911N6M = 82926482 -1624 TAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGGCCTGCT GGGGGFGGGGGGGFGEGDGEGGGGGGGGGT variant but doesn't include the subsequent nucleotide 77 | # change T>G. To avoid having to think about phasing of variants I changed 78 | # the VCF in vaxrank to contain a GT>TG variant. 79 | arg_parser = make_vaxrank_arg_parser() 80 | args = arg_parser.parse_args([ 81 | "--vcf", data_path("b16.f10/b16.f10.Phip.vcf"), 82 | "--bam", data_path("b16.f10/b16.combined.sorted.bam"), 83 | "--vaccine-peptide-length", "15", 84 | "--padding-around-mutation", "5", 85 | "--mhc-predictor", "random", 86 | "--mhc-alleles", "HLA-A*02:01", 87 | ]) 88 | results = run_vaxrank_from_parsed_args(args) 89 | ranked_list = results.ranked_vaccine_peptides 90 | 91 | for variant, vaccine_peptides in ranked_list: 92 | vaccine_peptide = vaccine_peptides[0] 93 | mutant_protein_fragment = vaccine_peptide.mutant_protein_fragment 94 | check_mutant_amino_acids( 95 | variant, 96 | mutant_protein_fragment) 97 | 98 | def test_keep_top_k_epitopes(): 99 | arg_parser = make_vaxrank_arg_parser() 100 | keep_k_epitopes = 3 101 | args = arg_parser.parse_args([ 102 | "--vcf", data_path("b16.f10/b16.f10.Phip.vcf"), 103 | "--bam", data_path("b16.f10/b16.combined.sorted.bam"), 104 | "--vaccine-peptide-length", "15", 105 | "--padding-around-mutation", "5", 106 | "--num-epitopes-per-vaccine-peptide", str(keep_k_epitopes), 107 | "--mhc-predictor", "netmhc", 108 | "--mhc-alleles", "HLA-A*02:01", 109 | ]) 110 | results = run_vaxrank_from_parsed_args(args) 111 | 112 | ranked_list = results.ranked_vaccine_peptides 113 | 114 | for variant, vaccine_peptides in ranked_list: 115 | vaccine_peptide = vaccine_peptides[0] 116 | eq_(keep_k_epitopes, len(vaccine_peptide.mutant_epitope_predictions)) 117 | # recompute the expected score, make sure the top-k argument from ranked_vaccine_peptides() 118 | # propagated as expected 119 | mutant_epitope_score = sum( 120 | p.logistic_epitope_score() for p in vaccine_peptide.mutant_epitope_predictions) 121 | almost_eq_(mutant_epitope_score, vaccine_peptide.mutant_epitope_score) 122 | 123 | def test_mutant_protein_fragment_serialization(): 124 | arg_parser = make_vaxrank_arg_parser() 125 | keep_k_epitopes = 3 126 | args = arg_parser.parse_args([ 127 | "--vcf", data_path("b16.f10/b16.f10.Phip.vcf"), 128 | "--bam", data_path("b16.f10/b16.combined.sorted.bam"), 129 | "--vaccine-peptide-length", "15", 130 | "--padding-around-mutation", "5", 131 | "--num-epitopes-per-vaccine-peptide", str(keep_k_epitopes), 132 | "--mhc-predictor", "netmhc", 133 | "--mhc-alleles", "HLA-A*02:01", 134 | ]) 135 | results = run_vaxrank_from_parsed_args(args) 136 | 137 | ranked_list = results.ranked_vaccine_peptides 138 | 139 | for _, vaccine_peptides in ranked_list: 140 | mutant_protein_fragment = vaccine_peptides[0].mutant_protein_fragment 141 | json_str = mutant_protein_fragment.to_json() 142 | deserialized = MutantProteinFragment.from_json(json_str) 143 | eq_(mutant_protein_fragment, deserialized) 144 | -------------------------------------------------------------------------------- /tests/test_shell_script.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | 14 | from os.path import getsize 15 | from mock import patch 16 | 17 | from tempfile import NamedTemporaryFile 18 | 19 | import pandas as pd 20 | from xlrd import open_workbook 21 | 22 | from vaxrank.cli import main as run_shell_script 23 | 24 | from .testing_helpers import data_path 25 | 26 | cli_args_for_b16_seqdata = [ 27 | "--vcf", data_path("b16.f10/b16.vcf"), 28 | "--bam", data_path("b16.f10/b16.combined.bam"), 29 | "--vaccine-peptide-length", "25", 30 | "--mhc-predictor", "random", 31 | "--mhc-alleles", "H2-Kb,H2-Db", 32 | "--padding-around-mutation", "5", 33 | "--count-mismatches-after-variant", 34 | ] 35 | 36 | cli_args_for_b16_seqdata_real_predictor = [ 37 | "--vcf", data_path("b16.f10/b16.vcf"), 38 | "--bam", data_path("b16.f10/b16.combined.bam"), 39 | "--vaccine-peptide-length", "25", 40 | "--mhc-predictor", "netmhcpan", 41 | "--mhc-alleles", "H2-Kb,H2-Db", 42 | "--mhc-epitope-lengths", "8", 43 | "--padding-around-mutation", "5", 44 | "--count-mismatches-after-variant" 45 | ] 46 | 47 | 48 | def test_ascii_report(): 49 | with NamedTemporaryFile(mode="r") as f: 50 | ascii_args = cli_args_for_b16_seqdata + ["--output-ascii-report", f.name] 51 | run_shell_script(ascii_args) 52 | contents = f.read() 53 | lines = contents.split("\n") 54 | assert len(lines) > 0 55 | 56 | 57 | def test_ascii_report_real_netmhc_predictor(): 58 | with NamedTemporaryFile(mode="r") as f: 59 | ascii_args = cli_args_for_b16_seqdata_real_predictor + [ 60 | "--output-ascii-report", f.name] 61 | run_shell_script(ascii_args) 62 | contents = f.read() 63 | lines = contents.split("\n") 64 | assert len(lines) > 0 65 | no_variants_text = 'No variants' 66 | assert no_variants_text not in contents 67 | 68 | 69 | def test_json_report(): 70 | with NamedTemporaryFile(mode="r") as f: 71 | json_args = cli_args_for_b16_seqdata + ["--output-json-file", f.name] 72 | run_shell_script(json_args) 73 | contents = f.read() 74 | lines = contents.split("\n") 75 | assert len(lines) > 0 76 | 77 | 78 | def test_csv_report(): 79 | with NamedTemporaryFile(mode="r") as f: 80 | csv_args = cli_args_for_b16_seqdata + ["--output-csv", f.name] 81 | run_shell_script(csv_args) 82 | contents = f.read() 83 | lines = contents.split("\n") 84 | assert len(lines) > 1 85 | 86 | 87 | def test_all_variant_csv_report(): 88 | with NamedTemporaryFile(mode="r") as f: 89 | all_csv_args = cli_args_for_b16_seqdata + [ 90 | "--output-passing-variants-csv", f.name, 91 | # TODO: make this flag not necessary 92 | "--output-csv", f.name + "ignored"] 93 | run_shell_script(all_csv_args) 94 | contents = f.read() 95 | lines = contents.split("\n") 96 | assert len(lines) > 1 97 | # make sure it can be a valid dataframe 98 | f.seek(0) 99 | df = pd.read_csv(f) 100 | assert len(df) > 1 101 | 102 | def test_isovar_csv(): 103 | with NamedTemporaryFile(mode="r") as f: 104 | isovar_csv_args = cli_args_for_b16_seqdata + [ 105 | "--output-isovar-csv", f.name, 106 | # TODO: make this flag not necessary 107 | "--output-csv", f.name + "ignored" 108 | ] 109 | run_shell_script(isovar_csv_args) 110 | df = pd.read_csv(f) 111 | assert len(df) > 1 112 | 113 | def test_xlsx_report(): 114 | with NamedTemporaryFile(mode="r") as f: 115 | xlsx_args = cli_args_for_b16_seqdata + ["--output-xlsx-report", f.name] 116 | run_shell_script(xlsx_args) 117 | book = open_workbook(f.name) 118 | assert book.nsheets > 1 119 | 120 | 121 | 122 | 123 | def test_html_report(): 124 | with NamedTemporaryFile(mode="r") as f: 125 | html_args = cli_args_for_b16_seqdata + ["--output-html", f.name] 126 | run_shell_script(html_args) 127 | contents = f.read() 128 | lines = contents.split("\n") 129 | assert len(lines) > 1 130 | 131 | 132 | def test_pdf_report(): 133 | with NamedTemporaryFile(mode="rb") as f: 134 | pdf_args = cli_args_for_b16_seqdata + ["--output-pdf-report", f.name] 135 | run_shell_script(pdf_args) 136 | assert getsize(f.name) > 1 137 | 138 | 139 | @patch('vaxrank.core_logic.vaccine_peptides_for_variant') 140 | def test_report_no_peptides(mock_vaccine_peptides_for_variant): 141 | # simulate case where we have no epitopes for any variant 142 | mock_vaccine_peptides_for_variant.return_value = [] 143 | with NamedTemporaryFile(mode="r") as f: 144 | html_args = cli_args_for_b16_seqdata + ["--output-csv", f.name] 145 | # test that this doesn't crash and that the CSV output is empty 146 | run_shell_script(html_args) 147 | contents = f.read() 148 | assert contents == '' 149 | 150 | -------------------------------------------------------------------------------- /tests/testing_helpers.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | from __future__ import absolute_import, print_function, division 14 | from os.path import join, dirname 15 | 16 | 17 | def data_path(name): 18 | """ 19 | Return the absolute path to a file in the vaxrank/test/data directory. 20 | The name specified should be relative to vaxrank/test/data. 21 | """ 22 | return join(dirname(__file__), "data", name) 23 | -------------------------------------------------------------------------------- /vaxrank/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.5.0" -------------------------------------------------------------------------------- /vaxrank/cli.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | import sys 14 | import logging 15 | import logging.config 16 | import pkg_resources 17 | 18 | from argparse import ArgumentParser 19 | 20 | from isovar import isovar_results_to_dataframe 21 | from isovar.cli import (make_isovar_arg_parser, run_isovar_from_parsed_args,) 22 | from mhctools.cli import ( 23 | add_mhc_args, 24 | mhc_alleles_from_args, 25 | mhc_binding_predictor_from_args, 26 | ) 27 | 28 | import pandas as pd 29 | import serializable 30 | from varcode.cli import variant_collection_from_args 31 | 32 | from . import __version__ 33 | from .core_logic import run_vaxrank 34 | from .gene_pathway_check import GenePathwayCheck 35 | from .report import ( 36 | make_ascii_report, 37 | make_html_report, 38 | make_pdf_report, 39 | make_csv_report, 40 | make_minimal_neoepitope_report, 41 | TemplateDataCreator, 42 | ) 43 | from .patient_info import PatientInfo 44 | 45 | logger = logging.getLogger(__name__) 46 | 47 | 48 | def make_vaxrank_arg_parser(): 49 | # create common parser with the --version flag 50 | parent_parser = ArgumentParser('parent', add_help=False) 51 | parent_parser.add_argument('--version', action='version', version='Vaxrank %s' % (__version__,)) 52 | 53 | # inherit commandline options from Isovar 54 | arg_parser = make_isovar_arg_parser( 55 | prog="vaxrank", 56 | description=( 57 | "Select personalized vaccine peptides from cancer variants, " 58 | "expression data, and patient HLA type."), 59 | parents=[parent_parser], 60 | ) 61 | add_mhc_args(arg_parser) 62 | add_vaccine_peptide_args(arg_parser) 63 | add_output_args(arg_parser) 64 | add_optional_output_args(arg_parser) 65 | add_supplemental_report_args(arg_parser) 66 | return arg_parser 67 | 68 | 69 | def cached_run_arg_parser(): 70 | arg_parser = ArgumentParser( 71 | prog="vaxrank", 72 | description=( 73 | "Select personalized vaccine peptides from cancer variants, " 74 | "expression data, and patient HLA type."), 75 | ) 76 | arg_parser.add_argument( 77 | "--input-json-file", 78 | default="", 79 | help="Path to JSON file containing results of vaccine peptide report") 80 | add_output_args(arg_parser) 81 | add_optional_output_args(arg_parser) 82 | add_supplemental_report_args(arg_parser) 83 | return arg_parser 84 | 85 | 86 | 87 | # Lets the user specify whether they want to see particular sections in the report. 88 | def add_optional_output_args(arg_parser): 89 | manufacturability_args = arg_parser.add_mutually_exclusive_group(required=False) 90 | manufacturability_args.add_argument( 91 | "--include-manufacturability-in-report", 92 | dest="manufacturability", 93 | action="store_true") 94 | 95 | manufacturability_args.add_argument( 96 | "--no-manufacturability-in-report", 97 | dest="manufacturability", 98 | action="store_false") 99 | arg_parser.set_defaults(manufacturability=True) 100 | 101 | wt_epitope_args = arg_parser.add_mutually_exclusive_group(required=False) 102 | wt_epitope_args.add_argument( 103 | "--include-non-overlapping-epitopes-in-report", 104 | dest="wt_epitopes", 105 | action="store_true", 106 | help="Set to true to include a report section for each vaccine peptide containing " 107 | "strong binders that do not overlap the mutation") 108 | 109 | wt_epitope_args.add_argument( 110 | "--no-non-overlapping-epitopes-in-report", 111 | dest="wt_epitopes", 112 | action="store_false", 113 | help="Set to false to exclude report information for each vaccine peptide about " 114 | "strong binders that do not overlap the mutation") 115 | arg_parser.set_defaults(wt_epitopes=True) 116 | 117 | 118 | def add_output_args(arg_parser): 119 | output_args_group = arg_parser.add_argument_group("Output options") 120 | 121 | output_args_group.add_argument( 122 | "--output-patient-id", 123 | default="", 124 | help="Patient ID to use in report") 125 | 126 | output_args_group.add_argument( 127 | "--output-csv", 128 | default="", 129 | help="Name of CSV file which contains predicted sequences") 130 | 131 | output_args_group.add_argument( 132 | "--output-ascii-report", 133 | default="", 134 | help="Path to ASCII vaccine peptide report") 135 | 136 | output_args_group.add_argument( 137 | "--output-html-report", 138 | default="", 139 | help="Path to HTML vaccine peptide report") 140 | 141 | output_args_group.add_argument( 142 | "--output-pdf-report", 143 | default="", 144 | help="Path to PDF vaccine peptide report") 145 | 146 | output_args_group.add_argument( 147 | "--output-json-file", 148 | default="", 149 | help="Path to JSON file containing results of vaccine peptide report") 150 | 151 | output_args_group.add_argument( 152 | "--output-xlsx-report", 153 | default="", 154 | help="Path to XLSX vaccine peptide report worksheet, one sheet per variant. This is meant " 155 | "for use by the vaccine manufacturer.") 156 | 157 | output_args_group.add_argument( 158 | "--output-neoepitope-report", 159 | default="", 160 | help="Path to XLSX neoepitope report, containing information focusing on short peptide " 161 | "sequences.") 162 | 163 | output_args_group.add_argument( 164 | "--output-reviewed-by", 165 | default="", 166 | help="Comma-separated list of reviewer names") 167 | 168 | output_args_group.add_argument( 169 | "--output-final-review", 170 | default="", 171 | help="Name of final reviewer of report") 172 | 173 | output_args_group.add_argument( 174 | "--output-passing-variants-csv", 175 | default="", 176 | help="Path to CSV file containing some metadata about every variant that has passed all " 177 | "variant caller filters") 178 | 179 | output_args_group.add_argument( 180 | "--output-isovar-csv", 181 | default="", 182 | help="Path to CSV file containing raw RNA counts and filtering metadata " 183 | "for all variants (generated by Isovar)") 184 | 185 | output_args_group.add_argument( 186 | "--log-path", 187 | default="python.log", 188 | help="File path to write the vaxrank Python log to") 189 | 190 | output_args_group.add_argument( 191 | "--max-mutations-in-report", 192 | default=None, 193 | type=int, 194 | help="Number of mutations to report") 195 | 196 | 197 | def add_vaccine_peptide_args(arg_parser): 198 | vaccine_peptide_group = arg_parser.add_argument_group("Vaccine peptide options") 199 | vaccine_peptide_group.add_argument( 200 | "--vaccine-peptide-length", 201 | default=25, 202 | type=int, 203 | help="Number of amino acids in the vaccine peptides. (default: %(default)s)") 204 | 205 | vaccine_peptide_group.add_argument( 206 | "--padding-around-mutation", 207 | default=5, 208 | type=int, 209 | help=( 210 | "Number of off-center windows around the mutation to consider " 211 | "as vaccine peptides. (default: %(default)s)" 212 | )) 213 | 214 | vaccine_peptide_group.add_argument( 215 | "--max-vaccine-peptides-per-mutation", 216 | default=1, 217 | type=int, 218 | help=( 219 | "Number of vaccine peptides to generate for each mutation. " 220 | "(default: %(default)s)" 221 | )) 222 | 223 | vaccine_peptide_group.add_argument( 224 | "--min-epitope-score", 225 | default=1e-10, 226 | type=float, 227 | help=( 228 | "Ignore predicted MHC ligands whose normalized binding score " 229 | "falls below this threshold. (default: %(default)s)")) 230 | 231 | vaccine_peptide_group.add_argument( 232 | "--num-epitopes-per-vaccine-peptide", 233 | type=int, 234 | help=( 235 | "Maximum number of mutant epitopes to consider when scoring " 236 | "each vaccine peptide. (default: %(default)s)")) 237 | 238 | 239 | def add_supplemental_report_args(arg_parser): 240 | report_args_group = arg_parser.add_argument_group("Supplemental report options") 241 | report_args_group.add_argument( 242 | "--cosmic_vcf_filename", 243 | default="", 244 | help="Local path to COSMIC vcf") 245 | 246 | 247 | def check_args(args): 248 | if not (args.output_csv or 249 | args.output_ascii_report or 250 | args.output_html_report or 251 | args.output_pdf_report or 252 | args.output_json_file or 253 | args.output_xlsx_report or 254 | args.output_neoepitope_report or 255 | args.output_passing_variants_csv or 256 | args.output_isovar_csv): 257 | raise ValueError( 258 | "Must specify at least one of: --output-csv, " 259 | "--output-xlsx-report, " 260 | "--output-ascii-report, " 261 | "--output-html-report, " 262 | "--output-pdf-report, " 263 | "--output-neoepitope-report, " 264 | "--output-json-file, " 265 | "--output-passing-variants-csv, " 266 | "--output-isovar-csv") 267 | 268 | def run_vaxrank_from_parsed_args(args): 269 | mhc_predictor = mhc_binding_predictor_from_args(args) 270 | 271 | args.protein_sequence_length = ( 272 | args.vaccine_peptide_length + 2 * args.padding_around_mutation 273 | ) 274 | 275 | # Vaxrank is going to evaluate multiple vaccine peptides containing 276 | # the same mutation so need a longer sequence from Isovar 277 | isovar_results = run_isovar_from_parsed_args(args) 278 | 279 | if args.output_isovar_csv: 280 | df = isovar_results_to_dataframe(isovar_results) 281 | df.to_csv(args.output_isovar_csv, index=False) 282 | 283 | return run_vaxrank( 284 | isovar_results=isovar_results, 285 | mhc_predictor=mhc_predictor, 286 | vaccine_peptide_length=args.vaccine_peptide_length, 287 | max_vaccine_peptides_per_variant=args.max_vaccine_peptides_per_mutation, 288 | min_epitope_score=args.min_epitope_score, 289 | num_mutant_epitopes_to_keep=args.num_epitopes_per_vaccine_peptide) 290 | 291 | def ranked_vaccine_peptides_with_metadata_from_parsed_args(args): 292 | """ 293 | Computes all the data needed for report generation. 294 | 295 | Parameters 296 | ---------- 297 | args : Namespace 298 | Parsed user args from this run 299 | 300 | Returns a dictionary containing 3 items: 301 | - ranked variant/vaccine peptide list 302 | - a dictionary of command-line arguments used to generate it 303 | - patient info object 304 | """ 305 | 306 | if hasattr(args, 'input_json_file'): 307 | with open(args.input_json_file) as f: 308 | 309 | data = serializable.from_json(f.read()) 310 | # the JSON data from the previous run will have the older args saved, which may need to 311 | # be overridden with args from this run (which all be output related) 312 | data['args'].update(vars(args)) 313 | 314 | # if we need to truncate the variant list based on max_mutations_in_report, do that here 315 | if len(data['variants']) > args.max_mutations_in_report: 316 | data['variants'] = data['variants'][:args.max_mutations_in_report] 317 | return data 318 | # get various things from user args 319 | mhc_alleles = mhc_alleles_from_args(args) 320 | logger.info("MHC alleles: %s", mhc_alleles) 321 | 322 | variants = variant_collection_from_args(args) 323 | logger.info("Variants: %s", variants) 324 | 325 | vaxrank_results = run_vaxrank_from_parsed_args(args) 326 | 327 | variants_count_dict = vaxrank_results.variant_counts() 328 | assert len(variants) == variants_count_dict['num_total_variants'], \ 329 | "Len(variants) is %d but variants_count_dict came back with %d" % ( 330 | len(variants), variants_count_dict['num_total_variants']) 331 | 332 | if args.output_passing_variants_csv: 333 | variant_metadata_dicts = vaxrank_results.variant_properties( 334 | gene_pathway_check=GenePathwayCheck()) 335 | df = pd.DataFrame(variant_metadata_dicts) 336 | df.to_csv(args.output_passing_variants_csv, index=False) 337 | 338 | ranked_variants_with_vaccine_peptides = vaxrank_results.ranked_vaccine_peptides 339 | ranked_variants_with_vaccine_peptides_for_report = \ 340 | ranked_variants_with_vaccine_peptides[:args.max_mutations_in_report] 341 | patient_info = PatientInfo( 342 | patient_id=args.output_patient_id, 343 | vcf_paths=variants.sources, 344 | bam_path=args.bam, 345 | mhc_alleles=mhc_alleles, 346 | num_somatic_variants=variants_count_dict['num_total_variants'], 347 | num_coding_effect_variants=variants_count_dict['num_coding_effect_variants'], 348 | num_variants_with_rna_support=variants_count_dict['num_variants_with_rna_support'], 349 | num_variants_with_vaccine_peptides=variants_count_dict['num_variants_with_vaccine_peptides'] 350 | ) 351 | # return variants, patient info, and command-line args 352 | data = { 353 | # TODO: 354 | # change this field to 'ranked_variants_with_vaccine_peptides' 355 | # but figure out how to do it in a backwards compatible way 356 | 'variants': ranked_variants_with_vaccine_peptides_for_report, 357 | 'patient_info': patient_info, 358 | 'args': vars(args), 359 | } 360 | logger.info('About to save args: %s', data['args']) 361 | 362 | # save JSON data if necessary. as of time of writing, vaxrank takes ~25 min to run, 363 | # most of which is core logic. the formatting is super fast, and it can 364 | # be useful to save the data to be able to iterate just on the formatting 365 | if args.output_json_file: 366 | with open(args.output_json_file, 'w') as f: 367 | f.write(serializable.to_json(data)) 368 | logger.info('Wrote JSON report data to %s', args.output_json_file) 369 | 370 | return data 371 | 372 | def configure_logging(args): 373 | logging.config.fileConfig( 374 | pkg_resources.resource_filename( 375 | __name__, 376 | 'logging.conf'), 377 | defaults={'logfilename': args.log_path}) 378 | 379 | def choose_arg_parser(args_list): 380 | # TODO: replace this with a command sub-parser 381 | if "--input-json-file" in args_list: 382 | return cached_run_arg_parser() 383 | else: 384 | return make_vaxrank_arg_parser() 385 | 386 | def parse_vaxrank_args(args_list): 387 | arg_parser = choose_arg_parser(args_list) 388 | return arg_parser.parse_args(args_list) 389 | 390 | def main(args_list=None): 391 | """ 392 | Script to generate vaccine peptide predictions from somatic cancer variants, 393 | patient HLA type, and tumor RNA-seq data. 394 | 395 | Example usage: 396 | vaxrank 397 | --vcf somatic.vcf \ 398 | --bam rnaseq.bam \ 399 | --vaccine-peptide-length 25 \ 400 | --output-csv vaccine-peptides.csv 401 | """ 402 | if args_list is None: 403 | args_list = sys.argv[1:] 404 | 405 | args = parse_vaxrank_args(args_list) 406 | configure_logging(args) 407 | logger.info(args) 408 | check_args(args) 409 | 410 | data = ranked_vaccine_peptides_with_metadata_from_parsed_args(args) 411 | 412 | ranked_variants_with_vaccine_peptides = data['variants'] 413 | patient_info = data['patient_info'] 414 | args_for_report = data['args'] 415 | 416 | ################### 417 | # CSV-based reports 418 | ################### 419 | if args.output_csv or args.output_xlsx_report: 420 | make_csv_report( 421 | ranked_variants_with_vaccine_peptides, 422 | excel_report_path=args.output_xlsx_report, 423 | csv_report_path=args.output_csv) 424 | 425 | if args.output_neoepitope_report: 426 | make_minimal_neoepitope_report( 427 | ranked_variants_with_vaccine_peptides, 428 | num_epitopes_per_peptide=args.num_epitopes_per_vaccine_peptide, 429 | excel_report_path=args.output_neoepitope_report) 430 | 431 | ######################## 432 | # Template-based reports 433 | ######################## 434 | 435 | if not (args.output_ascii_report or args.output_html_report or args.output_pdf_report): 436 | return 437 | 438 | input_json_file = args.input_json_file if hasattr(args, 'input_json_file') else None 439 | template_data_creator = TemplateDataCreator( 440 | ranked_variants_with_vaccine_peptides=ranked_variants_with_vaccine_peptides, 441 | patient_info=patient_info, 442 | final_review=args.output_final_review, 443 | reviewers=args.output_reviewed_by, 444 | args_for_report=args_for_report, 445 | input_json_file=input_json_file, 446 | cosmic_vcf_filename=args.cosmic_vcf_filename) 447 | 448 | template_data = template_data_creator.compute_template_data() 449 | 450 | if args.output_ascii_report: 451 | make_ascii_report( 452 | template_data=template_data, 453 | ascii_report_path=args.output_ascii_report) 454 | 455 | if args.output_html_report: 456 | make_html_report( 457 | template_data=template_data, 458 | html_report_path=args.output_html_report) 459 | 460 | if args.output_pdf_report: 461 | make_pdf_report( 462 | template_data=template_data, 463 | pdf_report_path=args.output_pdf_report) 464 | -------------------------------------------------------------------------------- /vaxrank/core_logic.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | 14 | import logging 15 | 16 | from numpy import isclose 17 | 18 | from .mutant_protein_fragment import MutantProteinFragment 19 | from .epitope_prediction import predict_epitopes, slice_epitope_predictions 20 | from .vaccine_peptide import VaccinePeptide 21 | from .vaxrank_results import VaxrankResults 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | def run_vaxrank( 26 | isovar_results, 27 | mhc_predictor, 28 | vaccine_peptide_length=25, 29 | max_vaccine_peptides_per_variant=1, 30 | num_mutant_epitopes_to_keep=10000, 31 | min_epitope_score=0.0): 32 | """ 33 | Parameters 34 | ---------- 35 | isovar_results : list of isovar.IsovarResult 36 | Each IsovarResult corresponds to one somatic variant and its collection 37 | of protein sequences determined from RNA. 38 | 39 | mhc_predictor : mhctools.BasePredictor 40 | Object with predict_peptides method, used for making pMHC binding 41 | predictions 42 | 43 | vaccine_peptide_length : int 44 | Length of vaccine SLP to construct 45 | 46 | max_vaccine_peptides_per_variant : int 47 | Number of vaccine peptides to generate for each mutation. 48 | 49 | num_mutant_epitopes_to_keep : int, optional 50 | Number of top-ranking epitopes for each vaccine peptide to include in 51 | computation. 52 | 53 | min_epitope_score : float, optional 54 | Ignore peptides with binding predictions whose normalized score is less 55 | than this. 56 | """ 57 | variant_to_vaccine_peptides_dict = create_vaccine_peptides_dict( 58 | isovar_results=isovar_results, 59 | mhc_predictor=mhc_predictor, 60 | vaccine_peptide_length=vaccine_peptide_length, 61 | max_vaccine_peptides_per_variant=max_vaccine_peptides_per_variant, 62 | num_mutant_epitopes_to_keep=num_mutant_epitopes_to_keep, 63 | min_epitope_score=min_epitope_score) 64 | ranked_list = ranked_vaccine_peptides(variant_to_vaccine_peptides_dict) 65 | 66 | return VaxrankResults( 67 | isovar_results=isovar_results, 68 | variant_to_vaccine_peptides_dict=variant_to_vaccine_peptides_dict, 69 | ranked_vaccine_peptides=ranked_list) 70 | 71 | 72 | def create_vaccine_peptides_dict( 73 | isovar_results, 74 | mhc_predictor, 75 | vaccine_peptide_length=25, 76 | max_vaccine_peptides_per_variant=1, 77 | num_mutant_epitopes_to_keep=10 ** 5, 78 | min_epitope_score=0.0): 79 | """ 80 | Parameters 81 | ---------- 82 | isovar_results : list of isovar.IsovarResult 83 | List with one object per variant optionally containing protein sequences 84 | 85 | mhc_predictor : mhctools.BasePredictor 86 | Object with predict_peptides method, used for making pMHC binding 87 | predictions 88 | 89 | vaccine_peptide_length : int 90 | Length of vaccine SLP to construct 91 | 92 | max_vaccine_peptides_per_variant : int 93 | Number of vaccine peptides to generate for each mutation. 94 | 95 | num_mutant_epitopes_to_keep : int, optional 96 | Number of top-ranking epitopes for each vaccine peptide to include in 97 | computation. 98 | 99 | min_epitope_score : float, optional 100 | Ignore peptides with binding predictions whose normalized score is less 101 | than this. 102 | 103 | Returns 104 | ------- 105 | Returns a dictionary of varcode.Variant objects to a list of 106 | VaccinePeptides. 107 | """ 108 | vaccine_peptides_dict = {} 109 | for isovar_result in isovar_results: 110 | variant = isovar_result.variant 111 | vaccine_peptides = vaccine_peptides_for_variant( 112 | isovar_result=isovar_result, 113 | mhc_predictor=mhc_predictor, 114 | vaccine_peptide_length=vaccine_peptide_length, 115 | max_vaccine_peptides_per_variant=max_vaccine_peptides_per_variant, 116 | num_mutant_epitopes_to_keep=num_mutant_epitopes_to_keep, 117 | min_epitope_score=min_epitope_score) 118 | 119 | if any(x.contains_mutant_epitopes() for x in vaccine_peptides): 120 | vaccine_peptides_dict[variant] = vaccine_peptides 121 | 122 | return vaccine_peptides_dict 123 | 124 | def vaccine_peptides_for_variant( 125 | isovar_result, 126 | mhc_predictor, 127 | vaccine_peptide_length, 128 | max_vaccine_peptides_per_variant, 129 | num_mutant_epitopes_to_keep=None, 130 | min_epitope_score=0.0): 131 | """ 132 | Parameters 133 | ---------- 134 | isovar_result : isovar.IsovarResult 135 | 136 | mhc_predictor : mhctools.BasePredictor 137 | Object with predict_peptides method, used for making pMHC binding 138 | predictions 139 | 140 | vaccine_peptide_length : int 141 | Length of vaccine SLP to construct 142 | 143 | max_vaccine_peptides_per_variant : int 144 | Number of vaccine peptides to generate for each mutation. 145 | 146 | num_mutant_epitopes_to_keep : int, optional 147 | Number of top-ranking epitopes for each vaccine peptide to include in 148 | computation. 149 | 150 | min_epitope_score : float, optional 151 | Ignore peptides with binding predictions whose normalized score is less 152 | than this. 153 | 154 | Returns 155 | ------- 156 | Sorted list of VaccinePeptide objects. If there are no suitable vaccine 157 | peptides (no strong MHC binder subsequences), returns an empty list. 158 | """ 159 | if not isovar_result.passes_all_filters: 160 | # don't consider candidate vaccine peptides from variants which either 161 | # failed their filters or don't have an RNA-derived protein sequence 162 | return [] 163 | 164 | variant = isovar_result.variant 165 | long_protein_fragment = MutantProteinFragment.from_isovar_result(isovar_result) 166 | 167 | logger.info( 168 | "Mutant protein fragment for %s: %s", 169 | variant, 170 | long_protein_fragment) 171 | 172 | epitope_predictions = predict_epitopes( 173 | mhc_predictor=mhc_predictor, 174 | protein_fragment=long_protein_fragment, 175 | min_epitope_score=min_epitope_score, 176 | genome=variant.ensembl).values() 177 | 178 | candidate_vaccine_peptides = [] 179 | 180 | for offset, candidate_fragment in long_protein_fragment.sorted_subsequences( 181 | subsequence_length=vaccine_peptide_length): 182 | 183 | subsequence_epitope_predictions = slice_epitope_predictions( 184 | epitope_predictions, 185 | start_offset=offset, 186 | end_offset=offset + len(candidate_fragment)) 187 | # filter out peptides that have no epitopes 188 | if not subsequence_epitope_predictions: 189 | logger.info( 190 | "No epitope predictions for mutant protein fragment %s", 191 | candidate_fragment) 192 | continue 193 | 194 | assert all( 195 | p.source_sequence == candidate_fragment.amino_acids 196 | for p in subsequence_epitope_predictions) 197 | 198 | candidate_vaccine_peptide = VaccinePeptide( 199 | mutant_protein_fragment=candidate_fragment, 200 | epitope_predictions=subsequence_epitope_predictions, 201 | num_mutant_epitopes_to_keep=num_mutant_epitopes_to_keep) 202 | 203 | logger.debug( 204 | "%s, combined score: %0.4f", 205 | candidate_vaccine_peptide, 206 | candidate_vaccine_peptide.combined_score) 207 | candidate_vaccine_peptides.append(candidate_vaccine_peptide) 208 | 209 | n_total_candidates = len(candidate_vaccine_peptides) 210 | if n_total_candidates == 0: 211 | logger.info("No candidate peptides for variant %s", variant.short_description) 212 | return [] 213 | 214 | max_score = max(vp.combined_score for vp in candidate_vaccine_peptides) 215 | if isclose(max_score, 0.0): 216 | filtered_candidate_vaccine_peptides = candidate_vaccine_peptides 217 | else: 218 | # only keep candidate vaccines that are within 1% of the maximum 219 | # combined score 220 | filtered_candidate_vaccine_peptides = [ 221 | vp 222 | for vp in candidate_vaccine_peptides 223 | if vp.combined_score / max_score > 0.99 224 | ] 225 | n_filtered = len(filtered_candidate_vaccine_peptides) 226 | logger.info( 227 | "Keeping %d/%d vaccine peptides for %s", 228 | n_filtered, 229 | n_total_candidates, 230 | variant) 231 | 232 | if n_filtered == 0: 233 | return [] 234 | 235 | filtered_candidate_vaccine_peptides.sort(key=VaccinePeptide.lexicographic_sort_key) 236 | 237 | logger.debug("Top vaccine peptides for %s:", variant) 238 | for i, vaccine_peptide in enumerate(filtered_candidate_vaccine_peptides): 239 | logger.debug( 240 | "%d) %s (combined score = %0.4f)", 241 | i + 1, 242 | vaccine_peptide, 243 | vaccine_peptide.combined_score) 244 | 245 | return filtered_candidate_vaccine_peptides[:max_vaccine_peptides_per_variant] 246 | 247 | 248 | def ranked_vaccine_peptides(variant_to_vaccine_peptides_dict): 249 | """ 250 | This function returns a sorted list whose first element is a Variant and whose second 251 | element is a list of VaccinePeptide objects. 252 | 253 | Parameters 254 | ---------- 255 | variant_to_vaccine_peptides_dict : dict 256 | Dictionary from varcode.Variant to list of VaccinePeptide 257 | 258 | Returns list of (varcode.Variant, VaccinePeptide list) tuples 259 | """ 260 | result_list = list(variant_to_vaccine_peptides_dict.items()) 261 | 262 | def sort_key(variant_and_vaccine_peptides_pair): 263 | vaccine_peptides = variant_and_vaccine_peptides_pair[1] 264 | if len(vaccine_peptides) == 0: 265 | return 0.0 266 | else: 267 | top_vaccine_peptide = vaccine_peptides[0] 268 | return top_vaccine_peptide.combined_score 269 | 270 | # sort in descending order of combined (expression * mhc binding) scores 271 | result_list.sort(key=sort_key, reverse=True) 272 | return result_list 273 | -------------------------------------------------------------------------------- /vaxrank/data/class1-mhc-presentation-pathway.csv: -------------------------------------------------------------------------------- 1 | Gene,Name,Function,Ensembl Gene ID 2 | PSMB8,proteasome subunit beta 8,immunoproteasome,ENSG00000204264 3 | PSMB9,proteasome subunit beta 9,immunoproteasome,ENSG00000240065 4 | PSMB10,proteasome subunit beta 10,immunoproteasome,ENSG00000205220 5 | TAP1,transporter associated with antigen processing 1,peptide transport,ENSG00000168394 6 | TAP2,transporter associated with antigen processing 2,peptide transport,ENSG00000204267 7 | CALR,calreticulin,peptide loading,ENSG00000179218 8 | CANX,calnexin,peptide loading,ENSG00000127022 9 | PDIA3,protein disulfide-isomerase A3,peptide loading,ENSG00000167004 10 | TAPBP,tapasin,peptide loading,ENSG00000231925 11 | TAPBPL,tapasin-related protein,peptide loading,ENSG00000139192 12 | NLRC5,NOD-like receptor family CARD domain containing 5,transcription factor,ENSG00000140853 13 | IRF1,interferon regulatory factor 1,transcription factor,ENSG00000125347 14 | ERAP1,endoplasmic reticulum aminopeptidase 1,peptidase,ENSG00000164307 15 | ERAP2,endoplasmic reticulum aminopeptidase 2,peptidase,ENSG00000164308 16 | B2M,beta-2 microglobulin,MHC subunit,ENSG00000166710 17 | HLA-A,human leukocyte antigen A,MHC subunit,ENSG00000206503 18 | HLA-B,human leukocyte antigen B,MHC subunit,ENSG00000234745 19 | HLA-C,human leukocyte antigen C,MHC subunit,ENSG00000204525 20 | -------------------------------------------------------------------------------- /vaxrank/data/interferon-gamma-response.csv: -------------------------------------------------------------------------------- 1 | Gene,Name,Function,Ensembl Gene ID 2 | IFNGR1,interferon gamma receptor 1,receptor,ENSG00000027697 3 | IFNGR2,interferon gamma receptor 2,receptor,ENSG00000159128 4 | STAT1,signal transducer and activator of transcription 1,signal transducer,ENSG00000115415 5 | STAT2,signal transducer and activator of transcription 2,signal transducer,ENSG00000170581 6 | STAT3,signal transducer and activator of transcription 3,signal transducer,ENSG00000168610 7 | JAK1,Janus kinase 1,kinase,ENSG00000162434 8 | JAK2,Janus kinase 2,kinase,ENSG00000096968 9 | SOCS1,suppressor of cytokine signaling 1,negative regulator,ENSG00000185338 10 | SOCS3,suppressor of cytokine signaling 3,negative regulator,ENSG00000184557 11 | -------------------------------------------------------------------------------- /vaxrank/epitope_prediction.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | from collections import OrderedDict 14 | import traceback 15 | import logging 16 | 17 | import numpy as np 18 | from serializable import Serializable 19 | 20 | from .reference_proteome import ReferenceProteome 21 | 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | 26 | class EpitopePrediction(Serializable): 27 | def __init__( 28 | self, 29 | allele, 30 | peptide_sequence, 31 | wt_peptide_sequence, 32 | ic50, 33 | wt_ic50, 34 | percentile_rank, 35 | prediction_method_name, 36 | overlaps_mutation, 37 | source_sequence, 38 | offset, 39 | occurs_in_reference): 40 | self.allele = allele 41 | self.peptide_sequence = peptide_sequence 42 | self.wt_peptide_sequence = wt_peptide_sequence 43 | self.length = len(peptide_sequence) 44 | self.ic50 = ic50 45 | self.wt_ic50 = wt_ic50 46 | self.percentile_rank = percentile_rank 47 | self.prediction_method_name = prediction_method_name 48 | self.overlaps_mutation = overlaps_mutation 49 | self.source_sequence = source_sequence 50 | self.offset = offset 51 | self.overlaps_mutation = overlaps_mutation 52 | self.occurs_in_reference = occurs_in_reference 53 | 54 | @classmethod 55 | def from_dict(cls, d): 56 | """ 57 | Deserialize EpitopePrediction from a dictionary of keywords. 58 | """ 59 | d = d.copy() 60 | if "length" in d: 61 | # length argument removed in version 1.1.0 62 | del d["length"] 63 | return cls(**d) 64 | 65 | def logistic_epitope_score( 66 | self, 67 | midpoint=350.0, 68 | width=150.0, 69 | ic50_cutoff=5000.0): # TODO: add these default values into CLI as arguments 70 | """ 71 | Map from IC50 values to score where 1.0 = strong binder, 0.0 = weak binder 72 | Default midpoint and width for logistic determined by max likelihood fit 73 | for data from Alessandro Sette's 1994 paper: 74 | 75 | "The relationship between class I binding affinity 76 | and immunogenicity of potential cytotoxic T cell epitopes. 77 | 78 | TODO: Use a large dataset to find MHC binding range predicted to # 79 | correlate with immunogenicity 80 | """ 81 | if self.ic50 >= ic50_cutoff: 82 | return 0.0 83 | 84 | rescaled = (float(self.ic50) - midpoint) / width 85 | # simplification of 1.0 - logistic(x) = logistic(-x) 86 | logistic = 1.0 / (1.0 + np.exp(rescaled)) 87 | 88 | # since we're scoring IC50 values, let's normalize the output 89 | # so IC50 near 0.0 always returns a score of 1.0 90 | normalizer = 1.0 / (1.0 + np.exp(-midpoint / width)) 91 | 92 | return logistic / normalizer 93 | 94 | def slice_source_sequence(self, start_offset, end_offset): 95 | """ 96 | 97 | Parameters 98 | ---------- 99 | start_offset : int 100 | 101 | end_offset : int 102 | 103 | Return EpitopePrediction object with source sequence and offset 104 | adjusted. If this slicing would shorten the mutant peptide, then 105 | return None. 106 | """ 107 | if self.offset < start_offset: 108 | # this peptide starts before the requested slice through the 109 | # source sequence 110 | return None 111 | 112 | if self.offset + self.length > end_offset: 113 | # this peptide goes beyond the end of the requested slice 114 | # through the source sequence 115 | return None 116 | 117 | return EpitopePrediction( 118 | allele=self.allele, 119 | peptide_sequence=self.peptide_sequence, 120 | wt_peptide_sequence=self.wt_peptide_sequence, 121 | ic50=self.ic50, 122 | wt_ic50=self.wt_ic50, 123 | percentile_rank=self.percentile_rank, 124 | prediction_method_name=self.prediction_method_name, 125 | overlaps_mutation=self.overlaps_mutation, 126 | source_sequence=self.source_sequence[start_offset:end_offset], 127 | offset=self.offset - start_offset, 128 | occurs_in_reference=self.occurs_in_reference) 129 | 130 | 131 | def slice_epitope_predictions( 132 | epitope_predictions, 133 | start_offset, 134 | end_offset): 135 | """ 136 | Return subset of EpitopePrediction objects which overlap the given interval 137 | and slice through their source sequences and adjust their offset. 138 | """ 139 | return [ 140 | p.slice_source_sequence(start_offset, end_offset) 141 | for p in epitope_predictions 142 | if p.offset >= start_offset and p.offset + p.length <= end_offset 143 | ] 144 | 145 | def predict_epitopes( 146 | mhc_predictor, 147 | protein_fragment, 148 | min_epitope_score=0.0, 149 | genome=None): 150 | """ 151 | Parameters 152 | ---------- 153 | mhc_predictor : mhctools.BasePredictor 154 | Object with predict_peptides method 155 | 156 | protein_fragment : MutantProteinFragment 157 | 158 | peptide_length : list of int 159 | Lengths of peptides to make pMHC binding predictions for 160 | 161 | min_epitope_score : float 162 | Ignore peptides with binding predictions whose normalized score is less 163 | than this. 164 | 165 | genome : pyensembl.Genome 166 | Genome whose proteome to use for reference peptide filtering 167 | 168 | Returns an OrderedDict of EpitopePrediction objects, keyed by a 169 | (peptide sequence, allele) tuple, that have a normalized score greater 170 | than min_epitope_score. 171 | 172 | Uses the input genome to evaluate whether the epitope occurs in reference. 173 | """ 174 | results = OrderedDict() 175 | reference_proteome = ReferenceProteome(genome) 176 | 177 | # sometimes the predictors will fail, and we don't want to crash vaxrank 178 | # in that situation 179 | # TODO: make more specific or remove when we fix error handling in mhctools 180 | try: 181 | mhctools_binding_predictions = mhc_predictor.predict_subsequences( 182 | {protein_fragment.gene_name: protein_fragment.amino_acids}) 183 | except: 184 | logger.error( 185 | 'MHC prediction errored for protein fragment %s, with traceback: %s', 186 | protein_fragment, traceback.format_exc()) 187 | return results 188 | 189 | # compute the WT epitopes for each mutant fragment's epitopes; mutant -> WT 190 | wt_peptides = {} 191 | for binding_prediction in mhctools_binding_predictions: 192 | peptide = binding_prediction.peptide 193 | peptide_length = binding_prediction.length 194 | peptide_start_offset = binding_prediction.offset 195 | peptide_end_offset = peptide_start_offset + peptide_length 196 | 197 | overlaps_mutation = protein_fragment.interval_overlaps_mutation( 198 | start_offset=peptide_start_offset, 199 | end_offset=peptide_end_offset) 200 | 201 | if overlaps_mutation: 202 | full_reference_protein_sequence = ( 203 | protein_fragment.predicted_effect().original_protein_sequence 204 | ) 205 | global_epitope_start_pos = ( 206 | protein_fragment.global_start_pos() + peptide_start_offset 207 | ) 208 | wt_peptide = full_reference_protein_sequence[ 209 | global_epitope_start_pos:global_epitope_start_pos + peptide_length] 210 | wt_peptides[peptide] = wt_peptide 211 | 212 | wt_predictions = [] 213 | try: 214 | # filter to minimum peptide lengths 215 | valid_wt_peptides = [ 216 | x for x in wt_peptides.values() if len(x) >= mhc_predictor.min_peptide_length 217 | ] 218 | if len(valid_wt_peptides) > 0: 219 | wt_predictions = mhc_predictor.predict_peptides(valid_wt_peptides) 220 | except: 221 | logger.error( 222 | 'MHC prediction for WT peptides errored, with traceback: %s', 223 | traceback.format_exc()) 224 | 225 | # break it out: (peptide, allele) -> prediction 226 | wt_predictions_grouped = { 227 | (wt_prediction.peptide, wt_prediction.allele): wt_prediction 228 | for wt_prediction in wt_predictions 229 | } 230 | 231 | # convert from mhctools.BindingPrediction objects to EpitopePrediction 232 | # which differs primarily by also having a boolean field 233 | # 'overlaps_mutation' that indicates whether the epitope overlaps 234 | # mutant amino acids or both sides of a deletion 235 | num_total = 0 236 | num_occurs_in_reference = 0 237 | num_low_scoring = 0 238 | for binding_prediction in mhctools_binding_predictions: 239 | num_total += 1 240 | peptide = binding_prediction.peptide 241 | peptide_length = binding_prediction.length 242 | peptide_start_offset = binding_prediction.offset 243 | peptide_end_offset = peptide_start_offset + peptide_length 244 | 245 | overlaps_mutation = protein_fragment.interval_overlaps_mutation( 246 | start_offset=peptide_start_offset, 247 | end_offset=peptide_end_offset) 248 | 249 | occurs_in_reference = reference_proteome.contains(peptide) 250 | if occurs_in_reference: 251 | logger.debug('Peptide %s occurs in reference', peptide) 252 | num_occurs_in_reference += 1 253 | 254 | # compute WT epitope sequence, if this epitope overlaps the mutation 255 | if overlaps_mutation: 256 | wt_peptide = wt_peptides[peptide] 257 | wt_prediction = wt_predictions_grouped.get( 258 | (wt_peptide, binding_prediction.allele)) 259 | wt_ic50 = None 260 | if wt_prediction is None: 261 | # this can happen in a stop-loss variant: do we want to check that here? 262 | if len(wt_peptide) < mhc_predictor.min_peptide_length: 263 | logger.info( 264 | 'No prediction for too-short WT epitope %s: possible stop-loss variant', 265 | wt_peptide) 266 | else: 267 | wt_ic50 = wt_prediction.value 268 | 269 | else: 270 | wt_peptide = peptide 271 | wt_ic50 = binding_prediction.value 272 | 273 | epitope_prediction = EpitopePrediction( 274 | allele=binding_prediction.allele, 275 | peptide_sequence=peptide, 276 | wt_peptide_sequence=wt_peptide, 277 | ic50=binding_prediction.value, 278 | wt_ic50=wt_ic50, 279 | percentile_rank=binding_prediction.percentile_rank, 280 | prediction_method_name=binding_prediction.prediction_method_name, 281 | overlaps_mutation=overlaps_mutation, 282 | source_sequence=protein_fragment.amino_acids, 283 | offset=peptide_start_offset, 284 | occurs_in_reference=occurs_in_reference) 285 | 286 | if epitope_prediction.logistic_epitope_score() >= min_epitope_score: 287 | key = (epitope_prediction.peptide_sequence, epitope_prediction.allele) 288 | results[key] = epitope_prediction 289 | else: 290 | num_low_scoring += 1 291 | 292 | logger.info( 293 | "%d total peptides: %d occur in reference, %d failed score threshold", 294 | num_total, 295 | num_occurs_in_reference, 296 | num_low_scoring) 297 | return results 298 | -------------------------------------------------------------------------------- /vaxrank/gene_pathway_check.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | from collections import OrderedDict 14 | from os.path import join, dirname 15 | 16 | import pandas as pd 17 | 18 | 19 | _ENSEMBL_GENE_ID_COLUMN_NAME = 'Ensembl Gene ID' 20 | _MUTATION_COLUMN_NAME = 'Mutation' 21 | 22 | _IFNG_RESPONSE_COLUMN_NAME = 'interferon_gamma_response' 23 | _CLASS_I_MHC_COLUMN_NAME = 'class1_mhc_presentation_pathway' 24 | _DRIVER_GENE_COLUMN_NAME = 'cancer_driver_gene' 25 | _DRIVER_VARIANT_COLUMN_NAME = 'cancer_driver_variant' 26 | 27 | _CURRENT_DIR = dirname(__file__) 28 | _DATA_DIR = join(_CURRENT_DIR, "data") 29 | 30 | 31 | class GenePathwayCheck(object): 32 | """ 33 | This class is meant for use with gene/variant list files from 34 | https://github.com/openvax/gene-lists. Other files can be used as well, but 35 | need to follow a similar column structure. Most logic is based on Ensembl 36 | gene IDs. 37 | 38 | Parameters 39 | ---------- 40 | interferon_gamma_response_csv : str, optional 41 | Local path to interferon-gamma response CSV file. 42 | 43 | class1_mhc_presentation_pathway_csv : str, optional 44 | Local path to MHC class I presentation pathway CSV file. 45 | 46 | cancer_driver_genes_csv : str, optional 47 | Local path to cancer driver genes CSV file. 48 | 49 | cancer_driver_variants_csv : str, optional 50 | Local path to cancer driver variants CSV file. 51 | """ 52 | def __init__( 53 | self, 54 | interferon_gamma_response_csv=None, 55 | class1_mhc_presentation_pathway_csv=None, 56 | cancer_driver_genes_csv=None, 57 | cancer_driver_variants_csv=None): 58 | 59 | self.interferon_gamma_response_gene_set = self._load_set_from_csv( 60 | csv_path=interferon_gamma_response_csv, 61 | default_filename="interferon-gamma-response.csv", 62 | description="Interferon gamma response pathway", 63 | column_names=[_ENSEMBL_GENE_ID_COLUMN_NAME]) 64 | 65 | self.class1_mhc_presentation_pathway_gene_set = self._load_set_from_csv( 66 | csv_path=class1_mhc_presentation_pathway_csv, 67 | default_filename="class1-mhc-presentation-pathway.csv", 68 | description="Class I MHC presentation pathway", 69 | column_names=[_ENSEMBL_GENE_ID_COLUMN_NAME]) 70 | 71 | self.cancer_driver_genes_set = self._load_set_from_csv( 72 | csv_path=cancer_driver_genes_csv, 73 | default_filename="cancer-driver-genes.csv", 74 | description="Cancer driver genes", 75 | column_names=[_ENSEMBL_GENE_ID_COLUMN_NAME]) 76 | # set of gene ID, variant description pairs 77 | self.cancer_driver_variants_set = self._load_set_from_csv( 78 | csv_path=cancer_driver_variants_csv, 79 | default_filename="cancer-driver-variants.csv", 80 | description="Driver variants", 81 | column_names=[_ENSEMBL_GENE_ID_COLUMN_NAME, _MUTATION_COLUMN_NAME]) 82 | 83 | @classmethod 84 | def _load_set_from_csv(cls, csv_path, default_filename, description, column_names): 85 | if not csv_path: 86 | csv_path = join(_DATA_DIR, default_filename) 87 | df = pd.read_csv(csv_path) 88 | columns = [] 89 | for column_name in column_names: 90 | if column_name not in df.columns: 91 | raise ValueError("%s file (%s) needs column '%s'" % ( 92 | description, 93 | csv_path, 94 | column_name)) 95 | columns.append(df[column_name].values) 96 | if len(columns) == 1: 97 | return set(columns[0]) 98 | else: 99 | return set(zip(*columns)) 100 | 101 | def make_variant_dict(self, variant): 102 | """ 103 | Returns a dictionary of boolean values, depending on whether we see this 104 | variant in any relevant pathway or cancer driver files. 105 | 106 | Parameters 107 | ---------- 108 | variant : varcode.Variant 109 | Variant object to evaluate 110 | """ 111 | effect_description = variant.effects().top_priority_effect().short_description 112 | overlapping_gene_ids = variant.gene_ids 113 | variant_dict = OrderedDict() 114 | variant_dict[_IFNG_RESPONSE_COLUMN_NAME] = any([ 115 | gene_id in self.interferon_gamma_response_gene_set 116 | for gene_id in overlapping_gene_ids 117 | ]) 118 | variant_dict[_CLASS_I_MHC_COLUMN_NAME] = any([ 119 | gene_id in self.class1_mhc_presentation_pathway_gene_set 120 | for gene_id in overlapping_gene_ids 121 | ]) 122 | variant_dict[_DRIVER_GENE_COLUMN_NAME] = any([ 123 | gene_id in self.cancer_driver_genes_set 124 | for gene_id in overlapping_gene_ids 125 | ]) 126 | 127 | variant_dict[_DRIVER_VARIANT_COLUMN_NAME] = any([ 128 | (gene_id, effect_description) in self.cancer_driver_variants_set 129 | for gene_id in overlapping_gene_ids 130 | ]) 131 | return variant_dict 132 | -------------------------------------------------------------------------------- /vaxrank/logging.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root,vaxrank,isovar,varcode,pyensembl,mhctools,datacache 3 | 4 | [formatters] 5 | keys=simpleFormatter 6 | 7 | [handlers] 8 | keys=consoleHandler,consoleHandlerCritical,fileHandler 9 | 10 | [logger_root] 11 | level=INFO 12 | handlers=consoleHandlerCritical 13 | 14 | [handler_consoleHandler] 15 | class=StreamHandler 16 | level=INFO 17 | formatter=simpleFormatter 18 | args=(sys.stdout,) 19 | 20 | [handler_consoleHandlerCritical] # only for root logger: essentially silent 21 | class=StreamHandler 22 | level=CRITICAL 23 | formatter=simpleFormatter 24 | args=(sys.stdout,) 25 | 26 | [handler_fileHandler] 27 | class=FileHandler 28 | level=DEBUG 29 | formatter=simpleFormatter 30 | args=('%(logfilename)s', 'w') 31 | 32 | [formatter_simpleFormatter] 33 | format=%(asctime)s - %(name)s:%(lineno)s - %(levelname)s - %(message)s 34 | datefmt= 35 | 36 | # vaxrank 37 | 38 | [logger_vaxrank] 39 | level=DEBUG 40 | qualname=vaxrank 41 | handlers=consoleHandler,fileHandler 42 | 43 | # isovar 44 | 45 | [logger_isovar] 46 | level=DEBUG 47 | qualname=isovar 48 | handlers=consoleHandler,fileHandler 49 | 50 | # varcode 51 | 52 | [logger_varcode] 53 | level=DEBUG 54 | qualname=varcode 55 | handlers=consoleHandler,fileHandler 56 | 57 | # pyensembl 58 | 59 | [logger_pyensembl] 60 | level=DEBUG 61 | qualname=pyensembl 62 | handlers=consoleHandler 63 | 64 | # mhctools 65 | 66 | [logger_mhctools] 67 | level=DEBUG 68 | qualname=mhctools 69 | handlers=consoleHandler,fileHandler 70 | 71 | # datacache 72 | 73 | [logger_datacache] 74 | level=DEBUG 75 | qualname=datacache 76 | handlers=consoleHandler 77 | -------------------------------------------------------------------------------- /vaxrank/manufacturability.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | """ 14 | Scoring functions for determing which sequences are easy to manufacture using 15 | solid-phase synthesis. 16 | 17 | For more information see: https://github.com/hammerlab/vaxrank/issues/2 18 | """ 19 | 20 | from collections import namedtuple 21 | 22 | # Amino Acid Hydropathy Score 23 | # Table 2 from Kyte and Doolittle"s 24 | # "A Simple Method for Displaying the Hydropathic Character of a Protein" 25 | 26 | hydropathy_dict = { 27 | "A": 1.8, 28 | "C": 2.5, 29 | "D": -3.5, 30 | "E": -3.5, 31 | "F": 2.8, 32 | "G": -0.4, 33 | "H": -3.2, 34 | "I": 4.5, 35 | "K": -3.9, 36 | "L": 3.8, 37 | "M": 1.9, 38 | "N": -3.5, 39 | "P": -1.6, 40 | "Q": -3.5, 41 | "R": -4.5, 42 | "S": -0.8, 43 | "T": -0.7, 44 | "V": 4.2, 45 | "W": -0.9, 46 | "Y": -1.3 47 | } 48 | 49 | 50 | def gravy_score(amino_acids): 51 | """ 52 | Mean amino acid hydropathy averaged across residues of a peptide 53 | or protein sequence. 54 | """ 55 | total = sum( 56 | hydropathy_dict[amino_acid] for amino_acid in amino_acids) 57 | return total / len(amino_acids) 58 | 59 | 60 | def max_kmer_gravy_score(amino_acids, k): 61 | """ 62 | Returns max GRAVY score of any kmer in the amino acid sequence, 63 | used to determine if there are any extremely hydrophobic regions within a 64 | longer amino acid sequence. 65 | """ 66 | return max( 67 | gravy_score(amino_acids[i:i + k]) 68 | for i in range(len(amino_acids) - k + 1)) 69 | 70 | 71 | def max_7mer_gravy_score(amino_acids): 72 | return max_kmer_gravy_score(amino_acids, 7) 73 | 74 | 75 | def cterm_kmer_gravy_score(amino_acids, k): 76 | """ 77 | Mean hydropathy of last k residues on the C-terminus of the peptide. 78 | """ 79 | n = len(amino_acids) 80 | return gravy_score(amino_acids[n - k:n]) 81 | 82 | 83 | def cterm_7mer_gravy_score(amino_acids): 84 | return cterm_kmer_gravy_score(amino_acids, 7) 85 | 86 | 87 | def difficult_n_terminal_residue(amino_acids): 88 | """ 89 | Is the N-terminus one of {Gln, Glu, Cys}? 90 | --- 91 | Priority I: avoid N-terminal Gln, Glu, Cys 92 | """ 93 | return amino_acids[0] in {"Q", "E", "C"} 94 | 95 | 96 | def c_terminal_proline(amino_acids): 97 | """ 98 | Is the right-most (C-terminal) amino acid a proline? 99 | """ 100 | return amino_acids[-1] == "P" 101 | 102 | 103 | def c_terminal_cysteine(amino_acids): 104 | """ 105 | Is the right-most (C-terminal) amino acid a cysteine? 106 | """ 107 | return amino_acids[-1] == "C" 108 | 109 | 110 | def n_terminal_asparagine(amino_acids): 111 | """ 112 | Asparagine at the N-terminus of a peptide is also hard 113 | to synthesize, though not as bad as {Gln, Glu, Cys} 114 | """ 115 | return amino_acids[0] == "N" 116 | 117 | 118 | def asparagine_proline_bond_count(amino_acids): 119 | """ 120 | Count the number of Asparagine/Asn/N-Proline/Pro/P bonds 121 | Problem with Asn-Pro bonds: can spontaneously cleave the peptide 122 | """ 123 | return sum( 124 | amino_acids[i:i + 2] == "NP" 125 | for i in range(len(amino_acids) - 1)) 126 | 127 | 128 | def cysteine_count(amino_acids): 129 | """ 130 | How many cysteines are in the amino acid sequence? 131 | Problem with cysteine residues: They can form disulfide bonds across 132 | distant parts of the peptide 133 | """ 134 | return sum(amino_acid == "C" for amino_acid in amino_acids) 135 | 136 | 137 | def combine_scoring_functions(*scoring_functions): 138 | """ 139 | Given a list of scoring functions, make a namedtuple with 140 | fields of the same names. Returns the ManufacturabilityScores class. 141 | """ 142 | names = [fn.__name__ for fn in scoring_functions] 143 | 144 | class ManufacturabilityScores(namedtuple('ManufacturabilityScores', names)): 145 | @classmethod 146 | def from_amino_acids(cls, amino_acids): 147 | return cls(*[fn(amino_acids) for fn in scoring_functions]) 148 | 149 | return ManufacturabilityScores 150 | 151 | ManufacturabilityScores = combine_scoring_functions( 152 | 153 | # GRAVY score of 7 residues closest to the C terminus 154 | cterm_7mer_gravy_score, 155 | 156 | # GRAVY score of any 7mer window in the peptide sequence 157 | max_7mer_gravy_score, 158 | 159 | # avoid N-terminal Gln, Glu, Cys 160 | difficult_n_terminal_residue, 161 | 162 | # avoid C-terminal Cys 163 | c_terminal_cysteine, 164 | 165 | # avoid C-terminal Pro 166 | c_terminal_proline, 167 | 168 | # total number of Cys residues 169 | cysteine_count, 170 | 171 | # avoid N-terminal Asn 172 | n_terminal_asparagine, 173 | 174 | # avoid Asp-Pro bonds 175 | asparagine_proline_bond_count, 176 | ) 177 | -------------------------------------------------------------------------------- /vaxrank/mutant_protein_fragment.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | 14 | import logging 15 | 16 | from varcode.effects import top_priority_effect 17 | from serializable import Serializable 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | class MutantProteinFragment(Serializable): 23 | def __init__( 24 | self, 25 | variant, 26 | gene_name, 27 | amino_acids, 28 | mutant_amino_acid_start_offset, 29 | mutant_amino_acid_end_offset, 30 | supporting_reference_transcripts, 31 | n_overlapping_reads, 32 | n_alt_reads, 33 | n_ref_reads, 34 | n_alt_reads_supporting_protein_sequence): 35 | """ 36 | Parameters 37 | ---------- 38 | variant : varcode.Variant 39 | Somatic mutation. 40 | 41 | gene_name : str 42 | Gene from which we used a transcript to translate this mutation. 43 | 44 | amino_acids : str 45 | Translated protein sequence, aggregated from possibly multiple 46 | synonymous coding sequences. 47 | 48 | mutant_amino_acid_start_offset : int 49 | Starting offset of amino acids which differ due to the mutation 50 | 51 | mutant_amino_acid_end_offset : int 52 | End offset of amino acids which differ due to the mutation 53 | 54 | supporting_reference_transcripts : list of pyensembl.Transcript 55 | PyEnsembl Transcript objects for reference transcripts which 56 | were used to establish the reading frame of coding sequence(s) 57 | detected from RNA. 58 | 59 | n_overlapping_reads : int 60 | Number of reads overlapping the variant locus. 61 | 62 | n_alt_reads : int 63 | Number of reads supporting the variant. 64 | 65 | n_ref_reads : int 66 | Number of reads supporting the reference allele. 67 | 68 | n_alt_reads_supporting_protein_sequence : int 69 | Number of RNA reads fully spanning the cDNA sequence(s) from which 70 | we translated this amino acid sequence. 71 | """ 72 | self.variant = variant 73 | self.gene_name = gene_name 74 | self.amino_acids = amino_acids 75 | self.mutant_amino_acid_start_offset = mutant_amino_acid_start_offset 76 | self.mutant_amino_acid_end_offset = mutant_amino_acid_end_offset 77 | self.supporting_reference_transcripts = \ 78 | supporting_reference_transcripts 79 | self.n_overlapping_reads = n_overlapping_reads 80 | self.n_alt_reads = n_alt_reads 81 | self.n_ref_reads = n_ref_reads 82 | self.n_alt_reads_supporting_protein_sequence = \ 83 | n_alt_reads_supporting_protein_sequence 84 | 85 | @classmethod 86 | def from_isovar_result(cls, isovar_result): 87 | """ 88 | Create a MutantProteinFragment from an isovar.IsovarResult object 89 | 90 | Parameters 91 | ---------- 92 | isovar_result : isovar.IsovarResult 93 | 94 | Returns 95 | ------- 96 | MutantProteinFragment 97 | """ 98 | protein_sequence = isovar_result.top_protein_sequence 99 | if protein_sequence is None: 100 | return None 101 | return cls( 102 | variant=isovar_result.variant, 103 | gene_name=protein_sequence.gene_name, 104 | amino_acids=protein_sequence.amino_acids, 105 | mutant_amino_acid_start_offset=protein_sequence.mutation_start_idx, 106 | mutant_amino_acid_end_offset=protein_sequence.mutation_end_idx, 107 | 108 | # TODO: distinguish reads and fragments in Vaxrank? 109 | n_overlapping_reads=isovar_result.num_total_fragments, 110 | n_alt_reads=isovar_result.num_alt_fragments, 111 | n_ref_reads=isovar_result.num_ref_fragments, 112 | n_alt_reads_supporting_protein_sequence=protein_sequence.num_supporting_fragments, 113 | supporting_reference_transcripts=protein_sequence.transcripts) 114 | 115 | def __len__(self): 116 | return len(self.amino_acids) 117 | 118 | @property 119 | def n_mutant_amino_acids(self): 120 | return ( 121 | self.mutant_amino_acid_end_offset - self.mutant_amino_acid_start_offset) 122 | 123 | @property 124 | def mutation_distance_from_edge(self): 125 | distance_from_left = self.mutant_amino_acid_start_offset 126 | distance_from_right = len(self) - self.mutant_amino_acid_end_offset 127 | return min(distance_from_left, distance_from_right) 128 | 129 | @property 130 | def is_deletion(self): 131 | return self.n_mutant_amino_acids == 0 and self.variant.is_deletion 132 | 133 | @property 134 | def n_other_reads(self): 135 | """ 136 | Number of reads supporting alleles which are neither ref nor alt 137 | """ 138 | return self.n_overlapping_reads - (self.n_ref_reads + self.n_alt_reads) 139 | 140 | def interval_overlaps_mutation(self, start_offset, end_offset): 141 | """ 142 | Does the given start_offset:end_offset interval overlap the mutated 143 | region of this MutantProteinFragment? Interval offsets are expected 144 | to be base-0 half-open (start is inclusive, end is exclusive). 145 | """ 146 | return ( 147 | start_offset < self.mutant_amino_acid_end_offset and 148 | end_offset > self.mutant_amino_acid_start_offset) 149 | 150 | def generate_subsequences(self, subsequence_length): 151 | """ 152 | Yields (int, MutantProteinFragment) pairs, where the integer 153 | indicates the offset into the amino acid sequences. 154 | """ 155 | n_total_amino_acids = len(self.amino_acids) 156 | if n_total_amino_acids <= subsequence_length: 157 | yield (0, self) 158 | else: 159 | for subsequence_start_offset in range( 160 | 0, 161 | n_total_amino_acids - subsequence_length + 1): 162 | subsequence_end_offset = subsequence_start_offset + subsequence_length 163 | amino_acids = self.amino_acids[ 164 | subsequence_start_offset:subsequence_end_offset] 165 | mutant_amino_acid_start_offset = max( 166 | 0, 167 | self.mutant_amino_acid_start_offset - subsequence_start_offset) 168 | mutant_amino_acid_end_offset = min( 169 | len(amino_acids), 170 | max( 171 | 0, 172 | self.mutant_amino_acid_end_offset - subsequence_start_offset)) 173 | n_supporting_reads = self.n_alt_reads_supporting_protein_sequence 174 | subsequence_mutant_protein_fragment = MutantProteinFragment( 175 | variant=self.variant, 176 | gene_name=self.gene_name, 177 | amino_acids=amino_acids, 178 | mutant_amino_acid_start_offset=mutant_amino_acid_start_offset, 179 | mutant_amino_acid_end_offset=mutant_amino_acid_end_offset, 180 | n_overlapping_reads=self.n_overlapping_reads, 181 | n_ref_reads=self.n_ref_reads, 182 | n_alt_reads=self.n_alt_reads, 183 | n_alt_reads_supporting_protein_sequence=n_supporting_reads, 184 | supporting_reference_transcripts=self.supporting_reference_transcripts) 185 | yield subsequence_start_offset, subsequence_mutant_protein_fragment 186 | 187 | def sorted_subsequences( 188 | self, 189 | subsequence_length, 190 | limit=None, 191 | sort_key=lambda x: ( 192 | -x[1].mutation_distance_from_edge, 193 | -x[1].n_mutant_amino_acids)): 194 | """ 195 | Returns subsequences, paired with their offset from the start of the 196 | protein fragment. The default sort criterion is maximizing the 197 | mutation distance from the edge of the sequence and secondarily 198 | maximizing the number of mutant amino acids. 199 | """ 200 | subsequences = list(self.generate_subsequences(subsequence_length)) 201 | subsequences.sort(key=sort_key) 202 | if limit: 203 | subsequences = subsequences[:limit] 204 | return subsequences 205 | 206 | def predicted_effect(self): 207 | effects = [ 208 | self.variant.effect_on_transcript(t) for t in 209 | self.supporting_reference_transcripts 210 | ] 211 | predicted_effect = top_priority_effect(effects) 212 | return predicted_effect 213 | 214 | def global_start_pos(self): 215 | # position of mutation start relative to the full amino acid sequence 216 | global_mutation_start_pos = self.predicted_effect().aa_mutation_start_offset 217 | if global_mutation_start_pos is None: 218 | logger.error( 219 | 'Could not find mutation start pos for variant %s', 220 | self.variant) 221 | return -1 222 | 223 | # get the global position of the mutant protein fragment: shift left by the amount of 224 | # the relative mutant start position 225 | return ( 226 | global_mutation_start_pos - self.mutant_amino_acid_start_offset 227 | ) 228 | -------------------------------------------------------------------------------- /vaxrank/patient_info.py: -------------------------------------------------------------------------------- 1 | from serializable import Serializable 2 | 3 | class PatientInfo(Serializable): 4 | def __init__( 5 | self, 6 | patient_id, 7 | vcf_paths, 8 | bam_path, 9 | mhc_alleles, 10 | num_somatic_variants, 11 | num_coding_effect_variants, 12 | num_variants_with_rna_support, 13 | num_variants_with_vaccine_peptides): 14 | self.patient_id = patient_id 15 | self.vcf_paths = vcf_paths 16 | self.bam_path = bam_path 17 | self.mhc_alleles = mhc_alleles 18 | self.num_somatic_variants = num_somatic_variants 19 | self.num_coding_effect_variants = num_coding_effect_variants 20 | self.num_variants_with_rna_support = num_variants_with_rna_support 21 | self.num_variants_with_vaccine_peptides = num_variants_with_vaccine_peptides 22 | -------------------------------------------------------------------------------- /vaxrank/reference_proteome.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | from __future__ import absolute_import, print_function, division 14 | import os 15 | import logging 16 | 17 | from datacache import get_data_dir 18 | import shellinford 19 | 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | def fm_index_path(genome): 25 | """ 26 | Returns a path for cached reference peptides, for the given genome. 27 | """ 28 | # if $VAXRANK_REF_PEPTIDES_DIR is set, that'll be the location of the cache 29 | cache_dir = get_data_dir(envkey='VAXRANK_REF_PEPTIDES_DIR') 30 | if not os.path.exists(cache_dir): 31 | os.makedirs(cache_dir) 32 | 33 | return os.path.join(cache_dir, '%s_%d_3.fm' % ( 34 | genome.species.latin_name, genome.release)) 35 | 36 | 37 | def generate_protein_sequences(genome): 38 | """ 39 | Generator whose elements are protein sequences from the given genome. 40 | 41 | Parameters 42 | ---------- 43 | genome : pyensembl.EnsemblRelease 44 | Input genome to load for reference peptides 45 | """ 46 | for t in genome.transcripts(): 47 | if t.is_protein_coding: 48 | yield t.protein_sequence 49 | 50 | 51 | def load_reference_peptides_index(genome, force_reload=False): 52 | """ 53 | Loads the FM index containing reference peptides. 54 | 55 | Parameters 56 | ---------- 57 | genome : pyensembl.EnsemblRelease 58 | Input genome to load for reference peptides 59 | 60 | force_reload : bool, optional 61 | If true, will recompute index for this genome even if it already exists. 62 | 63 | Returns 64 | ------- 65 | fm : shellinford.FMIndex 66 | Index populated with reference peptides from the genome 67 | """ 68 | path = fm_index_path(genome) 69 | if force_reload or not os.path.exists(path): 70 | logger.info("Building FM index at %s", path) 71 | fm = shellinford.FMIndex() 72 | fm.build(generate_protein_sequences(genome), path) 73 | logger.info("Done building FM index") 74 | return fm 75 | return shellinford.FMIndex(filename=path) 76 | 77 | 78 | class ReferenceProteome(object): 79 | def __init__(self, genome): 80 | self.fm_index = load_reference_peptides_index(genome) 81 | 82 | def contains(self, kmer): 83 | return len(self.fm_index.search(kmer)) > 0 84 | -------------------------------------------------------------------------------- /vaxrank/templates/stylesheet.css: -------------------------------------------------------------------------------- 1 | /* this is to fix HTML->PDF rendering on Debian: wkhtmltopdf screws up the 2 | line heights otherwise */ 3 | table, table.th, table.td { 4 | line-height: 1.1em; 5 | } 6 | 7 | #main { 8 | padding: 4em; 9 | } 10 | 11 | /* Various headers */ 12 | #report-header { 13 | padding-bottom: 1em; 14 | margin-top: -2em; 15 | } 16 | 17 | #patient-info { 18 | padding-bottom: 1em; 19 | } 20 | 21 | #variants { 22 | padding-top: 1em; 23 | padding-bottom: 1em; 24 | } 25 | 26 | #effects { 27 | margin-bottom: 1.5em; 28 | } 29 | 30 | #peptides { 31 | margin-bottom: 1.5em; 32 | } 33 | 34 | #args { 35 | padding-top: 2em; 36 | padding-bottom: 1em; 37 | } 38 | 39 | /* Patient info */ 40 | table.patient-info { 41 | width: 90%; 42 | } 43 | 44 | col.patient-info-column-one { 45 | width: 40%; 46 | } 47 | 48 | /* Command-line args */ 49 | table.args { 50 | width: 90%; 51 | } 52 | 53 | col.args-column-one { 54 | width: 40%; 55 | } 56 | 57 | /* Variants */ 58 | ol.main { 59 | padding-left: 1.2em; 60 | margin-left: 0.1em; 61 | font-size: 2em; 62 | color: #b3b3b3; 63 | } 64 | 65 | li.variant-list-item { 66 | border-left: 0.3em solid; 67 | padding: 0 2em 0 2em; 68 | margin-bottom: 2em; 69 | page-break-before: always; 70 | page-break-after: always; 71 | } 72 | 73 | li.variant-list-item:nth-child(odd) { 74 | border-left: 0.3em dotted; 75 | padding: 0 2em 0 2em; 76 | margin-bottom: 2em; 77 | page-break-before: always; 78 | page-break-after: always; 79 | } 80 | 81 | div.variant-span { 82 | font-size: 16px; 83 | color: black; 84 | } 85 | 86 | table.variant { 87 | margin-top: -2em; 88 | width: 90%; 89 | } 90 | 91 | col.variant-column-one { 92 | width: 40%; 93 | } 94 | 95 | col.variant-column-two { 96 | width: 60%; 97 | } 98 | 99 | thead.variant-head { 100 | font-size: 135%; 101 | margin-left: -2px; 102 | } 103 | 104 | td.variant-head { 105 | padding-left: 0.7em; 106 | } 107 | 108 | /* Peptides */ 109 | h4.peptides { 110 | page-break-before: always; 111 | } 112 | 113 | div.wt-epitopes { 114 | page-break-after: always; 115 | page-break-inside: avoid; 116 | margin: 2em 0 2em 2em; 117 | } 118 | 119 | table.wt-epitopes { 120 | font-size: 80%; 121 | } 122 | 123 | ol.peptides { 124 | padding-left: 0; 125 | list-style-position: inside; 126 | } 127 | 128 | li.peptide { 129 | page-break-inside: avoid; 130 | page-break-before: always; 131 | } 132 | 133 | table.peptide { 134 | margin: -1em 0 2em 2em; 135 | width: 80%; 136 | } 137 | 138 | span.mutant { 139 | background-color: yellow; 140 | border: 1px; 141 | border-style: solid; 142 | border-color: red; 143 | padding: 2px; 144 | margin-left: 2px; 145 | margin-right: 2px; 146 | } 147 | 148 | table.peptide-inner { 149 | width: 100%; 150 | border: none; 151 | } 152 | 153 | td.peptide-inner { 154 | padding: 0; 155 | } 156 | 157 | td.peptide-inner-header { 158 | font-weight: bold; 159 | text-align: center; 160 | font-size: 125%; 161 | padding: 0.75em; 162 | } 163 | 164 | col.peptide-data-column-one { 165 | width: 70%; 166 | } 167 | 168 | col.peptide-data-column-two { 169 | width: 30%; 170 | } 171 | 172 | col.epitope-data-column-one { 173 | width: 16%; 174 | } 175 | 176 | col.epitope-data-column-two { 177 | width: 16%; 178 | } 179 | 180 | col.epitope-data-column-three { 181 | width: 16%; 182 | } 183 | 184 | col.epitope-data-column-four { 185 | width: 16%; 186 | } 187 | 188 | col.epitope-data-column-five { 189 | width: 16%; 190 | } 191 | 192 | col.epitope-data-column-six { 193 | width: 16%; 194 | } 195 | 196 | table.epitope-inner { 197 | font-size: 80%; 198 | width: 100%; 199 | border: none; 200 | } 201 | 202 | /* Signatures */ 203 | table.signature { 204 | width: 70%; 205 | } 206 | 207 | table.signature-inner { 208 | width: 100%; 209 | border: none; 210 | } 211 | 212 | td.signature-inner { 213 | padding: 0; 214 | } 215 | 216 | col.signature-column-one { 217 | width: 30%; 218 | } 219 | 220 | col.signature-column-two { 221 | width: 70%; 222 | } 223 | -------------------------------------------------------------------------------- /vaxrank/templates/template.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Vaxrank Report 5 | 6 | 7 | 8 | 11 | 12 | 13 |
14 |

Vaccine Peptides Report

15 | 16 |

PATIENT INFO

17 | 18 | 19 | 20 | 21 | 22 | {% for key, val in patient_info.items() %} 23 | 24 | {% endfor %} 25 |
{{ key }}{{ val }}
26 |
27 | 28 |

COMMAND LINE ARGUMENTS

29 | 30 | 31 | 32 | 33 | 34 | {% for key, val in args %} 35 | 36 | {% endfor %} 37 |
{{ key }}{{ val }}
38 | {% if input_json_file %} 39 |

Report generated from saved location: {{ input_json_file }}

40 | {% endif %} 41 |
    42 | 43 | {% if variants %} 44 | {% for v in variants %} 45 |
  1. 46 |
    47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | {% for key, val in v.variant_data.items() %} 59 | 60 | {% endfor %} 61 |
    Variant{{ v.short_description }}
    {{ key }}{{ val }}
    62 |
    63 | 64 |

    Predicted Effect

    65 | 66 | {% for key, val in v.effect_data.items() %} 67 | 68 | {% endfor %} 69 |
    {{ key }}{{ val }}
    70 |
    71 | 72 | {% if v.databases %} 73 |

    Databases

    74 | 75 | {% for key, val in v.databases.items() %} 76 | 77 | {% endfor %} 78 |
    {{ key }}{{ val }}
    79 |
    80 | {% endif %} 81 | 82 |
      83 | {% for p in v.peptides %} 84 |
    1. 85 | 86 | 87 | 88 | 89 | 100 | {% if include_manufacturability %} 101 | 102 | 113 | {% endif %} 114 | 115 | 116 | 142 |
      {{ p.header_display_data.aa_before_mutation }}{{ p.header_display_data.aa_mutant }}{{ p.header_display_data.aa_after_mutation }}
      90 | 91 | 92 | 93 | 94 | 95 | {% for key, val in p.peptide_data.items() %} 96 | 97 | {% endfor %} 98 |
      {{ key }}{{ val }}
      99 |
      Manufacturability
      103 | 104 | 105 | 106 | 107 | 108 | {% for key, val in p.manufacturability_data.items() %} 109 | 110 | {% endfor %} 111 |
      {{ key }}{{ val }}
      112 |
      Predicted mutant epitopes
      117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | {% for key in p.epitopes[0] %} 129 | 130 | {% endfor %} 131 | 132 | 133 | {% for e in p.epitopes %} 134 | 135 | {% for _, val in e.items() %} 136 | 137 | {% endfor %} 138 | 139 | {% endfor %} 140 |
      {{ key }}
      {{ val }}
      141 |
      143 | {% if include_wt_epitopes %} 144 |
      145 | {% if p.wt_epitopes %} 146 |

      Predicted strong binders that do not overlap the mutation

      147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | {% for e in p.wt_epitopes %} 156 | 157 | 158 | 159 | 160 | 161 | {% endfor %} 162 |
      SequenceIC50Allele
      {{ e["Sequence"] }}{{ e["IC50"] }}{{ e["Allele"] }}
      163 | {% else %} 164 |

      No predicted strong binders that do not overlap the mutation.

      165 | {% endif %} 166 |
      167 | {% endif %} 168 |
    2. 169 | {% endfor %} 170 |
    171 |
    172 |
  2. 173 | {% endfor %} 174 |
175 | 176 | {% if reviewers %} 177 | 178 | 179 | 180 | 181 | {% for r in reviewers %} 182 | 183 | {% endfor %} 184 |
Reviewed By
{{ r }}
185 |

186 | {% endif %} 187 | 188 | 189 | 190 | 191 | 192 | 193 | 204 | 205 |
Final Review
194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 |
Name{{ final_review }}
Signature
Date
203 |
206 | {% else %} 207 |

No variants with sufficient vaccine peptides were found.

208 | {% endif %} 209 |
210 | 211 | 212 | -------------------------------------------------------------------------------- /vaxrank/templates/template.txt: -------------------------------------------------------------------------------- 1 | {% for key, val in patient_info.items() %} 2 | {{ key }}: {{ val }} 3 | {% endfor %} 4 | 5 | Package version info 6 | {% for key, val in package_versions.items() %} 7 | {{ key }}: {{ val }} 8 | {% endfor %} 9 | --- 10 | 11 | {% if variants %} 12 | {% for v in variants %} 13 | {{ v.num }}) {{ v.short_description }} ({{ v.variant_data['Gene name'] }}) 14 | {% for key, val in v.variant_data.items() %} 15 | {{ key }}: {{ val }} 16 | {% endfor %} 17 | 18 | {% for key, val in v.effect_data.items() %} 19 | {{ key }}: {{ val }} 20 | {% endfor %} 21 | 22 | Vaccine Peptides: 23 | {% for p in v.peptides %} 24 | {{ p.header_display_data.num }}. {{ p.header_display_data.aa_before_mutation }}_{{ p.header_display_data.aa_mutant }}_{{ p.header_display_data.aa_after_mutation }} (score = {{ v.variant_data["Top score"] }}) 25 | {% for key, val in p.peptide_data.items() %} 26 | - {{ key }}: {{ val }} 27 | {% endfor %} 28 | {% if include_manufacturability %} 29 | 30 | Manufacturability: 31 | {% for key, val in p.manufacturability_data.items() %} 32 | - {{ key }}: {{ val }} 33 | {% endfor %} 34 | {% endif %} 35 | 36 | Predicted mutant epitopes: 37 | {{ p.ascii_epitopes|indent(18) }} 38 | 39 | {% if include_wt_epitopes and p.wt_epitopes %} 40 | Predicted strong binders that do not overlap the mutation: 41 | {{ p.ascii_wt_epitopes|indent(18) }} 42 | {% endif %} 43 | 44 | {% endfor %} 45 | {% endfor %} 46 | {% else %} 47 | No variants with sufficient vaccine peptides were found. 48 | {% endif %} 49 | -------------------------------------------------------------------------------- /vaxrank/vaccine_peptide.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | 14 | from operator import attrgetter 15 | 16 | import numpy as np 17 | from serializable import Serializable 18 | 19 | from .manufacturability import ManufacturabilityScores 20 | 21 | 22 | class VaccinePeptide(Serializable): 23 | """ 24 | VaccinePeptide combines the sequence information of MutantProteinFragment 25 | with MHC binding predictions for subsequences of the protein fragment. 26 | 27 | The resulting lists of mutant and wildtype epitope predictions 28 | are sorted by affinity. 29 | """ 30 | 31 | def __init__( 32 | self, 33 | mutant_protein_fragment, 34 | epitope_predictions, 35 | num_mutant_epitopes_to_keep=None, 36 | sort_predictions_by='ic50'): 37 | """ 38 | Parameters 39 | ---------- 40 | mutant_protein_fragment : MutantProteinFragment 41 | 42 | epitope_predictions : list of EpitopePrediction 43 | 44 | num_mutant_epitopes_to_keep : int or None 45 | If None then keep all mutant epitopes. 46 | 47 | sort_predictions_by : str 48 | Field of EpitopePrediction used for sorting epitope predictions 49 | overlapping mutation in ascending order. Can be either 'ic50' 50 | or 'percentile_rank'. 51 | """ 52 | self.mutant_protein_fragment = mutant_protein_fragment 53 | self.epitope_predictions = epitope_predictions 54 | self.num_mutant_epitopes_to_keep = num_mutant_epitopes_to_keep 55 | self.sort_predictions_by = sort_predictions_by 56 | 57 | sort_key = attrgetter(sort_predictions_by) 58 | 59 | # only keep the top k epitopes 60 | self.mutant_epitope_predictions = sorted([ 61 | p for p in epitope_predictions 62 | if p.overlaps_mutation and not p.occurs_in_reference 63 | ], key=sort_key) 64 | if num_mutant_epitopes_to_keep: 65 | self.mutant_epitope_predictions = \ 66 | self.mutant_epitope_predictions[:num_mutant_epitopes_to_keep] 67 | 68 | self.wildtype_epitope_predictions = sorted([ 69 | p for p in epitope_predictions 70 | if not p.overlaps_mutation or p.occurs_in_reference 71 | ], key=sort_key) 72 | 73 | self.wildtype_epitope_score = sum( 74 | p.logistic_epitope_score() 75 | for p in self.wildtype_epitope_predictions) 76 | # only keep the top k epitopes for the purposes of the score 77 | self.mutant_epitope_score = sum( 78 | p.logistic_epitope_score() 79 | for p in self.mutant_epitope_predictions) 80 | 81 | self.manufacturability_scores = \ 82 | ManufacturabilityScores.from_amino_acids( 83 | self.mutant_protein_fragment.amino_acids) 84 | 85 | def peptide_synthesis_difficulty_score_tuple( 86 | self, 87 | max_c_terminal_hydropathy=1.5, 88 | min_kmer_hydropathy=0, 89 | max_kmer_hydropathy_low_priority=1.5, 90 | max_kmer_hydropathy_high_priority=2.5): 91 | """ 92 | Generates a tuple of scores used for lexicographic sorting of vaccine 93 | peptides. 94 | 95 | The most important criterion for choosing a vaccine peptide is to 96 | minimize the number of cysteines in the sequence (to prevent the 97 | formation of disulfide bonds). 98 | 99 | It is also important to keep the mean hydropathy of the C-terminal 100 | residues below 1.5 and also to ensure that no window of amino acids 101 | within the sequence has a mean hydropathy score > 2.5 (using 102 | AA values from Table 2 of Kyte & Doolittle 1982). 103 | 104 | If there are multiple vaccine peptides all of whose subsequence 105 | windows satisfy the GRAVY (mean hydropathy) < 2.5 constraint then 106 | let's optimize the terminal amino acids to exclude ones known to 107 | make solid phase synthesis difficult. 108 | 109 | If there are multiple vaccine peptides without difficult terminal 110 | residues then try to eliminate N-terminal asparagine residues 111 | (not as harmful) and asparagine-proline bonds 112 | (known to dissociate easily). If all of these constraints 113 | are satisfied, then attempt to keep the max k-mer hydropahy below 114 | a lower constant (default GRAVY score 1.5) and above a minimum value 115 | (default 0). 116 | 117 | (Sort criteria determined through conversations with manufacturer) 118 | """ 119 | cterm_7mer_gravy = self.manufacturability_scores.cterm_7mer_gravy_score 120 | max_7mer_gravy = self.manufacturability_scores.max_7mer_gravy_score 121 | 122 | # numbers we want to minimize, so a bigger number is worse 123 | return ( 124 | # total number of Cys residues 125 | self.manufacturability_scores.cysteine_count, 126 | 127 | # C-terminal 7mer GRAVY score < 1.5 128 | # (or user specified max GRAVY score for C terminus of peptide) 129 | max(0, cterm_7mer_gravy - max_c_terminal_hydropathy), 130 | 131 | # max 7mer GRAVY score < 2.5 132 | # (or user specified higher priority maximum for GRAVY score) 133 | max(0, max_7mer_gravy - max_kmer_hydropathy_high_priority), 134 | 135 | # avoid N-terminal Gln, Glu, Cys 136 | self.manufacturability_scores.difficult_n_terminal_residue, 137 | 138 | # avoid C-terminal Cys 139 | self.manufacturability_scores.c_terminal_cysteine, 140 | 141 | # avoid C-terminal Pro 142 | self.manufacturability_scores.c_terminal_proline, 143 | 144 | # avoid N-terminal Asn 145 | self.manufacturability_scores.n_terminal_asparagine, 146 | 147 | # avoid Asp-Pro bonds 148 | self.manufacturability_scores.asparagine_proline_bond_count, 149 | 150 | # max 7mer GRAVY score < 1.5 151 | # (or user specified lower priority maximum for GRAVY score) 152 | max(0, max_7mer_gravy - max_kmer_hydropathy_low_priority), 153 | 154 | # max 7mer GRAVY score > 0 155 | # (or user specified min GRAVY for 7mer windows in peptide) 156 | max(0, min_kmer_hydropathy - max_7mer_gravy), 157 | ) 158 | 159 | def lexicographic_sort_key(self): 160 | """ 161 | Create tuple of scores so that candidates get sorted lexicographically 162 | by multiple criteria. Make sure to make the wildtype epitope 163 | score positive (since we want fewer wildtype epitopes) but the others 164 | negative (since we want more of them). 165 | """ 166 | # since we're sorting in decreasing order, numbers which we want 167 | # to be larger must have their signs flipped 168 | essential_score_tuple = ( 169 | # Sum of normalized MHC binding affinities of subsequences 170 | # round to 5 digits to avoid floating point errors from 171 | # serving as tie-breakers 172 | -round(self.mutant_epitope_score, 6), 173 | 174 | # Number of reads supporting the variant 175 | -self.mutant_protein_fragment.n_alt_reads 176 | ) 177 | manufacturability_score_tuple = self.peptide_synthesis_difficulty_score_tuple() 178 | extra_score_tuple = ( 179 | # Number of reads supporting the particular protein sequence 180 | # sequence we're using for this vaccine peptide. Currently 181 | # all vaccine peptides are drawn from the same larger sequence 182 | # so this score shouldn't change. 183 | -self.mutant_protein_fragment.n_alt_reads_supporting_protein_sequence, 184 | 185 | # Minimize the sum of non-mutant MHC binding scores, 186 | # round to prevent floating point errors from serving as 187 | # tie-breakers 188 | round(self.wildtype_epitope_score, 6), 189 | 190 | # All else being equal, we prefer to maximize the number of 191 | # mutant amino acids 192 | -self.mutant_protein_fragment.n_mutant_amino_acids, 193 | 194 | # If nothing else can serve as a tie break then try to center 195 | # the mutation in the vaccine peptide. 196 | -self.mutant_protein_fragment.mutation_distance_from_edge 197 | ) 198 | return ( 199 | essential_score_tuple + 200 | manufacturability_score_tuple + 201 | extra_score_tuple 202 | ) 203 | 204 | def contains_mutant_epitopes(self): 205 | return len(self.mutant_epitope_predictions) > 0 206 | 207 | @property 208 | def expression_score(self): 209 | return np.sqrt(self.mutant_protein_fragment.n_alt_reads) 210 | 211 | @property 212 | def combined_score(self): 213 | return self.expression_score * self.mutant_epitope_score 214 | 215 | def to_dict(self): 216 | epitope_predictions = self.mutant_epitope_predictions + self.wildtype_epitope_predictions 217 | return { 218 | "mutant_protein_fragment": self.mutant_protein_fragment, 219 | "epitope_predictions": epitope_predictions, 220 | "num_mutant_epitopes_to_keep": self.num_mutant_epitopes_to_keep, 221 | "sort_predictions_by": self.sort_predictions_by, 222 | } 223 | 224 | @classmethod 225 | def from_dict(cls, d): 226 | d = d.copy() 227 | if "sort_predictions_by" not in d: 228 | d["sort_predictions_by"] = "ic50" 229 | return cls(**d) 230 | -------------------------------------------------------------------------------- /vaxrank/vaxrank_results.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | from collections import OrderedDict 14 | 15 | from serializable import Serializable 16 | 17 | class VaxrankResults(Serializable): 18 | """ 19 | Data class used to represent all results captured by running Vaxrank. 20 | """ 21 | def __init__( 22 | self, 23 | isovar_results, 24 | variant_to_vaccine_peptides_dict, 25 | ranked_vaccine_peptides): 26 | """ 27 | Parameters 28 | ---------- 29 | isovar_results : list of isovar.IsovarResult 30 | IsovarResult object for each variant without any filtering 31 | 32 | variant_to_vaccine_peptides_dict : dict 33 | Dictionary mapping variant to a list of possible vaccine peptides 34 | 35 | ranked_vaccine_peptides : list of VaccinePeptide 36 | """ 37 | self.isovar_results = isovar_results 38 | self.variant_to_vaccine_peptides_dict = variant_to_vaccine_peptides_dict 39 | self.ranked_vaccine_peptides = ranked_vaccine_peptides 40 | 41 | 42 | @property 43 | def variants(self): 44 | """ 45 | Unfiltered list of variants 46 | 47 | Returns 48 | ------- 49 | list of varcode.Variant 50 | """ 51 | return [ 52 | isovar_result.variant 53 | for isovar_result 54 | in self.isovar_results 55 | ] 56 | 57 | def variant_counts(self): 58 | """ 59 | Summarize Vaxrank counts for total variants, variants with coding effects, 60 | variants with RNA support, and variants with associated vaccine peptides. 61 | 62 | Returns 63 | ------- 64 | dict 65 | """ 66 | variant_properties = self.variant_properties() 67 | 68 | # dictionary which will contain some overall variant counts 69 | # for report display 70 | counts_dict = {} 71 | counts_dict['num_total_variants'] = len(self.isovar_results) 72 | counts_dict['num_coding_effect_variants'] = \ 73 | sum([v['is_coding_nonsynonymous'] for v in variant_properties]) 74 | counts_dict['num_variants_with_rna_support'] = \ 75 | sum([v['rna_support'] for v in variant_properties]) 76 | 77 | counts_dict['num_variants_with_vaccine_peptides'] = \ 78 | sum([v['has_vaccine_peptide'] for v in variant_properties]) 79 | return counts_dict 80 | 81 | def variant_properties(self, gene_pathway_check=None): 82 | """ 83 | Parameters 84 | ---------- 85 | gene_pathway_check : GenePathwayCheck (optional) 86 | Used to look up whether a mutation or its affected gene are in some 87 | biologically important pathway. 88 | 89 | Returns 90 | ------- 91 | list of dictionaries containing properties we want to analyze later, 92 | e.g. whether this variant is part of a pathway of interest, 93 | is a strong MHC binder, etc. 94 | """ 95 | variant_properties_list = [] 96 | for isovar_result in self.isovar_results: 97 | variant = isovar_result.variant 98 | 99 | variant_dict = OrderedDict(( 100 | ('gene_name', isovar_result.top_gene_name), 101 | ('contig', variant.contig), 102 | ('start', variant.start), 103 | ('ref', variant.ref), 104 | ('alt', variant.alt), 105 | ('is_coding_nonsynonymous', 106 | isovar_result.predicted_effect_modifies_protein_sequence), 107 | ('rna_support', 108 | isovar_result.has_mutant_protein_sequence_from_rna), 109 | )) 110 | 111 | # TODO: 112 | # compute MHC binder status for variants that don't have RNA support 113 | variant_dict['mhc_binder'] = \ 114 | variant_dict["has_vaccine_peptide"] = \ 115 | variant in self.variant_to_vaccine_peptides_dict 116 | 117 | if gene_pathway_check is not None: 118 | pathway_dict = gene_pathway_check.make_variant_dict(variant) 119 | variant_dict.update(pathway_dict) 120 | 121 | variant_properties_list.append(variant_dict) 122 | return variant_properties_list 123 | --------------------------------------------------------------------------------