├── .github
    └── workflows
    │   └── tests.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── RELEASING.md
├── code-of-conduct.md
├── deploy.sh
├── develop.sh
├── docs
    ├── Makefile
    ├── conf.py
    └── index.rst
├── lint.sh
├── pylintrc
├── requirements.txt
├── run-vaxrank-b16-test-data.sh
├── setup.py
├── test.sh
├── tests
    ├── __init__.py
    ├── common.py
    ├── data
    │   └── b16.f10
    │   │   ├── b16.combined.bam
    │   │   ├── b16.combined.bam.bai
    │   │   ├── b16.combined.sam
    │   │   ├── b16.combined.sorted.bam
    │   │   ├── b16.combined.sorted.bam.bai
    │   │   ├── b16.expressed.vcf
    │   │   ├── b16.f10.127a.aldh1b1.chr4.45802539.refG.altC.sam
    │   │   ├── b16.f10.127a.klf6.chr13.5864876.refC.altCG.sam
    │   │   ├── b16.f10.127a.phip.chr9.82927102.refG.altT.sam
    │   │   ├── b16.f10.127a.wdr13.chrX.8125624.refC.altA.sam
    │   │   ├── b16.f10.Phip.vcf
    │   │   ├── b16.f10.Wdr13.vcf
    │   │   ├── b16.not-expressed.vcf
    │   │   └── b16.vcf
    ├── test_cancer_driver_gene.py
    ├── test_epitope_prediction.py
    ├── test_manufacturability.py
    ├── test_mutant_protein_sequence.py
    ├── test_shell_script.py
    └── testing_helpers.py
└── vaxrank
    ├── __init__.py
    ├── cli.py
    ├── core_logic.py
    ├── data
        ├── cancer-driver-genes.csv
        ├── cancer-driver-variants.csv
        ├── class1-mhc-presentation-pathway.csv
        └── interferon-gamma-response.csv
    ├── epitope_prediction.py
    ├── gene_pathway_check.py
    ├── logging.conf
    ├── manufacturability.py
    ├── mutant_protein_fragment.py
    ├── patient_info.py
    ├── reference_proteome.py
    ├── report.py
    ├── templates
        ├── stylesheet.css
        ├── template.html
        └── template.txt
    ├── vaccine_peptide.py
    └── vaxrank_results.py


/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 3 | 
 4 | name: Tests
 5 | on: [push, pull_request]
 6 | 
 7 | jobs:
 8 |   build:
 9 |     runs-on: ubuntu-latest
10 |     strategy:
11 |       fail-fast: true
12 |       matrix:
13 |         python-version: ["3.9", "3.10", "3.11"]
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |       - name: Set up Python ${{ matrix.python-version }}
17 |         uses: actions/setup-python@v3
18 |         with:
19 |           python-version: ${{ matrix.python-version }}
20 |       - name: Checkout private netmhc-bundle repo
21 |         uses: actions/checkout@v4
22 |         with:
23 |           repository: openvax/netmhc-bundle
24 |           token: ${{ secrets.NETMHC_BUNDLE_ACCESS_TOKEN }}
25 |           path: netmhc-bundle
26 | 
27 |       - name: Install netmhc-bundle dependencies
28 |         uses: awalsh128/cache-apt-pkgs-action@latest
29 |         with:
30 |           packages: tcsh gawk python2-minimal
31 |           version: 1.0
32 |       - name: Install dependencies
33 |         run: |
34 |           python -m pip install --upgrade pip
35 |           python -m pip install pytest pytest-cov pylint 
36 |           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
37 |       - name: Install wkthtmltopdf
38 |         run: |
39 |           sudo apt-get install -y xfonts-base xfonts-75dpi
40 |           wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.bionic_amd64.deb
41 |           sudo dpkg -i wkhtmltox_0.12.6-1.bionic_amd64.deb
42 |       - name: Lint with PyLint
43 |         run: |
44 |           ./lint.sh
45 |       - name: Download Ensembl data
46 |         run: |
47 |           echo "Before installing Ensembl releases" && df -h
48 |           pyensembl install --release 75 --species human --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCh37.75/
49 |           pyensembl install --release 102 --species mouse --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCm38.102/
50 |           pyensembl install --release 93 --species human --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCh38.93/
51 |           pyensembl install --release 93 --species mouse --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCm38.93/
52 |       - name: Test with pytest
53 |         run: |
54 |           # configure netmhc-bundle paths
55 |           export NETMHC_BUNDLE_HOME=$PWD/netmhc-bundle
56 |           echo "NetMHC-bundle dir:" && ls -l $NETMHC_BUNDLE_HOME
57 |           mkdir $PWD/netmhc-bundle-tmp
58 |           export NETMHC_BUNDLE_TMPDIR=$PWD/netmhc-bundle-tmp
59 |           export PATH=$PATH:$NETMHC_BUNDLE_HOME/bin
60 |           ./test.sh
61 |       - name: Publish coverage to Coveralls
62 |         uses: coverallsapp/github-action@v2.2.3
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | #Ipython Notebook
62 | .ipynb_checkpoints
63 | 
64 | # Generated outputs
65 | vaccine-peptides-report.txt
66 | vaccine-peptides-report.html
67 | vaccine-peptides-report.pdf
68 | vaccine-peptides-report.xlsx
69 | vaccine-peptides-report.json
70 | vaccine-peptides-all-passing.csv
71 | vaccine-peptides.csv
72 | neoepitope-report.xlsx
73 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Vaxrank
 2 | 
 3 | We would love your help in making Vaxrank a useful resource for the community. No contribution is too small, and we especially appreciate usability improvements like better documentation, tutorials, tests, or code cleanup.
 4 | 
 5 | ## Making a contribution
 6 | All contributions can be made as pull requests on Github. One of the core developers will review your contribution. As needed the core contributors will also make releases and submit to PyPI.
 7 | 
 8 | A few other guidelines:
 9 | 
10 |  * Vaxrank supports 3.3+ on Linux and OS X. We don't guarantee support for Windows. 
11 |  * All functions should be documented using [numpy-style docstrings](https://numpydoc.readthedocs.io/en/latest/format.html) and associated with unit tests.
12 |  * Bugfixes should be accompanied with a test that illustrates the bug when feasible.
13 |  * Contributions are licensed under Apache 2.0
14 |  * Please adhere to our [code of conduct](https://github.com/openvax/vaxrank/blob/master/code-of-conduct.md).
15 | 
16 | Working on your first Pull Request? One resource that may be helpful is [How to Contribute to an Open Source Project on GitHub](https://egghead.io/series/how-to-contribute-to-an-open-source-project-on-github).
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE README.md
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Tests](https://github.com/openvax/vaxrank/actions/workflows/tests.yml/badge.svg)](https://github.com/openvax/vaxrank/actions/workflows/tests.yml)
 2 | [![Coverage Status](https://coveralls.io/repos/github/openvax/vaxrank/badge.svg?branch=master)](https://coveralls.io/github/openvax/vaxrank?branch=master)
 3 | <a href="https://pypi.python.org/pypi/vaxrank/">
 4 |     <img src="https://img.shields.io/pypi/v/vaxrank.svg?maxAge=1000" alt="PyPI" />
 5 | </a>
 6 | 
 7 | # vaxrank
 8 | 
 9 | Selection of mutated protein fragments for therapeutic personalized cancer vaccines.
10 | 
11 | ## Usage
12 | 
13 | ```sh
14 | 
15 | vaxrank \
16 |     --vcf test/data/b16.f10/b16.vcf \
17 |     --bam test/data/b16.f10/b16.combined.bam \
18 |     --vaccine-peptide-length 25 \
19 |     --mhc-predictor netmhc \
20 |     --mhc-alleles H2-Kb,H2-Db \
21 |     --padding-around-mutation 5 \
22 |     --output-ascii-report vaccine-peptides.txt \
23 |     --output-pdf-report vaccine-peptides.pdf \
24 |     --output-html-report vaccine-peptides.html
25 | ```
26 | 
27 | ## Installation
28 | 
29 | Vaxrank can be installed using [pip](https://packaging.python.org/installing/#use-pip-for-installing):
30 | 
31 | ```
32 | pip install vaxrank
33 | ```
34 | 
35 | Note: to generate PDF reports, you first need to install [wkhtmltopdf](http://wkhtmltopdf.org/), which you can do (on OS X) like so:
36 | 
37 | ```
38 | brew install Caskroom/cask/wkhtmltopdf
39 | ```
40 | 
41 | Vaxrank uses [PyEnsembl](https://github.com/openvax/pyensembl) for accessing information about the reference genome. You must install an Ensembl release corresponding to the reference genome associated with the mutations provided to Vaxrank.
42 | 
43 | The latest release for GRCh38 is Ensembl 93:
44 | ```
45 | pyensembl install --release 93 --species human
46 | ```
47 | 
48 | The last release for GRCh37 is Ensembl 75:
49 | ```
50 | pyensembl install --release 75 --species human
51 | ```
52 | 
53 | If your variants were called from alignments against hg19 then you can still use GRCh37 but should ignore mitochondrial variants.
54 | 
55 | ## Paper & Citation
56 | 
57 | There is a Vaxrank paper on biorxiv called [Vaxrank: A Computational Tool For Designing Personalized Cancer Vaccines](https://www.biorxiv.org/content/early/2017/05/27/142919) which can be cited as:
58 | 
59 |     @article {Rubinsteyn142919,
60 |         author = {Rubinsteyn, Alex and Hodes, Isaac and Kodysh, Julia and Hammerbacher, Jeffrey},
61 |         title = {Vaxrank: A Computational Tool For Designing Personalized Cancer Vaccines},
62 |         year = {2017},
63 |         doi = {10.1101/142919},
64 |         publisher = {Cold Spring Harbor Laboratory},
65 |         abstract = {Therapeutic vaccines targeting mutant tumor antigens ({\textquotedblleft}neoantigens{\textquotedblright}) are an increasingly popular form of personalized cancer immunotherapy. Vaxrank is a computational tool for selecting neoantigen vaccine peptides from tumor mutations, tumor RNA data, and patient HLA type. Vaxrank is freely available at www.github.com/hammerlab/vaxrank under the Apache 2.0 open source license and can also be installed from the Python Package Index.},
66 |         URL = {https://www.biorxiv.org/content/early/2017/05/27/142919},
67 |         eprint = {https://www.biorxiv.org/content/early/2017/05/27/142919.full.pdf},
68 |         journal = {bioRxiv}
69 |     }
70 | 
71 | 
72 | # Development
73 | 
74 | To install Vaxrank for local development, you may do the below:
75 | 
76 | ```
77 | git clone git@github.com:openvax/vaxrank.git
78 | conda create -q -n vaxrank-dev-env python=3.5.2 numpy scipy pandas pylint
79 | source activate vaxrank-dev-env
80 | pip install -r requirements.txt
81 | pip install .
82 | pyensembl install --release 87 --species human
83 | pyensembl install --release 87 --species mouse
84 | ```
85 | 
86 | You should run the linter and the test suite as you work on Vaxrank (and these will be run automatically by our continuous integration server up on a PR being made).
87 | 
88 | ```
89 | ./lint.sh && ./test.sh 
90 | ```
91 | 
92 | The first run of the tests may take a while (8 minutes on a 2016 Macbook Pro) to create the FM index of the proteome, but subsequent tests should take only a few seconds.
93 | 
94 | 


--------------------------------------------------------------------------------
/RELEASING.md:
--------------------------------------------------------------------------------
1 | # Releasing Vaxrank
2 | 
3 | This document explains what do once your [Pull Request](https://www.atlassian.com/git/tutorials/making-a-pull-request/) has been reviewed and all final changes applied. Now you're ready merge your branch into master and release it to the world:
4 | 
5 | 0. Make sure that you have `pandoc` and `pypandoc` installed: this is needed for readme markdown on PyPI. (See [here](http://pandoc.org/installing.html) and [here](https://pypi.python.org/pypi/pypandoc), respectively, for instructions.)
6 | 1. Bump the [version](http://semver.org/) on __init__.py, as part of the PR you want to release.
7 | 2. Merge your branch into master.
8 | 3. Run `python setup.py sdist upload`, which pushes the newest release to PyPI.
9 | 


--------------------------------------------------------------------------------
/code-of-conduct.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at hello@openvax.org. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
1 | ./lint.sh && \
2 | ./test.sh && \
3 | python3 -m pip install --upgrade build && \
4 | python3 -m pip install --upgrade twine && \
5 | rm -rf dist && \
6 | python3 -m build && \
7 | python3 -m twine upload dist/*
8 | 
9 | 


--------------------------------------------------------------------------------
/develop.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | 
3 | pip install -e .
4 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = vaxrank
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # vaxrank documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Oct 10 16:59:03 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | # import os
 20 | # import sys
 21 | # sys.path.insert(0, os.path.abspath('.'))
 22 | 
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #
 28 | # needs_sphinx = '1.0'
 29 | 
 30 | # Add any Sphinx extension module names here, as strings. They can be
 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 32 | # ones.
 33 | extensions = []
 34 | 
 35 | # Add any paths that contain templates here, relative to this directory.
 36 | templates_path = ['_templates']
 37 | 
 38 | # The suffix(es) of source filenames.
 39 | # You can specify multiple suffix as a list of string:
 40 | #
 41 | # source_suffix = ['.rst', '.md']
 42 | source_suffix = '.rst'
 43 | 
 44 | # The master toctree document.
 45 | master_doc = 'index'
 46 | 
 47 | # General information about the project.
 48 | project = u'vaxrank'
 49 | copyright = u'2017, Alex Rubinsteyn, Julia Kodysh'
 50 | author = u'Alex Rubinsteyn, Julia Kodysh'
 51 | 
 52 | # The version info for the project you're documenting, acts as replacement for
 53 | # |version| and |release|, also used in various other places throughout the
 54 | # built documents.
 55 | #
 56 | # The short X.Y version.
 57 | version = u''
 58 | # The full version, including alpha/beta/rc tags.
 59 | release = u''
 60 | 
 61 | # The language for content autogenerated by Sphinx. Refer to documentation
 62 | # for a list of supported languages.
 63 | #
 64 | # This is also used if you do content translation via gettext catalogs.
 65 | # Usually you set "language" from the command line for these cases.
 66 | language = None
 67 | 
 68 | # List of patterns, relative to source directory, that match files and
 69 | # directories to ignore when looking for source files.
 70 | # This patterns also effect to html_static_path and html_extra_path
 71 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 72 | 
 73 | # The name of the Pygments (syntax highlighting) style to use.
 74 | pygments_style = 'sphinx'
 75 | 
 76 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 77 | todo_include_todos = False
 78 | 
 79 | 
 80 | # -- Options for HTML output ----------------------------------------------
 81 | 
 82 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 83 | # a list of builtin themes.
 84 | #
 85 | html_theme = 'alabaster'
 86 | 
 87 | # Theme options are theme-specific and customize the look and feel of a theme
 88 | # further.  For a list of options available for each theme, see the
 89 | # documentation.
 90 | #
 91 | # html_theme_options = {}
 92 | 
 93 | # Add any paths that contain custom static files (such as style sheets) here,
 94 | # relative to this directory. They are copied after the builtin static files,
 95 | # so a file named "default.css" will overwrite the builtin "default.css".
 96 | html_static_path = ['_static']
 97 | 
 98 | # Custom sidebar templates, must be a dictionary that maps document names
 99 | # to template names.
100 | #
101 | # This is required for the alabaster theme
102 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
103 | html_sidebars = {
104 |     '**': [
105 |         'about.html',
106 |         'navigation.html',
107 |         'relations.html',  # needs 'show_related': True theme option to display
108 |         'searchbox.html',
109 |         'donate.html',
110 |     ]
111 | }
112 | 
113 | 
114 | # -- Options for HTMLHelp output ------------------------------------------
115 | 
116 | # Output file base name for HTML help builder.
117 | htmlhelp_basename = 'vaxrankdoc'
118 | 
119 | 
120 | # -- Options for LaTeX output ---------------------------------------------
121 | 
122 | latex_elements = {
123 |     # The paper size ('letterpaper' or 'a4paper').
124 |     #
125 |     # 'papersize': 'letterpaper',
126 | 
127 |     # The font size ('10pt', '11pt' or '12pt').
128 |     #
129 |     # 'pointsize': '10pt',
130 | 
131 |     # Additional stuff for the LaTeX preamble.
132 |     #
133 |     # 'preamble': '',
134 | 
135 |     # Latex figure (float) alignment
136 |     #
137 |     # 'figure_align': 'htbp',
138 | }
139 | 
140 | # Grouping the document tree into LaTeX files. List of tuples
141 | # (source start file, target name, title,
142 | #  author, documentclass [howto, manual, or own class]).
143 | latex_documents = [
144 |     (master_doc, 'vaxrank.tex', u'vaxrank Documentation',
145 |      u'Alex Rubinsteyn, Julia Kodysh', 'manual'),
146 | ]
147 | 
148 | 
149 | # -- Options for manual page output ---------------------------------------
150 | 
151 | # One entry per manual page. List of tuples
152 | # (source start file, name, description, authors, manual section).
153 | man_pages = [
154 |     (master_doc, 'vaxrank', u'vaxrank Documentation',
155 |      [author], 1)
156 | ]
157 | 
158 | 
159 | # -- Options for Texinfo output -------------------------------------------
160 | 
161 | # Grouping the document tree into Texinfo files. List of tuples
162 | # (source start file, target name, title, author,
163 | #  dir menu entry, description, category)
164 | texinfo_documents = [
165 |     (master_doc, 'vaxrank', u'vaxrank Documentation',
166 |      author, 'vaxrank', 'One line description of project.',
167 |      'Miscellaneous'),
168 | ]
169 | 
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | .. vaxrank documentation master file, created by
  2 |    sphinx-quickstart on Tue Oct 10 16:59:03 2017.
  3 |    You can adapt this file completely to your liking, but it should at least
  4 |    contain the root `toctree` directive.
  5 | 
  6 | .. toctree::
  7 |    :maxdepth: 2
  8 |    :caption: Contents:
  9 | 
 10 | Getting Started With Vaxrank
 11 | ============================
 12 | 
 13 | Overview
 14 | --------
 15 | Vaxrank is a tool for selecting mutated peptides for use in personalized therapeutic cancer vaccination. Vaxrank determines which peptides should be used in a vaccine from tumor-specific somatic mutations, tumor RNA sequencing data, and a patient's HLA type. Additionally, Vaxrank considers surrounding non-mutated residues in a peptide to prioritize vaccine peptide candidates and improve the odds of successful synthesis.
 16 | 
 17 | Vaxrank is being actively developed at the Icahn School of Medicine at Mount Sinai.
 18 | 
 19 | Questions, Bug Reporting, and Issue Tracking
 20 | --------------------------------------------
 21 | Questions, bug reporting and issue tracking are provided by GitHub. Please report all bugs by creating a new issue. You can ask questions by creating a new issue with the question tag.
 22 | 
 23 | Installation
 24 | ============
 25 | 
 26 | Vaxrank can be installed using `pip <https://packaging.python.org/installing/#use-pip-for-installing>`_:
 27 | 
 28 | .. code-block:: bash
 29 | 
 30 |     pip install vaxrank
 31 | 
 32 | Note: to generate PDF reports, you first need to install `wkhtmltopdf <http://wkhtmltopdf.org/>`_, which you can do (on OS X) like so:
 33 | 
 34 | .. code-block:: bash
 35 | 
 36 |     brew install Caskroom/cask/wkhtmltopdf
 37 | 
 38 | Vaxrank uses `PyEnsembl <https://github.com/hammerlab/pyensembl>`_ for accessing information about the reference genome. You must install an Ensembl release corresponding to the reference genome associated with the mutations provided to Vaxrank.
 39 | 
 40 | The latest supported release for GRCh38 is Ensembl 87:
 41 | 
 42 | .. code-block:: bash
 43 | 
 44 |     pyensembl install --release 87 --species human
 45 | 
 46 | The latest release for GRCh37 is Ensembl 75:
 47 | 
 48 | .. code-block:: bash
 49 | 
 50 |     pyensembl install --release 75 --species human
 51 | 
 52 | 
 53 | Running Vaxrank
 54 | ===============
 55 | 
 56 | Basic Vaxrank usage involves these parameters:
 57 | 
 58 | .. code-block:: bash
 59 | 
 60 |     vaxrank \
 61 |         --vcf somatic-variants.vcf \
 62 |         --bam tumor-rna.bam \
 63 |         --mhc-predictor netmhc \
 64 |         --mhc-alleles A*02:01,A*02:03 \
 65 |         --mhc-epitope-lengths 8 \
 66 |         --padding-around-mutation 5 \
 67 |         --vaccine-peptide-length 25 \
 68 |         --output-ascii-report vaccine-peptides-report.txt 
 69 | 
 70 | This tells Vaxrank to:
 71 | 
 72 | - consider each variant from the input VCF file against the RNA evidence in the input BAM file;
 73 | - predict MHC binding of each resulting mutant protein sequence using the NetMHC prediction algorithm with the A*02:01 and A*02:03 MHC alleles, evaluating sequences of length 8 for purposes of MHC binding prediction;
 74 | - choose protein vaccine candidates, each composed of 25 amino acids; and
 75 | - generate a report written to vaccine-peptides-report.txt, containing the top ranked variants with their associated vaccine proteins.
 76 | 
 77 | For a complete description of parameters supported by Vaxrank, keep on reading.
 78 | 
 79 | 
 80 | Variant Parameters
 81 | ------------------
 82 | Vaxrank starts with a set of candidate genomic variants and considers each for inclusion in the vaccine. There are several ways to specify a set of variants for Vaxrank to consider:
 83 | 
 84 | --vcf VCF_FILE
 85 |     Genomic variants in `VCF <https://samtools.github.io/hts-specs/VCFv4.2.pdf>`_ format.
 86 | --maf MAF_FILE
 87 |     Genomic variants in `MAF <https://wiki.nci.nih.gov/display/TCGA/Mutation+Annotation+Format+(MAF)+Specification>`_ format.
 88 | --json-variants JSON_VARIANTS
 89 |     Path to Varcode.VariantCollection object serialized as a JSON
 90 |     file. To learn more about Varcode, see `docs <https://github.com/hammerlab/varcode>`_. 
 91 | 
 92 | MHC Prediction Parameters
 93 | -------------------------
 94 | 
 95 | Vaxrank uses a patient's HLA type information to predict which of the candidate vaccine peptides are most likely to be seen and targeted by the patient's immune system. The MHC alleles can be passed in either in a file or as a comma-separated list of inputs.
 96 | 
 97 | --mhc-alleles-file MHC_ALLELES_FILE
 98 |   File with one HLA allele per line
 99 | --mhc-alleles MHC_ALLELES
100 |   Comma-separate or space-separated list of MHC alleles, e.g. "HLA-A*02:01,HLA-A*02:03".
101 | --mhc-peptide-lengths MHC_PEPTIDE_LENGTHS
102 |   Comma-separated list of epitope lengths to consider for MHC binding prediction, e.g. "8,9,10,11". This can also take a range of values, e.g. "8-11".
103 | 
104 | In addition, the user can specify different MHC binding predictors for Vaxrank to use:
105 | 
106 | --mhc-predictor MHC_PREDICTOR
107 |   MHC predictor to use. MHCFlurry is an open-source predictor installed by default. Note that to use NetMHC predictors, you need to have locally installed the NetMHC suite software, with binaries like NetMHCpan as executable files on your path. See a list of all supported predictors `here <https://github.com/hammerlab/mhctools>`_.
108 | 
109 | RNA Parameters
110 | --------------
111 | 
112 | Vaxrank uses input tumor RNA data to see whether the input somatic variants are sufficiently expressed. 
113 | 
114 | --bam BAM
115 |   BAM file containing tumor RNA reads.
116 | 
117 | Each variant's effect on a resulting protein is predicted and matched against what we see in the input RNA. There are many options available to the power user, but the only actual required argument is the location of the tumor RNA BAM; all values listed below come with reasonable defaults.
118 | 
119 | --min-alt-rna-reads MIN_ALT_RNA_READS
120 |   Minimum number of RNA reads supporting the variant allele. Default: 2.
121 | --min-variant-sequence-coverage MIN_VARIANT_SEQUENCE_COVERAGE
122 |   Minimum number of reads supporting a variant sequence. Variant sequences will be trimmed to positions supported by at least this number of RNA reads. Default: 2.
123 | --disable-variant-sequence-assembly
124 |   By default, variant cDNA sequences are assembled from overlapping reads. Include this argument to disable the assembly behavior.
125 | --protein-sequence-length
126 |   Vaxrank will try to translate protein sequences of this length, though sometimes the resulting sequence may be shorter (depending on the RNA data, presence of stop codons, etc.). Default: 20.
127 | --max-reference-transcript-mismatches MAX_REFERENCE_TRANSCRIPT_MISMATCHES
128 |   Maximum number of mismatches between the variant sequence being constructed and the reference sequence before the variant sequence gets dropped from consideration. Default: 2.
129 | --include-mismatches-after-variant
130 |   By default, only mismatches that occur before the actual variant locus count against --max-reference-transcript-mismatches. Set this value to True if you also want to count mismatches after the variant locus towards the total. Default: false.
131 | --min-transcript-prefix-length MIN_TRANSCRIPT_PREFIX_LENGTH
132 |   Number of nucleotides before the variant we try to match against a reference transcript. Default: 10.
133 | --min-mapping-quality MIN_MAPPING_QUALITY
134 |   Minimum MAPQ value to allow for a read. Default: 1.
135 | --use-duplicate-reads
136 |   Use a read even if it's been marked as a duplicate. Default: false.
137 | --drop-secondary-alignments
138 |   If true, Vaxrank will use a read even at a location that isn't its primary alignment. Default: false.
139 | 
140 | Vaccine Peptide Parameters
141 | --------------------------
142 | There are some more options to specify the desired characteristics of the output vaccine peptides, which will contain shorter sequences that contain the mutation and are predicted to be strong MHC binders.
143 | 
144 | --vaccine-peptide-length VACCINE_PEPTIDE_LENGTH
145 |   Number of amino acids in the resulting vaccine peptides. Default: 25.
146 | --padding-around-mutation PADDING_AROUND_MUTATION
147 |   Number of off-center windows around the mutation to consider as vaccine peptides. Default: 0.
148 | --min-epitope-score MIN_EPITOPE_SCORE
149 |   Ignore epitopes whose normalized score falls below this threshold. Default: 0.001. 
150 | 
151 | Output Parameters
152 | -----------------
153 | 
154 | By default, the report will contain all high-confidence vaccine peptides, but the report can be made more restrictive using the following parameters:
155 | 
156 | --max-vaccine-peptides-per-mutation MAX_VACCINE_PEPTIDES_PER_MUTATION
157 |                         Number of vaccine peptides to generate for each
158 |                         mutation
159 | --max-mutations-in-report MAX_MUTATIONS_IN_REPORT
160 |                         Number of mutations to report
161 | 
162 | Output Formats
163 | ^^^^^^^^^^^^^^
164 | 
165 | Vaxrank can generate many types of outputs. The most basic output is an ASCII-formatted report, listing each high-scoring variant and its associated vaccine peptides. However, the user can also generate a PDF report and two types of Excel reports.
166 | 
167 | Options related to report generation:
168 |   --output-ascii-report OUTPUT_ASCII_REPORT
169 |                         Path to ASCII vaccine peptide report
170 |   --output-html-report OUTPUT_HTML_REPORT
171 |                         Path to HTML vaccine peptide report
172 |   --output-pdf-report OUTPUT_PDF_REPORT
173 |                         Path to PDF vaccine peptide report
174 |   --output-xlsx-report OUTPUT_XLSX_REPORT
175 |                         Path to XLSX vaccine peptide report worksheet, one
176 |                         sheet per variant. This is meant for use by the
177 |                         vaccine manufacturer.
178 |   --output-neoepitope-report OUTPUT_NEOEPITOPE_REPORT
179 |                         Path to XLSX neoepitope report, containing information
180 |                         focusing on short peptide sequences.
181 | 
182 | Vaxrank can also output all variants and vaccine sequences in a JSON file, which can be used for further programmatic processing if necessary. The file output location should be specified by:
183 | 
184 | --output-json-file OUTPUT_JSON_FILE
185 |                     Path to JSON vaccine peptide data
186 | 


--------------------------------------------------------------------------------
/lint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -o errexit
 3 | 
 4 | 
 5 | # disabling several categories of errors due to false positives in pylint,
 6 | # see these issues:
 7 | # - https://bitbucket.org/logilab/pylint/issues/701/false-positives-with-not-an-iterable-and
 8 | # - https://bitbucket.org/logilab/pylint/issues/58
 9 | 
10 | find vaxrank/ -name '*.py' \
11 |   | xargs pylint \
12 |   --errors-only \
13 |   --disable=unsubscriptable-object,not-an-iterable,no-member,invalid-unary-operand-type
14 | 
15 | echo 'Passes pylint check'
16 | 


--------------------------------------------------------------------------------
/pylintrc:
--------------------------------------------------------------------------------
1 | [TYPECHECK]
2 | # Without ignoring this, we get errors like:
3 | # E:249,20: Module 'numpy' has no 'nan' member (no-member)
4 | ignored-modules = numpy
5 | ignored-classes = nose.tools
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | six
 2 | numpy>=1.14.0
 3 | pandas
 4 | pyensembl>=1.5.0
 5 | varcode>=0.5.9
 6 | isovar>=1.1.1
 7 | mhctools>=1.5.0
 8 | roman
 9 | jinja2<3.1
10 | pdfkit  # needs wkhtmltopdf: brew install Caskroom/cask/wkhtmltopdf
11 | pypandoc  # needs pandoc: brew install pandoc
12 | shellinford>=0.3.4
13 | xlsxwriter
14 | xlrd>=1.0.0,<2.0.0
15 | xvfbwrapper
16 | future>=0.16.0  # needed by pylint
17 | astropy
18 | datacache
19 | pysam>=0.15.2
20 | 


--------------------------------------------------------------------------------
/run-vaxrank-b16-test-data.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | set -x
 3 | vaxrank \
 4 |     --download-reference-genome-data \
 5 |     --vcf test/data/b16.f10/b16.vcf \
 6 |     --bam test/data/b16.f10/b16.combined.bam \
 7 |     --vaccine-peptide-length 15 \
 8 |     --mhc-predictor netmhc \
 9 |     --mhc-alleles H2-Kb,H2-Db \
10 |     --mhc-epitope-lengths 8 \
11 |     --padding-around-mutation 0 \
12 |     --min-epitope-score 10e-100 \
13 |     --num-epitopes-per-peptide 5 \
14 |     --output-ascii-report vaccine-peptides-report.txt \
15 |     --output-html-report vaccine-peptides-report.html \
16 |     --output-pdf-report vaccine-peptides-report.pdf \
17 |     --output-xlsx-report vaccine-peptides-report.xlsx \
18 |     --output-neoepitope-report neoepitope-report.xlsx \
19 |     --output-json-file vaccine-peptides-report.json \
20 |     --output-csv vaccine-peptides.csv \
21 |     --output-passing-variants-csv vaccine-peptides-all-passing.csv \
22 |     --output-reviewed-by "John Doe,Jane Doe" \
23 |     --output-final-review "All the Does" \
24 |     --output-patient-id "Test Patient"
25 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Licensed under the Apache License, Version 2.0 (the "License");
 2 | # you may not use this file except in compliance with the License.
 3 | # You may obtain a copy of the License at
 4 | #
 5 | #     http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | 
14 | from __future__ import (absolute_import,)
15 | 
16 | import os
17 | import logging
18 | import re
19 | 
20 | from setuptools import setup
21 | 
22 | readme_dir = os.path.dirname(__file__)
23 | readme_path = os.path.join(readme_dir, 'README.md')
24 | 
25 | try:
26 |     with open(readme_path, 'r') as f:
27 |         readme_markdown = f.read()
28 | except:
29 |     logging.warn("Failed to load %s" % readme_path)
30 |     readme_markdown = ""
31 | 
32 | with open('vaxrank/__init__.py', 'r') as f:
33 |     version = re.search(
34 |         r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
35 |         f.read(),
36 |         re.MULTILINE).group(1)
37 | 
38 | if not version:
39 |     raise RuntimeError("Cannot find version information")
40 | 
41 | if __name__ == '__main__':
42 |     setup(
43 |         name='vaxrank',
44 |         version=version,
45 |         description="Mutant peptide ranking for personalized cancer vaccines",
46 |         author="Alex Rubinsteyn, Julia Kodysh",
47 |         author_email="alex@openvax.org, julia@openvax.org",
48 |         url="https://github.com/openvax/vaxrank",
49 |         license="http://www.apache.org/licenses/LICENSE-2.0.html",
50 |         classifiers=[
51 |             'Development Status :: 4 - Beta',
52 |             'Environment :: Console',
53 |             'Operating System :: OS Independent',
54 |             'Intended Audience :: Science/Research',
55 |             'License :: OSI Approved :: Apache Software License',
56 |             'Programming Language :: Python',
57 |             'Topic :: Scientific/Engineering :: Bio-Informatics',
58 |         ],
59 |         install_requires=[
60 |             'numpy>=1.14.0,<2.0.0',
61 |             'pandas>=2.1.4,<3.0.0',
62 |             'pyensembl>=2.0.0,<3.0.0',
63 |             'varcode>=1.1.0,<2.0.0',
64 |             'isovar>=1.3.0,<2.0.0',
65 |             'mhctools>=1.8.2,<2.0.0',
66 |             'roman',
67 |             'jinja2<3.1',
68 |             'pdfkit',
69 |             'pypandoc',
70 |             'shellinford>=0.3.4',
71 |             'xlrd>=1.0.0,<2.0.0',
72 |             'xlsxwriter',
73 |             'xvfbwrapper',
74 |             'future>=0.16.0',  # needed by pylint
75 |             'astropy',
76 |         ],
77 | 
78 |         long_description=readme_markdown,
79 |         long_description_content_type='text/markdown',
80 |         packages=['vaxrank'],
81 |         package_data={'vaxrank': ['templates/*', 'data/*', 'logging.conf']},
82 |         entry_points={
83 |             'console_scripts': [
84 |                 'vaxrank = vaxrank.cli:main'
85 |             ]
86 |         }
87 |     )
88 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | pytest --cov=vaxrank/ --cov-report=term-missing tests
2 | 
3 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Licensed under the Apache License, Version 2.0 (the "License");
 2 | # you may not use this file except in compliance with the License.
 3 | # You may obtain a copy of the License at
 4 | #
 5 | #       http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | from __future__ import absolute_import, print_function, division
14 | 


--------------------------------------------------------------------------------
/tests/common.py:
--------------------------------------------------------------------------------
 1 | # Licensed under the Apache License, Version 2.0 (the "License");
 2 | # you may not use this file except in compliance with the License.
 3 | # You may obtain a copy of the License at
 4 | #
 5 | #       http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | def ok_(a, s=None):
14 |     if s is None:
15 |         assert a
16 |     else:
17 |         assert a, s
18 | 
19 | def eq_(a, b, s=None):
20 |     if s is None:
21 |         assert a == b
22 |     else:
23 |         assert a == b, s
24 | 
25 | def neq_(a, b, s=None):
26 |     if s is None:
27 |         assert a != b
28 |     else:
29 |         assert a != b, s
30 | 
31 | def gt_(a, b, s=None):
32 |     if s is None:
33 |         assert a > b
34 |     else:
35 |         assert a > b, s
36 | 
37 | def lt_(a, b, s=None):
38 |     if s is None:
39 |         assert a < b
40 |     else:
41 |         assert a < b, s
42 | 
43 | def gte_(a, b, s=None):
44 |     if s is None:
45 |         assert a >= b
46 |     else:
47 |         assert a >= b, s
48 | 
49 | def lte_(a, b, s=None):
50 |     if s is None:
51 |         assert a <= b
52 |     else:
53 |         assert a <= b, s
54 | 
55 | def almost_eq_(a, b, tol=1e-6, s=None):
56 |     if s is None:
57 |         assert abs(a - b) < tol
58 |     else:
59 |         assert abs(a - b) < tol, s


--------------------------------------------------------------------------------
/tests/data/b16.f10/b16.combined.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvax/vaxrank/a95e4e19070fe06b67b806f995b209f190a7af9a/tests/data/b16.f10/b16.combined.bam


--------------------------------------------------------------------------------
/tests/data/b16.f10/b16.combined.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvax/vaxrank/a95e4e19070fe06b67b806f995b209f190a7af9a/tests/data/b16.f10/b16.combined.bam.bai


--------------------------------------------------------------------------------
/tests/data/b16.f10/b16.combined.sorted.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvax/vaxrank/a95e4e19070fe06b67b806f995b209f190a7af9a/tests/data/b16.f10/b16.combined.sorted.bam


--------------------------------------------------------------------------------
/tests/data/b16.f10/b16.combined.sorted.bam.bai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openvax/vaxrank/a95e4e19070fe06b67b806f995b209f190a7af9a/tests/data/b16.f10/b16.combined.sorted.bam.bai


--------------------------------------------------------------------------------
/tests/data/b16.f10/b16.expressed.vcf:
--------------------------------------------------------------------------------
1 | ##reference=mm10
2 | #chr	pos	id	ref	alt	qual	filter	info
3 | chr9	82927102	.	G	T	.	.	.
4 | chrX	8125624	.	C	A	.	.	.
5 | 


--------------------------------------------------------------------------------
/tests/data/b16.f10/b16.f10.127a.aldh1b1.chr4.45802539.refG.altC.sam:
--------------------------------------------------------------------------------
 1 | @HD VN:1.0  SO:coordinate
 2 | @SQ SN:chr1 LN:195471971
 3 | @SQ SN:chr2 LN:182113224
 4 | @SQ SN:chr3 LN:160039680
 5 | @SQ SN:chr4 LN:156508116
 6 | @SQ SN:chr5 LN:151834684
 7 | @SQ SN:chr6 LN:149736546
 8 | @SQ SN:chr7 LN:145441459
 9 | @SQ SN:chr8 LN:129401213
10 | @SQ SN:chr9 LN:124595110
11 | @SQ SN:chr10    LN:130694993
12 | @SQ SN:chr11    LN:122082543
13 | @SQ SN:chr12    LN:120129022
14 | @SQ SN:chr13    LN:120421639
15 | @SQ SN:chr14    LN:124902244
16 | @SQ SN:chr15    LN:104043685
17 | @SQ SN:chr16    LN:98207768
18 | @SQ SN:chr17    LN:94987271
19 | @SQ SN:chr18    LN:90702639
20 | @SQ SN:chr19    LN:61431566
21 | @SQ SN:chrX LN:171031299
22 | @SQ SN:chrY LN:91744698
23 | @SQ SN:chrM LN:16299
24 | @RG ID:Tumor_B16_F10_0810_127A  PL:ILLUMINA PU:HiSeq2500    LB:Tumor_B16_F10_0810_127A  DS:rnaseq   SM:Tumor_B16_F10_0810_127A  CN:MSSM
25 | HWI-D00273:119:C7FUMANXX:2:2314:9979:73514	163	chr4	45693473	255	13M145161N88M	=	45838774	145402	GTGCCAAAGAGACCGACACTCTTGGTGCTCGGGGTACAGTCTCCTCAAAAGTTCCCCTCTTCTGTTTTATAAGATAGGCTTTGAGGGTGCGATGCGCACGC	CCCCCGGGGG1@FBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGGGGGGGGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:196	XS:A:-
26 | HWI-D00273:119:C7FUMANXX:2:1302:1900:88137	147	chr4	45799162	255	8M3285N93M	=	45799073	-3475	CACCGCAGGTCCTCAGGATGCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCG	GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGFEGGGGGGFC@DB<GAACBB	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:203	XS:A:+
27 | HWI-D00273:119:C7FUMANXX:2:1304:19043:33867	163	chr4	45799164	255	6M3285N95M	=	45802895	3832	CCGCAGGTCCTCAGGATGCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAA	ABA=AE<CDGGGGGB@GGGGGFGGGGGEGDCGBGGGGGGGGGGDG>/EFGGEGGGGGE1@@FGGG<FGFGGBGD00C09FFCEC>FGGGBC00FDFG@.FG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:+
28 | HWI-D00273:119:C7FUMANXX:2:1309:15476:29614	83	chr4	45799165	255	5M3285N96M	=	45799092	-3459	CGCAGGTCCTCAGGATGCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAAC	GGGGGC<GGG@@CGEGGGGGGGFGGGGECGBGB>GGGGGGGGEBGGGGEGGGGGG>GGGF@DGGGGGGGGEGFC1FGGF>GGF11EE1DFBGE=/;A<3A3	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:201	XS:A:+
29 | HWI-D00273:119:C7FUMANXX:2:2109:16809:15061	83	chr4	45799165	255	5M3285N96M	=	45799092	-3459	CGCAGGTCCTCAGGATGCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAAC	GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGF=GGGECCCBC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:203	XS:A:+
30 | HWI-D00273:119:C7FUMANXX:2:2115:6797:61925	163	chr4	45802455	255	101M	=	45802642	288	GTCCTCAGGATGCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAAT	BBC=BGCGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGFGGGGGGGGGGGFDEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDAGGGGEB	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:200
31 | HWI-D00273:119:C7FUMANXX:2:1106:19607:76744	83	chr4	45802460	255	101M	=	45799110	-3451	CAGGATGCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAA	GGGGGFGGGGGGGGGGGGFGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:+
32 | HWI-D00273:119:C7FUMANXX:2:2109:18038:25161	83	chr4	45802466	255	101M	=	45799093	-3474	GCTGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAG	EGGGGG:GGGDGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGFGGE>GGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGFGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:+
33 | HWI-D00273:119:C7FUMANXX:2:2310:14056:99606	147	chr4	45802468	255	101M	=	45799077	-3492	TGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAG	GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGFGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:+
34 | HWI-D00273:119:C7FUMANXX:2:2314:15707:68909	83	chr4	45802468	255	101M	=	45799080	-3489	TGACTGCCCGACTCTTGCTGCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAG	CGGDGGGGGGGGEEFDGGADGGGEBGGGGGGE8>GGGGFCG@CGFDEGGF:DGFF>GGGGEGDGEGGGGGGGGFDGGGGGDGGEGDDE>BGGGFFGB@@BB	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:3	AS:i:195	XS:A:+
35 | HWI-D00273:119:C7FUMANXX:2:1110:20436:75562	99	chr4	45802487	255	101M	=	45802556	170	GCCCCGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAGATTTGCTACAACAAGCTGT	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:200
36 | HWI-D00273:119:C7FUMANXX:2:1206:18456:69465	99	chr4	45802491	255	101M	=	45802525	135	CGGCTCCTCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAGATTTGCTACAACAAGCTGTTCAT	CBCCCCFGEGGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGGGGEEDGGGGEGGGGGGGGGGGGGFGGGGGGGBCGFGGGGGGGGGGGGG>FFGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:200
37 | HWI-D00273:119:C7FUMANXX:2:2310:16660:100528	163	chr4	45802498	255	101M	=	45802719	322	TCTGCCTCCAGGGCAGGACTACCTCTTACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAGATTTGCTACAACAAGCTGTTCATCAACAAC	BCCBCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBB	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:200
38 | HWI-D00273:119:C7FUMANXX:2:1206:18456:69465	147	chr4	45802525	255	101M	=	45802491	-135	ACTCTACAGCAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAGATTTGCTACAACAAGCTGTTCATCAACAACGAGTGGCATGATGCGGTCAGCAAAAAG	8GGGE>>@GGGGGCFC8E=GGGGGCGBGD@@GGGGGDGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDCGDGGGGGGGGGGGBBBAB	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:200
39 | HWI-D00273:119:C7FUMANXX:2:1216:11959:66492	83	chr4	45802534	255	101M	=	45799111	-3524	CAGCTGCTCTCCCGAACCCAATCCCAAACCCAGAGATTTGCTACAACAAGCTGTTCATCAACAACGAGTGGCATGATGCGGTCAGCAAAAAGACCTTCCCC	GDGGGGGGGGGGGGF=GGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGFGGFGGGGGGGGGGFFGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGBCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:+
40 | 


--------------------------------------------------------------------------------
/tests/data/b16.f10/b16.f10.127a.phip.chr9.82927102.refG.altT.sam:
--------------------------------------------------------------------------------
 1 | @HD VN:1.0  SO:coordinate
 2 | @SQ SN:chr1 LN:195471971
 3 | @SQ SN:chr2 LN:182113224
 4 | @SQ SN:chr3 LN:160039680
 5 | @SQ SN:chr4 LN:156508116
 6 | @SQ SN:chr5 LN:151834684
 7 | @SQ SN:chr6 LN:149736546
 8 | @SQ SN:chr7 LN:145441459
 9 | @SQ SN:chr8 LN:129401213
10 | @SQ SN:chr9 LN:124595110
11 | @SQ SN:chr10    LN:130694993
12 | @SQ SN:chr11    LN:122082543
13 | @SQ SN:chr12    LN:120129022
14 | @SQ SN:chr13    LN:120421639
15 | @SQ SN:chr14    LN:124902244
16 | @SQ SN:chr15    LN:104043685
17 | @SQ SN:chr16    LN:98207768
18 | @SQ SN:chr17    LN:94987271
19 | @SQ SN:chr18    LN:90702639
20 | @SQ SN:chr19    LN:61431566
21 | @SQ SN:chrX LN:171031299
22 | @SQ SN:chrY LN:91744698
23 | @SQ SN:chrM LN:16299
24 | @RG ID:Tumor_B16_F10_0810_127A  PL:ILLUMINA PU:HiSeq2500    LB:Tumor_B16_F10_0810_127A  DS:rnaseq   SM:Tumor_B16_F10_0810_127A  CN:MSSM
25 | @PG ID:STAR PN:STAR VN:STAR_2.4.0g1 CL:STAR   --runThreadN 10   --genomeDir /sc/orga/projects/PBG/REFERENCES/mm10/star/Mus_musculus.GRCm38.75.processed.overhang75   --readFilesIn /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Raw/RNA.IlluminaHiSeq2500.PolyA/Tumor_B16_F10_0810_127A_ACTGAT_L002_R1_001.C7FUMANXX.fastq.gz   /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Raw/RNA.IlluminaHiSeq2500.PolyA/Tumor_B16_F10_0810_127A_ACTGAT_L002_R2_001.C7FUMANXX.fastq.gz      --readFilesCommand zcat      --outFileNamePrefix /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Processed/RAPiD.2_0_0/star/accepted_hits   --outStd SAM   --outReadsUnmapped Fastx   --outSAMmode Full   --outSAMstrandField intronMotif   --chimSegmentMin 15   --chimJunctionOverhangMin 15   --sjdbGTFfile /sc/orga/projects/PBG/REFERENCES/mm10/tophat/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf
26 | @CO user command line: STAR --chimSegmentMin 15 --chimJunctionOverhangMin 15 --outSAMstrandField intronMotif --genomeDir /sc/orga/projects/PBG/REFERENCES/mm10/star/Mus_musculus.GRCm38.75.processed.overhang75 --sjdbGTFfile /sc/orga/projects/PBG/REFERENCES/mm10/tophat/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf --runThreadN 10 --outReadsUnmapped Fastx --outStd SAM --outSAMmode Full --outFileNamePrefix /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Processed/RAPiD.2_0_0/star/accepted_hits --readFilesCommand zcat --readFilesIn /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Raw/RNA.IlluminaHiSeq2500.PolyA/Tumor_B16_F10_0810_127A_ACTGAT_L002_R1_001.C7FUMANXX.fastq.gz /sc/orga/scratch/shahh06/GCF/outgoing/ProductionQC/QC_E006.C039_Finnigan_C57BL_6_B16_RNASeq.PE.RNASeqPolyA.RAPiD.Mouse/Tumor_B16_F10_0810_127A/Raw/RNA.IlluminaHiSeq2500.PolyA/Tumor_B16_F10_0810_127A_ACTGAT_L002_R2_001.C7FUMANXX.fastq.gz
27 | HWI-D00273:119:C7FUMANXX:2:1210:3717:80737	163	chr9	82926503	255	29M503N72M	=	82927087	685	TCAAGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTAT	?ABBBGGGGGGGGGG1@CBFGGGG1:BFFEGGFGGGGGGCGGGGGGGGGGGGEGGGGGGGFGGGG>FGGGGGGGGCGG>GGGGGGGGGGGGGG>GGFDGGE	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:202	XS:A:-
28 | HWI-D00273:119:C7FUMANXX:2:1108:2867:16351	83	chr9	82926503	255	29M503N72M	=	82926413	-694	TCAAGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTAT	GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:202	XS:A:-
29 | HWI-D00273:119:C7FUMANXX:2:2314:11512:21608	83	chr9	82926503	255	29M503N72M	=	82926413	-694	TCAAGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTAT	GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFC?1GFGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:202	XS:A:-
30 | HWI-D00273:119:C7FUMANXX:2:1306:10026:100751	99	chr9	82926504	255	28M503N73M	=	82929045	3870	CAAGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTANNNGNATTNCANANNTTNAGAGTCATGTTATTAACTGCAGTTATA	?:A:0FG1EFG0EFGGGGGGGG:FFFGEFD>FG@FGGGBGGGGGGGB11BCE1<FG###:#=00#==#=##<=#==FFFGGGE@>D>DGCGGGGGGGGGDE	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:157	XS:A:-
31 | HWI-D00273:119:C7FUMANXX:2:2301:2974:21602	99	chr9	82926504	255	28M503N73M	=	82929045	3870	CAAGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATA	CCCCCGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGEEGGGGGGGGGGGGGGGGGGGGGDFGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:201	XS:A:-
32 | HWI-D00273:119:C7FUMANXX:2:2304:3748:77089	147	chr9	82926506	255	26M503N75M	=	82926422	-688	AGAACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAAC	GGGGGBE0E=0@0C0F:BGGGC11>GGCGGCGCGGGGGGGGGEEGF:FCGGDFF1GFEBCGGGGGGGGF1EGGGE1=1C@F1GGGDFGGG>GGGDGA0BB?	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:198	XS:A:-
33 | HWI-D00273:119:C7FUMANXX:2:1215:3447:97938	99	chr9	82926508	255	24M503N77M	=	82926516	612	AACAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTG	CCCCBFGGGGGGGGGGGGGGGGGGGBGGGGGGGGGEGG@GDEGEGGGGEGGGGGGEDGGFGGGGGBDCGGGGGGGGGGGGGGGGGGGEGGEGGGGGGGCG=	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:4	AS:i:196	XS:A:-
34 | HWI-D00273:119:C7FUMANXX:2:2114:3120:28476	147	chr9	82926510	255	22M503N79M	=	82909869	-17245	CAAACACCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTG	EGGEE=GGGGGGCFECGGGGGFGGGGGGGGEGFGGGGGGGGGGGGGGGGGGGGGGGF>BGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:202	XS:A:-
35 | HWI-D00273:119:C7FUMANXX:2:1213:20576:91910	147	chr9	82926516	255	16M503N85M	=	82926426	-694	CCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCA	GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCBA	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:3	AS:i:196	XS:A:-
36 | HWI-D00273:119:C7FUMANXX:2:1215:3447:97938	147	chr9	82926516	255	16M503N85M	=	82926508	-612	CCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCA	GGEGEGGGGGFGGGGGGGGDB@EGGGGGGGGGGGGGGDGGGDGDGGFGGGGGGFGGGGGGGGGGGGDF>GGFGFGGFEFGGGCGGGGFBFGGGGGGBCBAB	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:4	AS:i:196	XS:A:-
37 | HWI-D00273:119:C7FUMANXX:2:2110:3107:94042	147	chr9	82926516	255	16M503N85M	=	82915412	-11708	CCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCA	GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGGGGCBBBA	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:203	XS:A:-
38 | HWI-D00273:119:C7FUMANXX:2:2313:1725:79080	147	chr9	82926516	255	16M503N85M	=	82926413	-707	CCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCA	>GCFGGCGGGGGGCGGGGGGGGGGEGGGGGGGFFGGGGGEGGGGGGGGGGGGDFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBB@	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:202	XS:A:-
39 | HWI-D00273:119:C7FUMANXX:2:2314:5945:11999	147	chr9	82926516	255	16M503N85M	=	82913858	-13262	CCTCATCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCA	GGFGGGGGGGGGGGGGGGEGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:201	XS:A:-
40 | HWI-D00273:119:C7FUMANXX:2:2216:12319:64123	147	chr9	82926521	255	11M503N90M	=	82926426	-699	TCTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCG	CGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGFGGFFGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGBCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:202	XS:A:-
41 | HWI-D00273:119:C7FUMANXX:2:2315:15040:51160	99	chr9	82926522	255	10M503N91M	=	82929000	3807	CTTCATGACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGA	CCCCCGGGGGGGGGGGGGCFGGGGGGGGGGGGGGGGGGGEFGGGGGGGGGCGGGGG1FEGCCGGGGGGGG>FGGGCGGGGGGEGGGGGGGGGEGCGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:203	XS:A:-
42 | HWI-D00273:119:C7FUMANXX:2:2212:8037:3803	147	chr9	82926528	255	4M503N97M	=	82915344	-11788	GACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAA	C>GGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:203	XS:A:-
43 | HWI-D00273:119:C7FUMANXX:2:2305:20190:47439	83	chr9	82926528	255	4M503N97M	=	82926438	-694	GACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAA	FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:202	XS:A:-
44 | HWI-D00273:119:C7FUMANXX:2:1307:19253:35846	163	chr9	82926529	255	3M503N98M	=	82927122	1605	ACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAG	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGF	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:199	XS:A:-
45 | HWI-D00273:119:C7FUMANXX:2:2203:3890:70174	83	chr9	82926529	255	3M503N98M	=	82915307	-11826	ACCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCACAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAG	FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGFE?1GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:199	XS:A:-
46 | HWI-D00273:119:C7FUMANXX:2:2308:11032:56946	99	chr9	82927034	255	2S99M	=	82927179	1157	CCCCATTAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAG	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:199	XS:A:-
47 | HWI-D00273:119:C7FUMANXX:2:1311:15268:66444	163	chr9	82927036	255	101M	=	82928837	2007	ATTAGAACATGTATCAGTTGACCGGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTAC	AABB@1CGFGCGEGCGGGGGFGEGGG<DFGBFGGFCCF1FGFEGFEGGGGGGDDGGGGGGC>FGG1@F@DGGG>DFGGGGGGGGGGGGBF@FGEGGGGGGF	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:199	XS:A:-
48 | HWI-D00273:119:C7FUMANXX:2:1315:19055:88914	99	chr9	82927038	255	101M	=	82928101	1794	TAGAACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCA	ABB==EFC@FCCBDFG>BGCEGD1;BFF1F=FD1:GGGGG:BFGGGGBECGGGGGGGFGGGGGGGGE>DG>FGG?DGEEG0FFDFGG@FF<CG>D0FG00?	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:197	XS:A:-
49 | HWI-D00273:119:C7FUMANXX:2:2311:7334:96601	99	chr9	82927042	255	3S98M	=	82929066	5464	GGGACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCAT	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGFGEGGGEGGGGGEGGG>GG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:200	XS:A:-
50 | HWI-D00273:119:C7FUMANXX:2:1201:13659:67863	83	chr9	82927042	255	101M	=	82915362	-11781	ACATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGT	GGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFCCCCB	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:197	XS:A:-
51 | HWI-D00273:119:C7FUMANXX:2:1301:12664:14291	163	chr9	82927043	255	101M	=	82928108	1796	CATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTT	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGFGGGGEGGGGGGGGGGGGGGGGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:-
52 | HWI-D00273:119:C7FUMANXX:2:2301:14292:28834	163	chr9	82927043	255	101M	=	82928108	1796	CATGTATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTT	BBC@BGGGGGGGGGGGG0EFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:-
53 | HWI-D00273:119:C7FUMANXX:2:2216:5381:79114	163	chr9	82927048	255	1S100M	=	82927143	1107	CATCAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTT	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:200	XS:A:-
54 | HWI-D00273:119:C7FUMANXX:2:1112:7261:43382	83	chr9	82927050	255	101M	=	82926425	-726	CAGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCA	0GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:200
55 | HWI-D00273:119:C7FUMANXX:2:2204:4664:88087	147	chr9	82927051	255	101M	=	82915351	-11801	AGTTGACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCAT	GGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGCBCCB	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:-
56 | HWI-D00273:119:C7FUMANXX:2:2207:2698:99061	99	chr9	82927056	255	1S100M	=	82927129	1085	CACCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTT	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGEGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:200	XS:A:-
57 | HWI-D00273:119:C7FUMANXX:2:1102:11259:82311	99	chr9	82927057	255	1S99M1S	=	82928997	3269	TCCAGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTT	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGEGGGGEGGGGGGGGGGGGGFGGGGGGGGGGGDGGGGGGGG#	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:199	XS:A:-
58 | HWI-D00273:119:C7FUMANXX:2:1103:19599:62009	163	chr9	82927057	255	101M	=	82928131	1805	CCGGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGT	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:199	XS:A:-
59 | HWI-D00273:119:C7FUMANXX:2:2212:7108:51045	99	chr9	82927057	255	101M	=	82927141	1096	CCCGTATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGT	CCCCCGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:199	XS:A:-
60 | HWI-D00273:119:C7FUMANXX:2:2208:4517:81955	163	chr9	82927061	255	101M	=	82929000	3268	TATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATT	BBB@?=B1EFEGGGGGGGG;1C@1F11EGGGGGGBCGEC11F1EFGBE@GGGGGGGGGGG>F1DG@:FG>FGGGE11:FBCFDGCF@FGGGGGCGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:197	XS:A:-
61 | HWI-D00273:119:C7FUMANXX:2:2210:18779:40897	99	chr9	82927061	255	101M	=	82927088	128	TATAAAAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATT	3AABB@@1FGEFGGGGDGF@>F>FGCCEG@GGGG>1C@FGGGEGGGGGGGFEGDDEGGGGGGG0C0BEFDDGF>DFGGDDFFGGGG>F>GCCGGGGGECCG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:198
62 | HWI-D00273:119:C7FUMANXX:2:1312:18199:96205	99	chr9	82927062	255	101M	=	82928990	3257	ATAAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTT	BCCCBGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGG@FGGGGFEEDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:-
63 | HWI-D00273:119:C7FUMANXX:2:1305:7949:79617	163	chr9	82927064	255	101M	=	82929002	3267	AAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTG	BBCCCGG@DGCFFGGGC@1FGGGGGGGGEDGC>FGGGEGGGGGGG>FGGGG1DFGGGGGD:<0F>GGGGGGEGEGGDFGGGGGGGGGFGGGG<GGGGGGG0	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:-
64 | HWI-D00273:119:C7FUMANXX:2:2202:11211:92043	99	chr9	82927064	255	101M	=	82928121	1788	AAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTG	BCBCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFDGGGGGGGGGGGGGDGEGGGGG0	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:-
65 | HWI-D00273:119:C7FUMANXX:2:2312:4705:10287	99	chr9	82927064	255	101M	=	82928121	1788	AAGAATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTG	BBABBGGGGGGGGGGGGGGGG;FFGGGGGGGGGGFGECDF@FBGDGGFCFGGBFGGGGGGGGGBGGGGDGFF@D;F0FFGCGG0EFEEGCGGGG:@DG@00	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:-
66 | HWI-D00273:119:C7FUMANXX:2:2207:11885:95718	147	chr9	82927068	255	101M	=	82926439	-730	ATTCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTT	GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:200	XS:A:-
67 | HWI-D00273:119:C7FUMANXX:2:1107:14661:34101	163	chr9	82927070	255	101M	=	82928864	2000	TCCAAACTTTCAGAGTCATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCT	BCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGFGGGGEGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGG@@GGGFGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:201	XS:A:-
68 | HWI-D00273:119:C7FUMANXX:2:1309:14773:99693	163	chr9	82927070	255	101M	=	82927095	1037	TCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCT	BBCBCGGGGGGGGGGGGGGGG>FFGGFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:4	AS:i:193	XS:A:-
69 | HWI-D00273:119:C7FUMANXX:2:2111:17160:4559	163	chr9	82927070	255	1S100M	=	82927127	1069	CTCCAAACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTC	BBCBAGGGGGGGGGGGGGGGD=BDGEGEGCGGGGGFGEGGGGDGFGGGGGGGGEGGGGGGGCBGGGEGGGGGGGGGGGGGGGGFGGGGDGGGGGGGGGGGD	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:196	XS:A:-
70 | HWI-D00273:119:C7FUMANXX:2:2101:5725:47047	147	chr9	82927075	255	101M	=	82926472	-704	ACTTTCAGAGTCATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCC	FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGBCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:198	XS:A:-
71 | HWI-D00273:119:C7FUMANXX:2:1208:15555:44669	83	chr9	82927086	255	101M	=	82926496	-691	CATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTT	>GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:198	XS:A:-
72 | HWI-D00273:119:C7FUMANXX:2:2204:9269:81404	83	chr9	82927086	255	101M	=	82913858	-13329	CATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTT	GGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:197	XS:A:-
73 | HWI-D00273:119:C7FUMANXX:2:2205:14913:32579	147	chr9	82927086	255	101M	=	82915369	-11818	CATGTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTT	GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFEGGGGGGGGGGGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:197	XS:A:-
74 | HWI-D00273:119:C7FUMANXX:2:1210:3717:80737	83	chr9	82927087	255	101M	=	82926503	-685	ATGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTG	GGGGGGGGGGGGGFEGGGGEGFGGGGGGGGGFGGGGGGGGF>F@1GGGGGFGGFGGGGGGGGGGGGGGGEGGGGGGGF@EGGGGGGGDGGGGGGGGBCCBC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:202	XS:A:-
75 | HWI-D00273:119:C7FUMANXX:2:2210:18779:40897	147	chr9	82927088	255	101M	=	82927061	-128	TGTTATTAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGG	BGGGGGGCGCGGGGEGGGGGCGGCGGGGGGGGFCGFGEGF/EGDGGGC@GGDDGFDC@GGGDFGGGGDGGEDEGGGGGGGGGGEGGGGGBGFGGGGBCBBB	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:1	AS:i:198
76 | HWI-D00273:119:C7FUMANXX:2:1203:17569:36966	163	chr9	82927089	255	101M	=	82928999	3239	GTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGGC	@BCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:197	XS:A:-
77 | HWI-D00273:119:C7FUMANXX:2:2315:15400:52331	163	chr9	82927089	255	101M	=	82928999	3239	GTTATTAACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGGC	CCCBBGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGFGFGGGG>>FGGDCGGEGGGGCDG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:2	AS:i:197	XS:A:-
78 | HWI-D00273:119:C7FUMANXX:2:1111:4417:66340	99	chr9	82927094	255	95M911N6M	=	82927150	1068	TAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGGCCTGCT	BBBBBFGGGGGEFBGGFF=FDGGGGEGGGGGGGGGGGGGGGGGGGEGGGGGGGGEGGGGGGGGGGGFCGGGGGGGGGGGGGGGGD0CDGCGGGGGGGFEFE	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:203	XS:A:-
79 | HWI-D00273:119:C7FUMANXX:2:2313:5631:84945	83	chr9	82927094	255	95M911N6M	=	82926482	-1624	TAACTGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGGCCTGCT	GGGGGFGGGGGGGFGEGDGEGGGGGGGGG<BGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:203	XS:A:-
80 | HWI-D00273:119:C7FUMANXX:2:1309:14773:99693	83	chr9	82927095	255	94M911N7M	=	82927070	-1037	AACTGCATGTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGGCCTGCTG	FGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGFGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:4	AS:i:193	XS:A:-
81 | HWI-D00273:119:C7FUMANXX:2:1214:11764:82618	163	chr9	82927098	255	91M911N10M	=	82927156	1070	TGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGGCCTGCTGGAC	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFG	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:203	XS:A:-
82 | HWI-D00273:119:C7FUMANXX:2:2207:1691:32051	83	chr9	82927098	255	91M911N10M	=	82915418	-12692	TGCAGTTATAACTGTGTTGTCATGTCGATCCCAAGCTACCATAGTTACTTTCATTTTTGTGATTTTGTCTTCTATGCCTTGAAGATTTTGGCCTGCTGGAC	GGGGGGGGGGFEFGGGGGGGGGGGGF@GGGGDGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCC	RG:Z:Tumor_B16_F10_0810_127A	NH:i:1	HI:i:1	nM:i:0	AS:i:203	XS:A:-
83 | 


--------------------------------------------------------------------------------
/tests/data/b16.f10/b16.f10.Phip.vcf:
--------------------------------------------------------------------------------
1 | ##reference=mm10
2 | #chr    pos id  ref alt qual    filter  info
3 | chr9	82927102	.	GT	TG	.	.	.
4 | 


--------------------------------------------------------------------------------
/tests/data/b16.f10/b16.f10.Wdr13.vcf:
--------------------------------------------------------------------------------
1 | ##reference=mm10
2 | #chr    pos id  ref alt qual    filter  info
3 | chrX	8125624	.	C	A	.	.	.
4 | 


--------------------------------------------------------------------------------
/tests/data/b16.f10/b16.not-expressed.vcf:
--------------------------------------------------------------------------------
1 | ##reference=mm10
2 | #chr	pos	id	ref	alt	qual	filter	info
3 | chr4	45802539	.	G	C	.	.	.
4 | chr13	5864876	.	C	CG	.	.	.
5 | 


--------------------------------------------------------------------------------
/tests/data/b16.f10/b16.vcf:
--------------------------------------------------------------------------------
1 | ##reference=mm10
2 | #chr	pos	id	ref	alt	qual	filter	info
3 | chr4	45802539	.	G	C	.	.	.
4 | chr9	82927102	.	G	T	.	.	.
5 | chr11	101177505	.	T	C	.	.	.
6 | chr13	5864876	.	C	CG	.	.	.
7 | chrX	8125624	.	C	A	.	.	.
8 | 


--------------------------------------------------------------------------------
/tests/test_cancer_driver_gene.py:
--------------------------------------------------------------------------------
 1 | # Licensed under the Apache License, Version 2.0 (the "License");
 2 | # you may not use this file except in compliance with the License.
 3 | # You may obtain a copy of the License at
 4 | #
 5 | #       http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | 
14 | from vaxrank.gene_pathway_check import (
15 |     GenePathwayCheck,
16 |     _IFNG_RESPONSE_COLUMN_NAME,
17 |     _CLASS_I_MHC_COLUMN_NAME,
18 |     _DRIVER_GENE_COLUMN_NAME,
19 |     _DRIVER_VARIANT_COLUMN_NAME
20 | )
21 | from varcode import Variant
22 | 
23 | from .common import eq_
24 | 
25 | 
26 | def test_HRAS_G13C_in_cancer_driver_genes():
27 |     HRAS_G13C = Variant("11", 534286, "C", "A", "GRCh37")
28 |     effect = HRAS_G13C.effects().top_priority_effect()
29 |     eq_(effect.gene.name, "HRAS")
30 |     eq_(effect.short_description, "p.G13C")
31 |     gene_pathway_check = GenePathwayCheck()
32 |     variant_info = gene_pathway_check.make_variant_dict(HRAS_G13C)
33 |     assert not variant_info[_IFNG_RESPONSE_COLUMN_NAME]
34 |     assert not variant_info[_CLASS_I_MHC_COLUMN_NAME]
35 |     # even though it's a RAS G13 variant, it's not actually that common
36 |     # and thus didn't make the threshold for our source dataset
37 |     assert not variant_info[_DRIVER_VARIANT_COLUMN_NAME]
38 |     assert variant_info[_DRIVER_GENE_COLUMN_NAME]
39 | 
40 | 
41 | def test_HRAS_G13V_in_cancer_driver_genes_and_variants():
42 |     HRAS_G13V = Variant("11", 534285, "C", "A", "GRCh37")
43 |     effect = HRAS_G13V.effects().top_priority_effect()
44 |     eq_(effect.gene.name, "HRAS")
45 |     eq_(effect.short_description, "p.G13V")
46 |     gene_pathway_check = GenePathwayCheck()
47 |     variant_info = gene_pathway_check.make_variant_dict(HRAS_G13V)
48 |     assert not variant_info[_IFNG_RESPONSE_COLUMN_NAME]
49 |     assert not variant_info[_CLASS_I_MHC_COLUMN_NAME]
50 |     assert variant_info[_DRIVER_VARIANT_COLUMN_NAME]
51 |     assert variant_info[_DRIVER_GENE_COLUMN_NAME]
52 | 


--------------------------------------------------------------------------------
/tests/test_epitope_prediction.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #       http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | from mhctools import RandomBindingPredictor
 14 | from pyensembl import genome_for_reference_name
 15 | from varcode import Variant
 16 | from vaxrank.epitope_prediction import predict_epitopes, EpitopePrediction
 17 | from vaxrank.mutant_protein_fragment import MutantProteinFragment
 18 | from vaxrank.vaccine_peptide import VaccinePeptide
 19 | 
 20 | from .common import eq_, ok_
 21 | 
 22 | mouse_genome = genome_for_reference_name("GRCm38")
 23 | 
 24 | def test_reference_peptide_logic():
 25 | 
 26 |     wdr13_transcript = mouse_genome.transcripts_by_name("Wdr13-201")[0]
 27 | 
 28 |     protein_fragment = MutantProteinFragment(
 29 |         variant=Variant('X', '8125624', 'C', 'A'),
 30 |         gene_name='Wdr13',
 31 |         amino_acids='KLQGHSAPVLDVIVNCDESLLASSD',
 32 |         mutant_amino_acid_start_offset=12,
 33 |         mutant_amino_acid_end_offset=13,
 34 |         n_overlapping_reads=71,
 35 |         n_alt_reads=25,
 36 |         n_ref_reads=46,
 37 |         n_alt_reads_supporting_protein_sequence=2,
 38 |         supporting_reference_transcripts=[wdr13_transcript])
 39 | 
 40 |     epitope_predictions = predict_epitopes(
 41 |         mhc_predictor=RandomBindingPredictor(["H-2-Kb"]),
 42 |         protein_fragment=protein_fragment,
 43 |         genome=mouse_genome)
 44 | 
 45 |     # occurs in protein ENSMUSP00000033506
 46 |     prediction_occurs_in_reference = epitope_predictions[('NCDESLLAS', 'H-2-Kb')]
 47 |     prediction_does_not_occur_in_reference = epitope_predictions[('LDVIVNCDE', 'H-2-Kb')]
 48 |     ok_(prediction_occurs_in_reference.occurs_in_reference)
 49 |     ok_(not prediction_does_not_occur_in_reference.occurs_in_reference)
 50 | 
 51 |     # construct a simple vaccine peptide having these two predictions, which makes it easy to check
 52 |     # for mutant/WT scores from single contributors
 53 |     vaccine_peptide = VaccinePeptide(
 54 |         protein_fragment,
 55 |         [prediction_occurs_in_reference, prediction_does_not_occur_in_reference])
 56 | 
 57 |     eq_(prediction_occurs_in_reference.logistic_epitope_score(),
 58 |         vaccine_peptide.wildtype_epitope_score)
 59 |     eq_(prediction_does_not_occur_in_reference.logistic_epitope_score(),
 60 |         vaccine_peptide.mutant_epitope_score)
 61 | 
 62 | def test_mhc_predictor_error():
 63 |     wdr13_transcript = mouse_genome.transcripts_by_name("Wdr13-201")[0]
 64 | 
 65 |     protein_fragment = MutantProteinFragment(
 66 |         variant=Variant('X', '8125624', 'C', 'A'),
 67 |         gene_name='Wdr13',
 68 |         amino_acids='KLQGHSAPVLDVIVNCDESLLASSD',
 69 |         mutant_amino_acid_start_offset=12,
 70 |         mutant_amino_acid_end_offset=13,
 71 |         n_overlapping_reads=71,
 72 |         n_alt_reads=25,
 73 |         n_ref_reads=46,
 74 |         n_alt_reads_supporting_protein_sequence=2,
 75 |         supporting_reference_transcripts=[wdr13_transcript])
 76 | 
 77 |     # throws an error for each prediction, make sure vaxrank doesn't fall down
 78 |     class FakeMHCPredictor:
 79 |         def predict_subsequences(self, x):
 80 |             raise ValueError('I throw an error in your general direction')
 81 | 
 82 |     epitope_predictions = predict_epitopes(
 83 |         mhc_predictor=FakeMHCPredictor(),
 84 |         protein_fragment=protein_fragment,
 85 |         genome=mouse_genome)
 86 | 
 87 |     eq_(0, len(epitope_predictions))
 88 | 
 89 | def test_EpitopePrediction_json_serialization():
 90 |     e = EpitopePrediction(
 91 |         allele="HLA-A*02:01",
 92 |         peptide_sequence="SIINFEQL",
 93 |         ic50=2.0,
 94 |         wt_peptide_sequence="SIINFEKL",
 95 |         wt_ic50=2000.0,
 96 |         percentile_rank=0.3,
 97 |         prediction_method_name="ImaginationMHCpan",
 98 |         overlaps_mutation=True,
 99 |         source_sequence="SSIINFEQL",
100 |         offset=1,
101 |         occurs_in_reference=False)
102 |     json = e.to_json()
103 |     e2 = EpitopePrediction.from_json(json)
104 |     eq_(e, e2)
105 | 


--------------------------------------------------------------------------------
/tests/test_manufacturability.py:
--------------------------------------------------------------------------------
 1 | # Licensed under the Apache License, Version 2.0 (the "License");
 2 | # you may not use this file except in compliance with the License.
 3 | # You may obtain a copy of the License at
 4 | #
 5 | #       http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | from vaxrank.manufacturability import ManufacturabilityScores
14 | 
15 | def test_c_terminal_proline():
16 |     scores = ManufacturabilityScores.from_amino_acids("A" * 6 + "P")
17 |     assert scores.c_terminal_proline
18 | 
19 |     scores = ManufacturabilityScores.from_amino_acids("A" * 7)
20 |     assert not scores.c_terminal_proline
21 | 
22 | 
23 | def test_n_terminal_cysteine():
24 |     scores = ManufacturabilityScores.from_amino_acids("C" + 6 * "A")
25 |     assert scores.difficult_n_terminal_residue
26 | 
27 |     scores = ManufacturabilityScores.from_amino_acids(7 * "A")
28 |     assert not scores.difficult_n_terminal_residue
29 | 
30 | 
31 | def test_n_terminal_glutamic_acid():
32 |     scores = ManufacturabilityScores.from_amino_acids("E" + 6 * "A")
33 |     assert scores.difficult_n_terminal_residue
34 | 
35 |     scores = ManufacturabilityScores.from_amino_acids(7 * "A")
36 |     assert not scores.difficult_n_terminal_residue
37 | 
38 | 
39 | def test_n_terminal_glutamine():
40 |     scores = ManufacturabilityScores.from_amino_acids("Q" + 6 * "A")
41 |     assert scores.difficult_n_terminal_residue
42 | 
43 |     scores = ManufacturabilityScores.from_amino_acids(7 * "A")
44 |     assert not scores.difficult_n_terminal_residue
45 | 
46 | 
47 | def test_asp_pro_bond_count():
48 |     scores = ManufacturabilityScores.from_amino_acids("A" * 7)
49 |     assert scores.asparagine_proline_bond_count == 0
50 | 
51 |     scores = ManufacturabilityScores.from_amino_acids("NP" + "A" * 7 + "NP")
52 |     assert scores.asparagine_proline_bond_count == 2
53 | 
54 | 
55 | def test_cysteine_count():
56 |     scores = ManufacturabilityScores.from_amino_acids("C" * 7)
57 |     assert scores.cysteine_count == 7
58 | 
59 | 
60 | def cterm_7mer_gravy_score():
61 |     scores = ManufacturabilityScores.from_amino_acids("QLFY" + "A" * 7)
62 |     # hydropathy of alanine is 1.8 from Kyte & Doolittle 1982
63 |     assert scores.cterm_7mer_gravy_score == 1.8
64 | 
65 | 
66 | def max_7mer_gravy_score():
67 |     scores = ManufacturabilityScores.from_amino_acids("H" * 3 + "A" * 7)
68 |     # hydropathy of alanine is 1.8, histidine is -3.2
69 |     # from Kyte & Doolittle 1982
70 |     assert scores.max_7mer_gravy_score == 1.8
71 | 


--------------------------------------------------------------------------------
/tests/test_mutant_protein_sequence.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #       http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | from mhctools import RandomBindingPredictor
 14 | 
 15 | from vaxrank.cli import make_vaxrank_arg_parser, run_vaxrank_from_parsed_args
 16 | from vaxrank.mutant_protein_fragment import MutantProteinFragment
 17 | 
 18 | from .common import eq_, almost_eq_
 19 | from .testing_helpers import data_path
 20 | 
 21 | random_binding_predictor = RandomBindingPredictor(["H-2-Kb", "H-2-Db"])
 22 | 
 23 | 
 24 | def check_mutant_amino_acids(variant, mutant_protein_fragment):
 25 |     predicted_effect = mutant_protein_fragment.predicted_effect()
 26 |     expected_amino_acids = predicted_effect.aa_alt
 27 |     vaxrank_mutant_amino_acids = mutant_protein_fragment.amino_acids[
 28 |         mutant_protein_fragment.mutant_amino_acid_start_offset:
 29 |         mutant_protein_fragment.mutant_amino_acid_end_offset]
 30 | 
 31 |     eq_(expected_amino_acids, vaxrank_mutant_amino_acids,
 32 |         "Expected amino acids '%s' for %s but got '%s' from vaxrank in '%s' %d:%d" % (
 33 |             expected_amino_acids,
 34 |             predicted_effect,
 35 |             vaxrank_mutant_amino_acids,
 36 |             mutant_protein_fragment.amino_acids,
 37 |             mutant_protein_fragment.mutant_amino_acid_start_offset,
 38 |             mutant_protein_fragment.mutant_amino_acid_end_offset))
 39 |     assert all(
 40 |         t.gene.name in variant.gene_names
 41 |         for t in
 42 |         mutant_protein_fragment.supporting_reference_transcripts), \
 43 |         "Wrong gene names for %s" % (mutant_protein_fragment.supporting_reference_transcripts,)
 44 | 
 45 | def test_mutant_amino_acids_in_mm10_chrX_8125624_refC_altA_pS460I():
 46 |     # there are two co-occurring variants in the RNAseq data but since
 47 |     # they don't happen in the same codon then we're considering the Varcode
 48 |     # annotation to be correct
 49 |     # TODO:
 50 |     #  deal with phasing of variants explicitly so that both
 51 |     #  variant positions are considered mutated
 52 |     arg_parser = make_vaxrank_arg_parser()
 53 |     args = arg_parser.parse_args([
 54 |         "--vcf", data_path("b16.f10/b16.f10.Wdr13.vcf"),
 55 |         "--bam", data_path("b16.f10/b16.combined.sorted.bam"),
 56 |         "--vaccine-peptide-length", "15",
 57 |         "--padding-around-mutation", "5",
 58 |         "--mhc-predictor", "random",
 59 |         "--mhc-alleles", "HLA-A*02:01",
 60 |     ])
 61 |     results = run_vaxrank_from_parsed_args(args)
 62 |     ranked_list = results.ranked_vaccine_peptides
 63 | 
 64 |     for variant, vaccine_peptides in ranked_list:
 65 |         eq_(
 66 |             1,
 67 |             len(vaccine_peptides),
 68 |             "Expected 1 vaccine peptide for variant '%s' but got %d" % (
 69 |                 variant, len(vaccine_peptides)))
 70 |         vaccine_peptide = vaccine_peptides[0]
 71 |         mutant_protein_fragment = vaccine_peptide.mutant_protein_fragment
 72 |         check_mutant_amino_acids(variant, mutant_protein_fragment)
 73 | 
 74 | def test_mutant_amino_acids_in_mm10_chr9_82927102_refGT_altTG_pT441H():
 75 |     # In the Isovar repository this test is weird because the VCF only
 76 |     # mentions the G>T variant but doesn't include the subsequent nucleotide
 77 |     # change T>G. To avoid having to think about phasing of variants I changed
 78 |     # the VCF in vaxrank to contain a GT>TG variant.
 79 |     arg_parser = make_vaxrank_arg_parser()
 80 |     args = arg_parser.parse_args([
 81 |         "--vcf", data_path("b16.f10/b16.f10.Phip.vcf"),
 82 |         "--bam", data_path("b16.f10/b16.combined.sorted.bam"),
 83 |         "--vaccine-peptide-length", "15",
 84 |         "--padding-around-mutation", "5",
 85 |         "--mhc-predictor", "random",
 86 |         "--mhc-alleles", "HLA-A*02:01",
 87 |     ])
 88 |     results = run_vaxrank_from_parsed_args(args)
 89 |     ranked_list = results.ranked_vaccine_peptides
 90 | 
 91 |     for variant, vaccine_peptides in ranked_list:
 92 |         vaccine_peptide = vaccine_peptides[0]
 93 |         mutant_protein_fragment = vaccine_peptide.mutant_protein_fragment
 94 |         check_mutant_amino_acids(
 95 |             variant,
 96 |             mutant_protein_fragment)
 97 | 
 98 | def test_keep_top_k_epitopes():
 99 |     arg_parser = make_vaxrank_arg_parser()
100 |     keep_k_epitopes = 3
101 |     args = arg_parser.parse_args([
102 |         "--vcf", data_path("b16.f10/b16.f10.Phip.vcf"),
103 |         "--bam", data_path("b16.f10/b16.combined.sorted.bam"),
104 |         "--vaccine-peptide-length", "15",
105 |         "--padding-around-mutation", "5",
106 |         "--num-epitopes-per-vaccine-peptide", str(keep_k_epitopes),
107 |         "--mhc-predictor", "netmhc",
108 |         "--mhc-alleles", "HLA-A*02:01",
109 |     ])
110 |     results = run_vaxrank_from_parsed_args(args)
111 | 
112 |     ranked_list = results.ranked_vaccine_peptides
113 | 
114 |     for variant, vaccine_peptides in ranked_list:
115 |         vaccine_peptide = vaccine_peptides[0]
116 |         eq_(keep_k_epitopes, len(vaccine_peptide.mutant_epitope_predictions))
117 |         # recompute the expected score, make sure the top-k argument from ranked_vaccine_peptides()
118 |         # propagated as expected
119 |         mutant_epitope_score = sum(
120 |             p.logistic_epitope_score() for p in vaccine_peptide.mutant_epitope_predictions)
121 |         almost_eq_(mutant_epitope_score, vaccine_peptide.mutant_epitope_score)
122 | 
123 | def test_mutant_protein_fragment_serialization():
124 |     arg_parser = make_vaxrank_arg_parser()
125 |     keep_k_epitopes = 3
126 |     args = arg_parser.parse_args([
127 |         "--vcf", data_path("b16.f10/b16.f10.Phip.vcf"),
128 |         "--bam", data_path("b16.f10/b16.combined.sorted.bam"),
129 |         "--vaccine-peptide-length", "15",
130 |         "--padding-around-mutation", "5",
131 |         "--num-epitopes-per-vaccine-peptide", str(keep_k_epitopes),
132 |         "--mhc-predictor", "netmhc",
133 |         "--mhc-alleles", "HLA-A*02:01",
134 |     ])
135 |     results = run_vaxrank_from_parsed_args(args)
136 | 
137 |     ranked_list = results.ranked_vaccine_peptides
138 | 
139 |     for _, vaccine_peptides in ranked_list:
140 |         mutant_protein_fragment = vaccine_peptides[0].mutant_protein_fragment
141 |         json_str = mutant_protein_fragment.to_json()
142 |         deserialized = MutantProteinFragment.from_json(json_str)
143 |         eq_(mutant_protein_fragment, deserialized)
144 | 


--------------------------------------------------------------------------------
/tests/test_shell_script.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #       http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | 
 14 | from os.path import getsize
 15 | from mock import patch
 16 | 
 17 | from tempfile import NamedTemporaryFile
 18 | 
 19 | import pandas as pd
 20 | from xlrd import open_workbook
 21 | 
 22 | from vaxrank.cli import main as run_shell_script
 23 | 
 24 | from .testing_helpers import data_path
 25 | 
 26 | cli_args_for_b16_seqdata = [
 27 |     "--vcf", data_path("b16.f10/b16.vcf"),
 28 |     "--bam", data_path("b16.f10/b16.combined.bam"),
 29 |     "--vaccine-peptide-length", "25",
 30 |     "--mhc-predictor", "random",
 31 |     "--mhc-alleles", "H2-Kb,H2-Db",
 32 |     "--padding-around-mutation", "5",
 33 |     "--count-mismatches-after-variant",
 34 | ]
 35 | 
 36 | cli_args_for_b16_seqdata_real_predictor = [
 37 |     "--vcf", data_path("b16.f10/b16.vcf"),
 38 |     "--bam", data_path("b16.f10/b16.combined.bam"),
 39 |     "--vaccine-peptide-length", "25",
 40 |     "--mhc-predictor", "netmhcpan",
 41 |     "--mhc-alleles", "H2-Kb,H2-Db",
 42 |     "--mhc-epitope-lengths", "8",
 43 |     "--padding-around-mutation", "5",
 44 |     "--count-mismatches-after-variant"
 45 | ]
 46 | 
 47 | 
 48 | def test_ascii_report():
 49 |     with NamedTemporaryFile(mode="r") as f:
 50 |         ascii_args = cli_args_for_b16_seqdata + ["--output-ascii-report", f.name]
 51 |         run_shell_script(ascii_args)
 52 |         contents = f.read()
 53 |         lines = contents.split("\n")
 54 |         assert len(lines) > 0
 55 | 
 56 | 
 57 | def test_ascii_report_real_netmhc_predictor():
 58 |     with NamedTemporaryFile(mode="r") as f:
 59 |         ascii_args = cli_args_for_b16_seqdata_real_predictor + [
 60 |             "--output-ascii-report", f.name]
 61 |         run_shell_script(ascii_args)
 62 |         contents = f.read()
 63 |         lines = contents.split("\n")
 64 |         assert len(lines) > 0
 65 |         no_variants_text = 'No variants'
 66 |         assert no_variants_text not in contents
 67 | 
 68 | 
 69 | def test_json_report():
 70 |     with NamedTemporaryFile(mode="r") as f:
 71 |         json_args = cli_args_for_b16_seqdata + ["--output-json-file", f.name]
 72 |         run_shell_script(json_args)
 73 |         contents = f.read()
 74 |         lines = contents.split("\n")
 75 |         assert len(lines) > 0
 76 | 
 77 | 
 78 | def test_csv_report():
 79 |     with NamedTemporaryFile(mode="r") as f:
 80 |         csv_args = cli_args_for_b16_seqdata + ["--output-csv", f.name]
 81 |         run_shell_script(csv_args)
 82 |         contents = f.read()
 83 |         lines = contents.split("\n")
 84 |         assert len(lines) > 1
 85 | 
 86 | 
 87 | def test_all_variant_csv_report():
 88 |     with NamedTemporaryFile(mode="r") as f:
 89 |         all_csv_args = cli_args_for_b16_seqdata + [
 90 |             "--output-passing-variants-csv", f.name,
 91 |             # TODO: make this flag not necessary
 92 |             "--output-csv", f.name + "ignored"]
 93 |         run_shell_script(all_csv_args)
 94 |         contents = f.read()
 95 |         lines = contents.split("\n")
 96 |         assert len(lines) > 1
 97 |         # make sure it can be a valid dataframe
 98 |         f.seek(0)
 99 |         df = pd.read_csv(f)
100 |         assert len(df) > 1
101 | 
102 | def test_isovar_csv():
103 |     with NamedTemporaryFile(mode="r") as f:
104 |         isovar_csv_args = cli_args_for_b16_seqdata + [
105 |             "--output-isovar-csv", f.name,
106 |             # TODO: make this flag not necessary
107 |             "--output-csv", f.name + "ignored"
108 |         ]
109 |         run_shell_script(isovar_csv_args)
110 |         df = pd.read_csv(f)
111 |         assert len(df) > 1
112 | 
113 | def test_xlsx_report():
114 |     with NamedTemporaryFile(mode="r") as f:
115 |         xlsx_args = cli_args_for_b16_seqdata + ["--output-xlsx-report", f.name]
116 |         run_shell_script(xlsx_args)
117 |         book = open_workbook(f.name)
118 |         assert book.nsheets > 1
119 | 
120 | 
121 | 
122 | 
123 | def test_html_report():
124 |     with NamedTemporaryFile(mode="r") as f:
125 |         html_args = cli_args_for_b16_seqdata + ["--output-html", f.name]
126 |         run_shell_script(html_args)
127 |         contents = f.read()
128 |         lines = contents.split("\n")
129 |         assert len(lines) > 1
130 | 
131 | 
132 | def test_pdf_report():
133 |     with NamedTemporaryFile(mode="rb") as f:
134 |         pdf_args = cli_args_for_b16_seqdata + ["--output-pdf-report", f.name]
135 |         run_shell_script(pdf_args)
136 |         assert getsize(f.name) > 1
137 | 
138 | 
139 | @patch('vaxrank.core_logic.vaccine_peptides_for_variant')
140 | def test_report_no_peptides(mock_vaccine_peptides_for_variant):
141 |     # simulate case where we have no epitopes for any variant
142 |     mock_vaccine_peptides_for_variant.return_value = []
143 |     with NamedTemporaryFile(mode="r") as f:
144 |         html_args = cli_args_for_b16_seqdata + ["--output-csv", f.name]
145 |         # test that this doesn't crash and that the CSV output is empty
146 |         run_shell_script(html_args)
147 |         contents = f.read()
148 |         assert contents == ''
149 | 
150 | 


--------------------------------------------------------------------------------
/tests/testing_helpers.py:
--------------------------------------------------------------------------------
 1 | # Licensed under the Apache License, Version 2.0 (the "License");
 2 | # you may not use this file except in compliance with the License.
 3 | # You may obtain a copy of the License at
 4 | #
 5 | #       http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | from __future__ import absolute_import, print_function, division
14 | from os.path import join, dirname
15 | 
16 | 
17 | def data_path(name):
18 |     """
19 |     Return the absolute path to a file in the vaxrank/test/data directory.
20 |     The name specified should be relative to vaxrank/test/data.
21 |     """
22 |     return join(dirname(__file__), "data", name)
23 | 


--------------------------------------------------------------------------------
/vaxrank/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "1.5.0"


--------------------------------------------------------------------------------
/vaxrank/cli.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #       http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | import sys
 14 | import logging
 15 | import logging.config
 16 | import pkg_resources
 17 | 
 18 | from argparse import ArgumentParser
 19 | 
 20 | from isovar import isovar_results_to_dataframe
 21 | from isovar.cli import (make_isovar_arg_parser, run_isovar_from_parsed_args,)
 22 | from mhctools.cli import (
 23 |     add_mhc_args,
 24 |     mhc_alleles_from_args,
 25 |     mhc_binding_predictor_from_args,
 26 | )
 27 | 
 28 | import pandas as pd
 29 | import serializable
 30 | from varcode.cli import variant_collection_from_args
 31 | 
 32 | from . import __version__
 33 | from .core_logic import run_vaxrank
 34 | from .gene_pathway_check import GenePathwayCheck
 35 | from .report import (
 36 |     make_ascii_report,
 37 |     make_html_report,
 38 |     make_pdf_report,
 39 |     make_csv_report,
 40 |     make_minimal_neoepitope_report,
 41 |     TemplateDataCreator,
 42 | )
 43 | from .patient_info import PatientInfo
 44 | 
 45 | logger = logging.getLogger(__name__)
 46 | 
 47 | 
 48 | def make_vaxrank_arg_parser():
 49 |     # create common parser with the --version flag
 50 |     parent_parser = ArgumentParser('parent', add_help=False)
 51 |     parent_parser.add_argument('--version', action='version', version='Vaxrank %s' % (__version__,))
 52 | 
 53 |     # inherit commandline options from Isovar
 54 |     arg_parser = make_isovar_arg_parser(
 55 |         prog="vaxrank",
 56 |         description=(
 57 |             "Select personalized vaccine peptides from cancer variants, "
 58 |             "expression data, and patient HLA type."),
 59 |         parents=[parent_parser],
 60 |     )
 61 |     add_mhc_args(arg_parser)
 62 |     add_vaccine_peptide_args(arg_parser)
 63 |     add_output_args(arg_parser)
 64 |     add_optional_output_args(arg_parser)
 65 |     add_supplemental_report_args(arg_parser)
 66 |     return arg_parser
 67 | 
 68 | 
 69 | def cached_run_arg_parser():
 70 |     arg_parser = ArgumentParser(
 71 |         prog="vaxrank",
 72 |         description=(
 73 |             "Select personalized vaccine peptides from cancer variants, "
 74 |             "expression data, and patient HLA type."),
 75 |     )
 76 |     arg_parser.add_argument(
 77 |         "--input-json-file",
 78 |         default="",
 79 |         help="Path to JSON file containing results of vaccine peptide report")
 80 |     add_output_args(arg_parser)
 81 |     add_optional_output_args(arg_parser)
 82 |     add_supplemental_report_args(arg_parser)
 83 |     return arg_parser
 84 | 
 85 | 
 86 | 
 87 | # Lets the user specify whether they want to see particular sections in the report.
 88 | def add_optional_output_args(arg_parser):
 89 |     manufacturability_args = arg_parser.add_mutually_exclusive_group(required=False)
 90 |     manufacturability_args.add_argument(
 91 |         "--include-manufacturability-in-report",
 92 |         dest="manufacturability",
 93 |         action="store_true")
 94 | 
 95 |     manufacturability_args.add_argument(
 96 |         "--no-manufacturability-in-report",
 97 |         dest="manufacturability",
 98 |         action="store_false")
 99 |     arg_parser.set_defaults(manufacturability=True)
100 | 
101 |     wt_epitope_args = arg_parser.add_mutually_exclusive_group(required=False)
102 |     wt_epitope_args.add_argument(
103 |         "--include-non-overlapping-epitopes-in-report",
104 |         dest="wt_epitopes",
105 |         action="store_true",
106 |         help="Set to true to include a report section for each vaccine peptide containing "
107 |              "strong binders that do not overlap the mutation")
108 | 
109 |     wt_epitope_args.add_argument(
110 |         "--no-non-overlapping-epitopes-in-report",
111 |         dest="wt_epitopes",
112 |         action="store_false",
113 |         help="Set to false to exclude report information for each vaccine peptide about "
114 |              "strong binders that do not overlap the mutation")
115 |     arg_parser.set_defaults(wt_epitopes=True)
116 | 
117 | 
118 | def add_output_args(arg_parser):
119 |     output_args_group = arg_parser.add_argument_group("Output options")
120 | 
121 |     output_args_group.add_argument(
122 |         "--output-patient-id",
123 |         default="",
124 |         help="Patient ID to use in report")
125 | 
126 |     output_args_group.add_argument(
127 |         "--output-csv",
128 |         default="",
129 |         help="Name of CSV file which contains predicted sequences")
130 | 
131 |     output_args_group.add_argument(
132 |         "--output-ascii-report",
133 |         default="",
134 |         help="Path to ASCII vaccine peptide report")
135 | 
136 |     output_args_group.add_argument(
137 |         "--output-html-report",
138 |         default="",
139 |         help="Path to HTML vaccine peptide report")
140 | 
141 |     output_args_group.add_argument(
142 |         "--output-pdf-report",
143 |         default="",
144 |         help="Path to PDF vaccine peptide report")
145 | 
146 |     output_args_group.add_argument(
147 |         "--output-json-file",
148 |         default="",
149 |         help="Path to JSON file containing results of vaccine peptide report")
150 | 
151 |     output_args_group.add_argument(
152 |         "--output-xlsx-report",
153 |         default="",
154 |         help="Path to XLSX vaccine peptide report worksheet, one sheet per variant. This is meant "
155 |              "for use by the vaccine manufacturer.")
156 | 
157 |     output_args_group.add_argument(
158 |         "--output-neoepitope-report",
159 |         default="",
160 |         help="Path to XLSX neoepitope report, containing information focusing on short peptide "
161 |              "sequences.")
162 | 
163 |     output_args_group.add_argument(
164 |         "--output-reviewed-by",
165 |         default="",
166 |         help="Comma-separated list of reviewer names")
167 | 
168 |     output_args_group.add_argument(
169 |         "--output-final-review",
170 |         default="",
171 |         help="Name of final reviewer of report")
172 | 
173 |     output_args_group.add_argument(
174 |         "--output-passing-variants-csv",
175 |         default="",
176 |         help="Path to CSV file containing some metadata about every variant that has passed all "
177 |              "variant caller filters")
178 | 
179 |     output_args_group.add_argument(
180 |         "--output-isovar-csv",
181 |         default="",
182 |         help="Path to CSV file containing raw RNA counts and filtering metadata "
183 |              "for all variants (generated by Isovar)")
184 | 
185 |     output_args_group.add_argument(
186 |         "--log-path",
187 |         default="python.log",
188 |         help="File path to write the vaxrank Python log to")
189 | 
190 |     output_args_group.add_argument(
191 |         "--max-mutations-in-report",
192 |         default=None,
193 |         type=int,
194 |         help="Number of mutations to report")
195 | 
196 | 
197 | def add_vaccine_peptide_args(arg_parser):
198 |     vaccine_peptide_group = arg_parser.add_argument_group("Vaccine peptide options")
199 |     vaccine_peptide_group.add_argument(
200 |         "--vaccine-peptide-length",
201 |         default=25,
202 |         type=int,
203 |         help="Number of amino acids in the vaccine peptides. (default: %(default)s)")
204 | 
205 |     vaccine_peptide_group.add_argument(
206 |         "--padding-around-mutation",
207 |         default=5,
208 |         type=int,
209 |         help=(
210 |             "Number of off-center windows around the mutation to consider "
211 |             "as vaccine peptides. (default: %(default)s)"
212 |         ))
213 | 
214 |     vaccine_peptide_group.add_argument(
215 |         "--max-vaccine-peptides-per-mutation",
216 |         default=1,
217 |         type=int,
218 |         help=(
219 |             "Number of vaccine peptides to generate for each mutation. "
220 |             "(default: %(default)s)"
221 |         ))
222 | 
223 |     vaccine_peptide_group.add_argument(
224 |         "--min-epitope-score",
225 |         default=1e-10,
226 |         type=float,
227 |         help=(
228 |             "Ignore predicted MHC ligands whose normalized binding score "
229 |             "falls below this threshold. (default: %(default)s)"))
230 | 
231 |     vaccine_peptide_group.add_argument(
232 |         "--num-epitopes-per-vaccine-peptide",
233 |         type=int,
234 |         help=(
235 |             "Maximum number of mutant epitopes to consider when scoring "
236 |             "each vaccine peptide. (default: %(default)s)"))
237 | 
238 | 
239 | def add_supplemental_report_args(arg_parser):
240 |     report_args_group = arg_parser.add_argument_group("Supplemental report options")
241 |     report_args_group.add_argument(
242 |         "--cosmic_vcf_filename",
243 |         default="",
244 |         help="Local path to COSMIC vcf")
245 | 
246 | 
247 | def check_args(args):
248 |     if not (args.output_csv or
249 |             args.output_ascii_report or
250 |             args.output_html_report or
251 |             args.output_pdf_report or
252 |             args.output_json_file or
253 |             args.output_xlsx_report or
254 |             args.output_neoepitope_report or
255 |             args.output_passing_variants_csv or
256 |             args.output_isovar_csv):
257 |         raise ValueError(
258 |             "Must specify at least one of: --output-csv, "
259 |             "--output-xlsx-report, "
260 |             "--output-ascii-report, "
261 |             "--output-html-report, "
262 |             "--output-pdf-report, "
263 |             "--output-neoepitope-report, "
264 |             "--output-json-file, "
265 |             "--output-passing-variants-csv, "
266 |             "--output-isovar-csv")
267 | 
268 | def run_vaxrank_from_parsed_args(args):
269 |     mhc_predictor = mhc_binding_predictor_from_args(args)
270 | 
271 |     args.protein_sequence_length = (
272 |             args.vaccine_peptide_length + 2 * args.padding_around_mutation
273 |     )
274 | 
275 |     # Vaxrank is going to evaluate multiple vaccine peptides containing
276 |     # the same mutation so need a longer sequence from Isovar
277 |     isovar_results = run_isovar_from_parsed_args(args)
278 | 
279 |     if args.output_isovar_csv:
280 |         df = isovar_results_to_dataframe(isovar_results)
281 |         df.to_csv(args.output_isovar_csv, index=False)
282 | 
283 |     return run_vaxrank(
284 |         isovar_results=isovar_results,
285 |         mhc_predictor=mhc_predictor,
286 |         vaccine_peptide_length=args.vaccine_peptide_length,
287 |         max_vaccine_peptides_per_variant=args.max_vaccine_peptides_per_mutation,
288 |         min_epitope_score=args.min_epitope_score,
289 |         num_mutant_epitopes_to_keep=args.num_epitopes_per_vaccine_peptide)
290 | 
291 | def ranked_vaccine_peptides_with_metadata_from_parsed_args(args):
292 |     """
293 |     Computes all the data needed for report generation.
294 | 
295 |     Parameters
296 |     ----------
297 |     args : Namespace
298 |       Parsed user args from this run
299 | 
300 |     Returns a dictionary containing 3 items:
301 |     - ranked variant/vaccine peptide list
302 |     - a dictionary of command-line arguments used to generate it
303 |     - patient info object
304 |     """
305 | 
306 |     if hasattr(args, 'input_json_file'):
307 |         with open(args.input_json_file) as f:
308 | 
309 |             data = serializable.from_json(f.read())
310 |             # the JSON data from the previous run will have the older args saved, which may need to
311 |             # be overridden with args from this run (which all be output related)
312 |             data['args'].update(vars(args))
313 | 
314 |             # if we need to truncate the variant list based on max_mutations_in_report, do that here
315 |             if len(data['variants']) > args.max_mutations_in_report:
316 |                 data['variants'] = data['variants'][:args.max_mutations_in_report]
317 |             return data
318 |     # get various things from user args
319 |     mhc_alleles = mhc_alleles_from_args(args)
320 |     logger.info("MHC alleles: %s", mhc_alleles)
321 | 
322 |     variants = variant_collection_from_args(args)
323 |     logger.info("Variants: %s", variants)
324 | 
325 |     vaxrank_results = run_vaxrank_from_parsed_args(args)
326 | 
327 |     variants_count_dict = vaxrank_results.variant_counts()
328 |     assert len(variants) == variants_count_dict['num_total_variants'], \
329 |         "Len(variants) is %d but variants_count_dict came back with %d" % (
330 |             len(variants), variants_count_dict['num_total_variants'])
331 | 
332 |     if args.output_passing_variants_csv:
333 |         variant_metadata_dicts = vaxrank_results.variant_properties(
334 |             gene_pathway_check=GenePathwayCheck())
335 |         df = pd.DataFrame(variant_metadata_dicts)
336 |         df.to_csv(args.output_passing_variants_csv, index=False)
337 | 
338 |     ranked_variants_with_vaccine_peptides = vaxrank_results.ranked_vaccine_peptides
339 |     ranked_variants_with_vaccine_peptides_for_report = \
340 |         ranked_variants_with_vaccine_peptides[:args.max_mutations_in_report]
341 |     patient_info = PatientInfo(
342 |         patient_id=args.output_patient_id,
343 |         vcf_paths=variants.sources,
344 |         bam_path=args.bam,
345 |         mhc_alleles=mhc_alleles,
346 |         num_somatic_variants=variants_count_dict['num_total_variants'],
347 |         num_coding_effect_variants=variants_count_dict['num_coding_effect_variants'],
348 |         num_variants_with_rna_support=variants_count_dict['num_variants_with_rna_support'],
349 |         num_variants_with_vaccine_peptides=variants_count_dict['num_variants_with_vaccine_peptides']
350 |     )
351 |     # return variants, patient info, and command-line args
352 |     data = {
353 |         # TODO:
354 |         #  change this field to 'ranked_variants_with_vaccine_peptides'
355 |         #  but figure out how to do it in a backwards compatible way
356 |         'variants': ranked_variants_with_vaccine_peptides_for_report,
357 |         'patient_info': patient_info,
358 |         'args': vars(args),
359 |     }
360 |     logger.info('About to save args: %s', data['args'])
361 | 
362 |     # save JSON data if necessary. as of time of writing, vaxrank takes ~25 min to run,
363 |     # most of which is core logic. the formatting is super fast, and it can
364 |     # be useful to save the data to be able to iterate just on the formatting
365 |     if args.output_json_file:
366 |         with open(args.output_json_file, 'w') as f:
367 |             f.write(serializable.to_json(data))
368 |             logger.info('Wrote JSON report data to %s', args.output_json_file)
369 | 
370 |     return data
371 | 
372 | def configure_logging(args):
373 |     logging.config.fileConfig(
374 |         pkg_resources.resource_filename(
375 |             __name__,
376 |             'logging.conf'),
377 |         defaults={'logfilename': args.log_path})
378 | 
379 | def choose_arg_parser(args_list):
380 |     # TODO: replace this with a command sub-parser
381 |     if "--input-json-file" in args_list:
382 |         return cached_run_arg_parser()
383 |     else:
384 |         return make_vaxrank_arg_parser()
385 | 
386 | def parse_vaxrank_args(args_list):
387 |     arg_parser = choose_arg_parser(args_list)
388 |     return arg_parser.parse_args(args_list)
389 | 
390 | def main(args_list=None):
391 |     """
392 |     Script to generate vaccine peptide predictions from somatic cancer variants,
393 |     patient HLA type, and tumor RNA-seq data.
394 | 
395 |     Example usage:
396 |         vaxrank
397 |             --vcf somatic.vcf \
398 |             --bam rnaseq.bam \
399 |             --vaccine-peptide-length 25 \
400 |             --output-csv vaccine-peptides.csv
401 |     """
402 |     if args_list is None:
403 |         args_list = sys.argv[1:]
404 | 
405 |     args = parse_vaxrank_args(args_list)
406 |     configure_logging(args)
407 |     logger.info(args)
408 |     check_args(args)
409 | 
410 |     data = ranked_vaccine_peptides_with_metadata_from_parsed_args(args)
411 | 
412 |     ranked_variants_with_vaccine_peptides = data['variants']
413 |     patient_info = data['patient_info']
414 |     args_for_report = data['args']
415 | 
416 |     ###################
417 |     # CSV-based reports
418 |     ###################
419 |     if args.output_csv or args.output_xlsx_report:
420 |         make_csv_report(
421 |             ranked_variants_with_vaccine_peptides,
422 |             excel_report_path=args.output_xlsx_report,
423 |             csv_report_path=args.output_csv)
424 | 
425 |     if args.output_neoepitope_report:
426 |         make_minimal_neoepitope_report(
427 |             ranked_variants_with_vaccine_peptides,
428 |             num_epitopes_per_peptide=args.num_epitopes_per_vaccine_peptide,
429 |             excel_report_path=args.output_neoepitope_report)
430 | 
431 |     ########################
432 |     # Template-based reports
433 |     ########################
434 | 
435 |     if not (args.output_ascii_report or args.output_html_report or args.output_pdf_report):
436 |         return
437 | 
438 |     input_json_file = args.input_json_file if hasattr(args, 'input_json_file') else None
439 |     template_data_creator = TemplateDataCreator(
440 |         ranked_variants_with_vaccine_peptides=ranked_variants_with_vaccine_peptides,
441 |         patient_info=patient_info,
442 |         final_review=args.output_final_review,
443 |         reviewers=args.output_reviewed_by,
444 |         args_for_report=args_for_report,
445 |         input_json_file=input_json_file,
446 |         cosmic_vcf_filename=args.cosmic_vcf_filename)
447 | 
448 |     template_data = template_data_creator.compute_template_data()
449 | 
450 |     if args.output_ascii_report:
451 |         make_ascii_report(
452 |             template_data=template_data,
453 |             ascii_report_path=args.output_ascii_report)
454 | 
455 |     if args.output_html_report:
456 |         make_html_report(
457 |             template_data=template_data,
458 |             html_report_path=args.output_html_report)
459 | 
460 |     if args.output_pdf_report:
461 |         make_pdf_report(
462 |             template_data=template_data,
463 |             pdf_report_path=args.output_pdf_report)
464 | 


--------------------------------------------------------------------------------
/vaxrank/core_logic.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #       http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | 
 14 | import logging
 15 | 
 16 | from numpy import isclose
 17 | 
 18 | from .mutant_protein_fragment import MutantProteinFragment
 19 | from .epitope_prediction import predict_epitopes, slice_epitope_predictions
 20 | from .vaccine_peptide import VaccinePeptide
 21 | from .vaxrank_results import VaxrankResults
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | def run_vaxrank(
 26 |         isovar_results,
 27 |         mhc_predictor,
 28 |         vaccine_peptide_length=25,
 29 |         max_vaccine_peptides_per_variant=1,
 30 |         num_mutant_epitopes_to_keep=10000,
 31 |         min_epitope_score=0.0):
 32 |     """
 33 |     Parameters
 34 |     ----------
 35 |     isovar_results : list of isovar.IsovarResult
 36 |         Each IsovarResult corresponds to one somatic variant and its collection
 37 |          of protein sequences determined from RNA.
 38 | 
 39 |     mhc_predictor : mhctools.BasePredictor
 40 |         Object with predict_peptides method, used for making pMHC binding
 41 |         predictions
 42 | 
 43 |     vaccine_peptide_length : int
 44 |         Length of vaccine SLP to construct
 45 | 
 46 |     max_vaccine_peptides_per_variant : int
 47 |         Number of vaccine peptides to generate for each mutation.
 48 | 
 49 |     num_mutant_epitopes_to_keep : int, optional
 50 |         Number of top-ranking epitopes for each vaccine peptide to include in
 51 |         computation.
 52 | 
 53 |     min_epitope_score : float, optional
 54 |         Ignore peptides with binding predictions whose normalized score is less
 55 |         than this.
 56 |     """
 57 |     variant_to_vaccine_peptides_dict = create_vaccine_peptides_dict(
 58 |         isovar_results=isovar_results,
 59 |         mhc_predictor=mhc_predictor,
 60 |         vaccine_peptide_length=vaccine_peptide_length,
 61 |         max_vaccine_peptides_per_variant=max_vaccine_peptides_per_variant,
 62 |         num_mutant_epitopes_to_keep=num_mutant_epitopes_to_keep,
 63 |         min_epitope_score=min_epitope_score)
 64 |     ranked_list = ranked_vaccine_peptides(variant_to_vaccine_peptides_dict)
 65 | 
 66 |     return VaxrankResults(
 67 |         isovar_results=isovar_results,
 68 |         variant_to_vaccine_peptides_dict=variant_to_vaccine_peptides_dict,
 69 |         ranked_vaccine_peptides=ranked_list)
 70 | 
 71 | 
 72 | def create_vaccine_peptides_dict(
 73 |         isovar_results,
 74 |         mhc_predictor,
 75 |         vaccine_peptide_length=25,
 76 |         max_vaccine_peptides_per_variant=1,
 77 |         num_mutant_epitopes_to_keep=10 ** 5,
 78 |         min_epitope_score=0.0):
 79 |     """
 80 |     Parameters
 81 |     ----------
 82 |     isovar_results : list of isovar.IsovarResult
 83 |         List with one object per variant optionally containing protein sequences
 84 | 
 85 |     mhc_predictor : mhctools.BasePredictor
 86 |         Object with predict_peptides method, used for making pMHC binding
 87 |         predictions
 88 | 
 89 |     vaccine_peptide_length : int
 90 |         Length of vaccine SLP to construct
 91 | 
 92 |     max_vaccine_peptides_per_variant : int
 93 |         Number of vaccine peptides to generate for each mutation.
 94 | 
 95 |     num_mutant_epitopes_to_keep : int, optional
 96 |         Number of top-ranking epitopes for each vaccine peptide to include in
 97 |         computation.
 98 | 
 99 |     min_epitope_score : float, optional
100 |         Ignore peptides with binding predictions whose normalized score is less
101 |         than this.
102 | 
103 |     Returns
104 |     -------
105 |     Returns a dictionary of varcode.Variant objects to a list of
106 |     VaccinePeptides.
107 |     """
108 |     vaccine_peptides_dict = {}
109 |     for isovar_result in isovar_results:
110 |         variant = isovar_result.variant
111 |         vaccine_peptides = vaccine_peptides_for_variant(
112 |             isovar_result=isovar_result,
113 |             mhc_predictor=mhc_predictor,
114 |             vaccine_peptide_length=vaccine_peptide_length,
115 |             max_vaccine_peptides_per_variant=max_vaccine_peptides_per_variant,
116 |             num_mutant_epitopes_to_keep=num_mutant_epitopes_to_keep,
117 |             min_epitope_score=min_epitope_score)
118 | 
119 |         if any(x.contains_mutant_epitopes() for x in vaccine_peptides):
120 |             vaccine_peptides_dict[variant] = vaccine_peptides
121 | 
122 |     return vaccine_peptides_dict
123 | 
124 | def vaccine_peptides_for_variant(
125 |         isovar_result,
126 |         mhc_predictor,
127 |         vaccine_peptide_length,
128 |         max_vaccine_peptides_per_variant,
129 |         num_mutant_epitopes_to_keep=None,
130 |         min_epitope_score=0.0):
131 |     """
132 |     Parameters
133 |     ----------
134 |     isovar_result : isovar.IsovarResult
135 | 
136 |     mhc_predictor : mhctools.BasePredictor
137 |         Object with predict_peptides method, used for making pMHC binding
138 |         predictions
139 | 
140 |     vaccine_peptide_length : int
141 |         Length of vaccine SLP to construct
142 | 
143 |     max_vaccine_peptides_per_variant : int
144 |         Number of vaccine peptides to generate for each mutation.
145 | 
146 |     num_mutant_epitopes_to_keep : int, optional
147 |         Number of top-ranking epitopes for each vaccine peptide to include in
148 |         computation.
149 | 
150 |     min_epitope_score : float, optional
151 |         Ignore peptides with binding predictions whose normalized score is less
152 |         than this.
153 | 
154 |     Returns
155 |     -------
156 |     Sorted list of VaccinePeptide objects. If there are no suitable vaccine
157 |     peptides (no strong MHC binder subsequences), returns an empty list.
158 |     """
159 |     if not isovar_result.passes_all_filters:
160 |         # don't consider candidate vaccine peptides from variants which either
161 |         # failed their filters or don't have an RNA-derived protein sequence
162 |         return []
163 | 
164 |     variant = isovar_result.variant
165 |     long_protein_fragment = MutantProteinFragment.from_isovar_result(isovar_result)
166 | 
167 |     logger.info(
168 |         "Mutant protein fragment for %s: %s",
169 |         variant,
170 |         long_protein_fragment)
171 | 
172 |     epitope_predictions = predict_epitopes(
173 |         mhc_predictor=mhc_predictor,
174 |         protein_fragment=long_protein_fragment,
175 |         min_epitope_score=min_epitope_score,
176 |         genome=variant.ensembl).values()
177 | 
178 |     candidate_vaccine_peptides = []
179 | 
180 |     for offset, candidate_fragment in long_protein_fragment.sorted_subsequences(
181 |             subsequence_length=vaccine_peptide_length):
182 | 
183 |         subsequence_epitope_predictions = slice_epitope_predictions(
184 |             epitope_predictions,
185 |             start_offset=offset,
186 |             end_offset=offset + len(candidate_fragment))
187 |         # filter out peptides that have no epitopes
188 |         if not subsequence_epitope_predictions:
189 |             logger.info(
190 |                 "No epitope predictions for mutant protein fragment %s",
191 |                 candidate_fragment)
192 |             continue
193 | 
194 |         assert all(
195 |             p.source_sequence == candidate_fragment.amino_acids
196 |             for p in subsequence_epitope_predictions)
197 | 
198 |         candidate_vaccine_peptide = VaccinePeptide(
199 |             mutant_protein_fragment=candidate_fragment,
200 |             epitope_predictions=subsequence_epitope_predictions,
201 |             num_mutant_epitopes_to_keep=num_mutant_epitopes_to_keep)
202 | 
203 |         logger.debug(
204 |             "%s, combined score: %0.4f",
205 |             candidate_vaccine_peptide,
206 |             candidate_vaccine_peptide.combined_score)
207 |         candidate_vaccine_peptides.append(candidate_vaccine_peptide)
208 | 
209 |     n_total_candidates = len(candidate_vaccine_peptides)
210 |     if n_total_candidates == 0:
211 |         logger.info("No candidate peptides for variant %s", variant.short_description)
212 |         return []
213 | 
214 |     max_score = max(vp.combined_score for vp in candidate_vaccine_peptides)
215 |     if isclose(max_score, 0.0):
216 |         filtered_candidate_vaccine_peptides = candidate_vaccine_peptides
217 |     else:
218 |         # only keep candidate vaccines that are within 1% of the maximum
219 |         # combined score
220 |         filtered_candidate_vaccine_peptides = [
221 |             vp
222 |             for vp in candidate_vaccine_peptides
223 |             if vp.combined_score / max_score > 0.99
224 |         ]
225 |     n_filtered = len(filtered_candidate_vaccine_peptides)
226 |     logger.info(
227 |         "Keeping %d/%d vaccine peptides for %s",
228 |         n_filtered,
229 |         n_total_candidates,
230 |         variant)
231 | 
232 |     if n_filtered == 0:
233 |         return []
234 | 
235 |     filtered_candidate_vaccine_peptides.sort(key=VaccinePeptide.lexicographic_sort_key)
236 | 
237 |     logger.debug("Top vaccine peptides for %s:", variant)
238 |     for i, vaccine_peptide in enumerate(filtered_candidate_vaccine_peptides):
239 |         logger.debug(
240 |             "%d) %s (combined score = %0.4f)",
241 |             i + 1,
242 |             vaccine_peptide,
243 |             vaccine_peptide.combined_score)
244 | 
245 |     return filtered_candidate_vaccine_peptides[:max_vaccine_peptides_per_variant]
246 | 
247 | 
248 | def ranked_vaccine_peptides(variant_to_vaccine_peptides_dict):
249 |     """
250 |     This function returns a sorted list whose first element is a Variant and whose second
251 |     element is a list of VaccinePeptide objects.
252 | 
253 |     Parameters
254 |     ----------
255 |     variant_to_vaccine_peptides_dict : dict
256 |         Dictionary from varcode.Variant to list of VaccinePeptide
257 | 
258 |     Returns list of (varcode.Variant, VaccinePeptide list) tuples
259 |     """
260 |     result_list = list(variant_to_vaccine_peptides_dict.items())
261 | 
262 |     def sort_key(variant_and_vaccine_peptides_pair):
263 |         vaccine_peptides = variant_and_vaccine_peptides_pair[1]
264 |         if len(vaccine_peptides) == 0:
265 |             return 0.0
266 |         else:
267 |             top_vaccine_peptide = vaccine_peptides[0]
268 |             return top_vaccine_peptide.combined_score
269 | 
270 |     # sort in descending order of combined (expression * mhc binding) scores
271 |     result_list.sort(key=sort_key, reverse=True)
272 |     return result_list
273 | 


--------------------------------------------------------------------------------
/vaxrank/data/class1-mhc-presentation-pathway.csv:
--------------------------------------------------------------------------------
 1 | Gene,Name,Function,Ensembl Gene ID
 2 | PSMB8,proteasome subunit beta 8,immunoproteasome,ENSG00000204264
 3 | PSMB9,proteasome subunit beta 9,immunoproteasome,ENSG00000240065
 4 | PSMB10,proteasome subunit beta 10,immunoproteasome,ENSG00000205220
 5 | TAP1,transporter associated with antigen processing 1,peptide transport,ENSG00000168394
 6 | TAP2,transporter associated with antigen processing 2,peptide transport,ENSG00000204267
 7 | CALR,calreticulin,peptide loading,ENSG00000179218
 8 | CANX,calnexin,peptide loading,ENSG00000127022
 9 | PDIA3,protein disulfide-isomerase A3,peptide loading,ENSG00000167004
10 | TAPBP,tapasin,peptide loading,ENSG00000231925
11 | TAPBPL,tapasin-related protein,peptide loading,ENSG00000139192
12 | NLRC5,NOD-like receptor family CARD domain containing 5,transcription factor,ENSG00000140853
13 | IRF1,interferon regulatory factor 1,transcription factor,ENSG00000125347
14 | ERAP1,endoplasmic reticulum aminopeptidase 1,peptidase,ENSG00000164307
15 | ERAP2,endoplasmic reticulum aminopeptidase 2,peptidase,ENSG00000164308
16 | B2M,beta-2 microglobulin,MHC subunit,ENSG00000166710
17 | HLA-A,human leukocyte antigen A,MHC subunit,ENSG00000206503
18 | HLA-B,human leukocyte antigen B,MHC subunit,ENSG00000234745
19 | HLA-C,human leukocyte antigen C,MHC subunit,ENSG00000204525
20 | 


--------------------------------------------------------------------------------
/vaxrank/data/interferon-gamma-response.csv:
--------------------------------------------------------------------------------
 1 | Gene,Name,Function,Ensembl Gene ID
 2 | IFNGR1,interferon gamma receptor 1,receptor,ENSG00000027697
 3 | IFNGR2,interferon gamma receptor 2,receptor,ENSG00000159128
 4 | STAT1,signal transducer and activator of transcription 1,signal transducer,ENSG00000115415
 5 | STAT2,signal transducer and activator of transcription 2,signal transducer,ENSG00000170581
 6 | STAT3,signal transducer and activator of transcription 3,signal transducer,ENSG00000168610
 7 | JAK1,Janus kinase 1,kinase,ENSG00000162434
 8 | JAK2,Janus kinase 2,kinase,ENSG00000096968
 9 | SOCS1,suppressor of cytokine signaling 1,negative regulator,ENSG00000185338
10 | SOCS3,suppressor of cytokine signaling 3,negative regulator,ENSG00000184557
11 | 


--------------------------------------------------------------------------------
/vaxrank/epitope_prediction.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #     http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | from collections import OrderedDict
 14 | import traceback
 15 | import logging
 16 | 
 17 | import numpy as np
 18 | from serializable import Serializable
 19 | 
 20 | from .reference_proteome import ReferenceProteome
 21 | 
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | 
 26 | class EpitopePrediction(Serializable):
 27 |     def __init__(
 28 |             self,
 29 |             allele,
 30 |             peptide_sequence,
 31 |             wt_peptide_sequence,
 32 |             ic50,
 33 |             wt_ic50,
 34 |             percentile_rank,
 35 |             prediction_method_name,
 36 |             overlaps_mutation,
 37 |             source_sequence,
 38 |             offset,
 39 |             occurs_in_reference):
 40 |         self.allele = allele
 41 |         self.peptide_sequence = peptide_sequence
 42 |         self.wt_peptide_sequence = wt_peptide_sequence
 43 |         self.length = len(peptide_sequence)
 44 |         self.ic50 = ic50
 45 |         self.wt_ic50 = wt_ic50
 46 |         self.percentile_rank = percentile_rank
 47 |         self.prediction_method_name = prediction_method_name
 48 |         self.overlaps_mutation = overlaps_mutation
 49 |         self.source_sequence = source_sequence
 50 |         self.offset = offset
 51 |         self.overlaps_mutation = overlaps_mutation
 52 |         self.occurs_in_reference = occurs_in_reference
 53 | 
 54 |     @classmethod
 55 |     def from_dict(cls, d):
 56 |         """
 57 |         Deserialize EpitopePrediction from a dictionary of keywords.
 58 |         """
 59 |         d = d.copy()
 60 |         if "length" in d:
 61 |             # length argument removed in version 1.1.0
 62 |             del d["length"]
 63 |         return cls(**d)
 64 | 
 65 |     def logistic_epitope_score(
 66 |             self,
 67 |             midpoint=350.0,
 68 |             width=150.0,
 69 |             ic50_cutoff=5000.0):  # TODO: add these default values into CLI as arguments
 70 |         """
 71 |         Map from IC50 values to score where 1.0 = strong binder, 0.0 = weak binder
 72 |         Default midpoint and width for logistic determined by max likelihood fit
 73 |         for data from Alessandro Sette's 1994 paper:
 74 | 
 75 |            "The relationship between class I binding affinity
 76 |             and immunogenicity of potential cytotoxic T cell epitopes.
 77 | 
 78 |         TODO: Use a large dataset to find MHC binding range predicted to #
 79 |         correlate with immunogenicity
 80 |         """
 81 |         if self.ic50 >= ic50_cutoff:
 82 |             return 0.0
 83 | 
 84 |         rescaled = (float(self.ic50) - midpoint) / width
 85 |         # simplification of 1.0 - logistic(x) = logistic(-x)
 86 |         logistic = 1.0 / (1.0 + np.exp(rescaled))
 87 | 
 88 |         # since we're scoring IC50 values, let's normalize the output
 89 |         # so IC50 near 0.0 always returns a score of 1.0
 90 |         normalizer = 1.0 / (1.0 + np.exp(-midpoint / width))
 91 | 
 92 |         return logistic / normalizer
 93 | 
 94 |     def slice_source_sequence(self, start_offset, end_offset):
 95 |         """
 96 | 
 97 |         Parameters
 98 |         ----------
 99 |         start_offset : int
100 | 
101 |         end_offset : int
102 | 
103 |         Return EpitopePrediction object with source sequence and offset
104 |         adjusted. If this slicing would shorten the mutant peptide, then
105 |         return None.
106 |         """
107 |         if self.offset < start_offset:
108 |             # this peptide starts before the requested slice through the
109 |             # source sequence
110 |             return None
111 | 
112 |         if self.offset + self.length > end_offset:
113 |             # this peptide goes beyond the end of the requested slice
114 |             # through the source sequence
115 |             return None
116 | 
117 |         return EpitopePrediction(
118 |             allele=self.allele,
119 |             peptide_sequence=self.peptide_sequence,
120 |             wt_peptide_sequence=self.wt_peptide_sequence,
121 |             ic50=self.ic50,
122 |             wt_ic50=self.wt_ic50,
123 |             percentile_rank=self.percentile_rank,
124 |             prediction_method_name=self.prediction_method_name,
125 |             overlaps_mutation=self.overlaps_mutation,
126 |             source_sequence=self.source_sequence[start_offset:end_offset],
127 |             offset=self.offset - start_offset,
128 |             occurs_in_reference=self.occurs_in_reference)
129 | 
130 | 
131 | def slice_epitope_predictions(
132 |         epitope_predictions,
133 |         start_offset,
134 |         end_offset):
135 |     """
136 |     Return subset of EpitopePrediction objects which overlap the given interval
137 |     and slice through their source sequences and adjust their offset.
138 |     """
139 |     return [
140 |         p.slice_source_sequence(start_offset, end_offset)
141 |         for p in epitope_predictions
142 |         if p.offset >= start_offset and p.offset + p.length <= end_offset
143 |     ]
144 | 
145 | def predict_epitopes(
146 |         mhc_predictor,
147 |         protein_fragment,
148 |         min_epitope_score=0.0,
149 |         genome=None):
150 |     """
151 |     Parameters
152 |     ----------
153 |     mhc_predictor : mhctools.BasePredictor
154 |         Object with predict_peptides method
155 | 
156 |     protein_fragment : MutantProteinFragment
157 | 
158 |     peptide_length : list of int
159 |         Lengths of peptides to make pMHC binding predictions for
160 | 
161 |     min_epitope_score : float
162 |         Ignore peptides with binding predictions whose normalized score is less
163 |         than this.
164 | 
165 |     genome : pyensembl.Genome
166 |         Genome whose proteome to use for reference peptide filtering
167 | 
168 |     Returns an OrderedDict of EpitopePrediction objects, keyed by a
169 |     (peptide sequence, allele) tuple, that have a normalized score greater
170 |     than min_epitope_score.
171 | 
172 |     Uses the input genome to evaluate whether the epitope occurs in reference.
173 |     """
174 |     results = OrderedDict()
175 |     reference_proteome = ReferenceProteome(genome)
176 | 
177 |     # sometimes the predictors will fail, and we don't want to crash vaxrank
178 |     # in that situation
179 |     # TODO: make more specific or remove when we fix error handling in mhctools
180 |     try:
181 |         mhctools_binding_predictions = mhc_predictor.predict_subsequences(
182 |             {protein_fragment.gene_name: protein_fragment.amino_acids})
183 |     except:
184 |         logger.error(
185 |             'MHC prediction errored for protein fragment %s, with traceback: %s',
186 |             protein_fragment, traceback.format_exc())
187 |         return results
188 | 
189 |     # compute the WT epitopes for each mutant fragment's epitopes; mutant -> WT
190 |     wt_peptides = {}
191 |     for binding_prediction in mhctools_binding_predictions:
192 |         peptide = binding_prediction.peptide
193 |         peptide_length = binding_prediction.length
194 |         peptide_start_offset = binding_prediction.offset
195 |         peptide_end_offset = peptide_start_offset + peptide_length
196 | 
197 |         overlaps_mutation = protein_fragment.interval_overlaps_mutation(
198 |             start_offset=peptide_start_offset,
199 |             end_offset=peptide_end_offset)
200 | 
201 |         if overlaps_mutation:
202 |             full_reference_protein_sequence = (
203 |                 protein_fragment.predicted_effect().original_protein_sequence
204 |             )
205 |             global_epitope_start_pos = (
206 |                 protein_fragment.global_start_pos() + peptide_start_offset
207 |             )
208 |             wt_peptide = full_reference_protein_sequence[
209 |                 global_epitope_start_pos:global_epitope_start_pos + peptide_length]
210 |             wt_peptides[peptide] = wt_peptide
211 | 
212 |     wt_predictions = []
213 |     try:
214 |         # filter to minimum peptide lengths
215 |         valid_wt_peptides = [
216 |             x for x in wt_peptides.values() if len(x) >= mhc_predictor.min_peptide_length
217 |         ]
218 |         if len(valid_wt_peptides) > 0:
219 |             wt_predictions = mhc_predictor.predict_peptides(valid_wt_peptides)
220 |     except:
221 |         logger.error(
222 |             'MHC prediction for WT peptides errored, with traceback: %s',
223 |             traceback.format_exc())
224 | 
225 |     # break it out: (peptide, allele) -> prediction
226 |     wt_predictions_grouped = {
227 |         (wt_prediction.peptide, wt_prediction.allele): wt_prediction
228 |         for wt_prediction in wt_predictions
229 |     }
230 | 
231 |     # convert from mhctools.BindingPrediction objects to EpitopePrediction
232 |     # which differs primarily by also having a boolean field
233 |     # 'overlaps_mutation' that indicates whether the epitope overlaps
234 |     # mutant amino acids or both sides of a deletion
235 |     num_total = 0
236 |     num_occurs_in_reference = 0
237 |     num_low_scoring = 0
238 |     for binding_prediction in mhctools_binding_predictions:
239 |         num_total += 1
240 |         peptide = binding_prediction.peptide
241 |         peptide_length = binding_prediction.length
242 |         peptide_start_offset = binding_prediction.offset
243 |         peptide_end_offset = peptide_start_offset + peptide_length
244 | 
245 |         overlaps_mutation = protein_fragment.interval_overlaps_mutation(
246 |             start_offset=peptide_start_offset,
247 |             end_offset=peptide_end_offset)
248 | 
249 |         occurs_in_reference = reference_proteome.contains(peptide)
250 |         if occurs_in_reference:
251 |             logger.debug('Peptide %s occurs in reference', peptide)
252 |             num_occurs_in_reference += 1
253 | 
254 |         # compute WT epitope sequence, if this epitope overlaps the mutation
255 |         if overlaps_mutation:
256 |             wt_peptide = wt_peptides[peptide]
257 |             wt_prediction = wt_predictions_grouped.get(
258 |                 (wt_peptide, binding_prediction.allele))
259 |             wt_ic50 = None
260 |             if wt_prediction is None:
261 |                 # this can happen in a stop-loss variant: do we want to check that here?
262 |                 if len(wt_peptide) < mhc_predictor.min_peptide_length:
263 |                     logger.info(
264 |                         'No prediction for too-short WT epitope %s: possible stop-loss variant',
265 |                         wt_peptide)
266 |             else:
267 |                 wt_ic50 = wt_prediction.value
268 | 
269 |         else:
270 |             wt_peptide = peptide
271 |             wt_ic50 = binding_prediction.value
272 | 
273 |         epitope_prediction = EpitopePrediction(
274 |             allele=binding_prediction.allele,
275 |             peptide_sequence=peptide,
276 |             wt_peptide_sequence=wt_peptide,
277 |             ic50=binding_prediction.value,
278 |             wt_ic50=wt_ic50,
279 |             percentile_rank=binding_prediction.percentile_rank,
280 |             prediction_method_name=binding_prediction.prediction_method_name,
281 |             overlaps_mutation=overlaps_mutation,
282 |             source_sequence=protein_fragment.amino_acids,
283 |             offset=peptide_start_offset,
284 |             occurs_in_reference=occurs_in_reference)
285 | 
286 |         if epitope_prediction.logistic_epitope_score() >= min_epitope_score:
287 |             key = (epitope_prediction.peptide_sequence, epitope_prediction.allele)
288 |             results[key] = epitope_prediction
289 |         else:
290 |             num_low_scoring += 1
291 | 
292 |     logger.info(
293 |         "%d total peptides: %d occur in reference, %d failed score threshold",
294 |         num_total,
295 |         num_occurs_in_reference,
296 |         num_low_scoring)
297 |     return results
298 | 


--------------------------------------------------------------------------------
/vaxrank/gene_pathway_check.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #       http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | from collections import OrderedDict
 14 | from os.path import join, dirname
 15 | 
 16 | import pandas as pd
 17 | 
 18 | 
 19 | _ENSEMBL_GENE_ID_COLUMN_NAME = 'Ensembl Gene ID'
 20 | _MUTATION_COLUMN_NAME = 'Mutation'
 21 | 
 22 | _IFNG_RESPONSE_COLUMN_NAME = 'interferon_gamma_response'
 23 | _CLASS_I_MHC_COLUMN_NAME = 'class1_mhc_presentation_pathway'
 24 | _DRIVER_GENE_COLUMN_NAME = 'cancer_driver_gene'
 25 | _DRIVER_VARIANT_COLUMN_NAME = 'cancer_driver_variant'
 26 | 
 27 | _CURRENT_DIR = dirname(__file__)
 28 | _DATA_DIR = join(_CURRENT_DIR, "data")
 29 | 
 30 | 
 31 | class GenePathwayCheck(object):
 32 |     """
 33 |     This class is meant for use with gene/variant list files from
 34 |     https://github.com/openvax/gene-lists. Other files can be used as well, but
 35 |     need to follow a similar column structure. Most logic is based on Ensembl
 36 |     gene IDs.
 37 | 
 38 |     Parameters
 39 |     ----------
 40 |     interferon_gamma_response_csv : str, optional
 41 |         Local path to interferon-gamma response CSV file.
 42 | 
 43 |     class1_mhc_presentation_pathway_csv : str, optional
 44 |         Local path to MHC class I presentation pathway CSV file.
 45 | 
 46 |     cancer_driver_genes_csv : str, optional
 47 |         Local path to cancer driver genes CSV file.
 48 | 
 49 |     cancer_driver_variants_csv : str, optional
 50 |         Local path to cancer driver variants CSV file.
 51 |     """
 52 |     def __init__(
 53 |             self,
 54 |             interferon_gamma_response_csv=None,
 55 |             class1_mhc_presentation_pathway_csv=None,
 56 |             cancer_driver_genes_csv=None,
 57 |             cancer_driver_variants_csv=None):
 58 | 
 59 |         self.interferon_gamma_response_gene_set = self._load_set_from_csv(
 60 |             csv_path=interferon_gamma_response_csv,
 61 |             default_filename="interferon-gamma-response.csv",
 62 |             description="Interferon gamma response pathway",
 63 |             column_names=[_ENSEMBL_GENE_ID_COLUMN_NAME])
 64 | 
 65 |         self.class1_mhc_presentation_pathway_gene_set = self._load_set_from_csv(
 66 |             csv_path=class1_mhc_presentation_pathway_csv,
 67 |             default_filename="class1-mhc-presentation-pathway.csv",
 68 |             description="Class I MHC presentation pathway",
 69 |             column_names=[_ENSEMBL_GENE_ID_COLUMN_NAME])
 70 | 
 71 |         self.cancer_driver_genes_set = self._load_set_from_csv(
 72 |             csv_path=cancer_driver_genes_csv,
 73 |             default_filename="cancer-driver-genes.csv",
 74 |             description="Cancer driver genes",
 75 |             column_names=[_ENSEMBL_GENE_ID_COLUMN_NAME])
 76 |         # set of gene ID, variant description pairs
 77 |         self.cancer_driver_variants_set = self._load_set_from_csv(
 78 |             csv_path=cancer_driver_variants_csv,
 79 |             default_filename="cancer-driver-variants.csv",
 80 |             description="Driver variants",
 81 |             column_names=[_ENSEMBL_GENE_ID_COLUMN_NAME, _MUTATION_COLUMN_NAME])
 82 | 
 83 |     @classmethod
 84 |     def _load_set_from_csv(cls, csv_path, default_filename, description, column_names):
 85 |         if not csv_path:
 86 |             csv_path = join(_DATA_DIR, default_filename)
 87 |         df = pd.read_csv(csv_path)
 88 |         columns = []
 89 |         for column_name in column_names:
 90 |             if column_name not in df.columns:
 91 |                 raise ValueError("%s file (%s) needs column '%s'" % (
 92 |                     description,
 93 |                     csv_path,
 94 |                     column_name))
 95 |             columns.append(df[column_name].values)
 96 |         if len(columns) == 1:
 97 |             return set(columns[0])
 98 |         else:
 99 |             return set(zip(*columns))
100 | 
101 |     def make_variant_dict(self, variant):
102 |         """
103 |         Returns a dictionary of boolean values, depending on whether we see this
104 |         variant in any relevant pathway or cancer driver files.
105 | 
106 |         Parameters
107 |         ----------
108 |         variant : varcode.Variant
109 |             Variant object to evaluate
110 |         """
111 |         effect_description = variant.effects().top_priority_effect().short_description
112 |         overlapping_gene_ids = variant.gene_ids
113 |         variant_dict = OrderedDict()
114 |         variant_dict[_IFNG_RESPONSE_COLUMN_NAME] = any([
115 |             gene_id in self.interferon_gamma_response_gene_set
116 |             for gene_id in overlapping_gene_ids
117 |         ])
118 |         variant_dict[_CLASS_I_MHC_COLUMN_NAME] = any([
119 |             gene_id in self.class1_mhc_presentation_pathway_gene_set
120 |             for gene_id in overlapping_gene_ids
121 |         ])
122 |         variant_dict[_DRIVER_GENE_COLUMN_NAME] = any([
123 |             gene_id in self.cancer_driver_genes_set
124 |             for gene_id in overlapping_gene_ids
125 |         ])
126 | 
127 |         variant_dict[_DRIVER_VARIANT_COLUMN_NAME] = any([
128 |             (gene_id, effect_description) in self.cancer_driver_variants_set
129 |             for gene_id in overlapping_gene_ids
130 |         ])
131 |         return variant_dict
132 | 


--------------------------------------------------------------------------------
/vaxrank/logging.conf:
--------------------------------------------------------------------------------
 1 | [loggers]
 2 | keys=root,vaxrank,isovar,varcode,pyensembl,mhctools,datacache
 3 | 
 4 | [formatters]
 5 | keys=simpleFormatter
 6 | 
 7 | [handlers]
 8 | keys=consoleHandler,consoleHandlerCritical,fileHandler
 9 | 
10 | [logger_root]
11 | level=INFO
12 | handlers=consoleHandlerCritical
13 | 
14 | [handler_consoleHandler]
15 | class=StreamHandler
16 | level=INFO
17 | formatter=simpleFormatter
18 | args=(sys.stdout,)
19 | 
20 | [handler_consoleHandlerCritical]  # only for root logger: essentially silent
21 | class=StreamHandler
22 | level=CRITICAL
23 | formatter=simpleFormatter
24 | args=(sys.stdout,)
25 | 
26 | [handler_fileHandler]
27 | class=FileHandler
28 | level=DEBUG
29 | formatter=simpleFormatter
30 | args=('%(logfilename)s', 'w')
31 | 
32 | [formatter_simpleFormatter]
33 | format=%(asctime)s - %(name)s:%(lineno)s - %(levelname)s - %(message)s
34 | datefmt=
35 | 
36 | # vaxrank
37 | 
38 | [logger_vaxrank]
39 | level=DEBUG
40 | qualname=vaxrank
41 | handlers=consoleHandler,fileHandler
42 | 
43 | # isovar
44 | 
45 | [logger_isovar]
46 | level=DEBUG
47 | qualname=isovar
48 | handlers=consoleHandler,fileHandler
49 | 
50 | # varcode
51 | 
52 | [logger_varcode]
53 | level=DEBUG
54 | qualname=varcode
55 | handlers=consoleHandler,fileHandler
56 | 
57 | # pyensembl
58 | 
59 | [logger_pyensembl]
60 | level=DEBUG
61 | qualname=pyensembl
62 | handlers=consoleHandler
63 | 
64 | # mhctools
65 | 
66 | [logger_mhctools]
67 | level=DEBUG
68 | qualname=mhctools
69 | handlers=consoleHandler,fileHandler
70 | 
71 | # datacache
72 | 
73 | [logger_datacache]
74 | level=DEBUG
75 | qualname=datacache
76 | handlers=consoleHandler
77 | 


--------------------------------------------------------------------------------
/vaxrank/manufacturability.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #       http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | """
 14 | Scoring functions for determing which sequences are easy to manufacture using
 15 | solid-phase synthesis.
 16 | 
 17 | For more information see: https://github.com/hammerlab/vaxrank/issues/2
 18 | """
 19 | 
 20 | from collections import namedtuple
 21 | 
 22 | # Amino Acid Hydropathy Score
 23 | # Table 2 from Kyte and Doolittle"s
 24 | # "A Simple Method for Displaying the Hydropathic Character of a Protein"
 25 | 
 26 | hydropathy_dict = {
 27 |     "A": 1.8,
 28 |     "C": 2.5,
 29 |     "D": -3.5,
 30 |     "E": -3.5,
 31 |     "F": 2.8,
 32 |     "G": -0.4,
 33 |     "H": -3.2,
 34 |     "I": 4.5,
 35 |     "K": -3.9,
 36 |     "L": 3.8,
 37 |     "M": 1.9,
 38 |     "N": -3.5,
 39 |     "P": -1.6,
 40 |     "Q": -3.5,
 41 |     "R": -4.5,
 42 |     "S": -0.8,
 43 |     "T": -0.7,
 44 |     "V": 4.2,
 45 |     "W": -0.9,
 46 |     "Y": -1.3
 47 | }
 48 | 
 49 | 
 50 | def gravy_score(amino_acids):
 51 |     """
 52 |     Mean amino acid hydropathy averaged across residues of a peptide
 53 |     or protein sequence.
 54 |     """
 55 |     total = sum(
 56 |         hydropathy_dict[amino_acid] for amino_acid in amino_acids)
 57 |     return total / len(amino_acids)
 58 | 
 59 | 
 60 | def max_kmer_gravy_score(amino_acids, k):
 61 |     """
 62 |     Returns max GRAVY score of any kmer in the amino acid sequence,
 63 |     used to determine if there are any extremely hydrophobic regions within a
 64 |     longer amino acid sequence.
 65 |     """
 66 |     return max(
 67 |         gravy_score(amino_acids[i:i + k])
 68 |         for i in range(len(amino_acids) - k + 1))
 69 | 
 70 | 
 71 | def max_7mer_gravy_score(amino_acids):
 72 |     return max_kmer_gravy_score(amino_acids, 7)
 73 | 
 74 | 
 75 | def cterm_kmer_gravy_score(amino_acids, k):
 76 |     """
 77 |     Mean hydropathy of last k residues on the C-terminus of the peptide.
 78 |     """
 79 |     n = len(amino_acids)
 80 |     return gravy_score(amino_acids[n - k:n])
 81 | 
 82 | 
 83 | def cterm_7mer_gravy_score(amino_acids):
 84 |     return cterm_kmer_gravy_score(amino_acids, 7)
 85 | 
 86 | 
 87 | def difficult_n_terminal_residue(amino_acids):
 88 |     """
 89 |     Is the N-terminus one of {Gln, Glu, Cys}?
 90 |     ---
 91 |     Priority I: avoid N-terminal Gln, Glu, Cys
 92 |     """
 93 |     return amino_acids[0] in {"Q", "E", "C"}
 94 | 
 95 | 
 96 | def c_terminal_proline(amino_acids):
 97 |     """
 98 |     Is the right-most (C-terminal) amino acid a proline?
 99 |     """
100 |     return amino_acids[-1] == "P"
101 | 
102 | 
103 | def c_terminal_cysteine(amino_acids):
104 |     """
105 |     Is the right-most (C-terminal) amino acid a cysteine?
106 |     """
107 |     return amino_acids[-1] == "C"
108 | 
109 | 
110 | def n_terminal_asparagine(amino_acids):
111 |     """
112 |     Asparagine at the N-terminus of a peptide is also hard
113 |     to synthesize, though not as bad as {Gln, Glu, Cys}
114 |     """
115 |     return amino_acids[0] == "N"
116 | 
117 | 
118 | def asparagine_proline_bond_count(amino_acids):
119 |     """
120 |     Count the number of Asparagine/Asn/N-Proline/Pro/P bonds
121 |     Problem with Asn-Pro bonds: can spontaneously cleave the peptide
122 |     """
123 |     return sum(
124 |         amino_acids[i:i + 2] == "NP"
125 |         for i in range(len(amino_acids) - 1))
126 | 
127 | 
128 | def cysteine_count(amino_acids):
129 |     """
130 |     How many cysteines are in the amino acid sequence?
131 |     Problem with cysteine residues: They can form disulfide bonds across
132 |     distant parts of the peptide
133 |     """
134 |     return sum(amino_acid == "C" for amino_acid in amino_acids)
135 | 
136 | 
137 | def combine_scoring_functions(*scoring_functions):
138 |     """
139 |     Given a list of scoring functions, make a namedtuple with
140 |     fields of the same names. Returns the ManufacturabilityScores class.
141 |     """
142 |     names = [fn.__name__ for fn in scoring_functions]
143 | 
144 |     class ManufacturabilityScores(namedtuple('ManufacturabilityScores', names)):
145 |         @classmethod
146 |         def from_amino_acids(cls, amino_acids):
147 |             return cls(*[fn(amino_acids) for fn in scoring_functions])
148 | 
149 |     return ManufacturabilityScores
150 | 
151 | ManufacturabilityScores = combine_scoring_functions(
152 | 
153 |     # GRAVY score of 7 residues closest to the C terminus
154 |     cterm_7mer_gravy_score,
155 | 
156 |     # GRAVY score of any 7mer window in the peptide sequence
157 |     max_7mer_gravy_score,
158 | 
159 |     # avoid N-terminal Gln, Glu, Cys
160 |     difficult_n_terminal_residue,
161 | 
162 |     # avoid C-terminal Cys
163 |     c_terminal_cysteine,
164 | 
165 |     # avoid C-terminal Pro
166 |     c_terminal_proline,
167 | 
168 |     # total number of Cys residues
169 |     cysteine_count,
170 | 
171 |     # avoid N-terminal Asn
172 |     n_terminal_asparagine,
173 | 
174 |     # avoid Asp-Pro bonds
175 |     asparagine_proline_bond_count,
176 | )
177 | 


--------------------------------------------------------------------------------
/vaxrank/mutant_protein_fragment.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #       http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | 
 14 | import logging
 15 | 
 16 | from varcode.effects import top_priority_effect
 17 | from serializable import Serializable
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | class MutantProteinFragment(Serializable):
 23 |     def __init__(
 24 |             self,
 25 |             variant,
 26 |             gene_name,
 27 |             amino_acids,
 28 |             mutant_amino_acid_start_offset,
 29 |             mutant_amino_acid_end_offset,
 30 |             supporting_reference_transcripts,
 31 |             n_overlapping_reads,
 32 |             n_alt_reads,
 33 |             n_ref_reads,
 34 |             n_alt_reads_supporting_protein_sequence):
 35 |         """
 36 |         Parameters
 37 |         ----------
 38 |         variant : varcode.Variant
 39 |             Somatic mutation.
 40 | 
 41 |         gene_name : str
 42 |             Gene from which we used a transcript to translate this mutation.
 43 | 
 44 |         amino_acids : str
 45 |             Translated protein sequence, aggregated from possibly multiple
 46 |             synonymous coding sequences.
 47 | 
 48 |         mutant_amino_acid_start_offset : int
 49 |             Starting offset of amino acids which differ due to the mutation
 50 | 
 51 |         mutant_amino_acid_end_offset : int
 52 |             End offset of amino acids which differ due to the mutation
 53 | 
 54 |         supporting_reference_transcripts : list of pyensembl.Transcript
 55 |             PyEnsembl Transcript objects for reference transcripts which
 56 |             were used to establish the reading frame of coding sequence(s)
 57 |             detected from RNA.
 58 | 
 59 |         n_overlapping_reads : int
 60 |             Number of reads overlapping the variant locus.
 61 | 
 62 |         n_alt_reads  : int
 63 |             Number of reads supporting the variant.
 64 | 
 65 |         n_ref_reads : int
 66 |             Number of reads supporting the reference allele.
 67 | 
 68 |         n_alt_reads_supporting_protein_sequence : int
 69 |             Number of RNA reads fully spanning the cDNA sequence(s) from which
 70 |             we translated this amino acid sequence.
 71 |         """
 72 |         self.variant = variant
 73 |         self.gene_name = gene_name
 74 |         self.amino_acids = amino_acids
 75 |         self.mutant_amino_acid_start_offset = mutant_amino_acid_start_offset
 76 |         self.mutant_amino_acid_end_offset = mutant_amino_acid_end_offset
 77 |         self.supporting_reference_transcripts = \
 78 |             supporting_reference_transcripts
 79 |         self.n_overlapping_reads = n_overlapping_reads
 80 |         self.n_alt_reads = n_alt_reads
 81 |         self.n_ref_reads = n_ref_reads
 82 |         self.n_alt_reads_supporting_protein_sequence = \
 83 |             n_alt_reads_supporting_protein_sequence
 84 | 
 85 |     @classmethod
 86 |     def from_isovar_result(cls, isovar_result):
 87 |         """
 88 |         Create a MutantProteinFragment from an isovar.IsovarResult object
 89 | 
 90 |         Parameters
 91 |         ----------
 92 |         isovar_result : isovar.IsovarResult
 93 | 
 94 |         Returns
 95 |         -------
 96 |         MutantProteinFragment
 97 |         """
 98 |         protein_sequence = isovar_result.top_protein_sequence
 99 |         if protein_sequence is None:
100 |             return None
101 |         return cls(
102 |             variant=isovar_result.variant,
103 |             gene_name=protein_sequence.gene_name,
104 |             amino_acids=protein_sequence.amino_acids,
105 |             mutant_amino_acid_start_offset=protein_sequence.mutation_start_idx,
106 |             mutant_amino_acid_end_offset=protein_sequence.mutation_end_idx,
107 | 
108 |             # TODO: distinguish reads and fragments in Vaxrank?
109 |             n_overlapping_reads=isovar_result.num_total_fragments,
110 |             n_alt_reads=isovar_result.num_alt_fragments,
111 |             n_ref_reads=isovar_result.num_ref_fragments,
112 |             n_alt_reads_supporting_protein_sequence=protein_sequence.num_supporting_fragments,
113 |             supporting_reference_transcripts=protein_sequence.transcripts)
114 | 
115 |     def __len__(self):
116 |         return len(self.amino_acids)
117 | 
118 |     @property
119 |     def n_mutant_amino_acids(self):
120 |         return (
121 |             self.mutant_amino_acid_end_offset - self.mutant_amino_acid_start_offset)
122 | 
123 |     @property
124 |     def mutation_distance_from_edge(self):
125 |         distance_from_left = self.mutant_amino_acid_start_offset
126 |         distance_from_right = len(self) - self.mutant_amino_acid_end_offset
127 |         return min(distance_from_left, distance_from_right)
128 | 
129 |     @property
130 |     def is_deletion(self):
131 |         return self.n_mutant_amino_acids == 0 and self.variant.is_deletion
132 | 
133 |     @property
134 |     def n_other_reads(self):
135 |         """
136 |         Number of reads supporting alleles which are neither ref nor alt
137 |         """
138 |         return self.n_overlapping_reads - (self.n_ref_reads + self.n_alt_reads)
139 | 
140 |     def interval_overlaps_mutation(self, start_offset, end_offset):
141 |         """
142 |         Does the given start_offset:end_offset interval overlap the mutated
143 |         region of this MutantProteinFragment? Interval offsets are expected
144 |         to be base-0 half-open (start is inclusive, end is exclusive).
145 |         """
146 |         return (
147 |             start_offset < self.mutant_amino_acid_end_offset and
148 |             end_offset > self.mutant_amino_acid_start_offset)
149 | 
150 |     def generate_subsequences(self, subsequence_length):
151 |         """
152 |         Yields (int, MutantProteinFragment) pairs, where the integer
153 |         indicates the offset into the amino acid sequences.
154 |         """
155 |         n_total_amino_acids = len(self.amino_acids)
156 |         if n_total_amino_acids <= subsequence_length:
157 |             yield (0, self)
158 |         else:
159 |             for subsequence_start_offset in range(
160 |                     0,
161 |                     n_total_amino_acids - subsequence_length + 1):
162 |                 subsequence_end_offset = subsequence_start_offset + subsequence_length
163 |                 amino_acids = self.amino_acids[
164 |                     subsequence_start_offset:subsequence_end_offset]
165 |                 mutant_amino_acid_start_offset = max(
166 |                     0,
167 |                     self.mutant_amino_acid_start_offset - subsequence_start_offset)
168 |                 mutant_amino_acid_end_offset = min(
169 |                     len(amino_acids),
170 |                     max(
171 |                         0,
172 |                         self.mutant_amino_acid_end_offset - subsequence_start_offset))
173 |                 n_supporting_reads = self.n_alt_reads_supporting_protein_sequence
174 |                 subsequence_mutant_protein_fragment = MutantProteinFragment(
175 |                     variant=self.variant,
176 |                     gene_name=self.gene_name,
177 |                     amino_acids=amino_acids,
178 |                     mutant_amino_acid_start_offset=mutant_amino_acid_start_offset,
179 |                     mutant_amino_acid_end_offset=mutant_amino_acid_end_offset,
180 |                     n_overlapping_reads=self.n_overlapping_reads,
181 |                     n_ref_reads=self.n_ref_reads,
182 |                     n_alt_reads=self.n_alt_reads,
183 |                     n_alt_reads_supporting_protein_sequence=n_supporting_reads,
184 |                     supporting_reference_transcripts=self.supporting_reference_transcripts)
185 |                 yield subsequence_start_offset, subsequence_mutant_protein_fragment
186 | 
187 |     def sorted_subsequences(
188 |             self,
189 |             subsequence_length,
190 |             limit=None,
191 |             sort_key=lambda x: (
192 |                 -x[1].mutation_distance_from_edge,
193 |                 -x[1].n_mutant_amino_acids)):
194 |         """
195 |         Returns subsequences, paired with their offset from the start of the
196 |         protein fragment. The default sort criterion is maximizing the
197 |         mutation distance from the edge of the sequence and secondarily
198 |         maximizing the number of mutant amino acids.
199 |         """
200 |         subsequences = list(self.generate_subsequences(subsequence_length))
201 |         subsequences.sort(key=sort_key)
202 |         if limit:
203 |             subsequences = subsequences[:limit]
204 |         return subsequences
205 | 
206 |     def predicted_effect(self):
207 |         effects = [
208 |             self.variant.effect_on_transcript(t) for t in
209 |             self.supporting_reference_transcripts
210 |         ]
211 |         predicted_effect = top_priority_effect(effects)
212 |         return predicted_effect
213 | 
214 |     def global_start_pos(self):
215 |         # position of mutation start relative to the full amino acid sequence
216 |         global_mutation_start_pos = self.predicted_effect().aa_mutation_start_offset
217 |         if global_mutation_start_pos is None:
218 |             logger.error(
219 |                 'Could not find mutation start pos for variant %s',
220 |                 self.variant)
221 |             return -1
222 | 
223 |         # get the global position of the mutant protein fragment: shift left by the amount of
224 |         # the relative mutant start position
225 |         return (
226 |             global_mutation_start_pos - self.mutant_amino_acid_start_offset
227 |         )
228 | 


--------------------------------------------------------------------------------
/vaxrank/patient_info.py:
--------------------------------------------------------------------------------
 1 | from serializable import Serializable
 2 | 
 3 | class PatientInfo(Serializable):
 4 |     def __init__(
 5 |             self,
 6 |             patient_id,
 7 |             vcf_paths,
 8 |             bam_path,
 9 |             mhc_alleles,
10 |             num_somatic_variants,
11 |             num_coding_effect_variants,
12 |             num_variants_with_rna_support,
13 |             num_variants_with_vaccine_peptides):
14 |         self.patient_id = patient_id
15 |         self.vcf_paths = vcf_paths
16 |         self.bam_path = bam_path
17 |         self.mhc_alleles = mhc_alleles
18 |         self.num_somatic_variants = num_somatic_variants
19 |         self.num_coding_effect_variants = num_coding_effect_variants
20 |         self.num_variants_with_rna_support = num_variants_with_rna_support
21 |         self.num_variants_with_vaccine_peptides = num_variants_with_vaccine_peptides
22 | 


--------------------------------------------------------------------------------
/vaxrank/reference_proteome.py:
--------------------------------------------------------------------------------
 1 | # Licensed under the Apache License, Version 2.0 (the "License");
 2 | # you may not use this file except in compliance with the License.
 3 | # You may obtain a copy of the License at
 4 | #
 5 | #     http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | from __future__ import absolute_import, print_function, division
14 | import os
15 | import logging
16 | 
17 | from datacache import get_data_dir
18 | import shellinford
19 | 
20 | 
21 | logger = logging.getLogger(__name__)
22 | 
23 | 
24 | def fm_index_path(genome):
25 |     """
26 |     Returns a path for cached reference peptides, for the given genome.
27 |     """
28 |     # if $VAXRANK_REF_PEPTIDES_DIR is set, that'll be the location of the cache
29 |     cache_dir = get_data_dir(envkey='VAXRANK_REF_PEPTIDES_DIR')
30 |     if not os.path.exists(cache_dir):
31 |         os.makedirs(cache_dir)
32 | 
33 |     return os.path.join(cache_dir, '%s_%d_3.fm' % (
34 |         genome.species.latin_name, genome.release))
35 | 
36 | 
37 | def generate_protein_sequences(genome):
38 |     """
39 |     Generator whose elements are protein sequences from the given genome.
40 | 
41 |     Parameters
42 |     ----------
43 |     genome : pyensembl.EnsemblRelease
44 |         Input genome to load for reference peptides
45 |     """
46 |     for t in genome.transcripts():
47 |         if t.is_protein_coding:
48 |             yield t.protein_sequence
49 | 
50 | 
51 | def load_reference_peptides_index(genome, force_reload=False):
52 |     """
53 |     Loads the FM index containing reference peptides.
54 | 
55 |     Parameters
56 |     ----------
57 |     genome : pyensembl.EnsemblRelease
58 |         Input genome to load for reference peptides
59 | 
60 |     force_reload : bool, optional
61 |         If true, will recompute index for this genome even if it already exists.
62 | 
63 |     Returns
64 |     -------
65 |     fm : shellinford.FMIndex
66 |         Index populated with reference peptides from the genome
67 |     """
68 |     path = fm_index_path(genome)
69 |     if force_reload or not os.path.exists(path):
70 |         logger.info("Building FM index at %s", path)
71 |         fm = shellinford.FMIndex()
72 |         fm.build(generate_protein_sequences(genome), path)
73 |         logger.info("Done building FM index")
74 |         return fm
75 |     return shellinford.FMIndex(filename=path)
76 | 
77 | 
78 | class ReferenceProteome(object):
79 |     def __init__(self, genome):
80 |         self.fm_index = load_reference_peptides_index(genome)
81 | 
82 |     def contains(self, kmer):
83 |         return len(self.fm_index.search(kmer)) > 0
84 | 


--------------------------------------------------------------------------------
/vaxrank/templates/stylesheet.css:
--------------------------------------------------------------------------------
  1 | /* this is to fix HTML->PDF rendering on Debian: wkhtmltopdf screws up the 
  2 | line heights otherwise */
  3 | table, table.th, table.td {
  4 |     line-height: 1.1em;
  5 | }
  6 | 
  7 | #main {
  8 | 	padding: 4em;
  9 | }
 10 | 
 11 | /* Various headers */
 12 | #report-header {
 13 | 	padding-bottom: 1em;
 14 | 	margin-top: -2em;
 15 | }
 16 | 
 17 | #patient-info {
 18 | 	padding-bottom: 1em;
 19 | }
 20 | 
 21 | #variants {
 22 | 	padding-top: 1em;
 23 | 	padding-bottom: 1em;
 24 | }
 25 | 
 26 | #effects {
 27 | 	margin-bottom: 1.5em;
 28 | }
 29 | 
 30 | #peptides {
 31 | 	margin-bottom: 1.5em;
 32 | }
 33 | 
 34 | #args {
 35 | 	padding-top: 2em;
 36 | 	padding-bottom: 1em;
 37 | }
 38 | 
 39 | /* Patient info */
 40 | table.patient-info {
 41 | 	width: 90%;
 42 | }
 43 | 
 44 | col.patient-info-column-one {
 45 | 	width: 40%;
 46 | }
 47 | 
 48 | /* Command-line args */
 49 | table.args {
 50 | 	width: 90%;
 51 | }
 52 | 
 53 | col.args-column-one {
 54 | 	width: 40%;
 55 | }
 56 | 
 57 | /* Variants */
 58 | ol.main {
 59 | 	padding-left: 1.2em;
 60 | 	margin-left: 0.1em;
 61 | 	font-size: 2em;
 62 | 	color: #b3b3b3;
 63 | }
 64 | 
 65 | li.variant-list-item {
 66 | 	border-left: 0.3em solid;
 67 | 	padding: 0 2em 0 2em;
 68 | 	margin-bottom: 2em;
 69 | 	page-break-before: always;
 70 | 	page-break-after: always;
 71 | }
 72 | 
 73 | li.variant-list-item:nth-child(odd) {
 74 | 	border-left: 0.3em dotted;
 75 | 	padding: 0 2em 0 2em;
 76 | 	margin-bottom: 2em;
 77 | 	page-break-before: always;
 78 | 	page-break-after: always;
 79 | }
 80 | 
 81 | div.variant-span {
 82 | 	font-size: 16px;
 83 | 	color: black;
 84 | }
 85 | 
 86 | table.variant {
 87 | 	margin-top: -2em;
 88 | 	width: 90%;
 89 | }
 90 | 
 91 | col.variant-column-one {
 92 | 	width: 40%;
 93 | }
 94 | 
 95 | col.variant-column-two {
 96 | 	width: 60%;
 97 | }
 98 | 
 99 | thead.variant-head {
100 | 	font-size: 135%;
101 | 	margin-left: -2px;
102 | }
103 | 
104 | td.variant-head {
105 | 	padding-left: 0.7em;
106 | }
107 | 
108 | /* Peptides */
109 | h4.peptides {
110 | 	page-break-before: always;
111 | }
112 | 
113 | div.wt-epitopes {
114 | 	page-break-after: always;
115 | 	page-break-inside: avoid;
116 | 	margin: 2em 0 2em 2em;
117 | }
118 | 
119 | table.wt-epitopes {
120 | 	font-size: 80%;
121 | }
122 | 
123 | ol.peptides {
124 | 	padding-left: 0;
125 | 	list-style-position: inside;
126 | }
127 | 
128 | li.peptide {
129 | 	page-break-inside: avoid;
130 | 	page-break-before: always;
131 | }
132 | 
133 | table.peptide {
134 | 	margin: -1em 0 2em 2em;
135 | 	width: 80%;
136 | }
137 | 
138 | span.mutant {
139 | 	background-color: yellow;
140 | 	border: 1px;
141 | 	border-style: solid;
142 | 	border-color: red;
143 | 	padding: 2px;
144 | 	margin-left: 2px;
145 | 	margin-right: 2px;
146 | }
147 | 
148 | table.peptide-inner {
149 | 	width: 100%;
150 | 	border: none;
151 | }
152 | 
153 | td.peptide-inner {
154 | 	padding: 0;
155 | }
156 | 
157 | td.peptide-inner-header {
158 | 	font-weight: bold;
159 | 	text-align: center;
160 | 	font-size: 125%;
161 | 	padding: 0.75em;
162 | }
163 | 
164 | col.peptide-data-column-one {
165 | 	width: 70%;
166 | }
167 | 
168 | col.peptide-data-column-two {
169 | 	width: 30%;
170 | }
171 | 
172 | col.epitope-data-column-one {
173 | 	width: 16%;
174 | }
175 | 
176 | col.epitope-data-column-two {
177 | 	width: 16%;
178 | }
179 | 
180 | col.epitope-data-column-three {
181 | 	width: 16%;
182 | }
183 | 
184 | col.epitope-data-column-four {
185 | 	width: 16%;
186 | }
187 | 
188 | col.epitope-data-column-five {
189 | 	width: 16%;
190 | }
191 | 
192 | col.epitope-data-column-six {
193 | 	width: 16%;
194 | }
195 | 
196 | table.epitope-inner {
197 | 	font-size: 80%;
198 | 	width: 100%;
199 | 	border: none;
200 | }
201 | 
202 | /* Signatures */
203 | table.signature {
204 | 	width: 70%;
205 | }
206 | 
207 | table.signature-inner {
208 | 	width: 100%;
209 | 	border: none;
210 | }
211 | 
212 | td.signature-inner {
213 | 	padding: 0;
214 | }
215 | 
216 | col.signature-column-one {
217 | 	width: 30%;
218 | }
219 | 
220 | col.signature-column-two {
221 | 	width: 70%;
222 | }
223 | 


--------------------------------------------------------------------------------
/vaxrank/templates/template.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |     <head>
  4 |         <title>Vaxrank Report</title>
  5 |         <meta charset="UTF-8">
  6 |         <link rel="stylesheet" href="http://yui.yahooapis.com/pure/0.6.0/pure-min.css">
  7 |         <link rel="stylesheet" href="http://yui.yahooapis.com/pure/0.6.0/base-min.css">
  8 |         <style type="text/css">
  9 |             {% include "templates/stylesheet.css" %}
 10 |         </style>
 11 |     </head>
 12 |     <body>
 13 |         <div id="main">
 14 |         	<h1 id="report-header">Vaccine Peptides Report</h1>
 15 | 
 16 |         	<h3 id="patient-info">PATIENT INFO</h3>
 17 |             <table class="pure-table pure-table-bordered patient-info">
 18 |                 <colgroup>
 19 |                     <col class="patient-info-column-one">
 20 |                     <col class="patient-info-column-two">
 21 |                 </colgroup>
 22 |                 {% for key, val in patient_info.items() %}
 23 |                 <tr><td>{{ key }}</td><td>{{ val }}</td></tr>
 24 |                 {% endfor %}
 25 |             </table>
 26 |         	<br>
 27 | 
 28 |             <h3 id="args">COMMAND LINE ARGUMENTS</h3>
 29 |             <table class="pure-table pure-table-bordered args">
 30 |                 <colgroup>
 31 |                     <col class="args-column-one">
 32 |                     <col class="args-column-two">
 33 |                 </colgroup>
 34 |                 {% for key, val in args %}
 35 |                 <tr><td class="arg">{{ key }}</td><td class="arg">{{ val }}</td></tr>
 36 |                 {% endfor %}
 37 |             </table>
 38 |             {% if input_json_file %}
 39 |             <br><br><h4>Report generated from saved location: {{ input_json_file }}</h4>
 40 |             {% endif %}
 41 |         	<ol class="main">
 42 | 
 43 |             {% if variants %}
 44 |         	{% for v in variants %}
 45 |         	<li class="variant-list-item">
 46 |                 <div class="variant-span">
 47 |             		<table class="pure-table pure-table-bordered variant">
 48 |             			<colgroup>
 49 |                         	<col class="variant-column-one">
 50 |                             <col class="variant-column-two">
 51 |                         </colgroup>
 52 |                         <thead class="variant-head">
 53 |                             <tr>
 54 |                                 <td class="variant-head">Variant</td>
 55 |                                 <td class="variant-head">{{ v.short_description }}</td>
 56 |                             </tr>
 57 |                         </thead>
 58 |                         {% for key, val in v.variant_data.items() %}
 59 |                         <tr><td>{{ key }}</td><td>{{ val }}</td></tr>
 60 |                         {% endfor %}
 61 |                     </table>
 62 |                     <br>
 63 | 
 64 |                     <h4 id="effects">Predicted Effect</h4>
 65 |                     <table class="pure-table pure-table-bordered">
 66 |                         {% for key, val in v.effect_data.items() %}
 67 |                         <tr><td>{{ key }}</td><td>{{ val }}</td></tr>
 68 |                         {% endfor %}
 69 |                 	</table>
 70 |                 	<br>
 71 | 
 72 |                     {% if v.databases %}
 73 |                     <h4 id="effects">Databases</h4>
 74 |                     <table class="pure-table pure-table-bordered">
 75 |                         {% for key, val in v.databases.items() %}
 76 |                         <tr><td>{{ key }}</td><td><a href="{{ val }}">{{ val }}</a></td></tr>
 77 |                         {% endfor %}
 78 |                     </table>
 79 |                     <br>
 80 |                     {% endif %}
 81 | 
 82 |             		<ol class="peptides" type="i">
 83 |             			{% for p in v.peptides %}
 84 |             			<li class="peptide">
 85 |             				<table class="pure-table pure-table-bordered peptide">
 86 |                                 <thead>
 87 |                                     <tr><td class="peptide-inner-header">{{ p.header_display_data.aa_before_mutation }}<span class="mutant">{{ p.header_display_data.aa_mutant }}</span>{{ p.header_display_data.aa_after_mutation }}</td></tr>
 88 |                                 </thead>
 89 |                                 <tr><td class="peptide-inner">
 90 |                                     <table class="pure-table pure-table-bordered peptide-inner">
 91 |                                         <colgroup>
 92 |                                             <col class="peptide-data-column-one">
 93 |                                             <col class="peptide-data-column-two">
 94 |                                         </colgroup>
 95 |                                         {% for key, val in p.peptide_data.items() %}
 96 |                                         <tr><td>{{ key }}</td><td>{{ val }}</td></tr>
 97 |                                         {% endfor %}
 98 |                                     </table>
 99 |                                 </td></tr>
100 |                                 {% if include_manufacturability %}
101 |                                 <tr><td class="peptide-inner-header">Manufacturability</td></tr>
102 |                                 <tr><td class="peptide-inner">
103 |                                     <table class="pure-table pure-table-bordered peptide-inner">
104 |                                         <colgroup>
105 |                                             <col class="peptide-data-column-one">
106 |                                             <col class="peptide-data-column-two">
107 |                                         </colgroup>
108 |                                         {% for key, val in p.manufacturability_data.items() %}
109 |                                         <tr><td>{{ key }}</td><td>{{ val }}</td></tr>
110 |                                         {% endfor %}
111 |                                     </table>
112 |                                 </td></tr>
113 |                                 {% endif %}
114 | 
115 |                                 <tr><td class="peptide-inner-header">Predicted mutant epitopes</td></tr>
116 |                                 <tr><td class="peptide-inner">
117 |                                     <table class="pure-table epitope-inner">
118 |                                     <colgroup>
119 |                                         <col class="epitope-data-column-one">
120 |                                         <col class="epitope-data-column-two">
121 |                                         <col class="epitope-data-column-three">
122 |                                         <col class="epitope-data-column-four">
123 |                                         <col class="epitope-data-column-five">
124 |                                         <col class="epitope-data-column-six">
125 |                                     </colgroup>
126 |                                         <thead>
127 |                                             <tr>
128 |                                                 {% for key in p.epitopes[0] %}
129 |                                                 <td>{{ key }}</td>
130 |                                                 {% endfor %}
131 |                                             </tr>
132 |                                         </thead>
133 |                                         {% for e in p.epitopes %}
134 |                                         <tr>
135 |                                             {% for _, val in e.items() %}
136 |                                             <td>{{ val }}</td>
137 |                                             {% endfor %}
138 |                                         </tr>
139 |                                         {% endfor %}
140 |                                     </table>
141 |                                 </td></tr>
142 |                             </table>
143 |                             {% if include_wt_epitopes %}
144 |                             <div class="wt-epitopes">
145 |                             {% if p.wt_epitopes %}
146 |                             <h4 class="wt-epitopes">Predicted strong binders that do not overlap the mutation</h4>
147 |                             <table class="pure-table pure-table-bordered wt-epitopes">
148 |                                 <thead>
149 |                                     <tr>
150 |                                         <td>Sequence</td>
151 |                                         <td>IC50</td>
152 |                                         <td>Allele</td>
153 |                                     </tr>
154 |                                 </thead>
155 |                                 {% for e in p.wt_epitopes %}
156 |                                 <tr>
157 |                                     <td>{{ e["Sequence"] }}</td>
158 |                                     <td>{{ e["IC50"] }}</td>
159 |                                     <td>{{ e["Allele"] }}</td>
160 |                                 </tr>
161 |                                 {% endfor %}
162 |                             </table>
163 |                             {% else %}
164 |                             <h4 class="wt-epitopes">No predicted strong binders that do not overlap the mutation.</h4>
165 |                             {% endif %}
166 |                             </div>
167 |                             {% endif %}
168 |             			</li>
169 |             			{% endfor %}
170 |             		</ol>
171 |                 </div>
172 |         	</li>
173 |         	{% endfor %}
174 |             </ol>
175 | 
176 |             {% if reviewers %}
177 |             <table class="pure-table pure-table-bordered reviewed-by">
178 |                 <thead>
179 |                     <tr><td>Reviewed By</td></tr>
180 |                 </thead>
181 |                 {% for r in reviewers %}
182 |                 <tr><td>{{ r }}</td></tr>
183 |                 {% endfor %}
184 |             </table>
185 |             <br><br>
186 |             {% endif %}
187 | 
188 |             <table class="pure-table pure-table-bordered signature">
189 |                 <thead>
190 |                     <tr><td>Final Review</td></tr>
191 |                 </thead>
192 |                 <tr>
193 |                     <td class="signature-inner">
194 |                         <table class="pure-table signature-inner">
195 |                             <colgroup>
196 |                                 <col class="signature-column-one">
197 |                                 <col class="signature-column-two">
198 |                             </colgroup>
199 |                             <tr><td>Name</td><td>{{ final_review }}</td></tr>
200 |                             <tr><td>Signature</td><td></td></tr>
201 |                             <tr><td>Date</td><td></td></tr>
202 |                         </table>
203 |                     </td>
204 |                 </tr>
205 |             </table>
206 |             {% else %}
207 |             <h4>No variants with sufficient vaccine peptides were found.</h4>
208 |             {% endif %}
209 |         </div>
210 |     </body>
211 | </html>
212 | 


--------------------------------------------------------------------------------
/vaxrank/templates/template.txt:
--------------------------------------------------------------------------------
 1 | {% for key, val in patient_info.items() %}
 2 | {{ key }}: {{ val }}
 3 | {% endfor %}
 4 | 
 5 | Package version info
 6 | {% for key, val in package_versions.items() %}
 7 |     {{ key }}: {{ val }}
 8 | {% endfor %}
 9 | ---
10 | 
11 | {% if variants %}
12 | {% for v in variants %}
13 | {{ v.num }}) {{ v.short_description }} ({{ v.variant_data['Gene name'] }})
14 |         {% for key, val in v.variant_data.items() %}
15 |         {{ key }}: {{ val }}
16 |         {% endfor %}
17 | 
18 |         {% for key, val in v.effect_data.items() %}
19 |         {{ key }}: {{ val }}
20 |         {% endfor %}
21 | 
22 |         Vaccine Peptides:
23 |         {% for p in v.peptides %}
24 |                 {{ p.header_display_data.num }}. {{ p.header_display_data.aa_before_mutation }}_{{ p.header_display_data.aa_mutant }}_{{ p.header_display_data.aa_after_mutation }} (score = {{ v.variant_data["Top score"] }})
25 |                   {% for key, val in p.peptide_data.items() %}
26 |                   - {{ key }}: {{ val }}
27 |                   {% endfor %}
28 |                   {% if include_manufacturability %}
29 | 
30 |                   Manufacturability:
31 |                   {% for key, val in p.manufacturability_data.items() %}
32 |                   - {{ key }}: {{ val }}
33 |                   {% endfor %}
34 |                   {% endif %}
35 |                   
36 |                   Predicted mutant epitopes:
37 |                   {{ p.ascii_epitopes|indent(18) }}
38 | 
39 |                   {% if include_wt_epitopes and p.wt_epitopes %}
40 |                   Predicted strong binders that do not overlap the mutation:
41 |                   {{ p.ascii_wt_epitopes|indent(18) }}
42 |                   {% endif %}
43 | 
44 |         {% endfor %}
45 | {% endfor %}
46 | {% else %}
47 | No variants with sufficient vaccine peptides were found.
48 | {% endif %}
49 | 


--------------------------------------------------------------------------------
/vaxrank/vaccine_peptide.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #       http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | 
 14 | from operator import attrgetter
 15 | 
 16 | import numpy as np
 17 | from serializable import Serializable
 18 | 
 19 | from .manufacturability import ManufacturabilityScores
 20 | 
 21 | 
 22 | class VaccinePeptide(Serializable):
 23 |     """
 24 |     VaccinePeptide combines the sequence information of MutantProteinFragment
 25 |     with MHC binding predictions for subsequences of the protein fragment.
 26 | 
 27 |     The resulting lists of mutant and wildtype epitope predictions
 28 |     are sorted by affinity.
 29 |     """
 30 | 
 31 |     def __init__(
 32 |             self,
 33 |             mutant_protein_fragment,
 34 |             epitope_predictions,
 35 |             num_mutant_epitopes_to_keep=None,
 36 |             sort_predictions_by='ic50'):
 37 |         """
 38 |         Parameters
 39 |         ----------
 40 |         mutant_protein_fragment : MutantProteinFragment
 41 | 
 42 |         epitope_predictions : list of EpitopePrediction
 43 | 
 44 |         num_mutant_epitopes_to_keep : int or None
 45 |             If None then keep all mutant epitopes.
 46 | 
 47 |         sort_predictions_by : str
 48 |             Field of EpitopePrediction used for sorting epitope predictions
 49 |             overlapping mutation in ascending order. Can be either 'ic50'
 50 |             or 'percentile_rank'.
 51 |         """
 52 |         self.mutant_protein_fragment = mutant_protein_fragment
 53 |         self.epitope_predictions = epitope_predictions
 54 |         self.num_mutant_epitopes_to_keep = num_mutant_epitopes_to_keep
 55 |         self.sort_predictions_by = sort_predictions_by
 56 | 
 57 |         sort_key = attrgetter(sort_predictions_by)
 58 | 
 59 |         # only keep the top k epitopes
 60 |         self.mutant_epitope_predictions = sorted([
 61 |             p for p in epitope_predictions
 62 |             if p.overlaps_mutation and not p.occurs_in_reference
 63 |         ], key=sort_key)
 64 |         if num_mutant_epitopes_to_keep:
 65 |             self.mutant_epitope_predictions = \
 66 |                 self.mutant_epitope_predictions[:num_mutant_epitopes_to_keep]
 67 | 
 68 |         self.wildtype_epitope_predictions = sorted([
 69 |             p for p in epitope_predictions
 70 |             if not p.overlaps_mutation or p.occurs_in_reference
 71 |         ], key=sort_key)
 72 | 
 73 |         self.wildtype_epitope_score = sum(
 74 |             p.logistic_epitope_score()
 75 |             for p in self.wildtype_epitope_predictions)
 76 |         # only keep the top k epitopes for the purposes of the score
 77 |         self.mutant_epitope_score = sum(
 78 |             p.logistic_epitope_score()
 79 |             for p in self.mutant_epitope_predictions)
 80 | 
 81 |         self.manufacturability_scores = \
 82 |             ManufacturabilityScores.from_amino_acids(
 83 |                 self.mutant_protein_fragment.amino_acids)
 84 | 
 85 |     def peptide_synthesis_difficulty_score_tuple(
 86 |             self,
 87 |             max_c_terminal_hydropathy=1.5,
 88 |             min_kmer_hydropathy=0,
 89 |             max_kmer_hydropathy_low_priority=1.5,
 90 |             max_kmer_hydropathy_high_priority=2.5):
 91 |         """
 92 |         Generates a tuple of scores used for lexicographic sorting of vaccine
 93 |         peptides.
 94 | 
 95 |         The most important criterion for choosing a vaccine peptide is to
 96 |         minimize the number of cysteines in the sequence (to prevent the
 97 |         formation of disulfide bonds).
 98 | 
 99 |         It is also important to keep the mean hydropathy of the C-terminal
100 |         residues below 1.5 and also to ensure that no window of amino acids
101 |         within the sequence has a mean hydropathy score > 2.5 (using
102 |         AA values from Table 2 of Kyte & Doolittle 1982).
103 | 
104 |         If there are multiple vaccine peptides all of whose subsequence
105 |         windows satisfy the GRAVY (mean hydropathy) < 2.5 constraint then
106 |         let's optimize the terminal amino acids to exclude ones known to
107 |         make solid phase synthesis difficult.
108 | 
109 |         If there are multiple vaccine peptides without difficult terminal
110 |         residues then try to eliminate N-terminal asparagine residues
111 |         (not as harmful) and asparagine-proline bonds
112 |         (known to dissociate easily). If all of these constraints
113 |         are satisfied, then attempt to keep the max k-mer hydropahy below
114 |         a lower constant (default GRAVY score 1.5) and above a minimum value
115 |         (default 0).
116 | 
117 |         (Sort criteria determined through conversations with manufacturer)
118 |         """
119 |         cterm_7mer_gravy = self.manufacturability_scores.cterm_7mer_gravy_score
120 |         max_7mer_gravy = self.manufacturability_scores.max_7mer_gravy_score
121 | 
122 |         # numbers we want to minimize, so a bigger number is worse
123 |         return (
124 |             # total number of Cys residues
125 |             self.manufacturability_scores.cysteine_count,
126 | 
127 |             # C-terminal 7mer GRAVY score < 1.5
128 |             # (or user specified max GRAVY score for C terminus of peptide)
129 |             max(0, cterm_7mer_gravy - max_c_terminal_hydropathy),
130 | 
131 |             # max 7mer GRAVY score < 2.5
132 |             # (or user specified higher priority maximum for GRAVY score)
133 |             max(0, max_7mer_gravy - max_kmer_hydropathy_high_priority),
134 | 
135 |             # avoid N-terminal Gln, Glu, Cys
136 |             self.manufacturability_scores.difficult_n_terminal_residue,
137 | 
138 |             #  avoid C-terminal Cys
139 |             self.manufacturability_scores.c_terminal_cysteine,
140 | 
141 |             # avoid C-terminal Pro
142 |             self.manufacturability_scores.c_terminal_proline,
143 | 
144 |             # avoid N-terminal Asn
145 |             self.manufacturability_scores.n_terminal_asparagine,
146 | 
147 |             # avoid Asp-Pro bonds
148 |             self.manufacturability_scores.asparagine_proline_bond_count,
149 | 
150 |             # max 7mer GRAVY score < 1.5
151 |             # (or user specified lower priority maximum for GRAVY score)
152 |             max(0, max_7mer_gravy - max_kmer_hydropathy_low_priority),
153 | 
154 |             # max 7mer GRAVY score > 0
155 |             # (or user specified min GRAVY for 7mer windows in peptide)
156 |             max(0, min_kmer_hydropathy - max_7mer_gravy),
157 |         )
158 | 
159 |     def lexicographic_sort_key(self):
160 |         """
161 |         Create tuple of scores so that candidates get sorted lexicographically
162 |         by multiple criteria. Make sure to make the wildtype epitope
163 |         score positive (since we want fewer wildtype epitopes) but the others
164 |         negative (since we want more of them).
165 |         """
166 |         # since we're sorting in decreasing order, numbers which we want
167 |         # to be larger must have their signs flipped
168 |         essential_score_tuple = (
169 |             # Sum of normalized MHC binding affinities of subsequences
170 |             # round to 5 digits to avoid floating point errors from
171 |             # serving as tie-breakers
172 |             -round(self.mutant_epitope_score, 6),
173 | 
174 |             # Number of reads supporting the variant
175 |             -self.mutant_protein_fragment.n_alt_reads
176 |         )
177 |         manufacturability_score_tuple = self.peptide_synthesis_difficulty_score_tuple()
178 |         extra_score_tuple = (
179 |             # Number of reads supporting the particular protein sequence
180 |             # sequence we're using for this vaccine peptide. Currently
181 |             # all vaccine peptides are drawn from the same larger sequence
182 |             # so this score shouldn't change.
183 |             -self.mutant_protein_fragment.n_alt_reads_supporting_protein_sequence,
184 | 
185 |             # Minimize the sum of non-mutant MHC binding scores,
186 |             # round to prevent floating point errors from serving as
187 |             # tie-breakers
188 |             round(self.wildtype_epitope_score, 6),
189 | 
190 |             # All else being equal, we prefer to maximize the number of
191 |             # mutant amino acids
192 |             -self.mutant_protein_fragment.n_mutant_amino_acids,
193 | 
194 |             # If nothing else can serve as a tie break then try to center
195 |             # the mutation in the vaccine peptide.
196 |             -self.mutant_protein_fragment.mutation_distance_from_edge
197 |         )
198 |         return (
199 |             essential_score_tuple +
200 |             manufacturability_score_tuple +
201 |             extra_score_tuple
202 |         )
203 | 
204 |     def contains_mutant_epitopes(self):
205 |         return len(self.mutant_epitope_predictions) > 0
206 | 
207 |     @property
208 |     def expression_score(self):
209 |         return np.sqrt(self.mutant_protein_fragment.n_alt_reads)
210 | 
211 |     @property
212 |     def combined_score(self):
213 |         return self.expression_score * self.mutant_epitope_score
214 | 
215 |     def to_dict(self):
216 |         epitope_predictions = self.mutant_epitope_predictions + self.wildtype_epitope_predictions
217 |         return {
218 |             "mutant_protein_fragment": self.mutant_protein_fragment,
219 |             "epitope_predictions": epitope_predictions,
220 |             "num_mutant_epitopes_to_keep": self.num_mutant_epitopes_to_keep,
221 |             "sort_predictions_by": self.sort_predictions_by,
222 |         }
223 | 
224 |     @classmethod
225 |     def from_dict(cls, d):
226 |         d = d.copy()
227 |         if "sort_predictions_by" not in d:
228 |             d["sort_predictions_by"] = "ic50"
229 |         return cls(**d)
230 | 


--------------------------------------------------------------------------------
/vaxrank/vaxrank_results.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #       http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 10 | # See the License for the specific language governing permissions and
 11 | # limitations under the License.
 12 | 
 13 | from collections import OrderedDict
 14 | 
 15 | from serializable import Serializable
 16 | 
 17 | class VaxrankResults(Serializable):
 18 |     """
 19 |     Data class used to represent all results captured by running  Vaxrank.
 20 |     """
 21 |     def __init__(
 22 |             self,
 23 |             isovar_results,
 24 |             variant_to_vaccine_peptides_dict,
 25 |             ranked_vaccine_peptides):
 26 |         """
 27 |         Parameters
 28 |         ----------
 29 |         isovar_results : list of isovar.IsovarResult
 30 |             IsovarResult object for each variant without any filtering
 31 | 
 32 |         variant_to_vaccine_peptides_dict : dict
 33 |             Dictionary mapping variant to a list of possible vaccine peptides
 34 | 
 35 |         ranked_vaccine_peptides : list of VaccinePeptide
 36 |         """
 37 |         self.isovar_results = isovar_results
 38 |         self.variant_to_vaccine_peptides_dict = variant_to_vaccine_peptides_dict
 39 |         self.ranked_vaccine_peptides = ranked_vaccine_peptides
 40 | 
 41 | 
 42 |     @property
 43 |     def variants(self):
 44 |         """
 45 |         Unfiltered list of variants
 46 | 
 47 |         Returns
 48 |         -------
 49 |         list of varcode.Variant
 50 |         """
 51 |         return [
 52 |             isovar_result.variant
 53 |             for isovar_result
 54 |             in self.isovar_results
 55 |         ]
 56 | 
 57 |     def variant_counts(self):
 58 |         """
 59 |         Summarize Vaxrank counts for total variants, variants with coding effects,
 60 |         variants with RNA support, and variants with associated vaccine peptides.
 61 | 
 62 |         Returns
 63 |         -------
 64 |         dict
 65 |         """
 66 |         variant_properties = self.variant_properties()
 67 | 
 68 |         # dictionary which will contain some overall variant counts
 69 |         # for report display
 70 |         counts_dict = {}
 71 |         counts_dict['num_total_variants'] = len(self.isovar_results)
 72 |         counts_dict['num_coding_effect_variants'] = \
 73 |             sum([v['is_coding_nonsynonymous'] for v in variant_properties])
 74 |         counts_dict['num_variants_with_rna_support'] = \
 75 |             sum([v['rna_support'] for v in variant_properties])
 76 | 
 77 |         counts_dict['num_variants_with_vaccine_peptides'] =  \
 78 |             sum([v['has_vaccine_peptide'] for v in variant_properties])
 79 |         return counts_dict
 80 | 
 81 |     def variant_properties(self, gene_pathway_check=None):
 82 |         """
 83 |         Parameters
 84 |         ----------
 85 |         gene_pathway_check : GenePathwayCheck (optional)
 86 |             Used to look up whether a mutation or its affected gene are in some
 87 |             biologically important pathway.
 88 | 
 89 |         Returns
 90 |         -------
 91 |         list of dictionaries containing properties we want to analyze later,
 92 |         e.g. whether this variant is part of a pathway of interest,
 93 |         is a strong MHC binder, etc.
 94 |         """
 95 |         variant_properties_list = []
 96 |         for isovar_result in self.isovar_results:
 97 |             variant = isovar_result.variant
 98 | 
 99 |             variant_dict = OrderedDict((
100 |                 ('gene_name', isovar_result.top_gene_name),
101 |                 ('contig', variant.contig),
102 |                 ('start', variant.start),
103 |                 ('ref', variant.ref),
104 |                 ('alt', variant.alt),
105 |                 ('is_coding_nonsynonymous',
106 |                     isovar_result.predicted_effect_modifies_protein_sequence),
107 |                 ('rna_support',
108 |                     isovar_result.has_mutant_protein_sequence_from_rna),
109 |             ))
110 | 
111 |             # TODO:
112 |             #  compute MHC binder status for variants that don't have RNA support
113 |             variant_dict['mhc_binder'] = \
114 |                 variant_dict["has_vaccine_peptide"] = \
115 |                     variant in self.variant_to_vaccine_peptides_dict
116 | 
117 |             if gene_pathway_check is not None:
118 |                 pathway_dict = gene_pathway_check.make_variant_dict(variant)
119 |                 variant_dict.update(pathway_dict)
120 | 
121 |             variant_properties_list.append(variant_dict)
122 |         return variant_properties_list
123 | 


--------------------------------------------------------------------------------