├── .editorconfig
├── .github
├── release-drafter.yml
└── workflows
│ ├── after-master-commit.yml
│ ├── compare-annotation.yml
│ ├── compare-genomic-change-annotation.yml
│ ├── pytest.yml
│ └── release-management.yml
├── .gitignore
├── .version-level
├── AnnotatorCore.py
├── ClinicalDataAnnotator.py
├── CnaAnnotator.py
├── FusionAnnotator.py
├── GenerateReadMe.py
├── LICENSE
├── MafAnnotator.py
├── OncoKBPlots.py
├── README.md
├── StructuralVariantAnnotator.py
├── actionability_functions_msi_tmb_manuscript_R.r
├── data
├── example_atypical_alterations.txt
├── example_clinical.txt
├── example_cna.txt
├── example_fusions.txt
├── example_individual_cna.txt
├── example_maf.txt
├── example_maf_grch38.txt
└── example_sv.txt
├── example.sh
├── flake8.ini
├── requirements
├── common.txt
├── pip2.7.txt
└── pip3.txt
├── test_Annotation.py
└── test_AnnotatorCore.py
/.editorconfig:
--------------------------------------------------------------------------------
1 | # The EditorConfig project consists of a file format for defining coding styles
2 | # and a collection of text editor plugins that enable editors to read the file format
3 | # and adhere to defined styles.
4 |
5 | # EditorConfig files are read top to bottom and the closest EditorConfig files are read last.
6 | # Properties from matching EditorConfig sections are applied in the order they were read,
7 | # so properties in closer files take precedence.
8 |
9 | # Please only specify the formats you want to apply through out the project in this file.
10 | # Otherwise, please create new config file in your directory where you want to apply these styles.
11 |
12 | # More details about EditorConfig: http://EditorConfig.org
13 |
14 | # top-most EditorConfig file
15 | root = true
16 |
17 | [*]
18 | # Unix-style newlines with a newline ending every file
19 | insert_final_newline = false
20 | trim_trailing_whitespace = false
21 |
22 |
23 |
--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
1 | name-template: 'v$NEXT_PATCH_VERSION'
2 | tag-template: 'v$NEXT_PATCH_VERSION'
3 | categories:
4 | - title: '🧬 Features'
5 | labels:
6 | - 'feature'
7 | - title: '🐛 Bug Fixes'
8 | labels:
9 | - 'fix'
10 | - title: '🏎 Performance Tweaks'
11 | labels:
12 | - 'performance'
13 | - title: '🎨 Style Tweaks'
14 | labels:
15 | - 'style tweak'
16 | - title: '📘 Documentation'
17 | labels:
18 | - 'documentation'
19 | - title: '🧹 Cleanup'
20 | labels:
21 | - 'cleanup'
22 | - title: '👷♀️ Testing, Configuration & Deployment'
23 | labels:
24 | - 'devops'
25 | - title: '🧰 Maintenance'
26 | labels:
27 | - 'chore'
28 | - 'dependencies'
29 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
30 | template: |
31 | ## Changes
32 | $CHANGES
33 | ## 🕵️♀️ Full commit logs
34 | - https://github.com/oncokb/oncokb-annotator/compare/$PREVIOUS_TAG...v$NEXT_PATCH_VERSION
35 |
--------------------------------------------------------------------------------
/.github/workflows/after-master-commit.yml:
--------------------------------------------------------------------------------
1 | name: After master commit
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 |
8 | jobs:
9 | check-version-level-and-update:
10 | if: github.repository == 'oncokb/oncokb-annotator'
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v2
14 | with:
15 | fetch-depth: 0
16 | - name: 'Update Version Level'
17 | run: |
18 | git pull
19 | VERSION_LEVEL=$(cat .version-level | tr "[:upper:]" "[:lower:]")
20 |
21 | RELEASE_DRAFTER_MINOR='NEXT_MINOR_VERSION'
22 | RELEASE_DRAFTER_PATCH='NEXT_PATCH_VERSION'
23 |
24 | if [[ $VERSION_LEVEL == 'minor' ]]; then
25 | sed -i "s/$RELEASE_DRAFTER_PATCH/$RELEASE_DRAFTER_MINOR/gi" .github/release-drafter.yml
26 | fi
27 |
28 | if [[ $VERSION_LEVEL == 'patch' ]]; then
29 | sed -i "s/$RELEASE_DRAFTER_MINOR/$RELEASE_DRAFTER_PATCH/gi" .github/release-drafter.yml
30 | fi
31 |
32 | CHANGED=$(git diff --name-only HEAD --)
33 | if [ -n "$CHANGED" ]
34 | then
35 | git config user.name oncokb-bot
36 | git config user.email dev.oncokb@gmail.com
37 | git add .
38 | git commit -m "Update action files to align the version level to $VERSION_LEVEL"
39 | git push
40 | fi
41 |
--------------------------------------------------------------------------------
/.github/workflows/compare-annotation.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run annotation against the master annotation
2 |
3 | name: Compare Annotation
4 |
5 | on:
6 | push:
7 | branches:
8 | - master
9 | - next-minor-release
10 | pull_request:
11 | branches:
12 | - master
13 | - next-minor-release
14 | jobs:
15 | build:
16 | if: github.repository == 'oncokb/oncokb-annotator'
17 | runs-on: macos-latest
18 | steps:
19 | - uses: actions/checkout@v2
20 | - name: Set up Python 3.8
21 | uses: actions/setup-python@v2
22 | with:
23 | python-version: 3.8
24 | - name: Install dependencies
25 | run: |
26 | python -m pip install --upgrade pip
27 | pip install flake8
28 | pip install -r requirements/common.txt -r requirements/pip3.txt
29 | - name: Lint with flake8
30 | run: |
31 | # stop the build if there are Python syntax errors or undefined names
32 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
33 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
34 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
35 | - name: Annotate
36 | id: annotate
37 | env:
38 | ONCOKB_API_TOKEN: ${{ secrets.ONCOKB_BOT_API_TOKEN }}
39 | ONCOKB_OAUTH_TOKEN: ${{ secrets.ONCOKB_OAUTH_TOKEN }}
40 | run: |
41 | git checkout -b compare
42 |
43 | MUTATION_DATA_NAME=data_mutations_mskcc.txt
44 | CLINICAL_DATA_NAME=data_clinical_sample.txt
45 | FUSION_DATA_NAME=data_fusions.txt
46 | INDIVIDUAL_CNA_DATA_NAME=data_individual_CNA.txt
47 |
48 | cd data || exit
49 | curl -s -H "Authorization: token ${ONCOKB_OAUTH_TOKEN}" https://api.github.com/repos/knowledgesystems/oncokb-data/contents/annotation/annotator-test/data | jq -r '.[] | .download_url + " " + .name' | while IFS=' ' read -r downloadurl name; do
50 | if [[ "$name" == "$FIEL_NAME_PREFIX"* ]]; then
51 | curl -s "$downloadurl" -o $name
52 | fi
53 | done
54 | cd ..
55 |
56 | # create compare folder to add all annotated files
57 | mkdir compare
58 |
59 | PREFIX=oncokb
60 | IMAF=data/"$MUTATION_DATA_NAME"
61 | OMAF=compare/"$PREFIX"_"$MUTATION_DATA_NAME"
62 |
63 | IC=data/"$CLINICAL_DATA_NAME"
64 | OC=compare/"$PREFIX"_"$CLINICAL_DATA_NAME"
65 |
66 | IF=data/"$FUSION_DATA_NAME"
67 | OF=compare/"$PREFIX"_"$FUSION_DATA_NAME"
68 |
69 | IICNA=data/"$INDIVIDUAL_CNA_DATA_NAME"
70 | OICNA=compare/"$PREFIX"_"$INDIVIDUAL_CNA_DATA_NAME"
71 |
72 | python MafAnnotator.py -i "$IMAF" -o "$OMAF" -c "$IC" -b "$ONCOKB_API_TOKEN"
73 | python FusionAnnotator.py -i "$IF" -o "$OF" -c "$IC" -b "$ONCOKB_API_TOKEN"
74 | python CnaAnnotator.py -i "$IICNA" -o "$OICNA" -c "$IC" -b "$ONCOKB_API_TOKEN" -f "individual"
75 | python ClinicalDataAnnotator.py -i "$IC" -o "$OC" -a "$OMAF,$OICNA,$OF"
76 |
77 | git config user.name oncokb-bot
78 | git config user.email dev.oncokb@gmail.com
79 |
80 | git add .
81 | git commit -m 'add analysis'
82 |
83 | - name: Compare annotation result with the ones from master
84 | id: compare
85 | env:
86 | ONCOKB_OAUTH_TOKEN: ${{ secrets.ONCOKB_OAUTH_TOKEN }}
87 | FIEL_NAME_PREFIX: 'oncokb_data'
88 | run: |
89 | # remove everything under compare folder and replace wiht the ones from oncokb-data
90 | rm -f compare/*.txt
91 |
92 | cd compare || exit
93 | curl -s -H "Authorization: token ${ONCOKB_OAUTH_TOKEN}" https://api.github.com/repos/knowledgesystems/oncokb-data/contents/annotation/annotator-test/annotation | jq -r '.[] | .download_url + " " + .name' | while IFS=' ' read -r downloadurl name; do
94 | if [[ "$name" == "$FIEL_NAME_PREFIX"* ]]; then
95 | curl -s "$downloadurl" -o $name
96 | fi
97 | done
98 | cd ..
99 |
100 | # compare
101 | CHANGED=$(git diff --name-only HEAD --)
102 |
103 | if [ -n "$CHANGED" ]
104 | then
105 | git diff
106 | exit 1
107 | fi
108 |
109 |
--------------------------------------------------------------------------------
/.github/workflows/compare-genomic-change-annotation.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run annotation against the master annotation for a particular study
2 |
3 | name: Compare Genomic Change Annotation
4 |
5 | on:
6 | push:
7 | branches:
8 | - master
9 | - next-minor-release
10 | pull_request:
11 | branches:
12 | - master
13 | - next-minor-release
14 | jobs:
15 | build:
16 | if: github.repository == 'oncokb/oncokb-annotator'
17 | runs-on: macos-latest
18 | steps:
19 | - uses: actions/checkout@v2
20 | - name: Set up Python 3.8
21 | uses: actions/setup-python@v2
22 | with:
23 | python-version: 3.8
24 | - name: Install dependencies
25 | run: |
26 | python -m pip install --upgrade pip
27 | pip install flake8
28 | pip install -r requirements/common.txt -r requirements/pip3.txt
29 | - name: Lint with flake8
30 | run: |
31 | # stop the build if there are Python syntax errors or undefined names
32 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
33 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
34 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
35 | - name: Annotate
36 | id: annotate
37 | env:
38 | ONCOKB_API_TOKEN: ${{ secrets.ONCOKB_BOT_API_TOKEN }}
39 | ONCOKB_OAUTH_TOKEN: ${{ secrets.ONCOKB_OAUTH_TOKEN }}
40 | run: |
41 | git checkout -b compare
42 |
43 | MUTATION_DATA_NAME=data_mutations_mskcc.txt
44 | CLINICAL_DATA_NAME=data_clinical_sample.txt
45 |
46 | cd data
47 | curl -s -H "Authorization: token ${ONCOKB_OAUTH_TOKEN}" https://api.github.com/repos/knowledgesystems/oncokb-data/contents/annotation/annotator-test/data | jq -r '.[] | .download_url + " " + .name' | while IFS=' ' read -r downloadurl name; do
48 | if [[ "$name" == "$MUTATION_DATA_NAME" || "$name" == "$CLINICAL_DATA_NAME" ]]; then
49 | curl -s "$downloadurl" -o $name
50 | fi
51 | done
52 | cd ..
53 |
54 | # create compare folder to add all annotated files
55 | mkdir compare
56 |
57 | OGCMAF=oncokb_genomic_change_$MUTATION_DATA_NAME
58 |
59 | python MafAnnotator.py -i data/$MUTATION_DATA_NAME -o compare/$OGCMAF -c data/$CLINICAL_DATA_NAME -b $ONCOKB_API_TOKEN -q Genomic_Change
60 |
61 | git config user.name oncokb-bot
62 | git config user.email dev.oncokb@gmail.com
63 |
64 | git add .
65 | git commit -m 'add analysis'
66 |
67 | echo "::set-output name=FILE_NAME::$OGCMAF"
68 |
69 | - name: Compare annotation result with the ones from master
70 | id: compare
71 | env:
72 | FILE_NAME: ${{steps.annotate.outputs.FILE_NAME}}
73 | ONCOKB_OAUTH_TOKEN: ${{ secrets.ONCOKB_OAUTH_TOKEN }}
74 | run: |
75 | # remove everything under compare folder and replace wiht the ones from oncokb-data
76 | rm -f compare/*.txt
77 |
78 | cd compare
79 | curl -s -H "Authorization: token ${ONCOKB_OAUTH_TOKEN}" https://api.github.com/repos/knowledgesystems/oncokb-data/contents/annotation/annotator-test/annotation | jq -r '.[] | .download_url + " " + .name' | while IFS=' ' read -r downloadurl name; do
80 | if [[ "$name" == "$FILE_NAME" ]]; then
81 | curl -s "$downloadurl" -o $name
82 | fi
83 | done
84 | cd ..
85 |
86 | # compare
87 | CHANGED=$(git diff --name-only HEAD --)
88 |
89 | if [ -n "$CHANGED" ]
90 | then
91 | git diff
92 | exit 1
93 | fi
94 |
--------------------------------------------------------------------------------
/.github/workflows/pytest.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Run all python tests
5 |
6 | on:
7 | push:
8 | branches: [ master, next-minor-release ]
9 | pull_request:
10 | branches: [ master, next-minor-release ]
11 |
12 | jobs:
13 | lint:
14 | name: Linting using flake8
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/checkout@v2
18 | - uses: actions/setup-python@v2
19 | with:
20 | python-version: "3.9"
21 | - name: Run flake8
22 | uses: julianwachholz/flake8-action@v2
23 | with:
24 | checkName: "Python Lint"
25 | path: .
26 | config: flake8.ini
27 | env:
28 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
29 | pytest:
30 | needs: lint
31 | runs-on: ${{ matrix.os }}
32 | strategy:
33 | matrix:
34 | os: [ ubuntu-latest, macos-latest ]
35 | python-version: [ '3.8','3.9','3.10','3.11' ]
36 | steps:
37 | - uses: actions/checkout@v2
38 | - name: Set up Python ${{ matrix.python-version }}
39 | uses: actions/setup-python@v4
40 | with:
41 | python-version: ${{ matrix.python-version }}
42 | - name: Install dependencies
43 | env:
44 | PYTHON_VERSION: ${{ matrix.python-version }}
45 | run: |
46 | python -m pip install --upgrade pip
47 | pip install pytest
48 | if [[ $PYTHON_VERSION =~ ^2\.[0-9]+$ ]]; then pip install -r requirements/common.txt -r requirements/pip2.7.txt; fi
49 | if [[ $PYTHON_VERSION =~ ^3\.[0-9]+$ ]]; then pip install -r requirements/common.txt -r requirements/pip3.txt; fi
50 | - name: Test with pytest
51 | env:
52 | ONCOKB_API_TOKEN: ${{ secrets.ONCOKB_BOT_API_TOKEN }}
53 | run: |
54 | pytest
55 |
56 | build-in-windows:
57 | needs: lint
58 | runs-on: windows-latest
59 | strategy:
60 | matrix:
61 | python-version: [ '3.8','3.9','3.10','3.11' ]
62 | steps:
63 | - uses: actions/checkout@v2
64 | - name: Set up Python ${{ matrix.python-version }}
65 | uses: actions/setup-python@v4
66 | with:
67 | python-version: ${{ matrix.python-version }}
68 | - name: Install dependencies
69 | env:
70 | PYTHON_VERSION: ${{ matrix.python-version }}
71 | run: |
72 | python -m pip install --upgrade pip
73 | pip install pytest
74 | if ( $env:PYTHON_VERSION -match '^2\.[0-9]+$' )
75 | {
76 | pip install -r requirements/common.txt -r requirements/pip2.7.txt
77 | }
78 | if ( $env:PYTHON_VERSION -match '^3\.[0-9]+$' )
79 | {
80 | pip install -r requirements/common.txt -r requirements/pip3.txt
81 | }
82 | - name: Test with pytest
83 | env:
84 | ONCOKB_API_TOKEN: ${{ secrets.ONCOKB_BOT_API_TOKEN }}
85 | run: |
86 | pytest
87 |
--------------------------------------------------------------------------------
/.github/workflows/release-management.yml:
--------------------------------------------------------------------------------
1 | name: Release Management
2 |
3 | on:
4 | push:
5 | # branches to consider in the event; optional, defaults to all
6 | branches:
7 | - master
8 |
9 | jobs:
10 | update_draft_release:
11 | runs-on: ubuntu-latest
12 | steps:
13 | # Drafts your next Release notes as Pull Requests are merged into "master"
14 | - uses: release-drafter/release-drafter@v5
15 | env:
16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
17 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # oncokb output data
2 | data/*.oncokb.*
3 | data/example_README.txt
4 | process
5 |
6 | # Byte-compiled / optimized / DLL files
7 | __pycache__/
8 | *.py[cod]
9 | *$py.class
10 |
11 | # C extensions
12 | *.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | env/
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *,cover
51 | .hypothesis/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # IPython Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # dotenv
84 | .env
85 |
86 | # virtualenv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 |
93 | # Rope project settings
94 | .ropeproject
95 |
96 | # PyCharm
97 | .idea/
98 |
99 | # MAC OS
100 | .DS_Store
101 |
--------------------------------------------------------------------------------
/.version-level:
--------------------------------------------------------------------------------
1 | patch
2 |
--------------------------------------------------------------------------------
/ClinicalDataAnnotator.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 | import re
5 | import argparse
6 | import logging
7 |
8 | from AnnotatorCore import setsampleidsfileterfile
9 | from AnnotatorCore import process_clinical_data
10 |
11 | logging.basicConfig(level=logging.INFO)
12 | log = logging.getLogger('ClinicalDataAnnotator')
13 |
14 |
15 | def main(argv):
16 | if argv.help:
17 | log.info(
18 | '\n'
19 | 'ClinicalDataAnnotator.py -i -o