├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CONTRIBUTING.rst ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs └── source │ ├── LICENSE.txt │ ├── _static │ ├── css │ │ └── custom.css │ ├── img │ │ ├── gui-example-xtandem-advanced.png │ │ ├── gui-example-xtandem-finished.png │ │ ├── gui-example-xtandem-modifications-before.png │ │ ├── gui-example-xtandem-modifications-filled.png │ │ ├── gui-example-xtandem-output-files.png │ │ ├── gui-example-xtandem-processes.png │ │ ├── gui-example-xtandem-progress.png │ │ ├── gui-example-xtandem-psm-file.png │ │ ├── gui-example-xtandem-psm-filetype.png │ │ ├── gui-example-xtandem-spectra.png │ │ ├── gui-example-xtandem-start.png │ │ ├── gui-fixed-modifications.png │ │ ├── gui-modification-mapping.png │ │ ├── gui-overview.png │ │ ├── gui-screenshot-old.png │ │ ├── gui-screenshot.png │ │ ├── ms2rescore-overview.png │ │ ├── ms2rescore_logo.png │ │ ├── percolator-install-path.png │ │ └── qc-reports.png │ └── js │ │ └── badge.min.js │ ├── api │ ├── ms2rescore.feature_generators.rst │ ├── ms2rescore.report.rst │ ├── ms2rescore.rescoring_engines.rst │ └── ms2rescore.rst │ ├── cli.rst │ ├── conf.py │ ├── config_schema.md │ ├── contributing.rst │ ├── gui.rst │ ├── index.rst │ ├── installation.rst │ ├── tutorials │ └── in-depth-python-api.ipynb │ └── userguide │ ├── configuration.rst │ ├── input-files.rst │ ├── output-files.rst │ ├── search-engine-notes.rst │ └── tims2Rescore.rst ├── examples ├── mascot-ms2rescore.json ├── mascot-ms2rescore.toml ├── maxquant-ms2rescore.json ├── maxquant-ms2rescore.toml ├── msgfplus-ms2rescore.json ├── msgfplus-ms2rescore.toml ├── peptideshaker-ms2rescore.json ├── peptideshaker-ms2rescore.toml ├── sage-ms2rescore.json ├── sage-ms2rescore.toml ├── xtandem-ms2rescore.json └── xtandem-ms2rescore.toml ├── img ├── gui-screenshot.png ├── ms2rescore.ico ├── ms2rescore_logo.png └── ms2rescore_logo.svg ├── ms2rescore.spec ├── ms2rescore ├── __init__.py ├── __main__.py ├── config_parser.py ├── core.py ├── exceptions.py ├── feature_generators │ ├── __init__.py │ ├── base.py │ ├── basic.py │ ├── deeplc.py │ ├── im2deep.py │ ├── ionmob.py │ ├── maxquant.py │ └── ms2pip.py ├── gui │ ├── __init__.py │ ├── __main__.py │ ├── app.py │ ├── function2ctk.py │ └── widgets.py ├── package_data │ ├── __init__.py │ ├── config_default.json │ ├── config_default_tims.json │ ├── config_schema.json │ └── img │ │ ├── __init__.py │ │ ├── comments_icon_black.png │ │ ├── comments_icon_white.png │ │ ├── config_icon.png │ │ ├── docs_icon_black.png │ │ ├── docs_icon_white.png │ │ ├── github_icon_black.png │ │ ├── github_icon_white.png │ │ ├── ms2rescore_logo.png │ │ └── program_icon.ico ├── parse_psms.py ├── parse_spectra.py ├── report │ ├── __init__.py │ ├── __main__.py │ ├── charts.py │ ├── generate.py │ ├── templates │ │ ├── __init__.py │ │ ├── about.html │ │ ├── base.html │ │ ├── config.html │ │ ├── features.html │ │ ├── log.html │ │ ├── metadata.html │ │ ├── overview.html │ │ ├── stats-card.html │ │ ├── style.html │ │ ├── target-decoy.html │ │ └── texts.toml │ └── utils.py ├── rescoring_engines │ ├── __init__.py │ ├── mokapot.py │ └── percolator.py └── utils.py ├── ms2rescore_innosetup.iss ├── pyproject.toml └── tests ├── __init__.py ├── test_config_parser.py ├── test_data ├── msms-psms.tsv └── test.mgf └── test_parse_spectra.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | release: 5 | types: [created] 6 | workflow_dispatch: 7 | 8 | jobs: 9 | python-package: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | id-token: write # IMPORTANT: this permission is mandatory for trusted publishing 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Set up Python 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: "3.11" 20 | 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | python -m pip install --upgrade build pytest 25 | 26 | - name: Build source and wheel 27 | run: | 28 | python -m build --sdist --wheel --outdir dist/ 29 | 30 | - name: Test built package 31 | run: | 32 | pip install --only-binary :all: dist/ms2rescore-*.whl 33 | # pytest 34 | ms2rescore --help 35 | 36 | - name: Upload build artifacts 37 | uses: actions/upload-artifact@v4 38 | with: 39 | name: python-package 40 | path: dist/* 41 | 42 | - uses: pypa/gh-action-pypi-publish@release/v1 43 | 44 | windows-installer: 45 | runs-on: windows-latest 46 | needs: python-package 47 | steps: 48 | - uses: actions/checkout@v4 49 | 50 | - uses: actions/setup-python@v5 51 | with: 52 | python-version: "3.11" 53 | 54 | - name: Install package and dependencies 55 | run: | 56 | python -m pip install --upgrade pip 57 | pip install --only-binary :all: .[ionmob] pyinstaller 58 | 59 | - name: Install Inno Setup 60 | uses: crazy-max/ghaction-chocolatey@v3 61 | with: 62 | args: install innosetup -y --allow-unofficial --force 63 | 64 | - name: Run pyinstaller 65 | run: pyinstaller ./ms2rescore.spec --clean --noconfirm 66 | 67 | - name: Test built exe 68 | run: dist/ms2rescore/ms2rescore.exe 69 | 70 | - name: Run Inno Setup 71 | run: ISCC.exe ./ms2rescore_innosetup.iss /DAppVersion=${{ github.ref_name }} 72 | 73 | - name: Upload artifact 74 | uses: actions/upload-artifact@v4 75 | with: 76 | name: dist 77 | path: dist/*.exe 78 | 79 | - name: Upload installer to release 80 | uses: svenstaro/upload-release-action@v2 81 | with: 82 | repo_token: ${{ secrets.GITHUB_TOKEN }} 83 | tag: ${{ github.ref }} 84 | file_glob: true 85 | file: dist/*.exe 86 | 87 | docker-image: 88 | runs-on: ubuntu-latest 89 | permissions: 90 | packages: write 91 | contents: read 92 | attestations: write 93 | id-token: write 94 | steps: 95 | - name: Check out the repo 96 | uses: actions/checkout@v4 97 | 98 | - name: Log in to the Container registry 99 | uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 100 | with: 101 | registry: ghcr.io 102 | username: ${{ github.actor }} 103 | password: ${{ secrets.GITHUB_TOKEN }} 104 | 105 | - name: Extract metadata (tags, labels) for Docker 106 | id: meta 107 | uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 108 | with: 109 | images: ghcr.io/${{ github.repository }} 110 | 111 | - name: Build and push Docker images 112 | id: push 113 | uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 114 | with: 115 | context: . 116 | push: true 117 | tags: ${{ steps.meta.outputs.tags }} 118 | labels: ${{ steps.meta.outputs.labels }} 119 | 120 | - name: Generate artifact attestation 121 | uses: actions/attest-build-provenance@v1 122 | with: 123 | subject-name: ghcr.io/${{ github.repository }} 124 | subject-digest: ${{ steps.push.outputs.digest }} 125 | push-to-registry: true 126 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | jobs: 11 | test-python-package: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.9", "3.10", "3.11"] 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install ruff 28 | 29 | - name: Run Ruff 30 | run: ruff check --output-format=github . 31 | 32 | - name: Build and install ms2rescore package 33 | run: | 34 | pip install --only-binary :all: .[dev] 35 | 36 | - name: Test with pytest 37 | run: | 38 | pytest 39 | 40 | - name: Test installation 41 | run: | 42 | ms2rescore --help 43 | 44 | test-windows-installer: 45 | # Only run on push to main (e.g., after PR merge) 46 | if: ${{ github.ref == 'refs/heads/main' }} 47 | runs-on: windows-latest 48 | steps: 49 | - uses: actions/checkout@v4 50 | 51 | - uses: actions/setup-python@v5 52 | with: 53 | python-version: "3.11" 54 | 55 | - name: Install package and dependencies 56 | run: | 57 | python -m pip install --upgrade pip 58 | pip install --only-binary :all: .[ionmob] pyinstaller 59 | 60 | - name: Install Inno Setup 61 | uses: crazy-max/ghaction-chocolatey@v1 62 | with: 63 | args: install innosetup -y --allow-unofficial --force 64 | 65 | - name: Run pyinstaller 66 | run: pyinstaller ./ms2rescore.spec --clean --noconfirm 67 | 68 | - name: Test built exe 69 | run: dist/ms2rescore/ms2rescore.exe 70 | 71 | - name: Run Inno Setup 72 | run: ISCC.exe ./ms2rescore_innosetup.iss /DAppVersion=${{ github.ref_name }} 73 | 74 | - name: Upload artifact 75 | uses: actions/upload-artifact@v4 76 | with: 77 | name: dist 78 | path: dist/*.exe 79 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Others 2 | nbs/ 3 | .prettierrc 4 | data/ 5 | steps.txt 6 | old_files/ 7 | prepare_pin_files.py 8 | *.jar 9 | *.tar 10 | 11 | # Ruff 12 | .ruff_cache/ 13 | 14 | # Atom remote-sync config 15 | .remote-sync.json 16 | 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | env/ 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | *.egg-info/ 40 | .installed.cfg 41 | *.egg 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .tox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *,cover 61 | .hypothesis/ 62 | 63 | # Translations 64 | *.mo 65 | *.pot 66 | 67 | # Django stuff: 68 | *.log 69 | local_settings.py 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # IPython Notebook 85 | .ipynb_checkpoints 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # celery beat schedule file 91 | celerybeat-schedule 92 | 93 | # dotenv 94 | .env 95 | 96 | # virtualenv 97 | venv/ 98 | ENV/ 99 | .venv*/ 100 | 101 | # Spyder project settings 102 | .spyderproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # vscode 108 | .vscode/ 109 | .pytest_cache/ 110 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/sbrunner/jsonschema2md 3 | rev: 0.9.0 4 | hooks: 5 | - id: jsonschema2md 6 | files: ms2rescore/package_data/config_schema.json 7 | args: 8 | - ms2rescore/package_data/config_schema.json 9 | - docs/source/config_schema.md 10 | 11 | - repo: https://github.com/ralfg/convert-config-hook 12 | rev: 0.1.6 13 | hooks: 14 | - id: convert-config 15 | files: "examples\\/.*-ms2rescore\\.toml" 16 | args: ["--output-format", "json"] 17 | 18 | - repo: https://github.com/pre-commit/pre-commit-hooks 19 | rev: v2.3.0 20 | hooks: 21 | - id: check-yaml 22 | - id: trailing-whitespace 23 | # - id: end-of-file-fixer 24 | 25 | # - repo: https://github.com/pycqa/isort 26 | # rev: 5.11.2 27 | # hooks: 28 | # - id: isort 29 | # name: isort (python) 30 | 31 | - repo: https://github.com/psf/black 32 | rev: 22.10.0 33 | hooks: 34 | - id: black 35 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.11" 7 | 8 | sphinx: 9 | configuration: docs/source/conf.py 10 | builder: dirhtml 11 | 12 | python: 13 | install: 14 | - method: pip 15 | path: . 16 | extra_requirements: 17 | - docs 18 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | ############ 2 | Contributing 3 | ############ 4 | 5 | This document briefly describes how to contribute to 6 | `ms2rescore `_. 7 | 8 | 9 | 10 | Before you begin 11 | ################ 12 | 13 | If you have an idea for a feature, use case to add or an approach for a bugfix, 14 | you are welcome to communicate it with the community by opening a 15 | thread in 16 | `GitHub Discussions `_ 17 | or in `GitHub Issues `_. 18 | 19 | Welcome contributions include: 20 | 21 | - New features, such as the addition of new feature generators 22 | - Improvements of existing functionality 23 | - Bugfixes 24 | 25 | 26 | 27 | Development setup 28 | ################# 29 | 30 | Local install 31 | ************* 32 | 33 | #. Setup Python 3, and preferably create a virtual environment. 34 | #. Clone the `ms2rescore repository `_. 35 | #. Use pip in editable mode to setup the development environment: 36 | 37 | .. code-block:: sh 38 | 39 | pip install --editable .[dev,docs] 40 | 41 | 42 | Pre-commit hooks 43 | **************** 44 | 45 | Pre-commit hooks ensure that certain checks are performed before making a new commit. For instance, 46 | the ``black`` pre-commit hook is used to format all Python code, and ``jsonschema2md`` is used to 47 | automatically generate Markdown documentation for the configuration file. Setup the pre-commit 48 | hooks with: 49 | 50 | .. code-block:: sh 51 | 52 | pre-commit install 53 | 54 | 55 | Unit tests 56 | ********** 57 | 58 | Run tests with ``pytest``: 59 | 60 | .. code-block:: sh 61 | 62 | pytest ./tests 63 | 64 | 65 | Documentation 66 | ************* 67 | 68 | To work on the documentation and get a live preview, install the requirements 69 | and run ``sphinx-autobuild``: 70 | 71 | .. code-block:: sh 72 | 73 | pip install .[docs] 74 | sphinx-autobuild --watch ./ms2rescore ./docs/source/ ./docs/_build/html/ 75 | 76 | Then browse to http://localhost:8000 to watch the live preview. 77 | 78 | 79 | How to contribute 80 | ################# 81 | 82 | - Fork `ms2rescore `_ on GitHub to 83 | make your changes. 84 | - Commit and push your changes to your 85 | `fork `_. 86 | - Ensure that the tests and documentation (both Python docstrings and files in 87 | ``/docs/source/``) have been updated according to your changes. Python 88 | docstrings are formatted in the 89 | `numpydoc style `_. 90 | - Open a 91 | `pull request `_ 92 | with these changes. You pull request message ideally should include: 93 | 94 | - A description of why the changes should be made. 95 | - A description of the implementation of the changes. 96 | - A description of how to test the changes. 97 | 98 | - The pull request should pass all the continuous integration tests which are 99 | automatically run by 100 | `GitHub Actions `_. 101 | 102 | 103 | 104 | Release workflow 105 | ################ 106 | 107 | - When a new version is ready to be published: 108 | 109 | #. Change the ``__version__`` in ``ms2rescore/__init__.py`` following 110 | `semantic versioning `_. 111 | #. Update the changelog (if not already done) in ``CHANGELOG.md`` according to 112 | `Keep a Changelog `_. 113 | #. Merge all final changes with the ``main`` branch. 114 | #. On GitHub, draft a new release with the new version number and the 115 | changes that are listed in ``CHANGELOG.md``. 116 | 117 | - When a new release is published on GitHub, the following GitHub Actions are triggered: 118 | 119 | #. The Python package is build and published to PyPI. 120 | #. The Windows installer is build with pyInstaller and InnoSetup and published to the GitHub 121 | release. 122 | 123 | - A webhook triggers a new build of the documentation on Read the Docs. 124 | 125 | - The Bioconda recipe is automatically updated by the Bioconda bot, and subsequently both the Conda 126 | Python package and the Docker image are build. 127 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | 3 | # ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | LABEL name="ms2rescore" 6 | 7 | # ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/ms2rescore 8 | 9 | ADD pyproject.toml /ms2rescore/pyproject.toml 10 | ADD LICENSE /ms2rescore/LICENSE 11 | ADD README.md /ms2rescore/README.md 12 | ADD MANIFEST.in /ms2rescore/MANIFEST.in 13 | ADD ms2rescore /ms2rescore/ms2rescore 14 | 15 | RUN apt-get update \ 16 | && apt install -y procps \ 17 | && pip install /ms2rescore --only-binary :all: 18 | 19 | ENTRYPOINT [""] 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include ms2rescore/package_data/**/* 2 | include ms2rescore/package_data/* 3 | include ms2rescore/report/templates/* 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MS²Rescore 2 |

3 | 4 | [![GitHub release](https://img.shields.io/github/release-pre/compomics/ms2rescore.svg?style=flat-square)](https://github.com/compomics/ms2rescore/releases) 5 | [![PyPI](https://flat.badgen.net/pypi/v/ms2rescore)](https://pypi.org/project/ms2rescore/) 6 | [![GitHub Workflow Status](https://flat.badgen.net/github/checks/compomics/ms2rescore/main)](https://github.com/compomics/ms2rescore/actions/) 7 | [![GitHub issues](https://img.shields.io/github/issues/compomics/ms2rescore?style=flat-square)](https://github.com/compomics/ms2rescore/issues) 8 | [![GitHub](https://img.shields.io/github/license/compomics/ms2rescore.svg?style=flat-square)](https://www.apache.org/licenses/LICENSE-2.0) 9 | [![Last commit](https://flat.badgen.net/github/last-commit/compomics/ms2rescore)](https://github.com/compomics/ms2rescore/commits/) 10 | 11 | Modular and user-friendly platform for AI-assisted rescoring of peptide identifications 12 | 13 | ## About MS²Rescore 14 | 15 | MS²Rescore performs ultra-sensitive peptide identification rescoring with LC-MS predictors such as 16 | [MS²PIP][ms2pip] and [DeepLC][deeplc], and with ML-driven rescoring engines 17 | [Percolator][percolator] or [Mokapot][mokapot]. This results in more confident peptide 18 | identifications, which allows you to get **more peptide IDs** at the same false discovery rate 19 | (FDR) threshold, or to set a **more stringent FDR threshold** while still retaining a similar 20 | number of peptide IDs. MS²Rescore is **ideal for challenging proteomics identification workflows**, 21 | such as proteogenomics, metaproteomics, or immunopeptidomics. 22 | 23 | ![MS²Rescore overview](https://raw.githubusercontent.com/compomics/ms2rescore/main/docs/source/_static/img/ms2rescore-overview.png) 24 | 25 | MS²Rescore can read peptide identifications in any format supported by [psm_utils][psm_utils] 26 | (see [Supported file formats][file-formats]) and has been tested with various search engines output 27 | files: 28 | 29 | - [MS Amanda](http://ms.imp.ac.at/?goto=msamanda) `.csv` 30 | - [Sage](https://github.com/lazear/sage) `.sage.tsv` 31 | - [PeptideShaker](https://compomics.github.io/projects/peptide-shaker.html) `.mzid` 32 | - [ProteomeDiscoverer](#)`.msf` 33 | - [MSGFPlus](https://omics.pnl.gov/software/ms-gf) `.mzid` 34 | - [Mascot](https://www.matrixscience.com/) `.mzid` 35 | - [MaxQuant](https://www.maxquant.org/) `msms.txt` 36 | - [X!Tandem](https://www.thegpm.org/tandem/) `.xml` 37 | - [PEAKS](https://www.bioinfor.com/peaksdb/) `.mzid` 38 | 39 | MS²Rescore is available as a [desktop application][desktop], a [command line tool][cli], and a 40 | [modular Python API][python-package]. 41 | 42 | ## TIMS²Rescore: Direct support for DDA-PASEF data 43 | 44 | MS²Rescore v3.1+ includes TIMS²Rescore, a usage mode with specialized default configurations for 45 | DDA-PASEF data from timsTOF instruments. TIMS²Rescore makes use of new MS²PIP prediction models for 46 | timsTOF fragmentation and IM2Deep for ion mobility separation. Bruker .d and miniTDF spectrum 47 | files are directly supported through the [timsrust](https://github.com/MannLabs/timsrust) library. 48 | 49 | Checkout our [paper](https://doi.org/10.1021/acs.jproteome.4c00609) for more information and the 50 | [TIMS²Rescore documentation][tims2rescore] to get started. 51 | 52 | ## Citing 53 | 54 | **Latest MS²Rescore publication:** 55 | 56 | > **MS²Rescore 3.0 is a modular, flexible, and user-friendly platform to boost peptide identifications, as showcased with MS Amanda 3.0.** 57 | > Louise Marie Buur*, Arthur Declercq*, Marina Strobl, Robbin Bouwmeester, Sven Degroeve, Lennart Martens, Viktoria Dorfer*, and Ralf Gabriels*. 58 | > _Journal of Proteome Research_ (2024) [doi:10.1021/acs.jproteome.3c00785](https://doi.org/10.1021/acs.jproteome.3c00785)
\*contributed equally 59 | 60 | **MS²Rescore for immunopeptidomics:** 61 | 62 | > **MS²Rescore: Data-driven rescoring dramatically boosts immunopeptide identification rates.** 63 | > Arthur Declercq, Robbin Bouwmeester, Aurélie Hirschler, Christine Carapito, Sven Degroeve, Lennart Martens, and Ralf Gabriels. 64 | > _Molecular & Cellular Proteomics_ (2021) [doi:10.1016/j.mcpro.2022.100266](https://doi.org/10.1016/j.mcpro.2022.100266) 65 | 66 | **MS²Rescore for timsTOF DDA-PASEF data:** 67 | 68 | > **TIMS²Rescore: A DDA-PASEF optimized data-driven rescoring pipeline based on MS²Rescore.** 69 | > Arthur Declercq*, Robbe Devreese*, Jonas Scheid, Caroline Jachmann, Tim Van Den Bossche, Annica Preikschat, David Gomez-Zepeda, Jeewan Babu Rijal, Aurélie Hirschler, Jonathan R Krieger, Tharan Srikumar, George Rosenberger, Dennis Trede, Christine Carapito, Stefan Tenzer, Juliane S Walz, Sven Degroeve, Robbin Bouwmeester, Lennart Martens, and Ralf Gabriels. 70 | > _Journal of Proteome Research_ (2025) [doi:10.1021/acs.jproteome.4c00609](https://doi.org/10.1021/acs.jproteome.4c00609) 71 | 72 | **Original publication describing the concept of rescoring with predicted spectra:** 73 | 74 | > **Accurate peptide fragmentation predictions allow data driven approaches to replace and improve upon proteomics search engine scoring functions.** 75 | > Ana S C Silva, Robbin Bouwmeester, Lennart Martens, and Sven Degroeve. 76 | > _Bioinformatics_ (2019) [doi:10.1093/bioinformatics/btz383](https://doi.org/10.1093/bioinformatics/btz383) 77 | 78 | To replicate the experiments described in this article, check out the 79 | [publication branch][publication-branch] of the repository. 80 | 81 | ## Getting started 82 | 83 | The desktop application can be installed on Windows with a [one-click installer][desktop-installer]. 84 | The Python package and command line interface can be installed with `pip`, `conda`, or `docker`. 85 | Check out the [full documentation][docs] to get started. 86 | 87 | ## Questions or issues? 88 | 89 | Have questions on how to apply MS²Rescore on your data? Or ran into issues while using MS²Rescore? 90 | Post your questions on the [GitHub Discussions][discussions] forum and we are happy to help! 91 | 92 | ## How to contribute 93 | 94 | Bugs, questions or suggestions? Feel free to post an issue in the [issue tracker][issues] or to 95 | make a [pull request][pr]! 96 | 97 | [docs]: https://ms2rescore.readthedocs.io/ 98 | [issues]: https://github.com/compomics/ms2rescore/issues/ 99 | [discussions]: https://github.com/compomics/ms2rescore/discussions/ 100 | [pr]: https://github.com/compomics/ms2rescore/pulls/ 101 | [desktop]: https://ms2rescore.readthedocs.io/en/stable/gui/ 102 | [desktop-installer]: https://github.com/compomics/ms2rescore/releases/latest 103 | [cli]: https://ms2rescore.readthedocs.io/en/stable/cli/ 104 | [python-package]: https://ms2rescore.readthedocs.io/en/stable/api/ms2rescore/ 105 | [docker]: https://ms2rescore.readthedocs.io/en/stable/installation#docker-container 106 | [publication-branch]: https://github.com/compomics/ms2rescore/tree/pub 107 | [ms2pip]: https://github.com/compomics/ms2pip 108 | [deeplc]: https://github.com/compomics/deeplc 109 | [percolator]: https://github.com/percolator/percolator/ 110 | [mokapot]: https://mokapot.readthedocs.io/ 111 | [psm_utils]: https://github.com/compomics/psm_utils 112 | [file-formats]: https://psm-utils.readthedocs.io/en/stable/#supported-file-formats 113 | [tims2rescore]: https://ms2rescore.readthedocs.io/en/stable/userguide/tims2Rescore 114 | -------------------------------------------------------------------------------- /docs/source/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | /* replace the copyright to eliminate the copyright symbol enforced by 2 | the ReadTheDocs theme but eschewed by our legal team */ 3 | div[role=contentinfo] { 4 | visibility: hidden; 5 | position: relative; 6 | } 7 | 8 | div[role=contentinfo]:after { 9 | visibility: visible; 10 | position: absolute; 11 | top: 0; 12 | left: 0; 13 | content: "Creative Commons CC-BY-SA 4.0"; 14 | } 15 | 16 | :not(dt) > strong, :not(dt) > b, 17 | .rst-content .viewcode-back, 18 | .rst-content .viewcode-link { 19 | font-weight: semi-bold; 20 | color: #2c3e50; 21 | } 22 | 23 | .wy-menu-vertical header, 24 | .wy-menu-vertical p.caption { 25 | color: #80b4e8; 26 | } 27 | 28 | /** Mobile nav-bar **/ 29 | .wy-nav-top, 30 | .wy-side-nav-search{ 31 | background: #2c3e50; 32 | } 33 | 34 | /** Signature text **/ 35 | html.writer-html4 .rst-content dl:not(.docutils)>dt, 36 | html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple)>dt { 37 | color: #555; 38 | } 39 | 40 | /** Signature bg **/ 41 | html.writer-html4 .rst-content dl:not(.docutils)>dt, 42 | html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple)>dt { 43 | background: #C9F4ED; 44 | border-top: 3px solid #80b4e8; 45 | } 46 | 47 | /** Figure bottom margin **/ 48 | .rst-content figure, 49 | .rst-content .tab-content > figure:last-child { 50 | margin-bottom: 24px; 51 | } 52 | -------------------------------------------------------------------------------- /docs/source/_static/img/gui-example-xtandem-advanced.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-advanced.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-example-xtandem-finished.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-finished.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-example-xtandem-modifications-before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-modifications-before.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-example-xtandem-modifications-filled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-modifications-filled.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-example-xtandem-output-files.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-output-files.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-example-xtandem-processes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-processes.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-example-xtandem-progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-progress.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-example-xtandem-psm-file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-psm-file.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-example-xtandem-psm-filetype.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-psm-filetype.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-example-xtandem-spectra.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-spectra.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-example-xtandem-start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-start.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-fixed-modifications.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-fixed-modifications.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-modification-mapping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-modification-mapping.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-overview.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-screenshot-old.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-screenshot-old.png -------------------------------------------------------------------------------- /docs/source/_static/img/gui-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-screenshot.png -------------------------------------------------------------------------------- /docs/source/_static/img/ms2rescore-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/ms2rescore-overview.png -------------------------------------------------------------------------------- /docs/source/_static/img/ms2rescore_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/ms2rescore_logo.png -------------------------------------------------------------------------------- /docs/source/_static/img/percolator-install-path.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/percolator-install-path.png -------------------------------------------------------------------------------- /docs/source/_static/img/qc-reports.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/qc-reports.png -------------------------------------------------------------------------------- /docs/source/api/ms2rescore.feature_generators.rst: -------------------------------------------------------------------------------- 1 | ***************************** 2 | ms2rescore.feature_generators 3 | ***************************** 4 | 5 | .. automodule:: ms2rescore.feature_generators 6 | :members: 7 | 8 | .. py:data:: ms2rescore.feature_generators.FEATURE_GENERATORS 9 | :type: dict 10 | 11 | Implemented feature generator classes by name. 12 | 13 | 14 | ms2rescore.feature_generators.base 15 | ################################## 16 | 17 | .. automodule:: ms2rescore.feature_generators.base 18 | :members: 19 | 20 | 21 | 22 | ms2rescore.feature_generators.basic 23 | #################################### 24 | 25 | .. automodule:: ms2rescore.feature_generators.basic 26 | :members: 27 | 28 | 29 | 30 | ms2rescore.feature_generators.deeplc 31 | #################################### 32 | 33 | .. automodule:: ms2rescore.feature_generators.deeplc 34 | :members: 35 | 36 | 37 | 38 | ms2rescore.feature_generators.ionmob 39 | #################################### 40 | 41 | .. automodule:: ms2rescore.feature_generators.ionmob 42 | :members: 43 | 44 | 45 | 46 | ms2rescore.feature_generators.maxquant 47 | ###################################### 48 | 49 | .. automodule:: ms2rescore.feature_generators.maxquant 50 | :members: 51 | 52 | 53 | 54 | ms2rescore.feature_generators.ms2pip 55 | #################################### 56 | 57 | .. automodule:: ms2rescore.feature_generators.ms2pip 58 | :members: 59 | -------------------------------------------------------------------------------- /docs/source/api/ms2rescore.report.rst: -------------------------------------------------------------------------------- 1 | ***************** 2 | ms2rescore.report 3 | ***************** 4 | 5 | .. automodule:: ms2rescore.report 6 | :members: 7 | 8 | 9 | 10 | Generate report 11 | ############### 12 | 13 | .. automodule:: ms2rescore.report.generate 14 | :members: 15 | 16 | 17 | Charts 18 | ###### 19 | 20 | .. automodule:: ms2rescore.report.charts 21 | :members: 22 | -------------------------------------------------------------------------------- /docs/source/api/ms2rescore.rescoring_engines.rst: -------------------------------------------------------------------------------- 1 | **************************** 2 | ms2rescore.rescoring_engines 3 | **************************** 4 | 5 | .. automodule:: ms2rescore.rescoring_engines 6 | :members: 7 | 8 | 9 | 10 | Mokapot 11 | ####### 12 | 13 | .. automodule:: ms2rescore.rescoring_engines.mokapot 14 | :members: 15 | 16 | 17 | 18 | Percolator 19 | ########## 20 | 21 | .. automodule:: ms2rescore.rescoring_engines.percolator 22 | :members: 23 | -------------------------------------------------------------------------------- /docs/source/api/ms2rescore.rst: -------------------------------------------------------------------------------- 1 | ********** 2 | ms2rescore 3 | ********** 4 | 5 | .. automodule:: ms2rescore 6 | :members: 7 | :imported-members: 8 | :exclude-members: filterwarnings 9 | -------------------------------------------------------------------------------- /docs/source/cli.rst: -------------------------------------------------------------------------------- 1 | ********************** 2 | Command line interface 3 | ********************** 4 | 5 | Run MS²Rescore 6 | ============== 7 | 8 | .. argparse:: 9 | :module: ms2rescore.__main__ 10 | :func: _argument_parser 11 | :prog: ms2rescore 12 | 13 | 14 | Other commands 15 | ============== 16 | 17 | Generate HTML report 18 | -------------------- 19 | Generate a report from MS²Rescore result file(s): 20 | 21 | .. code-block:: console 22 | 23 | ms2rescore-report [OPTIONS] OUTPUT_PREFIX 24 | 25 | or 26 | 27 | .. code-block:: console 28 | 29 | python -m ms2rescore.report [OPTIONS] OUTPUT_PREFIX 30 | 31 | 32 | 33 | Start graphical user interface 34 | ------------------------------ 35 | Start the graphical user interface. For more info, see :ref:`Graphical user interface`. 36 | 37 | .. code-block:: console 38 | 39 | ms2rescore-gui 40 | 41 | or 42 | 43 | .. code-block:: console 44 | 45 | python -m ms2rescore.gui 46 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | """Configuration file for the Sphinx documentation builder.""" 2 | 3 | import os 4 | import sys 5 | 6 | sys.path.insert(0, os.path.abspath("../../")) 7 | 8 | from ms2rescore import __version__ # noqa: E402 9 | 10 | # Project information 11 | project = "ms2rescore" 12 | author = "CompOmics" 13 | github_project_url = "https://github.com/compomics/ms2rescore/" 14 | github_doc_root = "https://github.com/compomics/ms2rescore/tree/main/docs/" 15 | release = __version__ 16 | 17 | # General configuration 18 | extensions = [ 19 | "nbsphinx", 20 | "sphinx.ext.autodoc", 21 | "sphinx.ext.autosectionlabel", 22 | "sphinx.ext.autosummary", 23 | "sphinx.ext.napoleon", 24 | "sphinx.ext.intersphinx", 25 | "sphinxarg.ext", 26 | "sphinx_inline_tabs", 27 | "sphinx_rtd_theme", 28 | "myst_parser", 29 | ] 30 | source_suffix = [".rst"] 31 | master_doc = "index" 32 | exclude_patterns = ["_build"] 33 | 34 | # Options for HTML output 35 | html_theme = "sphinx_rtd_theme" 36 | html_static_path = ["_static"] 37 | html_css_files = ["css/custom.css"] 38 | html_js_files = ["js/badge.min.js"] 39 | 40 | # Autodoc options 41 | autodoc_default_options = {"members": True, "show-inheritance": True} 42 | autodoc_member_order = "bysource" 43 | autodoc_typehints = "description" 44 | autoclass_content = "init" 45 | 46 | # Intersphinx options 47 | intersphinx_mapping = { 48 | "python": ("https://docs.python.org/3", None), 49 | "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), 50 | "numpy": ("https://numpy.org/doc/stable/", None), 51 | "plotly": ("https://plotly.com/python-api-reference/", None), 52 | "psm_utils": ("https://psm-utils.readthedocs.io/en/stable/", None), 53 | "mokapot": ("https://mokapot.readthedocs.io/en/stable/", None), 54 | } 55 | 56 | # nbsphinx options 57 | nbsphinx_execute = "never" 58 | 59 | 60 | def setup(app): 61 | config = { # noqa: F841 62 | "enable_eval_rst": True, 63 | } 64 | -------------------------------------------------------------------------------- /docs/source/config_schema.md: -------------------------------------------------------------------------------- 1 | # MS²Rescore configuration 2 | 3 | ## Properties 4 | 5 | - **`ms2rescore`** *(object)*: General MS²Rescore settings. Cannot contain additional properties. 6 | - **`feature_generators`** *(object)*: Feature generators and their configurations. Default: `{"basic": {}, "ms2pip": {"model": "HCD", "ms2_tolerance": 0.02}, "deeplc": {}, "maxquant": {}}`. 7 | - **`.*`**: Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*. 8 | - **`basic`**: Refer to *[#/definitions/basic](#definitions/basic)*. 9 | - **`ms2pip`**: Refer to *[#/definitions/ms2pip](#definitions/ms2pip)*. 10 | - **`deeplc`**: Refer to *[#/definitions/deeplc](#definitions/deeplc)*. 11 | - **`maxquant`**: Refer to *[#/definitions/maxquant](#definitions/maxquant)*. 12 | - **`ionmob`**: Refer to *[#/definitions/ionmob](#definitions/ionmob)*. 13 | - **`im2deep`**: Refer to *[#/definitions/im2deep](#definitions/im2deep)*. 14 | - **`rescoring_engine`** *(object)*: Rescoring engine to use and its configuration. Leave empty to skip rescoring and write features to file. Default: `{"mokapot": {}}`. 15 | - **`.*`**: Refer to *[#/definitions/rescoring_engine](#definitions/rescoring_engine)*. 16 | - **`percolator`**: Refer to *[#/definitions/percolator](#definitions/percolator)*. 17 | - **`mokapot`**: Refer to *[#/definitions/mokapot](#definitions/mokapot)*. 18 | - **`config_file`**: Path to configuration file. 19 | - **One of** 20 | - *string* 21 | - *null* 22 | - **`psm_file`**: Path to file with peptide-spectrum matches. 23 | - **One of** 24 | - *string* 25 | - *null* 26 | - *array* 27 | - **Items** *(string)* 28 | - **`psm_file_type`** *(string)*: PSM file type. By default inferred from file extension. Default: `"infer"`. 29 | - **`psm_reader_kwargs`** *(object)*: Keyword arguments passed to the PSM reader. Default: `{}`. 30 | - **`spectrum_path`**: Path to spectrum file or directory with spectrum files. 31 | - **One of** 32 | - *string* 33 | - *null* 34 | - **`output_path`**: Path and root name for output files. 35 | - **One of** 36 | - *string* 37 | - *null* 38 | - **`log_level`** *(string)*: Logging level. Must be one of: `["debug", "info", "warning", "error", "critical"]`. 39 | - **`id_decoy_pattern`**: Regex pattern used to identify the decoy PSMs in identification file. Default: `null`. 40 | - **One of** 41 | - *string* 42 | - *null* 43 | - **`spectrum_id_pattern`**: Regex pattern to extract index or scan number from spectrum file. Requires at least one capturing group. Default: `"(.*)"`. 44 | - **One of** 45 | - *string* 46 | - *null* 47 | - **`psm_id_pattern`**: Regex pattern to extract index or scan number from PSM file. Requires at least one capturing group. Default: `"(.*)"`. 48 | - **One of** 49 | - *string* 50 | - *null* 51 | - **`psm_id_rt_pattern`**: Regex pattern to extract retention time from PSM identifier. Requires at least one capturing group. Default: `null`. 52 | - **One of** 53 | - *string* 54 | - *null* 55 | - **`psm_id_im_pattern`**: Regex pattern to extract ion mobility from PSM identifier. Requires at least one capturing group. Default: `null`. 56 | - **One of** 57 | - *string* 58 | - *null* 59 | - **`lower_score_is_better`** *(boolean)*: Bool indicating if lower score is better. Default: `false`. 60 | - **`max_psm_rank_input`** *(number)*: Maximum rank of PSMs to use as input for rescoring. Minimum: `1`. Default: `10`. 61 | - **`max_psm_rank_output`** *(number)*: Maximum rank of PSMs to return after rescoring, before final FDR calculation. Minimum: `1`. Default: `1`. 62 | - **`modification_mapping`** *(object)*: Mapping of modification labels to each replacement label. Default: `{}`. 63 | - **`fixed_modifications`** *(object)*: Mapping of amino acids with fixed modifications to the modification name. Can contain additional properties. Default: `{}`. 64 | - **`processes`** *(number)*: Number of parallel processes to use; -1 for all available. Minimum: `-1`. Default: `-1`. 65 | - **`rename_to_usi`** *(boolean)*: Convert spectrum IDs to their universal spectrum identifier. 66 | - **`fasta_file`**: Path to FASTA file with protein sequences to use for protein inference. 67 | - **One of** 68 | - *string* 69 | - *null* 70 | - **`write_flashlfq`** *(boolean)*: Write results to a FlashLFQ-compatible file. Default: `false`. 71 | - **`write_report`** *(boolean)*: Write an HTML report with various QC metrics and charts. Default: `false`. 72 | - **`profile`** *(boolean)*: Write a txt report using cProfile for profiling. Default: `false`. 73 | ## Definitions 74 | 75 | - **`feature_generator`** *(object)*: Feature generator configuration. Can contain additional properties. 76 | - **`rescoring_engine`** *(object)*: Rescoring engine configuration. Can contain additional properties. 77 | - **`basic`** *(object)*: Basic feature generator configuration. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*. 78 | - **`ms2pip`** *(object)*: MS²PIP feature generator configuration. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*. 79 | - **`model`** *(string)*: MS²PIP model to use (see MS²PIP documentation). Default: `"HCD"`. 80 | - **`ms2_tolerance`** *(number)*: MS2 error tolerance in Da. Minimum: `0`. Default: `0.02`. 81 | - **`deeplc`** *(object)*: DeepLC feature generator configuration. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*. 82 | - **`calibration_set_size`**: Calibration set size. Default: `0.15`. 83 | - **One of** 84 | - *integer* 85 | - *number* 86 | - **`maxquant`** *(object)*: MaxQuant feature generator configuration. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*. 87 | - **`ionmob`** *(object)*: Ion mobility feature generator configuration using Ionmob. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*. 88 | - **`ionmob_model`** *(string)*: Path to Ionmob model directory. Default: `"GRUPredictor"`. 89 | - **`reference_dataset`** *(string)*: Path to Ionmob reference dataset file. Default: `"Meier_unimod.parquet"`. 90 | - **`tokenizer`** *(string)*: Path to tokenizer json file. Default: `"tokenizer.json"`. 91 | - **`im2deep`** *(object)*: Ion mobility feature generator configuration using IM2Deep. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*. 92 | - **`reference_dataset`** *(string)*: Path to IM2Deep reference dataset file. Default: `"Meier_unimod.parquet"`. 93 | - **`mokapot`** *(object)*: Mokapot rescoring engine configuration. Additional properties are passed to the Mokapot brew function. Can contain additional properties. Refer to *[#/definitions/rescoring_engine](#definitions/rescoring_engine)*. 94 | - **`train_fdr`** *(number)*: FDR threshold for training Mokapot. Minimum: `0`. Maximum: `1`. Default: `0.01`. 95 | - **`write_weights`** *(boolean)*: Write Mokapot weights to a text file. Default: `false`. 96 | - **`write_txt`** *(boolean)*: Write Mokapot results to a text file. Default: `false`. 97 | - **`percolator`** *(object)*: Percolator rescoring engine configuration. Can contain additional properties. Refer to *[#/definitions/rescoring_engine](#definitions/rescoring_engine)*. 98 | - **`init-weights`**: Weights file for scoring function. Default: `false`. 99 | - **One of** 100 | - *string* 101 | - *null* 102 | -------------------------------------------------------------------------------- /docs/source/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /docs/source/gui.rst: -------------------------------------------------------------------------------- 1 | ************************ 2 | Graphical user interface 3 | ************************ 4 | 5 | 6 | Installation 7 | ============ 8 | 9 | The MS²Rescore desktop application can be installed on Windows with a 10 | :ref:`one-click installer `. Alternatively, or on other platforms, follow the 11 | :ref:`Python package installation instructions `. 12 | 13 | 14 | Starting the application 15 | ======================== 16 | 17 | If installed with the one-click installer, simply start MS²Rescore from the start menu or with the 18 | desktop shortcut. Otherwise, start the application from the 19 | :ref:`command line ` with the command ``ms2rescore-gui`` or with 20 | ``python -m ms2rescore.gui``. 21 | 22 | 23 | Application overview 24 | ==================== 25 | 26 | The MS²Rescore graphical user interface is divided into three main sections: 27 | 28 | 1. A side bar with references, window controls, and the current version number. 29 | 2. The configuration pane with input file selection, and parameter configuration. 30 | 3. The application log pane with the status output. 31 | 32 | On the bottom of the window, the application log level can be selected. The log level determines 33 | which messages are shown in the application log pane. On the bottom right, the application can be 34 | started with the "Start" button. The "Stop" button can be used to stop the application at any time 35 | during the execution. 36 | 37 | .. figure:: ../_static/img/gui-overview.png 38 | :width: 100% 39 | :alt: MS²Rescore graphical user interface 40 | 41 | Overview of the MS²Rescore desktop application. 42 | 43 | 44 | Configuring MS²Rescore 45 | ====================== 46 | 47 | Input file selection 48 | ^^^^^^^^^^^^^^^^^^^^ 49 | 50 | The main input for MS²Rescore are the PSM file(s) (search engine output) and the spectrum file(s). 51 | See :ref:`Input files` for more information. 52 | 53 | One or more PSM files can be selected from the file system with the "Browse files" button under. 54 | To make ensure correct reading of the file, specify the file type with from the drop-down menu. 55 | 56 | .. figure:: ../_static/img/gui-example-xtandem-psm-file.png 57 | :width: 60% 58 | :alt: PSM file selection 59 | 60 | PSM file selection 61 | 62 | 63 | .. figure:: ../_static/img/gui-example-xtandem-psm-filetype.png 64 | :width: 60% 65 | :alt: PSM file type selection 66 | 67 | PSM file type selection 68 | 69 | 70 | To select a single spectrum file (mzML or MGF), click the "Browse files" button. To select a 71 | folder with spectrum files, click the "Browse directories" button. 72 | 73 | .. figure:: ../_static/img/gui-example-xtandem-spectra.png 74 | :width: 60% 75 | :alt: Spectrum file selection 76 | 77 | Spectrum file selection 78 | 79 | 80 | Optionally, for protein inference information, a FASTA file can also be provided. Ensure that 81 | this file contains the same protein sequences as the search database used for the search engine. 82 | If a FASTA file is provided, protein digestion settings may need to be configured in the rescoring 83 | engine configuration. 84 | 85 | 86 | Number of processes 87 | ^^^^^^^^^^^^^^^^^^^ 88 | 89 | The number of processes can be configured to run the application in parallel. The default is to 90 | use all available CPU cores. The number of processes can be reduced to avoid overloading the 91 | system or to avoid memory issues. A number under 16 is recommended. 92 | 93 | 94 | Modification mapping 95 | ^^^^^^^^^^^^^^^^^^^^ 96 | 97 | Depending on the search engine, the peptide modification labels will have to be mapped 98 | to labels that can be understood by MS²Rescore. For example, X!Tandem uses mass shift labels, such 99 | as ``+57.02146`` for carbamidomethylation. However, tools such as DeepLC requires the atomic 100 | composition for all modifications. As this cannot be derived from the mass shift (or other labels 101 | that are not known to MS²Rescore), a mapping has to be provided. 102 | 103 | .. figure:: ../_static/img/gui-example-xtandem-modifications-before.png 104 | :width: 70% 105 | :alt: Modification mapping 106 | 107 | Modification mapping configuration. Click the plus sign to add more rows. 108 | 109 | 110 | In modification mapping, click the plus sign to add more rows to the table, or click the minus sign 111 | to remove rows. In the first column "Search engine label", enter the modification label as it 112 | appears in the PSM file. In the second column "ProForma label", enter a ProForma-compatible 113 | modification label. More information on accepted labels can be found in :ref:`Parsing modification 114 | labels`. 115 | 116 | .. figure:: ../_static/img/gui-example-xtandem-modifications-filled.png 117 | :width: 70% 118 | :alt: Modification mapping 119 | 120 | Modification mapping configuration for the X!Tandem example. Mass shift labels from X!Tandem 121 | are mapped to ProForma UniMod labels. 122 | 123 | 124 | Fixed modifications 125 | ^^^^^^^^^^^^^^^^^^^ 126 | 127 | If the search engine PSM file does not contain information on which fixed modifications were used, 128 | this must be specified in the MS²Rescore configuration. At the time of writing, only MaxQuant 129 | ``msms.txt``` files do not contain this information. For all other search engines, this information 130 | is contained in the PSM file and the following field can be left empty. 131 | 132 | 133 | Advanced options 134 | ^^^^^^^^^^^^^^^^ 135 | 136 | Most advanced options are only required for specific use cases or with specific search engine PSM 137 | files. All options are listed in the :doc:`userguide/configuration` section of the user guide. 138 | 139 | In the X!Tandem example, only the `PSM ID regex pattern` option is required. This option is used 140 | to extract the spectrum ID from the PSM file. The spectrum ID is used to match the PSM to the 141 | spectrum file. See :ref:`Mapping PSMs to spectra` for more information. 142 | 143 | .. figure:: ../_static/img/gui-example-xtandem-advanced.png 144 | :width: 70% 145 | :alt: Advanced options 146 | 147 | Advanced options 148 | 149 | 150 | For reference, all parameters for the X!Tandem example are also listed in the example 151 | configuration file on 152 | `GitHub `_. 153 | 154 | 155 | Starting the rescoring process 156 | ============================== 157 | 158 | After the configuration is complete, click the "Start" button to start the rescoring process. 159 | The application will show the progress in the application log pane. The log level can be changed 160 | before the run to show more or less information. 161 | 162 | .. figure:: ../_static/img/gui-example-xtandem-progress.png 163 | :width: 100% 164 | :alt: Running application 165 | 166 | Running application with log output 167 | 168 | 169 | A pop up will appear when the application is finished, or when an error occurred. If an error 170 | has occurred, the error message in the pop up should provide some insight into what went wrong. 171 | If the error message is not clear, please report the issue on the 172 | `GitHub issue tracker `_ or post your question on 173 | the `Discussion forum `_. 174 | 175 | .. figure:: ../_static/img/gui-example-xtandem-finished.png 176 | :width: 40% 177 | :alt: Pop up when MS²Rescore is finished 178 | 179 | Pop up when MS²Rescore is finished 180 | 181 | 182 | Viewing the results 183 | =================== 184 | 185 | After a successful run, the output files can be found in the directory of the input PSM file, or 186 | in the specified output directory. The most important files are the ``*.ms2rescore.psms.tsv`` file, 187 | which contains all PSMs with their new scores, and the ``*.ms2rescore.report.html`` file, which 188 | contains interactive charts that visualize the results and various quality control metrics. See 189 | :ref:`Output files` for more information. 190 | 191 | .. figure:: ../_static/img/gui-example-xtandem-output-files.png 192 | :width: 100% 193 | :alt: Output files 194 | 195 | Overview of the output files after rescoring the X!Tandem example. 196 | 197 | Double click the ``*.ms2rescore.report.html`` file to open it in the default web browser: 198 | 199 | .. figure:: ../_static/img/qc-reports.png 200 | :width: 100% 201 | :alt: Rescoring report 202 | 203 | Rescoring QC report with interactive charts. 204 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../README.md 2 | :parser: myst_parser.sphinx_ 3 | 4 | .. toctree:: 5 | :caption: About 6 | :hidden: 7 | :includehidden: 8 | 9 | About 10 | installation 11 | contributing 12 | 13 | .. toctree:: 14 | :caption: User guide 15 | :hidden: 16 | :includehidden: 17 | :glob: 18 | 19 | userguide/* 20 | 21 | 22 | .. toctree:: 23 | :caption: Tutorials 24 | :hidden: 25 | :includehidden: 26 | :glob: 27 | 28 | tutorials/* 29 | 30 | 31 | .. toctree:: 32 | :caption: Python API reference 33 | :hidden: 34 | :includehidden: 35 | :glob: 36 | 37 | api/* 38 | 39 | 40 | .. toctree:: 41 | :caption: Command line interface 42 | :hidden: 43 | :includehidden: 44 | 45 | cli 46 | 47 | 48 | 49 | .. toctree:: 50 | :caption: Graphical user interface 51 | :hidden: 52 | :includehidden: 53 | 54 | gui 55 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | ************ 2 | Installation 3 | ************ 4 | 5 | Python package 6 | ============== 7 | 8 | .. image:: https://flat.badgen.net/badge/install%20with/pip/green?icon=pypi 9 | :alt: Install with pip 10 | :target: https://pypi.org/project/ms2rescore/ 11 | 12 | .. image:: https://flat.badgen.net/badge/install%20with/conda/green?icon=conda 13 | :alt: Install with conda 14 | :target: https://anaconda.org/bioconda/ms2rescore 15 | 16 | MS²Rescore is installable as a Python package on Windows, macOS and Linux. 17 | 18 | In a fresh `virtual environment `_, run:: 19 | 20 | pip install ms2rescore 21 | 22 | 23 | Or, in a fresh `conda environment `_, run:: 24 | 25 | conda install -c bioconda ms2rescore 26 | 27 | Bioconda packages are only available for Linux and macOS. 28 | 29 | 30 | Windows installer 31 | ================= 32 | 33 | .. image:: https://flat.badgen.net/badge/install%20for/windows/blue?icon=windows 34 | :alt: Get for Windows 35 | :target: https://github.com/compomics/ms2rescore/releases/latest 36 | 37 | Download the ``.exe`` file from the 38 | `latest release `_ 39 | and go through the installation steps. If Microsoft Defender SmartScreen displays a warning, click 40 | "More info" and then click "Run anyway". 41 | 42 | 43 | Docker container 44 | ================ 45 | 46 | .. image:: https://flat.badgen.net/badge/pull/biocontainer/blue?icon=docker 47 | :alt: Pull with Docker 48 | :target: https://quay.io/repository/biocontainers/ms2rescore 49 | 50 | First check the latest version tag on 51 | `biocontainers/ms2rescore/tags `_. 52 | Then pull and run the container with: 53 | 54 | .. code-block:: bash 55 | 56 | docker container run -v :/data -w /data quay.io/biocontainers/ms2rescore: ms2rescore 57 | 58 | where ```` is the absolute path to the directory with your MS²Rescore input 59 | files, ```` is the container version tag, and ```` are the ms2rescore 60 | command line options (see :ref:`Command line interface`). 61 | 62 | 63 | Installing Percolator 64 | ===================== 65 | 66 | To use :ref:`percolator` as rescoring engine, it must be installed separately. Percolator is 67 | available for most platforms and can be downloaded from the 68 | `GitHub releases page `_. Ensure that 69 | the ``percolator`` executable is in your ``PATH``. On Windows, this can be done by checking the 70 | ``Add percolator to the system PATH for current user`` option during installation: 71 | 72 | .. figure:: ../_static/img/percolator-install-path.png 73 | :width: 60% 74 | :alt: Percolator installation on Windows 75 | 76 | .. note:: 77 | Alternatively, :ref:`mokapot` can be used as rescoring engine, which does not require a separate 78 | installation. 79 | 80 | For development 81 | =============== 82 | 83 | Clone this repository and use pip to install an editable version: 84 | 85 | .. code-block:: bash 86 | 87 | pip install --editable . 88 | -------------------------------------------------------------------------------- /docs/source/userguide/input-files.rst: -------------------------------------------------------------------------------- 1 | ########### 2 | Input files 3 | ########### 4 | 5 | PSM file(s) 6 | =========== 7 | 8 | The **peptide-spectrum match (PSM) file** is generally the output from a proteomics search engine. 9 | This file serves as the main input to MS²Rescore. 10 | 11 | The PSM file should contain **all putative identifications** made by the search engine, including 12 | both target and decoy PSMs. Ensure that the search engine was configured to include decoy entries 13 | in the search database and was operated with **target-decoy competition** enabled (i.e., 14 | considering both target and decoy sequences simultaneously during the search). 15 | 16 | .. attention:: 17 | As a general rule, MS²Rescore always needs access to **all target and decoy PSMs, without any 18 | FDR-filtering**. For some search engines, this means that the FDR-filter should be disabled or 19 | set to 100%. 20 | 21 | 22 | One or multiple PSM files can be provided at once. Note that merging PSMs from different MS runs 23 | could have an impact on the correctness of the FDR control. Combining multiple PSM files should 24 | generally only be done for LC-fractionated mass spectrometry runs. 25 | 26 | Various PSM file types are supported. The type can be specified with the ``psm_file_type`` option. 27 | Check the list of :py:mod:`psm_utils` tags in the 28 | :external+psm_utils:ref:`supported file formats ` section. Depending on the 29 | file extension, the file type can also be inferred from the file name. In that case, 30 | ``psm_file_type`` option can be set to ``infer``. 31 | 32 | 33 | Spectrum file(s) 34 | ================ 35 | 36 | Spectrum files are required for some feature generators. Both ``mzML`` and ``mgf`` formats are 37 | supported. The ``spectrum_path`` option can be either a single file or a folder. If the 38 | ``spectrum_path`` is a folder, MS²Rescore will search for spectrum files in the directory according 39 | to the run names in the PSM file. 40 | -------------------------------------------------------------------------------- /docs/source/userguide/output-files.rst: -------------------------------------------------------------------------------- 1 | ############ 2 | Output files 3 | ############ 4 | 5 | Depending on the options you choose, the following files will be created. All PSMs, peptides, and 6 | proteins are not yet filtered at any false discovery rate (FDR) level. 7 | 8 | Main output files: 9 | 10 | +-----------------------------------+----------------------------------------------------------------------------------+ 11 | | File | Description | 12 | +===================================+==================================================================================+ 13 | | ``.psms.tsv`` | Main output file with rescored PSMs and their new scores | 14 | +-----------------------------------+----------------------------------------------------------------------------------+ 15 | | ``.report.html`` | HTML report with interactive plots showing the results and some quality control | 16 | | | metrics. | 17 | +-----------------------------------+----------------------------------------------------------------------------------+ 18 | 19 | Log and configuration files: 20 | 21 | +--------------------------------------+--------------------------------------------------------------------------------------+ 22 | | File | Description | 23 | +======================================+======================================================================================+ 24 | | ``.log.txt`` | Log file with information about the run | 25 | +--------------------------------------+--------------------------------------------------------------------------------------+ 26 | | ``.log.html`` | HTML version of the log file | 27 | +--------------------------------------+--------------------------------------------------------------------------------------+ 28 | | ``.full-config.json`` | Full configuration file with all the parameters used | 29 | | | as configured in the user-provided configuration file, the command line or graphical | 30 | | | interface, and the default values. | 31 | +--------------------------------------+--------------------------------------------------------------------------------------+ 32 | | ``.feature_names.tsv`` | List of the features and their descriptions | 33 | +--------------------------------------+--------------------------------------------------------------------------------------+ 34 | 35 | Rescoring engine files: 36 | 37 | +-------------------------------------------------------------+-------------------------------------------------------------+ 38 | | File | Description | 39 | +=============================================================+=============================================================+ 40 | | ``..psms.txt`` | PSMs and their new scores at PSM-level FDR. | 41 | +-------------------------------------------------------------+-------------------------------------------------------------+ 42 | | ``..peptides.txt`` | Peptides and their new scores at peptide-level FDR. | 43 | +-------------------------------------------------------------+-------------------------------------------------------------+ 44 | | ``..proteins.txt`` | Proteins and their new scores at protein-level FDR. | 45 | +-------------------------------------------------------------+-------------------------------------------------------------+ 46 | | ``..decoy.psms.txt`` | Decoy PSMs and their new scores at PSM-level FDR. | 47 | +-------------------------------------------------------------+-------------------------------------------------------------+ 48 | | ``..decoy.peptides.txt`` | Decoy peptides and their new scores at peptide-level FDR. | 49 | +-------------------------------------------------------------+-------------------------------------------------------------+ 50 | | ``..decoy.proteins.txt`` | Decoy proteins and their new scores at protein-level FDR. | 51 | +-------------------------------------------------------------+-------------------------------------------------------------+ 52 | | ``..weights.txt`` | Feature weights, showing feature usage in the rescoring run | 53 | +-------------------------------------------------------------+-------------------------------------------------------------+ 54 | 55 | If no rescoring engine is selected, if Percolator was selected, or in DEBUG mode, the following 56 | files will also be written: 57 | 58 | +-------------------------------------------------------------+-----------------------------------------------------------+ 59 | | File | Description | 60 | +=============================================================+===========================================================+ 61 | | ``.pin`` | PSMs with all features for rescoring | 62 | +-------------------------------------------------------------+-----------------------------------------------------------+ 63 | -------------------------------------------------------------------------------- /docs/source/userguide/search-engine-notes.rst: -------------------------------------------------------------------------------- 1 | ################################# 2 | Notes for specific search engines 3 | ################################# 4 | 5 | MSGFPlus 6 | ======== 7 | 8 | - Run MSGFPlus in a concatenated target-decoy search, with the ``-addFeatures 1`` flag. 9 | 10 | 11 | MaxQuant 12 | ======== 13 | 14 | - Run MaxQuant without FDR filtering (set to 1) 15 | - Make sure to correctly configure both ``modification_mapping`` and ``fixed_modifications``. 16 | See :ref:`Parsing modification labels` for more information. 17 | -------------------------------------------------------------------------------- /docs/source/userguide/tims2Rescore.rst: -------------------------------------------------------------------------------- 1 | .. _tims2rescore: 2 | 3 | TIMS²Rescore 4 | ============ 5 | 6 | Introduction 7 | ------------ 8 | 9 | `TIMS²Rescore` is a specialized version of `MS²Rescore` for timsTOF DDA-PASEF data. This guide 10 | provides an overview of how to use TIMS²Rescore effectively. 11 | 12 | Installing TIMS²Rescore 13 | ----------------------- 14 | 15 | TIMS²Rescore is part of the ``ms2rescore`` package. Check out the :ref:`installation` instructions 16 | to get started. 17 | 18 | Usage 19 | ----- 20 | 21 | To use TIMS²Rescore, follow these steps: 22 | 23 | 1. Prepare your input files: 24 | - To boost DDA-PASEF peptide identifications, TIMS²Rescore requires the spectrum files from 25 | the timsTOF instrument and the PSM files with identifications from a supported search engine. 26 | - Make sure that the PSM file format comes from a supported search engine or is a standard 27 | format such as mzIdentML (See 28 | :external+psm_utils:ref:`supported file formats `). 29 | - Spectrum files can directly be passed as ``.d`` or `miniTDF` raw data or can optionally be 30 | first converted to mzML or MGF. We recommend using the format that was passed to the search 31 | engine. 32 | 33 | 2. Run ``tims2rescore``: 34 | - Open a terminal or command prompt. 35 | - Navigate to the directory where your input files are located. 36 | - Execute the following command: 37 | 38 | .. code-block:: bash 39 | 40 | tims2rescore -p -s 41 | 42 | Replace ``, ``, and `` with the 43 | actual paths to your input and output files. 44 | 45 | .. admonition:: note 46 | 47 | By default, specialized timsTOF models will be used for predictions. Optionally you can 48 | further configure TIMS²Rescore through a configuration file. For more information, refer 49 | to the :ref:`configuration` tab in the user guide. 50 | 51 | 3. Review the results: 52 | - Once the ``tims2rescore`` process completes, you will find the rescoring results in the 53 | same directory as the input files. 54 | - If you want a detailed report of the rescoring performance, you can either give the set 55 | `write_report` to `True` in the configuration file, use the `--write_report` option in the 56 | ``tims2rescore`` command line. Alternatively, run the following command after rescoring: 57 | 58 | .. code-block:: bash 59 | 60 | ms2rescore-report 61 | 62 | Replace `` with the actual output prefix of the result files to the output 63 | file. For instance, if the output file is ``identifications.psms.tsv``, then the output 64 | prefix is ``identifications``. 65 | 66 | Additional options 67 | ------------------ 68 | 69 | `tims2rescore` provides additional options to customize rescoring. You can explore these options 70 | by running the following command: 71 | 72 | .. code-block:: bash 73 | 74 | tims2rescore --help 75 | 76 | 77 | -------------------------------------------------------------------------------- /examples/mascot-ms2rescore.json: -------------------------------------------------------------------------------- 1 | { 2 | "ms2rescore": { 3 | "psm_file": "examples/data/search/mascot/F010956.mzid", 4 | "psm_file_type": "mzid", 5 | "spectrum_path": "examples/data/spectra/F010956.mgf", 6 | "psm_id_pattern": "(.*)", 7 | "id_decoy_pattern": "^rev_", 8 | "fasta_file": "examples/data/fasta/uniprot-proteome-human-contaminants.fasta" 9 | } 10 | } -------------------------------------------------------------------------------- /examples/mascot-ms2rescore.toml: -------------------------------------------------------------------------------- 1 | [ms2rescore] 2 | psm_file = "examples/data/search/mascot/F010956.mzid" 3 | psm_file_type = "mzid" 4 | spectrum_path = "examples/data/spectra/F010956.mgf" 5 | psm_id_pattern = "(.*)" 6 | id_decoy_pattern = '^rev_' 7 | fasta_file = "examples/data/fasta/uniprot-proteome-human-contaminants.fasta" 8 | -------------------------------------------------------------------------------- /examples/maxquant-ms2rescore.json: -------------------------------------------------------------------------------- 1 | { 2 | "ms2rescore": { 3 | "psm_file": "examples/data/search/maxquant/msms.txt", 4 | "psm_file_type": "msms", 5 | "spectrum_path": "examples/data/spectra", 6 | "spectrum_id_pattern": ".*scan=(\\d+)$", 7 | "fasta_file": "examples/data/fasta/uniprot-proteome-human-contaminants.fasta", 8 | "modification_mapping": { 9 | "gl": "Gln->pyro-Glu", 10 | "ox": "Oxidation", 11 | "ac": "Acetylation", 12 | "de": "Deamidation" 13 | }, 14 | "fixed_modifications": { 15 | "Carbamidomethyl": [ 16 | "C" 17 | ] 18 | } 19 | } 20 | } -------------------------------------------------------------------------------- /examples/maxquant-ms2rescore.toml: -------------------------------------------------------------------------------- 1 | [ms2rescore] 2 | psm_file = "examples/data/search/maxquant/msms.txt" 3 | psm_file_type = "msms" 4 | spectrum_path = "examples/data/spectra" 5 | spectrum_id_pattern = '.*scan=(\d+)$' # Single quotes for literal regex string 6 | fasta_file = "examples/data/fasta/uniprot-proteome-human-contaminants.fasta" 7 | 8 | [ms2rescore.modification_mapping] 9 | "gl" = "Gln->pyro-Glu" 10 | "ox" = "Oxidation" 11 | "ac" = "Acetylation" 12 | "de" = "Deamidation" 13 | 14 | [ms2rescore.fixed_modifications] 15 | "Carbamidomethyl" = ["C"] 16 | -------------------------------------------------------------------------------- /examples/msgfplus-ms2rescore.json: -------------------------------------------------------------------------------- 1 | { 2 | "ms2rescore": { 3 | "psm_file": "examples/id/msgfplus.pin", 4 | "psm_file_type": "percolator", 5 | "psm_reader_kwargs": { 6 | "score_column": "PSMScore" 7 | }, 8 | "log_level": "debug", 9 | "processes": 16, 10 | "rescoring_engine": { 11 | "mokapot": { 12 | "fasta_file": "examples/proteins/uniprot-proteome-human-contaminants.fasta", 13 | "write_weights": true, 14 | "write_txt": true 15 | } 16 | } 17 | } 18 | } -------------------------------------------------------------------------------- /examples/msgfplus-ms2rescore.toml: -------------------------------------------------------------------------------- 1 | [ms2rescore] 2 | psm_file = "examples/id/msgfplus.pin" 3 | psm_file_type = "percolator" 4 | psm_reader_kwargs = { "score_column" = "PSMScore" } 5 | log_level = "debug" 6 | processes = 16 7 | 8 | [ms2rescore.rescoring_engine.mokapot] 9 | fasta_file = "examples/proteins/uniprot-proteome-human-contaminants.fasta" 10 | write_weights = true 11 | write_txt = true 12 | -------------------------------------------------------------------------------- /examples/peptideshaker-ms2rescore.json: -------------------------------------------------------------------------------- 1 | { 2 | "ms2rescore": { 3 | "psm_file": "examples/data/search/peptideshaker/peptideshaker-example.mzid", 4 | "spectrum_path": "examples/data/spectra/qExactive01819.mzML" 5 | } 6 | } -------------------------------------------------------------------------------- /examples/peptideshaker-ms2rescore.toml: -------------------------------------------------------------------------------- 1 | [ms2rescore] 2 | psm_file = "examples/data/search/peptideshaker/peptideshaker-example.mzid" 3 | spectrum_path = "examples/data/spectra/qExactive01819.mzML" 4 | -------------------------------------------------------------------------------- /examples/sage-ms2rescore.json: -------------------------------------------------------------------------------- 1 | { 2 | "ms2rescore": { 3 | "psm_file": "examples/data/search/sage/results.sage.tsv", 4 | "psm_file_type": "sage", 5 | "spectrum_path": "examples/data/spectra/qExactive01819.mzML", 6 | "fasta_file": "examples/data/fasta/uniprot-human-reviewed-trypsin-june-2021_concatenated_target_decoy.fasta" 7 | } 8 | } -------------------------------------------------------------------------------- /examples/sage-ms2rescore.toml: -------------------------------------------------------------------------------- 1 | [ms2rescore] 2 | psm_file = "examples/data/search/sage/results.sage.tsv" 3 | psm_file_type = "sage" 4 | spectrum_path = "examples/data/spectra/qExactive01819.mzML" 5 | fasta_file = "examples/data/fasta/uniprot-human-reviewed-trypsin-june-2021_concatenated_target_decoy.fasta" 6 | -------------------------------------------------------------------------------- /examples/xtandem-ms2rescore.json: -------------------------------------------------------------------------------- 1 | { 2 | "ms2rescore": { 3 | "psm_file": "examples/data/search/xtandem/pyro.t.xml", 4 | "psm_file_type": "xtandem", 5 | "spectrum_path": "examples/data/spectra/Velos005137.mgf", 6 | "psm_id_pattern": "(\\S+).*", 7 | "modification_mapping": { 8 | "+57.022": "U:Carbamidomethyl", 9 | "+15.994": "U:Oxidation", 10 | "+39.9954": "U:Pyro-carbamidomethyl", 11 | "+42.0106": "U:Acetyl", 12 | "-17.0266": "U:Gln->pyro-Glu", 13 | "-18.0106": "U:Glu->pyro-Glu" 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /examples/xtandem-ms2rescore.toml: -------------------------------------------------------------------------------- 1 | [ms2rescore] 2 | psm_file = "examples/data/search/xtandem/pyro.t.xml" 3 | psm_file_type = "xtandem" 4 | spectrum_path = "examples/data/spectra/Velos005137.mgf" 5 | psm_id_pattern = '(\S+).*' 6 | 7 | [ms2rescore.modification_mapping] 8 | "+57.022" = "U:Carbamidomethyl" 9 | "+15.994" = "U:Oxidation" 10 | "+39.9954" = "U:Pyro-carbamidomethyl" 11 | "+42.0106" = "U:Acetyl" 12 | "-17.0266" = "U:Gln->pyro-Glu" 13 | "-18.0106" = "U:Glu->pyro-Glu" 14 | -------------------------------------------------------------------------------- /img/gui-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/img/gui-screenshot.png -------------------------------------------------------------------------------- /img/ms2rescore.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/img/ms2rescore.ico -------------------------------------------------------------------------------- /img/ms2rescore_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/img/ms2rescore_logo.png -------------------------------------------------------------------------------- /img/ms2rescore_logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 15 | 17 | 19 | 20 | 22 | image/svg+xml 23 | 25 | 26 | 27 | 28 | 31 | 35 | 39 | 44 | 48 | 52 | 56 | 60 | 64 | 68 | 72 | 76 | 80 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /ms2rescore.spec: -------------------------------------------------------------------------------- 1 | import importlib.metadata 2 | import os 3 | import re 4 | 5 | from PyInstaller.building.build_main import COLLECT, EXE, PYZ, Analysis 6 | from PyInstaller.utils.hooks import collect_all 7 | 8 | from ms2rescore import __version__ 9 | 10 | # Package info 11 | exe_name = "ms2rescore" 12 | script_name = "ms2rescore/gui/__main__.py" 13 | icon = "./img/ms2rescore.ico" 14 | location = os.getcwd() 15 | project = "ms2rescore" 16 | bundle_name = "ms2rescore" 17 | bundle_identifier = f"{bundle_name}.{__version__}" 18 | 19 | extra_requirements = {"ionmob"} 20 | 21 | # Requirements config 22 | skip_requirements_regex = r"^(?:.*\..*)" 23 | 24 | 25 | # Collect hidden imports and data for all requirements 26 | requirements = importlib.metadata.requires(project) 27 | requirements = { 28 | re.match(r"^[\w\-]+", req)[0] # Remove version specifiers 29 | for req in requirements 30 | if "; extra ==" not in req # Exclude optional dependencies 31 | } 32 | requirements.update([project, "xgboost"]) 33 | requirements.update(extra_requirements) 34 | 35 | hidden_imports = set() 36 | datas = [] 37 | binaries = [] 38 | checked = set() 39 | while requirements: 40 | requirement = requirements.pop() 41 | if re.match(skip_requirements_regex, requirement): 42 | continue 43 | if requirement in ["tomli"]: 44 | continue 45 | checked.add(requirement) 46 | module_version = importlib.metadata.version(re.match(r"^[\w\-]+", requirement)[0]) 47 | try: 48 | datas_, binaries_, hidden_imports_ = collect_all(requirement, include_py_files=True) 49 | except ImportError: 50 | continue 51 | datas += datas_ 52 | hidden_imports_ = set(hidden_imports_) 53 | if "" in hidden_imports_: 54 | hidden_imports_.remove("") 55 | if None in hidden_imports_: 56 | hidden_imports_.remove(None) 57 | requirements |= hidden_imports_ - checked 58 | hidden_imports |= hidden_imports_ 59 | 60 | hidden_imports = sorted([h for h in hidden_imports if "tests" not in h.split(".")]) 61 | hidden_imports = [h for h in hidden_imports if "__pycache__" not in h] 62 | datas = [ 63 | d 64 | for d in datas 65 | if ("__pycache__" not in d[0]) and (d[1] not in [".", "build", "dist", "Output"]) 66 | ] 67 | datas += [("ms2rescore\package_data", "package_data")] 68 | 69 | block_cipher = None 70 | # Build package 71 | a = Analysis( 72 | [script_name], 73 | pathex=[location], 74 | binaries=binaries, 75 | datas=datas, 76 | hiddenimports=hidden_imports, 77 | hookspath=[], 78 | hooksconfig={}, 79 | runtime_hooks=[], 80 | excludes=[], 81 | win_no_prefer_redirects=False, 82 | win_private_assemblies=False, 83 | cipher=block_cipher, 84 | noarchive=False, 85 | ) 86 | 87 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) 88 | 89 | exe = EXE( 90 | pyz, 91 | a.scripts, 92 | [], 93 | exclude_binaries=True, 94 | name=exe_name, 95 | debug=False, 96 | bootloader_ignore_signals=False, 97 | strip=False, 98 | upx=True, 99 | console=False, 100 | windowed=True, 101 | disable_windowed_traceback=False, 102 | target_arch=None, 103 | codesign_identity=None, 104 | entitlements_file=None, 105 | icon="./img/ms2rescore.ico", 106 | ) 107 | 108 | coll = COLLECT( 109 | exe, a.binaries, a.zipfiles, a.datas, strip=False, upx=True, upx_exclude=[], name=exe_name 110 | ) 111 | -------------------------------------------------------------------------------- /ms2rescore/__init__.py: -------------------------------------------------------------------------------- 1 | """Modular and user-friendly platform for AI-assisted rescoring of peptide identifications .""" 2 | 3 | __version__ = "3.2.0.dev2" 4 | __all__ = [ 5 | "parse_configurations", 6 | "rescore", 7 | ] 8 | 9 | from warnings import filterwarnings 10 | 11 | # mzmlb is not used, so hdf5plugin is not needed 12 | filterwarnings( 13 | "ignore", 14 | message="hdf5plugin is missing", 15 | category=UserWarning, 16 | module="psims.mzmlb", 17 | ) 18 | 19 | from ms2rescore.config_parser import parse_configurations # noqa: E402 20 | from ms2rescore.core import rescore # noqa: E402 21 | -------------------------------------------------------------------------------- /ms2rescore/__main__.py: -------------------------------------------------------------------------------- 1 | """MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and RTs.""" 2 | 3 | import argparse 4 | import cProfile 5 | import importlib.resources 6 | import json 7 | import logging 8 | import sys 9 | from pathlib import Path 10 | from typing import Union 11 | 12 | from rich.console import Console 13 | from rich.logging import RichHandler 14 | from rich.text import Text 15 | 16 | from ms2rescore import __version__, package_data 17 | from ms2rescore.config_parser import parse_configurations 18 | from ms2rescore.core import rescore 19 | from ms2rescore.exceptions import MS2RescoreConfigurationError 20 | 21 | try: 22 | import matplotlib.pyplot as plt 23 | 24 | plt.set_loglevel("warning") 25 | except ImportError: 26 | pass 27 | 28 | LOG_MAPPING = { 29 | "critical": logging.CRITICAL, 30 | "error": logging.ERROR, 31 | "warning": logging.WARNING, 32 | "info": logging.INFO, 33 | "debug": logging.DEBUG, 34 | } 35 | LOGGER = logging.getLogger(__name__) 36 | CONSOLE = Console(record=True) 37 | 38 | 39 | def _print_credits(tims=False): 40 | """Print software credits to terminal.""" 41 | text = Text() 42 | text.append("\n") 43 | if tims: 44 | text.append("TIMS²Rescore", style="bold link https://github.com/compomics/tims2rescore") 45 | else: 46 | text.append("MS²Rescore", style="bold link https://github.com/compomics/ms2rescore") 47 | text.append(f" (v{__version__})\n", style="bold") 48 | if tims: 49 | text.append("MS²Rescore tuned for timsTOF DDA-PASEF data.\n", style="italic") 50 | text.append("Developed at CompOmics, VIB / Ghent University, Belgium.\n") 51 | text.append("Please cite: ") 52 | if tims: 53 | text.append( 54 | "Declercq & Devreese et al. bioRxiv (2024)", 55 | style="link https://doi.org/10.1101/2024.05.29.596400", 56 | ) 57 | else: 58 | text.append( 59 | "Buur & Declercq et al. JPR (2024)", 60 | style="link https://doi.org/10.1021/acs.jproteome.3c00785", 61 | ) 62 | text.append("\n") 63 | if tims: 64 | text.stylize("#006cb5") 65 | CONSOLE.print(text) 66 | 67 | 68 | def _argument_parser() -> argparse.ArgumentParser: 69 | """Parse CLI arguments.""" 70 | parser = argparse.ArgumentParser( 71 | description="MS²Rescore: Sensitive PSM rescoring with predicted features.", 72 | formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=42), 73 | ) 74 | parser.add_argument("-v", "--version", action="version", version=__version__) 75 | parser.add_argument( 76 | "-p", 77 | "--psm-file", 78 | metavar="FILE", 79 | action="store", 80 | type=str, 81 | nargs="*", 82 | dest="psm_file", 83 | help="path to PSM file (PIN, mzIdentML, MaxQuant msms, X!Tandem XML...)", 84 | ) 85 | parser.add_argument( 86 | "-t", 87 | "--psm-file-type", 88 | metavar="STR", 89 | action="store", 90 | type=str, 91 | dest="psm_file_type", 92 | help="PSM file type (default: 'infer')", 93 | ) 94 | parser.add_argument( 95 | "-s", 96 | "--spectrum-path", 97 | metavar="FILE/DIR", 98 | action="store", 99 | type=str, 100 | dest="spectrum_path", 101 | help="path to MGF/mzML spectrum file or directory with spectrum files (default: derived\ 102 | from identification file)", 103 | ) 104 | parser.add_argument( 105 | "-c", 106 | "--config-file", 107 | metavar="FILE", 108 | action="store", 109 | type=str, 110 | dest="config_file", 111 | help="path to MS²Rescore configuration file (see README.md)", 112 | ) 113 | parser.add_argument( 114 | "-o", 115 | "--output-path", 116 | metavar="FILE", 117 | action="store", 118 | type=str, 119 | dest="output_path", 120 | help="Path and stem for output file names (default: derive from identification file)", 121 | ) 122 | parser.add_argument( 123 | "-l", 124 | "--log-level", 125 | metavar="STR", 126 | action="store", 127 | type=str, 128 | dest="log_level", 129 | help="logging level (default: `info`)", 130 | ) 131 | parser.add_argument( 132 | "-n", 133 | "--processes", 134 | metavar="INT", 135 | action="store", 136 | type=int, 137 | dest="processes", 138 | help="number of parallel processes available to MS²Rescore", 139 | ) 140 | parser.add_argument( 141 | "-f", 142 | "--fasta-file", 143 | metavar="FILE", 144 | action="store", 145 | type=str, 146 | dest="fasta_file", 147 | help="path to FASTA file", 148 | ) 149 | parser.add_argument( 150 | "--write-report", 151 | # metavar="BOOL", 152 | action="store_true", 153 | dest="write_report", 154 | help="boolean to enable profiling with cProfile", 155 | ) 156 | parser.add_argument( 157 | "--profile", 158 | # metavar="BOOL", 159 | action="store_true", 160 | # type=bool, 161 | # dest="profile", 162 | help="boolean to enable profiling with cProfile", 163 | ) 164 | 165 | return parser 166 | 167 | 168 | def _setup_logging(passed_level: str, log_file: Union[str, Path]): 169 | """Setup logging for writing to log file and Rich Console.""" 170 | if passed_level not in LOG_MAPPING: 171 | raise MS2RescoreConfigurationError( 172 | f"Invalid log level '{passed_level}'. " 173 | f"Valid levels are: {', '.join(LOG_MAPPING.keys())}" 174 | ) 175 | logging.basicConfig( 176 | format="%(name)s // %(message)s", 177 | datefmt="%Y-%m-%d %H:%M:%S", 178 | level=LOG_MAPPING[passed_level], 179 | handlers=[ 180 | logging.FileHandler(log_file, mode="w", encoding="utf-8"), 181 | RichHandler(rich_tracebacks=True, console=CONSOLE, show_path=False), 182 | ], 183 | ) 184 | 185 | 186 | def profile(fnc, filepath): 187 | """A decorator that uses cProfile to profile a function""" 188 | 189 | def inner(*args, **kwargs): 190 | with cProfile.Profile() as profiler: 191 | return_value = fnc(*args, **kwargs) 192 | profiler.dump_stats(filepath + ".profile.prof") 193 | return return_value 194 | 195 | return inner 196 | 197 | 198 | def main_tims(): 199 | """Run MS²Rescore command-line interface in TIMS²Rescore mode.""" 200 | main(tims=True) 201 | 202 | 203 | def main(tims=False): 204 | """Run MS²Rescore command-line interface.""" 205 | _print_credits(tims) 206 | 207 | # Parse CLI arguments and configuration file 208 | parser = _argument_parser() 209 | cli_args = parser.parse_args() 210 | 211 | configurations = [] 212 | if tims: 213 | configurations.append( 214 | json.load(importlib.resources.open_text(package_data, "config_default_tims.json")) 215 | ) 216 | if cli_args.config_file: 217 | configurations.append(cli_args.config_file) 218 | configurations.append(cli_args) 219 | 220 | try: 221 | config = parse_configurations(configurations) 222 | except MS2RescoreConfigurationError as e: 223 | LOGGER.critical(e) 224 | sys.exit(1) 225 | 226 | # Setup logging 227 | _setup_logging( 228 | config["ms2rescore"]["log_level"], config["ms2rescore"]["output_path"] + ".log.txt" 229 | ) 230 | 231 | # Run MS²Rescore 232 | try: 233 | if cli_args.profile: 234 | profiled_rescore = profile(rescore, config["ms2rescore"]["output_path"]) 235 | profiled_rescore(configuration=config) 236 | else: 237 | rescore(configuration=config) 238 | except Exception as e: 239 | LOGGER.exception(e) 240 | sys.exit(1) 241 | finally: 242 | CONSOLE.save_html(config["ms2rescore"]["output_path"] + ".log.html") 243 | 244 | 245 | if __name__ == "__main__": 246 | main() 247 | -------------------------------------------------------------------------------- /ms2rescore/config_parser.py: -------------------------------------------------------------------------------- 1 | """Parse configuration from command line arguments and configuration files.""" 2 | 3 | import importlib.resources 4 | import json 5 | import multiprocessing as mp 6 | from argparse import Namespace 7 | from pathlib import Path 8 | from typing import Dict, List, Union 9 | 10 | try: 11 | import tomllib 12 | except ImportError: 13 | import tomli as tomllib 14 | 15 | from cascade_config import CascadeConfig 16 | 17 | from ms2rescore import package_data 18 | from ms2rescore.exceptions import MS2RescoreConfigurationError 19 | 20 | 21 | def _parse_output_path(configured_path, psm_file_path): 22 | """Parse output path and make parent dirs if required.""" 23 | psm_file_stem = Path(psm_file_path).stem + ".ms2rescore" 24 | if configured_path: 25 | configured_path = Path(configured_path) 26 | # If existing dir, add psm_file stem 27 | if configured_path.is_dir(): 28 | return (configured_path / psm_file_stem).as_posix() 29 | # If parent is existing dir, use as is (user intended as path + stem) 30 | elif configured_path.parent.is_dir(): 31 | return configured_path.as_posix() 32 | # If none-existing dir, create dirs and add psm_file stem 33 | else: 34 | configured_path.mkdir(parents=True, exist_ok=True) 35 | return (configured_path / psm_file_stem).as_posix() 36 | else: 37 | # If none, use psm_file path and stem 38 | return (Path(psm_file_path).parent / psm_file_stem).as_posix() 39 | 40 | 41 | def _validate_filenames(config: Dict) -> Dict: 42 | """Validate and infer input/output filenames.""" 43 | # psm_file should be provided 44 | if not config["ms2rescore"]["psm_file"]: 45 | raise MS2RescoreConfigurationError("PSM file should be provided.") 46 | 47 | # if psm_file is a string turn into a list else leave as is 48 | if isinstance(config["ms2rescore"]["psm_file"], str): 49 | config["ms2rescore"]["psm_file"] = [config["ms2rescore"]["psm_file"]] 50 | 51 | # all provided psm_file(s) should exist 52 | psm_files = [] 53 | for psm_file in config["ms2rescore"]["psm_file"]: 54 | id_file = Path(psm_file) 55 | if not id_file.is_file(): 56 | raise FileNotFoundError(id_file) 57 | psm_files.append(id_file.as_posix()) 58 | config["ms2rescore"]["psm_file"] = psm_files 59 | 60 | # spectrum_path should either be None, or existing path to file or dir 61 | if config["ms2rescore"]["spectrum_path"]: 62 | spectrum_path = Path(config["ms2rescore"]["spectrum_path"]) 63 | if not spectrum_path.exists(): 64 | raise FileNotFoundError(spectrum_path) 65 | config["ms2rescore"]["spectrum_path"] = spectrum_path.as_posix() 66 | 67 | # Parse output_path 68 | config["ms2rescore"]["output_path"] = _parse_output_path( 69 | config["ms2rescore"]["output_path"], config["ms2rescore"]["psm_file"][0] 70 | ) 71 | 72 | # Parse config_file as posix path to avoid combination of forward and backward slashes 73 | if config["ms2rescore"]["config_file"]: 74 | config["ms2rescore"]["config_file"] = Path(config["ms2rescore"]["config_file"]).as_posix() 75 | 76 | return config 77 | 78 | 79 | def _validate_processes(config: Dict) -> Dict: 80 | """Validate requested processes with available cpu count.""" 81 | n_available = mp.cpu_count() 82 | if (config["ms2rescore"]["processes"] == -1) or ( 83 | config["ms2rescore"]["processes"] > n_available 84 | ): 85 | config["ms2rescore"]["processes"] = n_available 86 | return config 87 | 88 | 89 | def parse_configurations(configurations: List[Union[dict, str, Path, Namespace]]) -> Dict: 90 | """ 91 | Parse and validate MS²Rescore configuration files and CLI arguments. 92 | 93 | Default configuration, user configuration files, and CLI/class arguments are parsed 94 | in cascading order, with each successive configuration taking priority over the 95 | previous. 96 | 97 | Parameters 98 | ---------- 99 | configurations: Dict, str, Path, Namespace, List[Dict, str, Path, Namespace] 100 | configuration dictionary, path to configuration files, argparse Namespace, or a list of the 101 | above. 102 | """ 103 | if not isinstance(configurations, list): 104 | configurations = [configurations] 105 | 106 | # Initialize CascadeConfig with validation schema and defaults 107 | config_schema = importlib.resources.open_text(package_data, "config_schema.json") 108 | config_default = importlib.resources.open_text(package_data, "config_default.json") 109 | cascade_conf = CascadeConfig( 110 | validation_schema=json.load(config_schema), 111 | none_overrides_value=False, 112 | max_recursion_depth=1, 113 | ) 114 | cascade_conf.add_dict(json.load(config_default)) 115 | 116 | # Add configurations 117 | for config in configurations: 118 | if not config: 119 | continue 120 | if isinstance(config, dict): 121 | cascade_conf.add_dict(config) 122 | elif isinstance(config, str) or isinstance(config, Path): 123 | if Path(config).suffix.lower() == ".json": 124 | cascade_conf.add_json(config) 125 | elif Path(config).suffix.lower() == ".toml": 126 | cascade_conf.add_dict(dict(tomllib.load(Path(config).open("rb")))) 127 | else: 128 | raise MS2RescoreConfigurationError( 129 | "Unknown file extension for configuration file. Should be `json` or " "`toml`." 130 | ) 131 | elif isinstance(config, Namespace): 132 | cascade_conf.add_namespace(config, subkey="ms2rescore") 133 | else: 134 | raise ValueError( 135 | "Configuration should be a dictionary, argparse Namespace, or path to a " 136 | "configuration file." 137 | ) 138 | 139 | # Parse configurations 140 | config = cascade_conf.parse() 141 | 142 | # Validate and infer filenames and number of parallel processes 143 | config = _validate_filenames(config) 144 | config = _validate_processes(config) 145 | 146 | # Convert feature_generators and rescoring_engine names to lowercase 147 | config["ms2rescore"]["feature_generators"] = { 148 | k.lower(): v for k, v in config["ms2rescore"]["feature_generators"].items() 149 | } 150 | config["ms2rescore"]["rescoring_engine"] = { 151 | k.lower(): v for k, v in config["ms2rescore"]["rescoring_engine"].items() 152 | } 153 | 154 | return config 155 | -------------------------------------------------------------------------------- /ms2rescore/exceptions.py: -------------------------------------------------------------------------------- 1 | """MS²Rescore exceptions.""" 2 | 3 | 4 | class MS2RescoreError(Exception): 5 | """Generic MS2Rescore error.""" 6 | 7 | pass 8 | 9 | 10 | class MS2RescoreConfigurationError(MS2RescoreError): 11 | """Invalid MS2Rescore configuration.""" 12 | 13 | pass 14 | 15 | 16 | class IDFileParsingError(MS2RescoreError): 17 | """Identification file parsing error.""" 18 | 19 | pass 20 | 21 | 22 | class ModificationParsingError(IDFileParsingError): 23 | """Identification file parsing error.""" 24 | 25 | pass 26 | 27 | 28 | class MissingValuesError(MS2RescoreError): 29 | """Missing values in PSMs and/or spectra.""" 30 | 31 | pass 32 | 33 | 34 | class ReportGenerationError(MS2RescoreError): 35 | """Error while generating report.""" 36 | 37 | pass 38 | 39 | 40 | class RescoringError(MS2RescoreError): 41 | """Error while rescoring PSMs.""" 42 | 43 | pass 44 | -------------------------------------------------------------------------------- /ms2rescore/feature_generators/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feature generators to add rescoring features to PSMs from various (re)sources and prediction tools. 3 | """ 4 | 5 | from ms2rescore.feature_generators.basic import BasicFeatureGenerator 6 | from ms2rescore.feature_generators.deeplc import DeepLCFeatureGenerator 7 | from ms2rescore.feature_generators.ionmob import IonMobFeatureGenerator 8 | from ms2rescore.feature_generators.maxquant import MaxQuantFeatureGenerator 9 | from ms2rescore.feature_generators.ms2pip import MS2PIPFeatureGenerator 10 | from ms2rescore.feature_generators.im2deep import IM2DeepFeatureGenerator 11 | 12 | FEATURE_GENERATORS = { 13 | "basic": BasicFeatureGenerator, 14 | "ms2pip": MS2PIPFeatureGenerator, 15 | "deeplc": DeepLCFeatureGenerator, 16 | "maxquant": MaxQuantFeatureGenerator, 17 | "ionmob": IonMobFeatureGenerator, 18 | "im2deep": IM2DeepFeatureGenerator, 19 | } 20 | -------------------------------------------------------------------------------- /ms2rescore/feature_generators/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Set 3 | 4 | from psm_utils import PSMList 5 | 6 | from ms2rescore.parse_spectra import MSDataType 7 | 8 | 9 | class FeatureGeneratorBase(ABC): 10 | """Base class from which all feature generators must inherit.""" 11 | 12 | # List of required MS data types for feature generation 13 | required_ms_data: Set[MSDataType] = set() 14 | 15 | def __init__(self, *args, **kwargs) -> None: 16 | super().__init__() 17 | 18 | @property 19 | @abstractmethod 20 | def feature_names(self): 21 | pass 22 | 23 | @abstractmethod 24 | def add_features(psm_list: PSMList): 25 | pass 26 | 27 | 28 | class FeatureGeneratorException(Exception): 29 | """Base class for exceptions raised by feature generators.""" 30 | 31 | pass 32 | -------------------------------------------------------------------------------- /ms2rescore/feature_generators/basic.py: -------------------------------------------------------------------------------- 1 | """Generate basic features that can be extracted from any PSM list.""" 2 | 3 | import logging 4 | from typing import Dict, Iterable, List, Tuple 5 | 6 | import numpy as np 7 | from psm_utils import PSMList 8 | 9 | from ms2rescore.feature_generators.base import FeatureGeneratorBase 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class BasicFeatureGenerator(FeatureGeneratorBase): 15 | def __init__(self, *args, **kwargs) -> None: 16 | """ 17 | Generate basic features that can be extracted from any PSM list, including search engine 18 | score, charge state, and MS1 error. 19 | 20 | Parameters 21 | ---------- 22 | *args 23 | Positional arguments passed to the base class. 24 | **kwargs 25 | Keyword arguments passed to the base class. 26 | 27 | Attributes 28 | ---------- 29 | feature_names: list[str] 30 | Names of the features that will be added to the PSMs. 31 | 32 | """ 33 | super().__init__(*args, **kwargs) 34 | self._feature_names = None 35 | 36 | @property 37 | def feature_names(self) -> List[str]: 38 | if self._feature_names is None: 39 | raise ValueError("Feature names have not been set yet. First run `add_features`.") 40 | return self._feature_names 41 | 42 | def add_features(self, psm_list: PSMList) -> None: 43 | """ 44 | Add basic features to a PSM list. 45 | 46 | Parameters 47 | ---------- 48 | psm_list 49 | PSM list to add features to. 50 | 51 | """ 52 | logger.info("Adding basic features to PSMs.") 53 | 54 | self._feature_names = [] # Reset feature names 55 | 56 | charge_states = np.array([psm.peptidoform.precursor_charge for psm in psm_list]) 57 | precursor_mzs = psm_list["precursor_mz"] 58 | scores = psm_list["score"] 59 | 60 | has_charge = None not in charge_states 61 | has_mz = None not in precursor_mzs and has_charge 62 | has_score = None not in scores 63 | 64 | if has_charge: 65 | charge_n = charge_states 66 | charge_one_hot, one_hot_names = _one_hot_encode_charge(charge_states) 67 | self._feature_names.extend(["charge_n"] + one_hot_names) 68 | 69 | if has_mz: # Charge also required for theoretical m/z 70 | theo_mz = np.array([psm.peptidoform.theoretical_mz for psm in psm_list]) 71 | abs_ms1_error_ppm = np.abs((precursor_mzs - theo_mz) / theo_mz * 10**6) 72 | self._feature_names.append("abs_ms1_error_ppm") 73 | 74 | if has_score: 75 | self._feature_names.append("search_engine_score") 76 | 77 | for i, psm in enumerate(psm_list): 78 | psm.rescoring_features.update( 79 | dict( 80 | **{"charge_n": charge_n[i]} if has_charge else {}, 81 | **charge_one_hot[i] if has_charge else {}, 82 | **{"abs_ms1_error_ppm": abs_ms1_error_ppm[i]} if has_mz else {}, 83 | **{"search_engine_score": scores[i]} if has_score else {}, 84 | ) 85 | ) 86 | 87 | 88 | def _one_hot_encode_charge( 89 | charge_states: np.ndarray, 90 | ) -> Tuple[Iterable[Dict[str, int]], List[str]]: 91 | """One-hot encode charge states.""" 92 | n_entries = len(charge_states) 93 | min_charge = np.min(charge_states) 94 | max_charge = np.max(charge_states) 95 | 96 | mask = np.zeros((n_entries, max_charge - min_charge + 1), dtype=bool) 97 | mask[np.arange(n_entries), charge_states - min_charge] = 1 98 | one_hot = mask.view("i1") 99 | 100 | heading = [f"charge_{i}" for i in range(min_charge, max_charge + 1)] 101 | 102 | return [dict(zip(heading, row)) for row in one_hot], heading 103 | -------------------------------------------------------------------------------- /ms2rescore/feature_generators/deeplc.py: -------------------------------------------------------------------------------- 1 | """ 2 | DeepLC retention time-based feature generator. 3 | 4 | DeepLC is a fully modification-aware peptide retention time predictor. It uses a deep convolutional 5 | neural network to predict retention times based on the atomic composition of the (modified) amino 6 | acid residues in the peptide. See 7 | `github.com/compomics/deeplc `_ for more information. 8 | 9 | If you use DeepLC through MS²Rescore, please cite: 10 | 11 | .. epigraph:: 12 | Bouwmeester, R., Gabriels, R., Hulstaert, N. et al. DeepLC can predict retention times for 13 | peptides that carry unknown modifications. *Nat Methods* 18, 1363-1369 (2021). 14 | `doi:10.1038/s41592-021-01301-5 `_ 15 | 16 | """ 17 | 18 | import contextlib 19 | import logging 20 | import os 21 | from collections import defaultdict 22 | from inspect import getfullargspec 23 | from itertools import chain 24 | from typing import List, Union 25 | 26 | import numpy as np 27 | from psm_utils import PSMList 28 | 29 | from ms2rescore.feature_generators.base import FeatureGeneratorBase 30 | from ms2rescore.parse_spectra import MSDataType 31 | 32 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | class DeepLCFeatureGenerator(FeatureGeneratorBase): 37 | """DeepLC retention time-based feature generator.""" 38 | 39 | required_ms_data = {MSDataType.retention_time} 40 | 41 | def __init__( 42 | self, 43 | *args, 44 | lower_score_is_better: bool = False, 45 | calibration_set_size: Union[int, float, None] = None, 46 | processes: int = 1, 47 | **kwargs, 48 | ) -> None: 49 | """ 50 | Generate DeepLC-based features for rescoring. 51 | 52 | DeepLC retraining is on by default. Add ``deeplc_retrain: False`` as a keyword argument to 53 | disable retraining. 54 | 55 | Parameters 56 | ---------- 57 | lower_score_is_better 58 | Whether a lower PSM score denotes a better matching PSM. Default: False 59 | calibration_set_size: int or float 60 | Amount of best PSMs to use for DeepLC calibration. If this value is lower 61 | than the number of available PSMs, all PSMs will be used. (default: 0.15) 62 | processes: {int, None} 63 | Number of processes to use in DeepLC. Defaults to 1. 64 | kwargs: dict 65 | Additional keyword arguments are passed to DeepLC. 66 | 67 | Attributes 68 | ---------- 69 | feature_names: list[str] 70 | Names of the features that will be added to the PSMs. 71 | 72 | """ 73 | super().__init__(*args, **kwargs) 74 | 75 | self.lower_psm_score_better = lower_score_is_better 76 | self.calibration_set_size = calibration_set_size 77 | self.processes = processes 78 | self.deeplc_kwargs = kwargs or {} 79 | 80 | self._verbose = logger.getEffectiveLevel() <= logging.DEBUG 81 | 82 | # Lazy-load DeepLC 83 | from deeplc import DeepLC 84 | 85 | self.DeepLC = DeepLC 86 | 87 | # Remove any kwargs that are not DeepLC arguments 88 | self.deeplc_kwargs = { 89 | k: v for k, v in self.deeplc_kwargs.items() if k in getfullargspec(DeepLC).args 90 | } 91 | self.deeplc_kwargs.update({"config_file": None}) 92 | 93 | # Set default DeepLC arguments 94 | if "deeplc_retrain" not in self.deeplc_kwargs: 95 | self.deeplc_kwargs["deeplc_retrain"] = False 96 | 97 | self.deeplc_predictor = None 98 | if "path_model" in self.deeplc_kwargs: 99 | self.user_model = self.deeplc_kwargs.pop("path_model") 100 | logging.debug(f"Using user-provided DeepLC model {self.user_model}.") 101 | else: 102 | self.user_model = None 103 | 104 | @property 105 | def feature_names(self) -> List[str]: 106 | return [ 107 | "observed_retention_time", 108 | "predicted_retention_time", 109 | "rt_diff", 110 | "observed_retention_time_best", 111 | "predicted_retention_time_best", 112 | "rt_diff_best", 113 | ] 114 | 115 | def add_features(self, psm_list: PSMList) -> None: 116 | """Add DeepLC-derived features to PSMs.""" 117 | 118 | logger.info("Adding DeepLC-derived features to PSMs.") 119 | 120 | # Get easy-access nested version of PSMList 121 | psm_dict = psm_list.get_psm_dict() 122 | 123 | # Run DeepLC for each spectrum file 124 | current_run = 1 125 | total_runs = sum(len(runs) for runs in psm_dict.values()) 126 | 127 | for runs in psm_dict.values(): 128 | # Reset DeepLC predictor for each collection of runs 129 | self.deeplc_predictor = None 130 | self.selected_model = None 131 | for run, psms in runs.items(): 132 | peptide_rt_diff_dict = defaultdict( 133 | lambda: { 134 | "observed_retention_time_best": np.inf, 135 | "predicted_retention_time_best": np.inf, 136 | "rt_diff_best": np.inf, 137 | } 138 | ) 139 | logger.info( 140 | f"Running DeepLC for PSMs from run ({current_run}/{total_runs}): `{run}`..." 141 | ) 142 | 143 | # Disable wild logging to stdout by Tensorflow, unless in debug mode 144 | 145 | with contextlib.redirect_stdout( 146 | open(os.devnull, "w", encoding="utf-8") 147 | ) if not self._verbose else contextlib.nullcontext(): 148 | # Make new PSM list for this run (chain PSMs per spectrum to flat list) 149 | psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values()))) 150 | 151 | psm_list_calibration = self._get_calibration_psms(psm_list_run) 152 | logger.debug(f"Calibrating DeepLC with {len(psm_list_calibration)} PSMs...") 153 | self.deeplc_predictor = self.DeepLC( 154 | n_jobs=self.processes, 155 | verbose=self._verbose, 156 | path_model=self.selected_model or self.user_model, 157 | **self.deeplc_kwargs, 158 | ) 159 | self.deeplc_predictor.calibrate_preds(psm_list_calibration) 160 | # Still calibrate for each run, but do not try out all model options. 161 | # Just use model that was selected based on first run 162 | if not self.selected_model: 163 | self.selected_model = list(self.deeplc_predictor.model.keys()) 164 | self.deeplc_kwargs["deeplc_retrain"] = False 165 | logger.debug( 166 | f"Selected DeepLC model {self.selected_model} based on " 167 | "calibration of first run. Using this model (after new " 168 | "calibrations) for the remaining runs." 169 | ) 170 | 171 | logger.debug("Predicting retention times...") 172 | predictions = np.array(self.deeplc_predictor.make_preds(psm_list_run)) 173 | observations = psm_list_run["retention_time"] 174 | rt_diffs_run = np.abs(predictions - observations) 175 | 176 | logger.debug("Adding features to PSMs...") 177 | for i, psm in enumerate(psm_list_run): 178 | psm["rescoring_features"].update( 179 | { 180 | "observed_retention_time": observations[i], 181 | "predicted_retention_time": predictions[i], 182 | "rt_diff": rt_diffs_run[i], 183 | } 184 | ) 185 | peptide = psm.peptidoform.proforma.split("\\")[0] # remove charge 186 | if peptide_rt_diff_dict[peptide]["rt_diff_best"] > rt_diffs_run[i]: 187 | peptide_rt_diff_dict[peptide] = { 188 | "observed_retention_time_best": observations[i], 189 | "predicted_retention_time_best": predictions[i], 190 | "rt_diff_best": rt_diffs_run[i], 191 | } 192 | for psm in psm_list_run: 193 | psm["rescoring_features"].update( 194 | peptide_rt_diff_dict[psm.peptidoform.proforma.split("\\")[0]] 195 | ) 196 | current_run += 1 197 | 198 | def _get_calibration_psms(self, psm_list: PSMList): 199 | """Get N best scoring target PSMs for calibration.""" 200 | psm_list_targets = psm_list[~psm_list["is_decoy"]] 201 | if self.calibration_set_size: 202 | n_psms = self._get_number_of_calibration_psms(psm_list_targets) 203 | indices = np.argsort(psm_list_targets["score"]) 204 | indices = indices[:n_psms] if self.lower_psm_score_better else indices[-n_psms:] 205 | return psm_list_targets[indices] 206 | else: 207 | identified_psms = psm_list_targets[psm_list_targets["qvalue"] <= 0.01] 208 | if len(identified_psms) == 0: 209 | raise ValueError( 210 | "No target PSMs with q-value <= 0.01 found. Please set calibration set size for calibrating deeplc." 211 | ) 212 | elif (len(identified_psms) < 500) & (self.deeplc_kwargs["deeplc_retrain"]): 213 | logger.warning( 214 | " Less than 500 target PSMs with q-value <= 0.01 found for retraining. Consider turning of deeplc_retrain, as this is likely not enough data for retraining." 215 | ) 216 | return identified_psms 217 | 218 | def _get_number_of_calibration_psms(self, psm_list): 219 | """Get number of calibration PSMs given `calibration_set_size` and total number of PSMs.""" 220 | if isinstance(self.calibration_set_size, float): 221 | if not 0 < self.calibration_set_size <= 1: 222 | raise ValueError( 223 | "If `calibration_set_size` is a float, it cannot be smaller than " 224 | "or equal to 0 or larger than 1." 225 | ) 226 | else: 227 | num_calibration_psms = round(len(psm_list) * self.calibration_set_size) 228 | elif isinstance(self.calibration_set_size, int): 229 | if self.calibration_set_size > len(psm_list): 230 | logger.warning( 231 | f"Requested number of calibration PSMs ({self.calibration_set_size}" 232 | f") is larger than total number of PSMs ({len(psm_list)}). Using " 233 | "all PSMs for calibration." 234 | ) 235 | num_calibration_psms = len(psm_list) 236 | else: 237 | num_calibration_psms = self.calibration_set_size 238 | else: 239 | raise TypeError( 240 | "Expected float or int for `calibration_set_size`. Got " 241 | f"{type(self.calibration_set_size)} instead. " 242 | ) 243 | logger.debug(f"Using {num_calibration_psms} PSMs for calibration") 244 | return num_calibration_psms 245 | -------------------------------------------------------------------------------- /ms2rescore/feature_generators/im2deep.py: -------------------------------------------------------------------------------- 1 | """ 2 | IM2Deep ion mobility-based feature generator. 3 | 4 | IM2Deep is a fully modification-aware peptide ion mobility predictor. It uses a deep convolutional 5 | neural network to predict retention times based on the atomic composition of the (modified) amino 6 | acid residues in the peptide. See 7 | `github.com/compomics/IM2Deep `_ for more information. 8 | 9 | """ 10 | 11 | import contextlib 12 | import logging 13 | import os 14 | from inspect import getfullargspec 15 | from itertools import chain 16 | from typing import List 17 | 18 | import numpy as np 19 | import pandas as pd 20 | from im2deep.utils import im2ccs 21 | from im2deep.im2deep import predict_ccs 22 | from psm_utils import PSMList 23 | 24 | from ms2rescore.feature_generators.base import FeatureGeneratorBase 25 | from ms2rescore.parse_spectra import MSDataType 26 | 27 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" 28 | logger = logging.getLogger(__name__) 29 | 30 | 31 | class IM2DeepFeatureGenerator(FeatureGeneratorBase): 32 | """IM2Deep collision cross section feature generator.""" 33 | 34 | required_ms_data = {MSDataType.ion_mobility} 35 | 36 | def __init__( 37 | self, 38 | *args, 39 | processes: int = 1, 40 | **kwargs, 41 | ): 42 | """ 43 | Initialize the IM2DeepFeatureGenerator. 44 | 45 | Parameters 46 | ---------- 47 | processes : int, optional 48 | Number of parallel processes to use for IM2Deep predictions. Default is 1. 49 | **kwargs : dict, optional 50 | Additional keyword arguments to `im2deep.predict_ccs`. 51 | 52 | """ 53 | super().__init__(*args, **kwargs) 54 | 55 | self._verbose = logger.getEffectiveLevel() <= logging.DEBUG 56 | 57 | # Remove any kwargs that are not IM2Deep arguments 58 | self.im2deep_kwargs = kwargs or {} 59 | self.im2deep_kwargs = { 60 | k: v for k, v in self.im2deep_kwargs.items() if k in getfullargspec(predict_ccs).args 61 | } 62 | self.im2deep_kwargs["n_jobs"] = processes 63 | 64 | @property 65 | def feature_names(self) -> List[str]: 66 | return [ 67 | "ccs_observed_im2deep", 68 | "ccs_predicted_im2deep", 69 | "ccs_error_im2deep", 70 | "abs_ccs_error_im2deep", 71 | "perc_ccs_error_im2deep", 72 | ] 73 | 74 | def add_features(self, psm_list: PSMList) -> None: 75 | """Add IM2Deep-derived features to PSMs""" 76 | logger.info("Adding IM2Deep-derived features to PSMs") 77 | 78 | # Get easy-access nested version of PSMlist 79 | psm_dict = psm_list.get_psm_dict() 80 | 81 | # Run IM2Deep for each spectrum file 82 | current_run = 1 83 | total_runs = sum(len(runs) for runs in psm_dict.values()) 84 | 85 | for runs in psm_dict.values(): 86 | # Reset IM2Deep predictor for each collection of runs 87 | for run, psms in runs.items(): 88 | logger.info( 89 | f"Running IM2Deep for PSMs from run ({current_run}/{total_runs}): `{run}`..." 90 | ) 91 | 92 | # Disable wild logging to stdout by TensorFlow, unless in debug mode 93 | with ( 94 | contextlib.redirect_stdout(open(os.devnull, "w", encoding="utf-8")) 95 | if not self._verbose 96 | else contextlib.nullcontext() 97 | ): 98 | # Make new PSM list for this run (chain PSMs per spectrum to flat list) 99 | psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values()))) 100 | 101 | logger.debug("Calibrating IM2Deep...") 102 | 103 | # Convert ion mobility to CCS and calibrate CCS values 104 | psm_list_run_df = psm_list_run.to_dataframe() 105 | psm_list_run_df["charge"] = [ 106 | pep.precursor_charge for pep in psm_list_run_df["peptidoform"] 107 | ] 108 | psm_list_run_df["ccs_observed"] = im2ccs( 109 | psm_list_run_df["ion_mobility"], 110 | psm_list_run_df["precursor_mz"], 111 | psm_list_run_df["charge"], 112 | ) 113 | 114 | # Create dataframe with high confidence hits for calibration 115 | cal_psm_df = self.make_calibration_df(psm_list_run_df) 116 | 117 | # Make predictions with IM2Deep 118 | logger.debug("Predicting CCS values...") 119 | predictions = predict_ccs( 120 | psm_list_run, cal_psm_df, write_output=False, **self.im2deep_kwargs 121 | ) 122 | 123 | # Add features to PSMs 124 | logger.debug("Adding features to PSMs...") 125 | observations = psm_list_run_df["ccs_observed"] 126 | ccs_diffs_run = np.abs(predictions - observations) 127 | for i, psm in enumerate(psm_list_run): 128 | psm["rescoring_features"].update( 129 | { 130 | "ccs_observed_im2deep": observations[i], 131 | "ccs_predicted_im2deep": predictions[i], 132 | "ccs_error_im2deep": ccs_diffs_run[i], 133 | "abs_ccs_error_im2deep": np.abs(ccs_diffs_run[i]), 134 | "perc_ccs_error_im2deep": np.abs(ccs_diffs_run[i]) 135 | / observations[i] 136 | * 100, 137 | } 138 | ) 139 | 140 | current_run += 1 141 | 142 | @staticmethod 143 | def make_calibration_df(psm_list_df: pd.DataFrame, threshold: float = 0.25) -> pd.DataFrame: 144 | """ 145 | Make dataframe for calibration of IM2Deep predictions. 146 | 147 | Parameters 148 | ---------- 149 | psm_list_df 150 | DataFrame with PSMs. 151 | threshold 152 | Percentage of highest scoring identified target PSMs to use for calibration, 153 | default 0.95. 154 | 155 | Returns 156 | ------- 157 | pd.DataFrame 158 | DataFrame with high confidence hits for calibration. 159 | 160 | """ 161 | identified_psms = psm_list_df[ 162 | (psm_list_df["qvalue"] < 0.01) 163 | & (~psm_list_df["is_decoy"]) 164 | & (psm_list_df["charge"] < 5) # predictions do not go higher for IM2Deep 165 | ] 166 | calibration_psms = identified_psms[ 167 | identified_psms["qvalue"] < identified_psms["qvalue"].quantile(1 - threshold) 168 | ] 169 | logger.debug( 170 | f"Number of high confidence hits for calculating shift: {len(calibration_psms)}" 171 | ) 172 | return calibration_psms 173 | -------------------------------------------------------------------------------- /ms2rescore/feature_generators/maxquant.py: -------------------------------------------------------------------------------- 1 | """ 2 | Feature generator for PSMs from the MaxQuant search engine. 3 | 4 | MaxQuant msms.txt files contain various metrics from peptide-spectrum matching that can be used 5 | to generate rescoring features. These include features related to the mass errors of the seven 6 | fragment ions with the highest intensities, and features related to the ion current of the 7 | identified fragment ions. 8 | 9 | """ 10 | 11 | import logging 12 | from typing import List, Tuple 13 | 14 | import numpy as np 15 | from psm_utils import PSMList 16 | 17 | from ms2rescore.exceptions import MS2RescoreError 18 | from ms2rescore.feature_generators.base import FeatureGeneratorBase 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | class MaxQuantFeatureGenerator(FeatureGeneratorBase): 24 | """Generate MaxQuant-derived features.""" 25 | 26 | available_features = [ 27 | "mean_error_top7", 28 | "sq_mean_error_top7", 29 | "stdev_error_top7", 30 | "ln_explained_ion_current", 31 | "ln_nterm_ion_current_ratio", 32 | "ln_cterm_ion_current_ratio", 33 | "ln_ms2_ion_current", 34 | ] 35 | 36 | def __init__(self, *args, **kwargs) -> None: 37 | """ 38 | Generate MaxQuant-derived features. 39 | 40 | Attributes 41 | ---------- 42 | feature_names: list[str] 43 | Names of the features that will be added to the PSMs. 44 | 45 | Raises 46 | ------ 47 | MissingMetadataError 48 | If the required metadata entries are not present in the PSMs. 49 | 50 | """ 51 | super().__init__(*args, **kwargs) 52 | self._feature_names = self.available_features.copy() 53 | 54 | @property 55 | def feature_names(self) -> List[str]: 56 | return self._feature_names 57 | 58 | def add_features(self, psm_list: PSMList): 59 | """ 60 | Add MaxQuant-derived features to PSMs. 61 | 62 | Parameters 63 | ---------- 64 | psm_list 65 | PSMs to add features to. 66 | 67 | """ 68 | # Check if all PSMs are from MaxQuant 69 | if not self._all_psms_from_maxquant(psm_list): 70 | self._feature_names = [] # Set feature names to empty list to indicate none added 71 | logger.warning("Not all PSMs are from MaxQuant. Skipping MaxQuant feature generation.") 72 | return 73 | else: 74 | self._feature_names = self.available_features # Reset feature names 75 | logger.info("Adding MaxQuant-derived features to PSMs.") 76 | 77 | # Infer mass deviations column name 78 | for column_name in [ 79 | "Mass deviations [Da]", 80 | "Mass Deviations [Da]", 81 | "Mass deviations [ppm]", 82 | "Mass Deviations [ppm]", 83 | ]: 84 | if column_name in psm_list[0]["metadata"].keys(): 85 | self._mass_deviations_key = column_name 86 | break 87 | else: 88 | raise MissingMetadataError( 89 | "No mass deviations entry in PSM metadata. Cannot compute MaxQuant features." 90 | ) 91 | 92 | # Check other columns 93 | for column_name in ["Intensities", "Matches", "Intensity coverage"]: 94 | if column_name not in psm_list[0]["metadata"].keys(): 95 | raise MissingMetadataError( 96 | f"Missing {column_name} entry in PSM metadata. Cannot compute MaxQuant features." 97 | ) 98 | 99 | # Add features to PSMs 100 | for psm in psm_list: 101 | psm["rescoring_features"].update(self._compute_features(psm["metadata"])) 102 | 103 | @staticmethod 104 | def _all_psms_from_maxquant(psm_list): 105 | """Check if the PSMs are from MaxQuant.""" 106 | return (psm_list["source"] == "msms").all() 107 | 108 | def _compute_features(self, psm_metadata): 109 | """Compute features from derived from intensities and mass errors.""" 110 | features = {} 111 | if all(k in psm_metadata.keys() for k in ["Intensities", self._mass_deviations_key]): 112 | ( 113 | features["mean_error_top7"], 114 | features["sq_mean_error_top7"], 115 | features["stdev_error_top7"], 116 | ) = self._calculate_top7_peak_features( 117 | psm_metadata["Intensities"], psm_metadata[self._mass_deviations_key] 118 | ) 119 | 120 | if all(k in psm_metadata.keys() for k in ["Intensities", "Matches", "Intensity coverage"]): 121 | ( 122 | features["ln_explained_ion_current"], 123 | features["ln_nterm_ion_current_ratio"], 124 | features["ln_cterm_ion_current_ratio"], 125 | features["ln_ms2_ion_current"], 126 | ) = self._calculate_ion_current_features( 127 | psm_metadata["Matches"], 128 | psm_metadata["Intensities"], 129 | psm_metadata["Intensity coverage"], 130 | ) 131 | 132 | return features 133 | 134 | @staticmethod 135 | def _calculate_top7_peak_features(intensities: str, mass_errors: str) -> Tuple[np.ndarray]: 136 | """ 137 | Calculate "top 7 peak"-related search engine features. 138 | The following features are calculated: 139 | - mean_error_top7: Mean of mass errors of the seven fragment ion peaks with the 140 | highest intensities 141 | - sq_mean_error_top7: Squared MeanErrorTop7 142 | - stdev_error_top7: Standard deviation of mass errors of the seven fragment ion 143 | peaks with the highest intensities 144 | """ 145 | try: 146 | intensities = [float(i) for i in intensities.split(";")] 147 | mass_errors = [float(i) for i in mass_errors.split(";")] 148 | except ValueError: 149 | return 0.0, 0.0, 0.0 150 | 151 | indices_most_intens = np.array(intensities).argsort()[-1:-8:-1] 152 | mass_errors_top7 = [(mass_errors[i]) for i in indices_most_intens] 153 | mean_error_top7 = np.mean(mass_errors_top7) 154 | sq_mean_error_top7 = mean_error_top7**2 155 | stdev_error_top7 = np.std(mass_errors_top7) 156 | 157 | return mean_error_top7, sq_mean_error_top7, stdev_error_top7 158 | 159 | @staticmethod 160 | def _calculate_ion_current_features( 161 | matches: str, intensities: str, intensity_coverage: str 162 | ) -> Tuple[np.ndarray]: 163 | """ 164 | Calculate ion current related search engine features. 165 | The following features are calculated: 166 | - ln_explained_ion_current: Summed intensity of identified fragment ions, 167 | divided by that of all fragment ions, logged 168 | - ln_nterm_ion_current_ratio: Summed intensity of identified N-terminal 169 | fragments, divided by that of all identified fragments, logged 170 | - ln_cterm_ion_current_ratio: Summed intensity of identified N-terminal 171 | fragments, divided by that of all identified fragments, logged 172 | - ln_ms2_ion_current: Summed intensity of all observed fragment ions, logged 173 | """ 174 | pseudo_count = 0.00001 175 | try: 176 | ln_explained_ion_current = float(intensity_coverage) + pseudo_count 177 | summed_intensities = sum([float(i) for i in intensities.split(";")]) 178 | except ValueError: 179 | return 0.0, 0.0, 0.0, 0.0 180 | 181 | # Calculate ratio between matched b- and y-ion intensities 182 | y_ion_int = sum( 183 | [ 184 | float(intensities.split(";")[i]) 185 | for i, m in enumerate(matches.split(";")) 186 | if m.startswith("y") 187 | ] 188 | ) 189 | y_int_ratio = y_ion_int / summed_intensities 190 | 191 | ln_nterm_ion_current_ratio = (y_int_ratio + pseudo_count) * ln_explained_ion_current 192 | ln_cterm_ion_current_ratio = (1 - y_int_ratio + pseudo_count) * ln_explained_ion_current 193 | ln_ms2_ion_current = summed_intensities / ln_explained_ion_current 194 | 195 | out = [ 196 | ln_explained_ion_current, 197 | ln_nterm_ion_current_ratio, 198 | ln_cterm_ion_current_ratio, 199 | ln_ms2_ion_current, 200 | ] 201 | 202 | return tuple([np.log(x) for x in out]) 203 | 204 | 205 | class MissingMetadataError(MS2RescoreError): 206 | """Exception raised when a required metadata entry is missing.""" 207 | 208 | pass 209 | -------------------------------------------------------------------------------- /ms2rescore/gui/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/gui/__init__.py -------------------------------------------------------------------------------- /ms2rescore/gui/__main__.py: -------------------------------------------------------------------------------- 1 | """Entrypoint for MS²Rescore GUI.""" 2 | 3 | import multiprocessing 4 | import os 5 | import contextlib 6 | 7 | from ms2rescore.gui.app import app 8 | 9 | 10 | def main(): 11 | """Entrypoint for MS²Rescore GUI.""" 12 | multiprocessing.freeze_support() 13 | # Redirect stdout when running GUI (packaged app might not have console attached) 14 | with contextlib.redirect_stdout(open(os.devnull, "w")): 15 | app() 16 | 17 | 18 | if __name__ == "__main__": 19 | main() 20 | -------------------------------------------------------------------------------- /ms2rescore/package_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/__init__.py -------------------------------------------------------------------------------- /ms2rescore/package_data/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "./config_schema.json", 3 | "ms2rescore": { 4 | "feature_generators": { 5 | "basic": {}, 6 | "ms2pip": { 7 | "model": "HCD", 8 | "ms2_tolerance": 0.02 9 | }, 10 | "deeplc": { 11 | "deeplc_retrain": false 12 | }, 13 | "maxquant": {} 14 | }, 15 | "rescoring_engine": { 16 | "mokapot": { 17 | "train_fdr": 0.01, 18 | "write_weights": true, 19 | "write_txt": true 20 | } 21 | }, 22 | "config_file": null, 23 | "psm_file": null, 24 | "psm_file_type": "infer", 25 | "psm_reader_kwargs": {}, 26 | "spectrum_path": null, 27 | "output_path": null, 28 | "log_level": "info", 29 | "id_decoy_pattern": null, 30 | "psm_id_pattern": null, 31 | "spectrum_id_pattern": null, 32 | "psm_id_rt_pattern": null, 33 | "psm_id_im_pattern": null, 34 | "lower_score_is_better": false, 35 | "max_psm_rank_input": 10, 36 | "max_psm_rank_output": 1, 37 | "modification_mapping": {}, 38 | "fixed_modifications": {}, 39 | "processes": -1, 40 | "rename_to_usi": false, 41 | "fasta_file": null, 42 | "write_flashlfq": false, 43 | "write_report": false 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /ms2rescore/package_data/config_default_tims.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "./config_schema.json", 3 | "ms2rescore": { 4 | "feature_generators": { 5 | "basic": {}, 6 | "ms2pip": { 7 | "model": "timsTOF", 8 | "ms2_tolerance": 0.02 9 | }, 10 | "deeplc": { 11 | "deeplc_retrain": false 12 | }, 13 | "im2deep": {}, 14 | "maxquant": {} 15 | }, 16 | "rescoring_engine": { 17 | "mokapot": { 18 | "write_weights": true, 19 | "write_txt": true 20 | } 21 | }, 22 | "psm_file": null 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /ms2rescore/package_data/img/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/__init__.py -------------------------------------------------------------------------------- /ms2rescore/package_data/img/comments_icon_black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/comments_icon_black.png -------------------------------------------------------------------------------- /ms2rescore/package_data/img/comments_icon_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/comments_icon_white.png -------------------------------------------------------------------------------- /ms2rescore/package_data/img/config_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/config_icon.png -------------------------------------------------------------------------------- /ms2rescore/package_data/img/docs_icon_black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/docs_icon_black.png -------------------------------------------------------------------------------- /ms2rescore/package_data/img/docs_icon_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/docs_icon_white.png -------------------------------------------------------------------------------- /ms2rescore/package_data/img/github_icon_black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/github_icon_black.png -------------------------------------------------------------------------------- /ms2rescore/package_data/img/github_icon_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/github_icon_white.png -------------------------------------------------------------------------------- /ms2rescore/package_data/img/ms2rescore_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/ms2rescore_logo.png -------------------------------------------------------------------------------- /ms2rescore/package_data/img/program_icon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/program_icon.ico -------------------------------------------------------------------------------- /ms2rescore/parse_psms.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | from typing import Dict, Optional, Union 4 | 5 | import numpy as np 6 | import psm_utils.io 7 | from psm_utils import PSMList 8 | 9 | from ms2rescore.exceptions import MS2RescoreConfigurationError 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList: 15 | """ 16 | Parse PSMs and prepare for rescoring. 17 | 18 | Parameters 19 | ---------- 20 | config 21 | Dictionary containing general ms2rescore configuration (everything under ``ms2rescore`` 22 | top-level key). 23 | psm_list 24 | PSMList object containing PSMs. If None, PSMs will be read from ``psm_file``. 25 | 26 | """ 27 | # Read PSMs 28 | try: 29 | psm_list = _read_psms(config, psm_list) 30 | except psm_utils.io.PSMUtilsIOException: 31 | raise MS2RescoreConfigurationError( 32 | "Error occurred while reading PSMs. Please check the 'psm_file' and " 33 | "'psm_file_type' settings. See " 34 | "https://ms2rescore.readthedocs.io/en/latest/userguide/input-files/" 35 | " for more information." 36 | ) 37 | 38 | # Filter by PSM rank 39 | psm_list.set_ranks(config["lower_score_is_better"]) 40 | rank_filter = psm_list["rank"] <= config["max_psm_rank_input"] 41 | psm_list = psm_list[rank_filter] 42 | logger.info(f"Removed {sum(~rank_filter)} PSMs with rank >= {config['max_psm_rank_input']}.") 43 | 44 | # Remove invalid AAs, find decoys, calculate q-values 45 | psm_list = _remove_invalid_aa(psm_list) 46 | _find_decoys(psm_list, config["id_decoy_pattern"]) 47 | _calculate_qvalues(psm_list, config["lower_score_is_better"]) 48 | if config["psm_id_rt_pattern"] or config["psm_id_im_pattern"]: 49 | logger.debug("Parsing retention time and/or ion mobility from PSM identifier...") 50 | _parse_values_from_spectrum_id( 51 | psm_list, config["psm_id_rt_pattern"], config["psm_id_im_pattern"] 52 | ) 53 | 54 | # Store scoring values for comparison later 55 | for psm in psm_list: 56 | psm.provenance_data.update( 57 | { 58 | "before_rescoring_score": psm.score, 59 | "before_rescoring_qvalue": psm.qvalue, 60 | "before_rescoring_pep": psm.pep, 61 | "before_rescoring_rank": psm.rank, 62 | } 63 | ) 64 | 65 | logger.debug("Parsing modifications...") 66 | modifications_found = set( 67 | [ 68 | re.search(r"\[([^\[\]]*)\]", x.proforma).group(1) 69 | for x in psm_list["peptidoform"] 70 | if "[" in x.proforma 71 | ] 72 | ) 73 | logger.debug(f"Found modifications: {modifications_found}") 74 | non_mapped_modifications = modifications_found - set(config["modification_mapping"].keys()) 75 | if non_mapped_modifications: 76 | logger.warning( 77 | f"Non-mapped modifications found: {non_mapped_modifications}\n" 78 | "This can be ignored if they are Unimod modification labels." 79 | ) 80 | psm_list.rename_modifications(config["modification_mapping"]) 81 | psm_list.add_fixed_modifications(config["fixed_modifications"]) 82 | psm_list.apply_fixed_modifications() 83 | 84 | if config["psm_id_pattern"]: 85 | pattern = re.compile(config["psm_id_pattern"]) 86 | logger.debug("Applying 'psm_id_pattern'...") 87 | logger.debug( 88 | f"Parsing '{psm_list[0].spectrum_id}' to '{_match_psm_ids(psm_list[0].spectrum_id, pattern)}'" 89 | ) 90 | new_ids = [_match_psm_ids(old_id, pattern) for old_id in psm_list["spectrum_id"]] 91 | psm_list["spectrum_id"] = new_ids 92 | 93 | return psm_list 94 | 95 | 96 | def _read_psms(config, psm_list): 97 | if isinstance(psm_list, PSMList): 98 | return psm_list 99 | else: 100 | total_files = len(config["psm_file"]) 101 | psm_list = [] 102 | for current_file, psm_file in enumerate(config["psm_file"]): 103 | logger.info( 104 | f"Reading PSMs from PSM file ({current_file+1}/{total_files}): '{psm_file}'..." 105 | ) 106 | psm_list.extend( 107 | psm_utils.io.read_file( 108 | psm_file, 109 | filetype=config["psm_file_type"], 110 | show_progressbar=True, 111 | **config["psm_reader_kwargs"], 112 | ) 113 | ) 114 | logger.debug(f"Read {len(psm_list)} PSMs from '{psm_file}'.") 115 | 116 | return PSMList(psm_list=psm_list) 117 | 118 | 119 | def _find_decoys(psm_list: PSMList, id_decoy_pattern: Optional[str] = None): 120 | """Find decoys in PSMs, log amount, and raise error if none found.""" 121 | logger.debug("Finding decoys...") 122 | if id_decoy_pattern: 123 | psm_list.find_decoys(id_decoy_pattern) 124 | 125 | n_psms = len(psm_list) 126 | percent_decoys = sum(psm_list["is_decoy"]) / n_psms * 100 127 | logger.info(f"Found {n_psms} PSMs, of which {percent_decoys:.2f}% are decoys.") 128 | 129 | if not any(psm_list["is_decoy"]): 130 | raise MS2RescoreConfigurationError( 131 | "No decoy PSMs found. Please check if decoys are present in the PSM file and that " 132 | "the 'id_decoy_pattern' option is correct. See " 133 | "https://ms2rescore.readthedocs.io/en/latest/userguide/configuration/#selecting-decoy-psms" 134 | " for more information." 135 | ) 136 | 137 | 138 | def _calculate_qvalues(psm_list: PSMList, lower_score_is_better: bool): 139 | """Calculate q-values for PSMs if not present.""" 140 | # Calculate q-values if not present 141 | if None in psm_list["qvalue"]: 142 | logger.debug("Recalculating q-values...") 143 | psm_list.calculate_qvalues(reverse=not lower_score_is_better) 144 | 145 | 146 | def _match_psm_ids(old_id, regex_pattern): 147 | """Match PSM IDs to regex pattern or raise Exception if no match present.""" 148 | match = re.search(regex_pattern, str(old_id)) 149 | try: 150 | return match[1] 151 | except (TypeError, IndexError): 152 | raise MS2RescoreConfigurationError( 153 | f"'psm_id_pattern' could not be extracted from PSM spectrum IDs (i.e. {old_id})." 154 | " Ensure that the regex contains a capturing group?" 155 | ) 156 | 157 | 158 | def _parse_values_from_spectrum_id( 159 | psm_list: PSMList, 160 | psm_id_rt_pattern: Optional[str] = None, 161 | psm_id_im_pattern: Optional[str] = None, 162 | ): 163 | """Parse retention time and or ion mobility values from the spectrum_id.""" 164 | for pattern, label, key in zip( 165 | [psm_id_rt_pattern, psm_id_im_pattern], 166 | ["retention time", "ion mobility"], 167 | ["retention_time", "ion_mobility"], 168 | ): 169 | if pattern: 170 | logger.debug(f"Parsing {label} from spectrum_id with regex pattern " f"{pattern}") 171 | try: 172 | pattern = re.compile(pattern) 173 | psm_list[key] = [ 174 | float(pattern.search(psm.spectrum_id).group(1)) for psm in psm_list 175 | ] 176 | except AttributeError: 177 | raise MS2RescoreConfigurationError( 178 | f"Could not parse {label} from spectrum_id with the " 179 | f"{pattern} regex pattern. " 180 | f"Example spectrum_id: '{psm_list[0].spectrum_id}'\n. " 181 | f"Please make sure the {label} key is present in the spectrum_id " 182 | "and the value is in a capturing group or disable the relevant feature generator." 183 | ) 184 | 185 | 186 | def _remove_invalid_aa(psm_list: PSMList) -> PSMList: 187 | """Remove PSMs with invalid amino acids.""" 188 | invalid_psms = np.array( 189 | [any(aa in "BJOUXZ" for aa in psm.peptidoform.sequence) for psm in psm_list] 190 | ) 191 | 192 | if any(invalid_psms): 193 | logger.warning(f"Removed {sum(invalid_psms)} PSMs with invalid amino acids.") 194 | return psm_list[~invalid_psms] 195 | else: 196 | logger.debug("No PSMs with invalid amino acids found.") 197 | return psm_list 198 | -------------------------------------------------------------------------------- /ms2rescore/parse_spectra.py: -------------------------------------------------------------------------------- 1 | """Parse MGF files.""" 2 | 3 | import logging 4 | import re 5 | from enum import Enum 6 | from itertools import chain 7 | from typing import Optional, Set, Tuple 8 | 9 | import numpy as np 10 | from ms2rescore_rs import get_precursor_info 11 | from psm_utils import PSMList 12 | 13 | from ms2rescore.exceptions import MS2RescoreError 14 | from ms2rescore.utils import infer_spectrum_path 15 | 16 | LOGGER = logging.getLogger(__name__) 17 | 18 | 19 | class MSDataType(str, Enum): 20 | """Enum for MS data types required for feature generation.""" 21 | 22 | retention_time = "retention time" 23 | ion_mobility = "ion mobility" 24 | precursor_mz = "precursor m/z" 25 | ms2_spectra = "MS2 spectra" 26 | 27 | # Mimic behavior of StrEnum (Python >=3.11) 28 | def __str__(self): 29 | return self.value 30 | 31 | 32 | def add_precursor_values( 33 | psm_list: PSMList, spectrum_path: str, spectrum_id_pattern: Optional[str] = None 34 | ) -> Set[MSDataType]: 35 | """ 36 | Add precursor m/z, retention time, and ion mobility values to a PSM list. 37 | 38 | Parameters 39 | ---------- 40 | psm_list 41 | PSM list to add precursor values to. 42 | spectrum_path 43 | Path to the spectrum files. 44 | spectrum_id_pattern 45 | Regular expression pattern to extract spectrum IDs from file names. If provided, the 46 | pattern must contain a single capturing group that matches the spectrum ID. Default is 47 | None. 48 | 49 | Returns 50 | ------- 51 | available_ms_data 52 | Set of available MS data types in the PSM list. 53 | 54 | """ 55 | # Check if precursor values are missing in PSM list 56 | rt_missing = any(v is None or v == 0 or np.isnan(v) for v in psm_list["retention_time"]) 57 | im_missing = any(v is None or v == 0 or np.isnan(v) for v in psm_list["ion_mobility"]) 58 | mz_missing = any(v is None or v == 0 or np.isnan(v) for v in psm_list["precursor_mz"]) 59 | 60 | # Get precursor values from spectrum files 61 | LOGGER.info("Parsing precursor info from spectrum files...") 62 | mz, rt, im = _get_precursor_values(psm_list, spectrum_path, spectrum_id_pattern) 63 | mz_found, rt_found, im_found = np.all(mz != 0.0), np.all(rt != 0.0), np.all(im != 0.0) 64 | # ms2rescore_rs always returns 0.0 for missing values 65 | 66 | # Update PSM list with missing precursor values 67 | if rt_missing and rt_found: 68 | LOGGER.debug("Missing retention time values in PSM list. Updating from spectrum files.") 69 | psm_list["retention_time"] = rt 70 | if im_missing and im_found: 71 | LOGGER.debug("Missing ion mobility values in PSM list. Updating from spectrum files.") 72 | psm_list["ion_mobility"] = im 73 | if mz_missing and mz_found: 74 | LOGGER.debug("Missing precursor m/z values in PSM list. Updating from spectrum files.") 75 | psm_list["precursor_mz"] = mz 76 | else: 77 | # Check if precursor m/z values are consistent between PSMs and spectrum files 78 | mz_diff = np.abs(psm_list["precursor_mz"] - mz) 79 | if np.mean(mz_diff) > 1e-2: 80 | LOGGER.warning( 81 | "Mismatch between precursor m/z values in PSM list and spectrum files (mean " 82 | "difference exceeds 0.01 Da). Please ensure that the correct spectrum files are " 83 | "provided and that the `spectrum_id_pattern` and `psm_id_pattern` options are " 84 | "configured correctly. See " 85 | "https://ms2rescore.readthedocs.io/en/stable/userguide/configuration/#mapping-psms-to-spectra " 86 | "for more information." 87 | ) 88 | 89 | # Return available MS data types 90 | available_ms_data = { 91 | MSDataType.ms2_spectra, # Assume MS2 spectra are always present 92 | MSDataType.retention_time if not rt_missing or rt_found else None, 93 | MSDataType.ion_mobility if not im_missing or im_found else None, 94 | MSDataType.precursor_mz if not mz_missing or mz_found else None, 95 | } 96 | available_ms_data.discard(None) 97 | 98 | return available_ms_data 99 | 100 | 101 | def _get_precursor_values( 102 | psm_list: PSMList, spectrum_path: str, spectrum_id_pattern: str 103 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: 104 | """Get precursor m/z, RT, and IM from spectrum files.""" 105 | # Iterate over different runs in PSM list 106 | precursor_dict = dict() 107 | psm_dict = psm_list.get_psm_dict() 108 | for runs in psm_dict.values(): 109 | for run_name, psms in runs.items(): 110 | psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values()))) 111 | spectrum_file = infer_spectrum_path(spectrum_path, run_name) 112 | 113 | LOGGER.debug("Reading spectrum file: '%s'", spectrum_file) 114 | precursors = get_precursor_info(str(spectrum_file)) 115 | 116 | # Parse spectrum IDs with regex pattern if provided 117 | if spectrum_id_pattern: 118 | compiled_pattern = re.compile(spectrum_id_pattern) 119 | precursors = { 120 | compiled_pattern.search(spectrum_id).group(1): precursor 121 | for spectrum_id, precursor in precursors.items() 122 | } 123 | 124 | # Ensure all PSMs have a precursor values 125 | for psm in psm_list_run: 126 | if psm.spectrum_id not in precursors: 127 | raise SpectrumParsingError( 128 | "Mismatch between PSM and spectrum file IDs. Could find precursor values " 129 | f"for PSM with ID {psm.spectrum_id} in run {run_name}.\n" 130 | "Please check that the `spectrum_id_pattern` and `psm_id_pattern` options " 131 | "are configured correctly. See " 132 | "https://ms2rescore.readthedocs.io/en/stable/userguide/configuration/#mapping-psms-to-spectra" 133 | " for more information.\n" 134 | f"Example ID from PSM file: {psm.spectrum_id}\n" 135 | f"Example ID from spectrum file: {list(precursors.keys())[0]}" 136 | ) 137 | 138 | # Store precursor values in dictionary 139 | precursor_dict[run_name] = precursors 140 | 141 | # Reshape precursor values into arrays matching PSM list 142 | mzs = np.fromiter((precursor_dict[psm.run][psm.spectrum_id].mz for psm in psm_list), float) 143 | rts = np.fromiter((precursor_dict[psm.run][psm.spectrum_id].rt for psm in psm_list), float) 144 | ims = np.fromiter((precursor_dict[psm.run][psm.spectrum_id].im for psm in psm_list), float) 145 | 146 | return mzs, rts, ims 147 | 148 | 149 | class SpectrumParsingError(MS2RescoreError): 150 | """Error parsing retention time from spectrum file.""" 151 | 152 | pass 153 | -------------------------------------------------------------------------------- /ms2rescore/report/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functionality for analyzing and reporting MS²Rescore results, including reusable Plotly-based 3 | charts and HTML-report generation. 4 | """ 5 | -------------------------------------------------------------------------------- /ms2rescore/report/__main__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import click 4 | from rich.logging import RichHandler 5 | 6 | from ms2rescore.report.generate import generate_report 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @click.command() 12 | @click.argument("output_prefix", type=str) 13 | def main(**kwargs): 14 | logging.getLogger("mokapot").setLevel(logging.WARNING) 15 | logging.basicConfig( 16 | level=logging.INFO, 17 | handlers=[RichHandler(rich_tracebacks=True)], 18 | format="%(message)s", 19 | ) 20 | 21 | try: 22 | generate_report(kwargs["output_prefix"]) 23 | except Exception as e: 24 | logger.exception(e) 25 | exit(1) 26 | 27 | 28 | if __name__ == "__main__": 29 | main() 30 | -------------------------------------------------------------------------------- /ms2rescore/report/templates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/report/templates/__init__.py -------------------------------------------------------------------------------- /ms2rescore/report/templates/about.html: -------------------------------------------------------------------------------- 1 |
2 |
About MS²Rescore
3 |
4 |
5 |
6 | 7 |
8 |
9 |

MS²Rescore performs sensitive rescoring of peptide-spectrum matches (PSMs) using features from predictors of peptide behavior in LC-MS, such as MS²PIP for spectrum prediction, and DeepLC for retention time prediction. Rescoring is performed with Percolator or Mokapot.

10 |

11 | Please cite:
12 | MS²Rescore: Data-driven rescoring dramatically boosts immunopeptide identification rates.
13 | Arthur Declercq, Robbin Bouwmeester, Aurélie Hirschler, Christine Carapito, Sven Degroeve, Lennart Martens, Ralf Gabriels
14 | Molecular & Cellular Proteomics (2021) doi:10.1016/j.mcpro.2022 15 |

16 | GitHub repo 17 | Documentation 18 | Discussion forum 19 |
20 |
21 |
22 |
23 | -------------------------------------------------------------------------------- /ms2rescore/report/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | MS²Rescore report 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | {% include 'style.html' %} 16 | 17 | 18 |
19 | 20 |

MS2Rescore QC report

21 | 22 | 23 | {% include 'metadata.html' %} 24 | 25 | 26 | {% include 'about.html' %} 27 | 28 | 29 |
30 |
31 | 38 |
39 |
40 |
41 | {% for tab in main_tabs %} 42 |
{% include tab.template %}
43 | {% endfor %} 44 |
45 |
46 |
47 |
48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /ms2rescore/report/templates/config.html: -------------------------------------------------------------------------------- 1 |

{{ tab.context.description}}

2 |
{{ tab.context.config }}
3 | -------------------------------------------------------------------------------- /ms2rescore/report/templates/features.html: -------------------------------------------------------------------------------- 1 | {% for chart in tab.context.charts %} 2 |

{{ chart.title }}

3 |

{{ chart.description }}

4 |
{{ chart.chart }}
5 | {% endfor %} 6 | -------------------------------------------------------------------------------- /ms2rescore/report/templates/log.html: -------------------------------------------------------------------------------- 1 | {{ tab.context.log }} 2 | -------------------------------------------------------------------------------- /ms2rescore/report/templates/metadata.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

4 | Generated on
5 | {{ metadata.generated_on }} 6 |

7 |
8 |
9 |

10 | MS2Rescore version
11 | v{{ metadata.ms2rescore_version }} 12 |

13 |
14 |
15 |

16 | PSM filename
17 | {{ metadata.psm_filename }} 18 |

19 |
20 |
21 | -------------------------------------------------------------------------------- /ms2rescore/report/templates/overview.html: -------------------------------------------------------------------------------- 1 |

General statistics

2 |
3 |
{% for stats in tab.context.stats %} {% include 'stats-card.html' %} {% endfor %}
4 |
5 | 6 |

Identification charts

7 | {% for chart in tab.context.charts %} 8 |

{{ chart.title }}

9 |

{{ chart.description }}

10 |
{{ chart.chart }}
11 | {% endfor %} 12 | -------------------------------------------------------------------------------- /ms2rescore/report/templates/stats-card.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
{{ stats.item }}
5 |
6 |
{{ stats.number }} {{ stats.diff }}
7 |
8 |
{{ stats.percentage }}
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | -------------------------------------------------------------------------------- /ms2rescore/report/templates/style.html: -------------------------------------------------------------------------------- 1 | 96 | -------------------------------------------------------------------------------- /ms2rescore/report/templates/target-decoy.html: -------------------------------------------------------------------------------- 1 | {% for chart in tab.context.charts %} 2 |

{{ chart.title }}

3 |

{{ chart.description }}

4 |
{{ chart.chart }}
5 | {% endfor %} 6 | -------------------------------------------------------------------------------- /ms2rescore/report/templates/texts.toml: -------------------------------------------------------------------------------- 1 | [configuration] 2 | description = """ 3 | This is the full configuration that was used, as rendered from the 4 | default configuration, the user-provided configuration file, and the 5 | command line arguments, in cascading order. 6 | """ 7 | 8 | [charts.score_comparison] 9 | title = "Score comparison" 10 | description = """ 11 | This scatter plot shows the score for both target (blue) and decoy 12 | (red) PSMs before rescoring (x-axis) and after rescoring (y-axis). Dashed 13 | lines indicate the 1% FDR threshold for scores before and after rescoring. 14 | PSMs in the upper-left quadrant are only identified after rescoring. 15 | """ 16 | 17 | [charts.fdr_comparison] 18 | title = "False discovery rate comparison" 19 | description = """ 20 | This plot shows the number of identified target PSMs 21 | in function of the FDR threshold. The plot starts at the top-right corner 22 | with the total number of PSMs in the dataset (no FDR filtering). As the 23 | FDR threshold becomes more stringent (towards the left of the x-axis), 24 | the number of identified target PSMs goes down. The dashed line 25 | indicates the 1% FDR threshold. 26 | """ 27 | 28 | [charts.identification_overlap] 29 | title = "Identification overlap" 30 | description = """ 31 | This plot shows the unique identified PSMs, peptides, and (optionally) 32 | protein groups that were removed, retained, and gained by rescoring. 33 | """ 34 | 35 | [charts.score_histogram] 36 | title = "Score histogram" 37 | description = """ 38 | The score histogram shows the score distribution for both target PSMs (blue) 39 | and decoy PSMs (red). The target score distribution is exected to show 40 | two modes, corresponding to high-scoring (presumably correct) PSMs 41 | and low-scoring (presumably incorrect) PSMs. The decoy score 42 | distribution is expected to show a single mode, which should 43 | approximate the low-scoring part of the target score distribution 44 | as closely as possible. This approximation can be more easily assessed 45 | in the percentile-percentile plot (see below). The dashed line indicates 46 | the 1% FDR threshold. 47 | """ 48 | 49 | [charts.pp_plot] 50 | title = "Percentile-percentile plot" 51 | description = """ 52 | The percentile-percentile (PP) plot shows the empirical cumulative 53 | distribution function (ECDF) of the target distribution in function of 54 | the ECDF of the decoy distribution. In the context of peptide 55 | identification, it can be used to assess the quality of decoy PSMs and 56 | their capacity to help in correctly estimating the false discovery rate. 57 | 58 | Ideally, the PP-plot should follow a straight diagonal line up until the 59 | end of the decoy distribution (right-hand side of the plot), where the 60 | line turns vertically upwards. This means that the decoy distribution 61 | perfectly aligns with the first part of the target distribution (the 62 | low-scoring and presumably bad target PSMs) and therefore correctly 63 | models the bad target PSMs. This diagonal line matches the ratio of 64 | the number of decoy to the number of target PSMs. 65 | 66 | More information on this type of diagnostic plot can be found at 67 | statomics.github.io/TargetDecoy. 68 | """ 69 | 70 | [charts.feature_usage] 71 | title = "Feature usage in rescoring model" 72 | description = """ 73 | This plot shows the usage of each features in the 74 | rescoring model. The higher the usage, the more the feature contributes to separating target and 75 | decoy PSMs. Note that the usage is not necessarily correlated with the individual 76 | performance of the feature, as some features may be redundant with others. 77 | """ 78 | 79 | [charts.feature_performance] 80 | title = "Individual feature performance" 81 | description = """ 82 | The following plot shows the performance of individual features. For each 83 | feature, q-values are calculated as if that feature was individually used for scoring PSMs without 84 | any other information. Then, the area under curve (AUC) is calculated for the empirical cumulative 85 | distribution function (ECDF) of the q-values. The higher the AUC, the better the feature is at 86 | discriminating between target and decoy PSMs without any other information. 87 | """ 88 | 89 | [charts.ms2pip_pearson] 90 | title = "MS²PIP model performance" 91 | description = """ 92 | MS²PIP model performance can be estimated by calculating the Pearson correlation coefficient 93 | between the predicted and observed fragment ion intensities. Fragment intensities are first 94 | normalized to the total ion current and then log2-transformed. The following histogram shows the 95 | distribution of Pearson correlation coefficients for all target PSMs that passed the 1% FDR 96 | threshold. The red dashed line indicates the median correlation. 97 | """ 98 | 99 | [charts.deeplc_performance] 100 | title = "DeepLC model performance" 101 | description = """ 102 | DeepLC model performance can be visualized by plotting the predicted retention times against the 103 | observed retention times (top chart), or by calculating the relative mean absolute error (RMAE). The 104 | bottom chart shows the distribution of RMAE values of DeepLC predictions on 460 different benchmark 105 | datasets. The red line indicates the RMAE value for all target PSMs that passed the 1% FDR threshold 106 | of the current dataset. A lower RMAE value indicates better performance. 107 | """ 108 | 109 | [charts.im2deep_performance] 110 | title = "IM2Deep model performance" 111 | description = """ 112 | IM2Deep model performance can be visualized by plotting the predicted CCS against the observed CCS. 113 | """ 114 | 115 | [charts.ionmob_performance] 116 | title = "ionmob model performance" 117 | description = """ 118 | ionmob model performance can be visualized by plotting the predicted CCS against the observed CCS. 119 | """ 120 | -------------------------------------------------------------------------------- /ms2rescore/report/utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for MS²Rescore report generation.""" 2 | 3 | import logging 4 | from collections import defaultdict 5 | from csv import DictReader 6 | from pathlib import Path 7 | from typing import Optional, Tuple 8 | 9 | import pandas as pd 10 | import psm_utils 11 | from mokapot import LinearConfidence, LinearPsmDataset, read_fasta 12 | 13 | from ms2rescore.exceptions import ReportGenerationError 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def read_feature_names(feature_names_path: Path) -> dict: 19 | """Read feature names and mapping with feature generator from file.""" 20 | feature_names = defaultdict(list) 21 | with open(feature_names_path) as f: 22 | reader = DictReader(f, delimiter="\t") 23 | for line in reader: 24 | feature_names[line["feature_generator"]].append(line["feature_name"]) 25 | return feature_names 26 | 27 | 28 | def get_feature_values( 29 | psm_list: psm_utils.PSMList, feature_names: Optional[list] = None 30 | ) -> pd.DataFrame: 31 | """Get feature values for all PSMs in a PSM list.""" 32 | if not feature_names: 33 | feature_names = list(psm_list[0].rescoring_features.keys()) 34 | features = pd.DataFrame( 35 | {fname: psm.rescoring_features[fname] for fname in feature_names} for psm in psm_list 36 | ).astype("float32") 37 | return features 38 | 39 | 40 | def get_confidence_estimates( 41 | psm_list: psm_utils.PSMList, fasta_file: Optional[str] = None 42 | ) -> Tuple[LinearConfidence, LinearConfidence]: 43 | """Return identification confidence before and after rescoring.""" 44 | try: 45 | score_before = pd.DataFrame.from_records(psm_list["provenance_data"])[ 46 | "before_rescoring_score" 47 | ].astype(float) 48 | except KeyError as e: 49 | raise ReportGenerationError( 50 | "No `before_rescoring_score` in PSM list provenance data. Ensure that the PSM list " 51 | "was generated by MS²Rescore. Could not generate report." 52 | ) from e 53 | 54 | score_after = psm_list["score"] 55 | peptide = ( 56 | pd.Series(psm_list["peptidoform"]).astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True) 57 | ) 58 | psms = pd.DataFrame({"peptide": peptide, "is_target": ~psm_list["is_decoy"]}).reset_index() 59 | lin_psm_dataset = LinearPsmDataset( 60 | psms=psms, 61 | target_column="is_target", 62 | spectrum_columns="index", 63 | peptide_column="peptide", 64 | ) 65 | if fasta_file: 66 | fasta = read_fasta(fasta_file) 67 | lin_psm_dataset.add_proteins(fasta) 68 | 69 | confidence = dict() 70 | for when, scores in [("before", score_before), ("after", score_after)]: 71 | try: 72 | confidence[when] = lin_psm_dataset.assign_confidence(scores=scores) 73 | except (RuntimeError, IndexError): 74 | confidence[when] = None 75 | logger.warning("Could not assign confidence estimates for %s rescoring.", when) 76 | 77 | return confidence["before"], confidence["after"] 78 | -------------------------------------------------------------------------------- /ms2rescore/rescoring_engines/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Rescoring engines integrated in MS²Rescore. 3 | 4 | Each integrated rescoring engine typically includes a :py:func:`rescore` function that takes a 5 | :py:class:`~psm_utils.psm_list.PSMList` as input and writes the new scores, q-values, and PEPs to 6 | the original :py:class:`~psm_utils.psm_list.PSMList`. 7 | 8 | """ 9 | -------------------------------------------------------------------------------- /ms2rescore/rescoring_engines/mokapot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mokapot integration for MS²Rescore. 3 | 4 | :py:mod:`mokapot` is a full-Python implementation of the semi-supervised learning algorithms 5 | introduced with Percolator. It builds upon the flexible scikit-learn package, which makes it 6 | highly efficient for routine applications, but also customizable for experimental research 7 | settings. Using Mokapot through MS²Rescore brings several advantages over Percolator: It can be 8 | easily installed in the same Python environment, and it is generally faster as the communication 9 | between the tools happens completely within Python, without the need to write and read files 10 | or communicate through the command line. See 11 | `mokapot.readthedocs.io `_ for more information. 12 | 13 | If you use Mokapot through MS²Rescore, please cite: 14 | 15 | .. epigraph:: 16 | Fondrie W. E. & Noble W. S. mokapot: Fast and Flexible Semisupervised 17 | Learning for Peptide Detection. *J Proteome Res* (2021). 18 | `doi:10.1021/acs.jproteome.0c01010 `_ 19 | 20 | """ 21 | 22 | import logging 23 | import re 24 | from typing import Any, Dict, List, Optional, Tuple 25 | 26 | import mokapot 27 | import numpy as np 28 | import pandas as pd 29 | import psm_utils 30 | from mokapot.brew import brew 31 | from mokapot.dataset import LinearPsmDataset 32 | from mokapot.model import PercolatorModel 33 | from pyteomics.mass import nist_mass 34 | 35 | from ms2rescore.exceptions import RescoringError 36 | 37 | logger = logging.getLogger(__name__) 38 | logging.getLogger("numba").setLevel(logging.WARNING) 39 | 40 | 41 | def rescore( 42 | psm_list: psm_utils.PSMList, 43 | output_file_root: str = "ms2rescore", 44 | fasta_file: Optional[str] = None, 45 | train_fdr: float = 0.01, 46 | write_weights: bool = False, 47 | write_txt: bool = False, 48 | protein_kwargs: Optional[Dict[str, Any]] = None, 49 | **kwargs: Any, 50 | ) -> None: 51 | """ 52 | Rescore PSMs with Mokapot. 53 | 54 | The function provides a high-level interface to use Mokapot within MS²Rescore. It first 55 | converts the :py:class:`~psm_utils.psm_list.PSMList` to a 56 | :py:class:`~mokapot.dataset.LinearPsmDataset`, and then optionally adds protein information 57 | from a FASTA file. The dataset is then passed to the :py:func:`~mokapot.brew` function, which 58 | returns the new scores, q-values, and PEPs. These are then written back to the original 59 | :py:class:`~psm_utils.psm_list.PSMList`. 60 | 61 | Parameters 62 | ---------- 63 | psm_list 64 | PSMs to be rescored. 65 | output_file_root 66 | Root of output file names. Defaults to ``"ms2rescore"``. 67 | fasta_file 68 | Path to FASTA file with protein sequences to use for protein inference. Defaults to 69 | ``None``. 70 | train_fdr 71 | FDR to use for training the Mokapot model. Defaults to ``0.01``. 72 | write_weights 73 | Write model weights to a text file. Defaults to ``False``. 74 | write_txt 75 | Write Mokapot results to a text file. Defaults to ``False``. 76 | protein_kwargs 77 | Keyword arguments to pass to the :py:meth:`~mokapot.dataset.LinearPsmDataset.add_proteins` 78 | method. 79 | **kwargs 80 | Additional keyword arguments are passed to the Mokapot :py:func:`~mokapot.brew` function. 81 | 82 | """ 83 | _set_log_levels() 84 | 85 | if "write_flashlfq" in kwargs: 86 | _ = kwargs.pop("write_flashlfq") 87 | logger.warning( 88 | "The `write_flashlfq` argument has moved. To write FlashLFQ generic TSV, use the " 89 | "MS²Rescore-level `write_flashlfq` option instead." 90 | ) 91 | 92 | # Convert PSMList to Mokapot dataset 93 | lin_psm_data = convert_psm_list(psm_list) 94 | feature_names = list(lin_psm_data.features.columns) 95 | 96 | # Add proteins 97 | if fasta_file: 98 | logger.debug(f"Adding protein info from {fasta_file} with options: `{protein_kwargs}`") 99 | lin_psm_data.add_proteins(fasta_file, **protein_kwargs) 100 | 101 | # Rescore 102 | logger.debug(f"Mokapot brew options: `{kwargs}`") 103 | try: 104 | confidence_results, models = brew( 105 | lin_psm_data, model=PercolatorModel(train_fdr=train_fdr), rng=8, **kwargs 106 | ) 107 | except RuntimeError as e: 108 | raise RescoringError("Mokapot could not be run. Please check the input data.") from e 109 | 110 | add_psm_confidence(psm_list, confidence_results) 111 | add_peptide_confidence(psm_list, confidence_results) 112 | 113 | # Write results 114 | if write_weights: 115 | try: 116 | save_model_weights(models, feature_names, output_file_root) 117 | except AttributeError: 118 | logger.warning( 119 | "Could not extract Mokapot model weights with the `coef_` attribute. Most likely, " 120 | "a model type different from the default (linear SVM) was used. No weights will " 121 | "be saved." 122 | ) 123 | if write_txt: 124 | confidence_results.to_txt(file_root=output_file_root, decoys=True) 125 | 126 | 127 | def convert_psm_list( 128 | psm_list: psm_utils.PSMList, 129 | feature_names: Optional[List[str]] = None, 130 | ) -> LinearPsmDataset: 131 | """ 132 | Convert a PSM list to a Mokapot dataset. 133 | 134 | Parameters 135 | ---------- 136 | psm_list 137 | PSMList to rescore. 138 | feature_names 139 | List of feature names to use. Items must be keys in the PSM `rescoring_features` dict. 140 | 141 | """ 142 | psm_df = psm_list.to_dataframe() 143 | psm_df = psm_df.reset_index(drop=True).reset_index() 144 | 145 | psm_df["peptide"] = ( 146 | psm_df["peptidoform"].astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True) 147 | ) 148 | psm_df["is_target"] = ~psm_df["is_decoy"] 149 | psm_df["charge"] = psm_df["peptidoform"].apply(lambda x: x.precursor_charge) 150 | psm_df["calcmass"] = psm_df["peptidoform"].apply(lambda x: x.theoretical_mass) 151 | psm_df["expmass"] = _mz_to_mass(psm_df["precursor_mz"], psm_df["charge"]) 152 | 153 | required_columns = [ 154 | "index", 155 | "spectrum_id", 156 | "peptide", 157 | "is_target", 158 | "protein_list", 159 | "run", 160 | "calcmass", 161 | "expmass", 162 | "retention_time", 163 | "charge", 164 | ] 165 | feature_df = pd.DataFrame(list(psm_df["rescoring_features"])).astype(float).fillna(0.0) 166 | feature_df.columns = [f"feature:{f}" for f in feature_df.columns] 167 | combined_df = pd.concat([psm_df[required_columns], feature_df], axis=1) 168 | 169 | feature_names = [f"feature:{f}" for f in feature_names] if feature_names else None 170 | 171 | lin_psm_data = LinearPsmDataset( 172 | psms=combined_df, 173 | target_column="is_target", 174 | spectrum_columns="index", # Use artificial index to allow multi-rank rescoring 175 | peptide_column="peptide", 176 | protein_column="protein_list", 177 | feature_columns=feature_names or list(feature_df.columns), 178 | filename_column="run", 179 | scan_column="spectrum_id", # Keep as spectrum_id? 180 | calcmass_column="calcmass", 181 | expmass_column="expmass", 182 | rt_column="retention_time", 183 | charge_column="charge", 184 | ) 185 | 186 | return lin_psm_data 187 | 188 | 189 | def save_model_weights( 190 | models: Tuple[mokapot.model.Model], feature_names: List[str], output_file_root: str 191 | ): 192 | """ 193 | Save model weights to a file. 194 | 195 | Parameters 196 | ---------- 197 | models 198 | Tuple of Mokapot models (one for each fold) to save. 199 | feature_names 200 | List of feature names that were used to train the models. 201 | output_file_root 202 | Root of output file names. 203 | 204 | """ 205 | try: 206 | coefficients = np.stack([m.estimator.coef_[0] for m in models]) 207 | except AttributeError as e: 208 | raise AttributeError( 209 | "Could not extract Mokapot model weights with the `coef_` attribute. Most likely, " 210 | "a model type different from the default (linear SVM) was used." 211 | ) from e 212 | 213 | pd.DataFrame(coefficients, columns=list(feature_names)).to_csv( 214 | output_file_root + ".mokapot.weights.tsv", sep="\t", index=False 215 | ) 216 | 217 | 218 | def add_psm_confidence( 219 | psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence 220 | ) -> None: 221 | """Add PSM-level confidence estimates to PSM list, updating score, qvalue, pep, and rank.""" 222 | # Reshape confidence estimates to match PSMList 223 | keys = ["mokapot score", "mokapot q-value", "mokapot PEP"] 224 | mokapot_values_targets = ( 225 | confidence_results.confidence_estimates["psms"].set_index("index").sort_index()[keys] 226 | ) 227 | mokapot_values_decoys = ( 228 | confidence_results.decoy_confidence_estimates["psms"].set_index("index").sort_index()[keys] 229 | ) 230 | q = np.full((len(psm_list), 3), np.nan) 231 | q[mokapot_values_targets.index] = mokapot_values_targets.values 232 | q[mokapot_values_decoys.index] = mokapot_values_decoys.values 233 | 234 | # Add Mokapot results to PSMList 235 | psm_list["score"] = q[:, 0] 236 | psm_list["qvalue"] = q[:, 1] 237 | psm_list["pep"] = q[:, 2] 238 | 239 | # Reset ranks to match new scores 240 | psm_list.set_ranks(lower_score_better=False) 241 | 242 | 243 | def add_peptide_confidence( 244 | psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence 245 | ) -> None: 246 | """Add Mokapot peptide-level confidence estimates to PSM list.""" 247 | keys = ["mokapot score", "mokapot q-value", "mokapot PEP"] 248 | peptide_info = pd.concat( 249 | [ 250 | confidence_results.confidence_estimates["peptides"].set_index("peptide")[keys], 251 | confidence_results.decoy_confidence_estimates["peptides"].set_index("peptide")[keys], 252 | ], 253 | axis=0, 254 | ).to_dict(orient="index") 255 | 256 | # Add peptide-level scores to PSM metadata 257 | # run_key = "na" if not all(psm.run for psm in psm_list) else None 258 | no_charge_pattern = re.compile(r"(/\d+$)") 259 | for psm in psm_list: 260 | peptide_scores = peptide_info[(no_charge_pattern.sub("", str(psm.peptidoform), 1))] 261 | psm.metadata.update( 262 | { 263 | "peptide_score": peptide_scores["mokapot score"], 264 | "peptide_qvalue": peptide_scores["mokapot q-value"], 265 | "peptide_pep": peptide_scores["mokapot PEP"], 266 | } 267 | ) 268 | 269 | 270 | def _mz_to_mass(mz: float, charge: int) -> float: 271 | """Convert m/z to mass.""" 272 | return mz * charge - charge * nist_mass["H"][1][0] 273 | 274 | 275 | def _set_log_levels() -> None: 276 | """Set log levels for Mokapot and Numba to avoid too-high verbosity.""" 277 | # Set mokapot logging to WARNING if not in debug mode 278 | if logger.getEffectiveLevel() > logging.DEBUG: 279 | logging.getLogger("mokapot").setLevel(logging.WARNING) 280 | 281 | # Keep Numba logging to INFO or higher 282 | if logger.getEffectiveLevel() < logging.INFO: 283 | logging.getLogger("numba").setLevel(logging.INFO) 284 | -------------------------------------------------------------------------------- /ms2rescore/rescoring_engines/percolator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Percolator integration for MS²Rescore 3 | 4 | Percolator was the first tool to introduce semi-supervised learning for PSM rescoring. It is 5 | still widely used and has been integrated in many proteomics data analysis pipelines. This module 6 | integrates with Percolator through its command line interface. Percolator must be installed 7 | separately and the ``percolator`` command must be available in the PATH for this module to work. 8 | See `github.com/percolator/percolator `_ for 9 | more information. 10 | 11 | If you use Percolator through MS²Rescore, please cite: 12 | 13 | .. epigraph:: 14 | The M, MacCoss MJ, Noble WS, Käll L. Fast and Accurate Protein False Discovery Rates on 15 | Large-Scale Proteomics Data Sets with Percolator 3.0. *J Am Soc Mass Spectrom* (2016). 16 | `doi:10.1007/s13361-016-1460-7 `_ 17 | 18 | """ 19 | 20 | import logging 21 | import subprocess 22 | from typing import Any, Dict, Optional 23 | from copy import deepcopy 24 | 25 | import psm_utils 26 | 27 | from ms2rescore.exceptions import MS2RescoreError 28 | 29 | logger = logging.getLogger(__name__) 30 | 31 | 32 | LOG_LEVEL_MAP = { 33 | "critical": 0, 34 | "error": 0, 35 | "warning": 0, 36 | "info": 1, 37 | "debug": 2, 38 | } 39 | 40 | 41 | def rescore( 42 | psm_list: psm_utils.PSMList, 43 | output_file_root: str = "ms2rescore", 44 | log_level: str = "info", 45 | processes: int = 1, 46 | fasta_file: Optional[str] = None, 47 | percolator_kwargs: Optional[Dict[str, Any]] = None, 48 | ) -> None: 49 | """ 50 | Rescore PSMs with Percolator. 51 | 52 | Aside from updating the PSM ``score``, ``qvalue``, and ``pep`` values, the following output 53 | files are written: 54 | 55 | - Target PSMs: ``{output_file_root}.percolator.psms.pout`` 56 | - Target peptides: ``{output_file_root}.percolator.peptides.pout`` 57 | - Target proteins: ``{output_file_root}.percolator.proteins.pout`` 58 | - Decoy PSMs: ``{output_file_root}.percolator.decoy.psms.pout`` 59 | - Decoy peptides: ``{output_file_root}.percolator.decoy.peptides.pout`` 60 | - Decoy proteins: ``{output_file_root}.percolator.decoy.proteins.pout`` 61 | - Feature weights: ``{output_file_root}.percolator.weights.tsv`` 62 | 63 | Percolator is run through its command line interface. Percolator must be installed separately 64 | and the ``percolator`` command must be available in the PATH for this module to work. 65 | 66 | Parameters 67 | ---------- 68 | psm_list 69 | PSMs to be rescored. 70 | output_file_root 71 | Root of output file names. Defaults to ``ms2rescore``. 72 | log_level 73 | Log level for Percolator. Defaults to ``info``. 74 | processes 75 | Number of processes to use. Defaults to 1. 76 | fasta_file 77 | Path to FASTA file for protein inference. Defaults to ``None``. 78 | percolator_kwargs 79 | Additional keyword arguments for Percolator. Defaults to ``None``. 80 | 81 | """ 82 | percolator_kwargs = { 83 | "results-psms": output_file_root + ".percolator.psms.pout", 84 | "decoy-results-psms": output_file_root + ".percolator.decoy.psms.pout", 85 | "results-peptides": output_file_root + ".percolator.peptides.pout", 86 | "decoy-results-peptides": output_file_root + ".percolator.decoy.peptides.pout", 87 | "results-proteins": output_file_root + ".percolator.proteins.pout", 88 | "decoy-results-proteins": output_file_root + ".percolator.decoy.proteins.pout", 89 | "weights": output_file_root + ".percolator.weights.tsv", 90 | "verbose": LOG_LEVEL_MAP[log_level], 91 | "num-threads": min(processes, 128), # Higher values not supported by Percolator 92 | "post-processing-tdc": True, 93 | } 94 | if percolator_kwargs: 95 | percolator_kwargs.update(percolator_kwargs) 96 | 97 | if fasta_file: 98 | percolator_kwargs["picked-protein"] = fasta_file 99 | 100 | pin_filepath = f"{output_file_root}.pin" 101 | percolator_cmd = _construct_percolator_command(percolator_kwargs, pin_filepath) 102 | 103 | # Need to be able to link back to original PSMs, so reindex spectrum IDs, but copy PSM list 104 | # to avoid modifying original... 105 | # TODO: Better approach for this? 106 | 107 | psm_list_reindexed = deepcopy(psm_list) 108 | psm_list_reindexed.set_ranks() 109 | psm_list_reindexed["spectrum_id"] = [ 110 | f"{psm.get_usi(as_url=False)}_{psm.rank}" for psm in psm_list_reindexed 111 | ] 112 | spectrum_id_index = { 113 | spectrum_id: index for index, spectrum_id in enumerate(psm_list_reindexed["spectrum_id"]) 114 | } 115 | 116 | _write_pin_file(psm_list_reindexed, pin_filepath) 117 | 118 | logger.debug(f"Running percolator command {' '.join(percolator_cmd)}") 119 | try: 120 | output = subprocess.run(percolator_cmd, capture_output=True) 121 | except FileNotFoundError as e: 122 | if subprocess.getstatusoutput("percolator")[0] != 0: 123 | raise MS2RescoreError( 124 | "Could not run Percolator. Please ensure that the program is installed and " 125 | "available in your PATH. See " 126 | "https://ms2rescore.readthedocs.io/en/latest/installation/#installing-percolator " 127 | "for more information." 128 | ) from e 129 | else: 130 | logger.warn(f"Running Percolator resulted in an error:\n{output.stdout}") 131 | raise MS2RescoreError("Percolator error") from e 132 | except subprocess.CalledProcessError as e: 133 | logger.warn(f"Running Percolator resulted in an error:\n{output.stdout}") 134 | raise MS2RescoreError("Percolator error") from e 135 | 136 | logger.info( 137 | "Percolator output: \n" + _decode_string(output.stderr), extra={"highlighter": None} 138 | ) 139 | 140 | _update_psm_scores( 141 | psm_list, 142 | percolator_kwargs["results-psms"], 143 | percolator_kwargs["decoy-results-psms"], 144 | spectrum_id_index, 145 | ) 146 | 147 | 148 | def _update_psm_scores( 149 | psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str, spectrum_id_index: list 150 | ): 151 | """ 152 | Update PSM scores with Percolator results. 153 | 154 | PSMs from the target and decoy pout files are mapped back by their collection, run, 155 | spectrum_id, and peptidoform. 156 | 157 | """ 158 | target_psms = psm_utils.io.read_file(target_pout, filetype="percolator") 159 | decoy_psms = psm_utils.io.read_file(decoy_pout, filetype="percolator") 160 | psm_list_percolator = psm_utils.PSMList(psm_list=target_psms.psm_list + decoy_psms.psm_list) 161 | 162 | # Sort by reindexed spectrum_id so order matches original PSM list 163 | psm_list_percolator = sorted( 164 | psm_list_percolator, key=lambda psm: spectrum_id_index[psm["spectrum_id"]] 165 | ) 166 | 167 | if not len(psm_list) == len(psm_list_percolator): 168 | raise MS2RescoreError( 169 | f"Number of PSMs in original list ({len(psm_list)}) does not match number of PSMs in " 170 | f"Percolator output ({len(psm_list_percolator)})" 171 | ) 172 | 173 | for original_psm, new_psm in zip(psm_list, psm_list_percolator): 174 | original_psm["score"] = new_psm["score"] 175 | original_psm["qvalue"] = new_psm["qvalue"] 176 | original_psm["pep"] = new_psm["pep"] 177 | 178 | psm_list.set_ranks(lower_score_better=False) 179 | 180 | 181 | def _write_pin_file(psm_list: psm_utils.PSMList, filepath: str): 182 | """Write PIN file for rescoring.""" 183 | logger.debug(f"Writing PIN file to {filepath}") 184 | psm_utils.io.write_file( 185 | psm_list, 186 | filename=filepath, 187 | filetype="percolator", 188 | style="pin", 189 | feature_names=psm_list[0].rescoring_features.keys(), 190 | ) 191 | 192 | 193 | def _construct_percolator_command(percolator_kwargs: Dict, pin_filepath: str): 194 | """Create Percolator command for given set of arguments and path to PIN file.""" 195 | percolator_cmd = ["percolator"] 196 | for key, value in percolator_kwargs.items(): 197 | if not isinstance(value, bool): 198 | percolator_cmd.append(f"--{key}") 199 | percolator_cmd.append(str(value)) 200 | if key == "init-weights": 201 | percolator_cmd.append("--static") 202 | elif isinstance(value, bool) & value is False: 203 | continue 204 | else: 205 | percolator_cmd.append(f"--{key}") 206 | percolator_cmd.append(pin_filepath) 207 | return percolator_cmd 208 | 209 | 210 | def _decode_string(encoded_string): 211 | for encoding in ["utf-8", "latin-1", "ascii", "iso-8859-15"]: 212 | try: 213 | decoded_string = encoded_string.decode(encoding) 214 | logger.debug(f"Decoded stderr with {encoding}") 215 | return decoded_string 216 | except UnicodeDecodeError: 217 | pass 218 | else: 219 | raise MS2RescoreError("Could not infer encoding of Percolator logs.") 220 | -------------------------------------------------------------------------------- /ms2rescore/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from glob import glob 4 | from pathlib import Path 5 | from typing import Optional, Union 6 | 7 | from ms2rescore.exceptions import MS2RescoreConfigurationError 8 | from ms2rescore_rs import is_supported_file_type 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def infer_spectrum_path( 14 | configured_path: Union[str, Path, None], 15 | run_name: Optional[str] = None, 16 | ) -> Union[str, Path]: 17 | """ 18 | Infer spectrum path from passed path and expected filename (e.g. from PSM file). 19 | 20 | Parameters 21 | ---------- 22 | configured_path: str, Path, None 23 | User-defined path to spectrum file or directory containing spectrum file 24 | run_name : str, optional 25 | MS run name (stem of spectrum filename), e.g., as expected from PSM file. 26 | 27 | """ 28 | # If no spectrum path configured, use expected run_name in default dir 29 | if not configured_path: 30 | if run_name: 31 | resolved_path = os.path.join(".", run_name) 32 | else: 33 | raise MS2RescoreConfigurationError( 34 | "Could not resolve spectrum file name: No spectrum path configured " 35 | "and no run name in PSM file found." 36 | ) 37 | 38 | else: 39 | is_bruker_dir = configured_path.endswith(".d") or _is_minitdf(configured_path) 40 | 41 | # If passed path is directory (that is not Bruker raw), join with run name 42 | if os.path.isdir(configured_path) and not is_bruker_dir: 43 | if run_name: 44 | resolved_path = os.path.join(configured_path, run_name) 45 | else: 46 | raise MS2RescoreConfigurationError( 47 | "Could not resolve spectrum file name: Spectrum path is directory " 48 | "but no run name in PSM file found." 49 | ) 50 | 51 | # If passed path is file, use that, but warn if basename doesn't match expected 52 | elif os.path.isfile(configured_path) or (os.path.isdir(configured_path) and is_bruker_dir): 53 | if run_name and Path(configured_path).stem != Path(run_name).stem: 54 | logger.warning( 55 | "Passed spectrum path (`%s`) does not match run name found in PSM " 56 | "file (`%s`). Continuing with passed spectrum path.", 57 | configured_path, 58 | run_name, 59 | ) 60 | resolved_path = configured_path 61 | else: 62 | raise MS2RescoreConfigurationError( 63 | "Configured `spectrum_path` must be `None` or a path to an existing file " 64 | "or directory. If `None` or path to directory, spectrum run information " 65 | "should be present in the PSM file." 66 | ) 67 | 68 | # Match with file extension if not in resolved_path yet 69 | if not is_supported_file_type(resolved_path) or not os.path.exists(resolved_path): 70 | for filename in glob(resolved_path + "*"): 71 | if is_supported_file_type(filename): 72 | resolved_path = filename 73 | break 74 | else: 75 | raise MS2RescoreConfigurationError( 76 | f"Resolved spectrum filename ('{resolved_path}') does not contain a supported " 77 | "file extension (mzML, MGF, or .d) and could not find any matching existing " 78 | "files." 79 | ) 80 | 81 | return Path(resolved_path) 82 | 83 | 84 | def _is_minitdf(spectrum_file: str) -> bool: 85 | """ 86 | Check if the spectrum file is a Bruker miniTDF folder. 87 | 88 | A Bruker miniTDF folder has no fixed name, but contains files matching the patterns 89 | ``*ms2spectrum.bin`` and ``*ms2spectrum.parquet``. 90 | """ 91 | files = set(Path(spectrum_file).glob("*ms2spectrum.bin")) 92 | files.update(Path(spectrum_file).glob("*ms2spectrum.parquet")) 93 | return len(files) >= 2 94 | -------------------------------------------------------------------------------- /ms2rescore_innosetup.iss: -------------------------------------------------------------------------------- 1 | #define AppName "MS2Rescore" 2 | #define AppPublisher "CompOmics" 3 | #define AppURL "https://github.com/compomics/ms2rescore" 4 | #define AppExeName "ms2rescore.exe" 5 | 6 | [Setup] 7 | AppId={{2D3D12BD-3AE2-426E-8DE8-092148C12071} 8 | AppName={#AppName} 9 | AppVersion={#AppVersion} 10 | AppPublisher={#AppPublisher} 11 | AppPublisherURL={#AppURL} 12 | AppSupportURL={#AppURL} 13 | AppUpdatesURL={#AppURL} 14 | DefaultDirName={autopf}\{#AppName} 15 | DisableProgramGroupPage=yes 16 | LicenseFile=.\LICENSE 17 | PrivilegesRequired=lowest 18 | PrivilegesRequiredOverridesAllowed=dialog 19 | OutputDir="dist" 20 | OutputBaseFilename="{#AppName}-{#AppVersion}-Windows64bit" 21 | Compression=lzma 22 | SolidCompression=yes 23 | WizardStyle=modern 24 | 25 | [Languages] 26 | Name: "english"; MessagesFile: "compiler:Default.isl" 27 | 28 | [Tasks] 29 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked 30 | 31 | [Files] 32 | Source: "dist\ms2rescore\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs 33 | 34 | [Icons] 35 | Name: "{autoprograms}\{#AppName}"; Filename: "{app}\{#AppExeName}" 36 | Name: "{autodesktop}\{#AppName}"; Filename: "{app}\{#AppExeName}"; Tasks: desktopicon 37 | 38 | [Run] 39 | Filename: "{app}\{#AppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(AppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent 40 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "ms2rescore" 3 | description = " Modular and user-friendly platform for AI-assisted rescoring of peptide identifications." 4 | readme = "README.md" 5 | license = { file = "LICENSE" } 6 | keywords = [ 7 | "MS2Rescore", 8 | "MS2PIP", 9 | "DeepLC", 10 | "Percolator", 11 | "proteomics", 12 | "mass spectrometry", 13 | "peptide identification", 14 | "rescoring", 15 | "machine learning", 16 | ] 17 | authors = [ 18 | { name = "Ralf Gabriels", email = "ralf@gabriels.dev" }, 19 | { name = "Arthur Declercq", email = "arthur.declercq@ugent.be" }, 20 | { name = "Ana Sílvia C. Silva" }, 21 | { name = "Robbin Bouwmeester" }, 22 | { name = "Louise Buur" }, 23 | ] 24 | classifiers = [ 25 | "Intended Audience :: Science/Research", 26 | "License :: OSI Approved :: Apache Software License", 27 | "Operating System :: OS Independent", 28 | "Programming Language :: Python :: 3 :: Only", 29 | "Topic :: Scientific/Engineering :: Bio-Informatics", 30 | "Development Status :: 5 - Production/Stable", 31 | ] 32 | dynamic = ["version"] 33 | requires-python = ">=3.9" 34 | dependencies = [ 35 | "cascade-config>=0.4.0", 36 | "click>=7", 37 | "customtkinter>=5,<6", 38 | "deeplc>=3.0,<3.1", 39 | "deeplcretrainer", 40 | "im2deep>=0.3.1", 41 | "jinja2>=3", 42 | "lxml>=4.5", 43 | "mokapot==0.10", # 0.11.0 will introduce API changes 44 | "ms2pip>=4.0.0", 45 | "ms2rescore_rs>=0.4.0", 46 | "numpy>=1.25", 47 | "pandas>=1", 48 | "plotly>=5", 49 | "psm_utils>=1.1", 50 | "pyteomics>=4.7.2", 51 | "rich>=12", 52 | "tomli>=2; python_version < '3.11'", 53 | ] 54 | 55 | [project.optional-dependencies] 56 | ionmob = ["ionmob>=0.2", "tensorflow"] 57 | dev = ["ruff", "black", "pytest", "pytest-cov", "pre-commit"] 58 | docs = [ 59 | "sphinx", 60 | "myst-parser", 61 | "nbsphinx", 62 | "numpydoc>=1,<2", 63 | "semver>=2", 64 | "sphinx_inline_tabs", 65 | "sphinx_rtd_theme", 66 | "sphinx-argparse", 67 | "sphinx-autobuild", 68 | "toml", 69 | ] 70 | 71 | [project.urls] 72 | GitHub = "https://github.com/compomics/ms2rescore" 73 | ReadTheDocs = "https://ms2rescore.readthedocs.io" 74 | PyPi = "https://pypi.org/project/ms2rescore/" 75 | CompOmics = "https://www.compomics.com" 76 | 77 | [project.scripts] 78 | ms2rescore = "ms2rescore.__main__:main" 79 | ms2rescore-gui = "ms2rescore.gui.__main__:main" 80 | ms2rescore-report = "ms2rescore.report.__main__:main" 81 | tims2rescore = "ms2rescore.__main__:main_tims" 82 | 83 | [build-system] 84 | requires = ["flit_core >=3.2,<4"] 85 | build-backend = "flit_core.buildapi" 86 | 87 | [tool.isort] 88 | profile = "black" 89 | 90 | [tool.black] 91 | line-length = 99 92 | target-version = ['py39'] 93 | 94 | [tool.ruff] 95 | line-length = 99 96 | target-version = 'py39' 97 | 98 | [tool.ruff.lint] 99 | extend-select = ["T201", "T203"] 100 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_config_parser.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from ms2rescore.config_parser import _parse_output_path 4 | 5 | 6 | def test__parse_output_path(): 7 | # Ensure that test dir exists 8 | Path("examples/id").mkdir(parents=True, exist_ok=True) 9 | test_psm_file = "some/dir/psm_file.mzid" 10 | 11 | test_cases = [ 12 | ("examples/id", "examples/id/psm_file.ms2rescore"), # Existing dir 13 | ("examples/id/custom_stem", "examples/id/custom_stem"), # Parent is existing dir 14 | ("some/other_dir", "some/other_dir/psm_file.ms2rescore"), # None-existing dir 15 | ( 16 | "some/other_dir/", 17 | "some/other_dir/psm_file.ms2rescore", 18 | ), # None-existing dir, with trailing slash 19 | (None, "some/dir/psm_file.ms2rescore"), 20 | ] 21 | 22 | for output_path, expected in test_cases: 23 | assert _parse_output_path(output_path, test_psm_file) == expected 24 | -------------------------------------------------------------------------------- /tests/test_data/test.mgf: -------------------------------------------------------------------------------- 1 | BEGIN IONS 2 | TITLE=peptide: peptide1 3 | CHARGE=2+ 4 | PEPMASS=475.137295 5 | ION_MOBILITY=42.42 6 | RTINSECONDS=51.2 7 | 72.04439 100 8 | 148.06043 600 9 | 232.07504 300 10 | 263.08737 400 11 | 347.10198 500 12 | 423.11802 200 13 | END IONS 14 | -------------------------------------------------------------------------------- /tests/test_parse_spectra.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock, patch 2 | 3 | import numpy as np 4 | import pytest 5 | from psm_utils import PSM, PSMList 6 | 7 | from ms2rescore.feature_generators.base import MSDataType 8 | from ms2rescore.parse_spectra import ( 9 | SpectrumParsingError, 10 | _get_precursor_values, 11 | add_precursor_values, 12 | ) 13 | 14 | 15 | @pytest.fixture 16 | def mock_psm_list(): 17 | psm_list = PSMList( 18 | psm_list=[ 19 | PSM( 20 | peptidoform="PEPTIDE/2", 21 | run="run1", 22 | spectrum_id="spectrum1", 23 | retention_time=None, 24 | ion_mobility=None, 25 | precursor_mz=None, 26 | ), 27 | PSM( 28 | peptidoform="PEPTIDE/2", 29 | run="run1", 30 | spectrum_id="spectrum2", 31 | retention_time=None, 32 | ion_mobility=None, 33 | precursor_mz=None, 34 | ), 35 | ] 36 | ) 37 | return psm_list 38 | 39 | 40 | @pytest.fixture 41 | def mock_precursor_info(): 42 | return { 43 | "spectrum1": MagicMock(mz=529.7935187324, rt=10.5, im=1.0), 44 | "spectrum2": MagicMock(mz=651.83, rt=12.3, im=1.2), 45 | } 46 | 47 | 48 | @pytest.fixture 49 | def mock_precursor_info_missing_im(): 50 | return { 51 | "spectrum1": MagicMock(mz=529.7935187324, rt=10.5, im=0.0), 52 | "spectrum2": MagicMock(mz=651.83, rt=12.3, im=0.0), 53 | } 54 | 55 | 56 | @pytest.fixture 57 | def mock_precursor_info_incomplete(): 58 | return { 59 | "spectrum1": MagicMock(mz=529.7935187324, rt=10.5, im=1.0), 60 | # "spectrum2" is missing 61 | } 62 | 63 | 64 | @patch("ms2rescore.parse_spectra.get_precursor_info") 65 | @patch("ms2rescore.parse_spectra.infer_spectrum_path") 66 | def test_add_precursor_values( 67 | mock_infer_spectrum_path, mock_get_precursor_info, mock_psm_list, mock_precursor_info 68 | ): 69 | mock_infer_spectrum_path.return_value = "test_data/test_spectrum_file.mgf" 70 | mock_get_precursor_info.return_value = mock_precursor_info 71 | 72 | available_ms_data = add_precursor_values(mock_psm_list, "test_data") 73 | 74 | assert MSDataType.retention_time in available_ms_data 75 | assert MSDataType.ion_mobility in available_ms_data 76 | assert MSDataType.precursor_mz in available_ms_data 77 | 78 | for psm in mock_psm_list: 79 | assert psm.retention_time is not None 80 | assert psm.ion_mobility is not None 81 | assert psm.precursor_mz is not None 82 | 83 | 84 | @patch("ms2rescore.parse_spectra.get_precursor_info") 85 | @patch("ms2rescore.parse_spectra.infer_spectrum_path") 86 | def test_add_precursor_values_missing_im( 87 | mock_infer_spectrum_path, 88 | mock_get_precursor_info, 89 | mock_psm_list, 90 | mock_precursor_info_missing_im, 91 | ): 92 | mock_infer_spectrum_path.return_value = "test_data/test_spectrum_file.mgf" 93 | mock_get_precursor_info.return_value = mock_precursor_info_missing_im 94 | 95 | available_ms_data = add_precursor_values(mock_psm_list, "test_data") 96 | 97 | assert MSDataType.retention_time in available_ms_data 98 | assert MSDataType.ion_mobility not in available_ms_data 99 | assert MSDataType.precursor_mz in available_ms_data 100 | 101 | for psm in mock_psm_list: 102 | assert psm.retention_time is not None 103 | assert psm.ion_mobility is None 104 | assert psm.precursor_mz is not None 105 | 106 | 107 | @patch("ms2rescore.parse_spectra.get_precursor_info") 108 | @patch("ms2rescore.parse_spectra.infer_spectrum_path") 109 | def test_get_precursor_values( 110 | mock_infer_spectrum_path, mock_get_precursor_info, mock_psm_list, mock_precursor_info 111 | ): 112 | mock_infer_spectrum_path.return_value = "test_data/test_spectrum_file.mgf" 113 | mock_get_precursor_info.return_value = mock_precursor_info 114 | 115 | mzs, rts, ims = _get_precursor_values(mock_psm_list, "test_data", None) 116 | 117 | expected_mzs = np.array([529.7935187324, 651.83]) 118 | expected_rts = np.array([10.5, 12.3]) 119 | expected_ims = np.array([1.0, 1.2]) 120 | 121 | np.testing.assert_array_equal(mzs, expected_mzs) 122 | np.testing.assert_array_equal(rts, expected_rts) 123 | np.testing.assert_array_equal(ims, expected_ims) 124 | 125 | 126 | @patch("ms2rescore.parse_spectra.get_precursor_info") 127 | @patch("ms2rescore.parse_spectra.infer_spectrum_path") 128 | def test_get_precursor_values_missing_spectrum_id( 129 | mock_infer_spectrum_path, 130 | mock_get_precursor_info, 131 | mock_psm_list, 132 | mock_precursor_info_incomplete, 133 | ): 134 | mock_infer_spectrum_path.return_value = "test_data/test_spectrum_file.mgf" 135 | mock_get_precursor_info.return_value = mock_precursor_info_incomplete 136 | 137 | with pytest.raises(SpectrumParsingError): 138 | _get_precursor_values(mock_psm_list, "test_data", None) 139 | 140 | 141 | def test_spectrum_parsing_error(): 142 | with pytest.raises(SpectrumParsingError): 143 | raise SpectrumParsingError("Test error message") 144 | --------------------------------------------------------------------------------