├── .github
    └── workflows
    │   ├── publish.yml
    │   └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── CONTRIBUTING.rst
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
    └── source
    │   ├── LICENSE.txt
    │   ├── _static
    │       ├── css
    │       │   └── custom.css
    │       ├── img
    │       │   ├── gui-example-xtandem-advanced.png
    │       │   ├── gui-example-xtandem-finished.png
    │       │   ├── gui-example-xtandem-modifications-before.png
    │       │   ├── gui-example-xtandem-modifications-filled.png
    │       │   ├── gui-example-xtandem-output-files.png
    │       │   ├── gui-example-xtandem-processes.png
    │       │   ├── gui-example-xtandem-progress.png
    │       │   ├── gui-example-xtandem-psm-file.png
    │       │   ├── gui-example-xtandem-psm-filetype.png
    │       │   ├── gui-example-xtandem-spectra.png
    │       │   ├── gui-example-xtandem-start.png
    │       │   ├── gui-fixed-modifications.png
    │       │   ├── gui-modification-mapping.png
    │       │   ├── gui-overview.png
    │       │   ├── gui-screenshot-old.png
    │       │   ├── gui-screenshot.png
    │       │   ├── ms2rescore-overview.png
    │       │   ├── ms2rescore_logo.png
    │       │   ├── percolator-install-path.png
    │       │   └── qc-reports.png
    │       └── js
    │       │   └── badge.min.js
    │   ├── api
    │       ├── ms2rescore.feature_generators.rst
    │       ├── ms2rescore.report.rst
    │       ├── ms2rescore.rescoring_engines.rst
    │       └── ms2rescore.rst
    │   ├── cli.rst
    │   ├── conf.py
    │   ├── config_schema.md
    │   ├── contributing.rst
    │   ├── gui.rst
    │   ├── index.rst
    │   ├── installation.rst
    │   ├── tutorials
    │       └── in-depth-python-api.ipynb
    │   └── userguide
    │       ├── configuration.rst
    │       ├── input-files.rst
    │       ├── output-files.rst
    │       ├── search-engine-notes.rst
    │       └── tims2Rescore.rst
├── examples
    ├── mascot-ms2rescore.json
    ├── mascot-ms2rescore.toml
    ├── maxquant-ms2rescore.json
    ├── maxquant-ms2rescore.toml
    ├── msgfplus-ms2rescore.json
    ├── msgfplus-ms2rescore.toml
    ├── peptideshaker-ms2rescore.json
    ├── peptideshaker-ms2rescore.toml
    ├── sage-ms2rescore.json
    ├── sage-ms2rescore.toml
    ├── xtandem-ms2rescore.json
    └── xtandem-ms2rescore.toml
├── img
    ├── gui-screenshot.png
    ├── ms2rescore.ico
    ├── ms2rescore_logo.png
    └── ms2rescore_logo.svg
├── ms2rescore.spec
├── ms2rescore
    ├── __init__.py
    ├── __main__.py
    ├── config_parser.py
    ├── core.py
    ├── exceptions.py
    ├── feature_generators
    │   ├── __init__.py
    │   ├── base.py
    │   ├── basic.py
    │   ├── deeplc.py
    │   ├── im2deep.py
    │   ├── ionmob.py
    │   ├── maxquant.py
    │   └── ms2pip.py
    ├── gui
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── app.py
    │   ├── function2ctk.py
    │   └── widgets.py
    ├── package_data
    │   ├── __init__.py
    │   ├── config_default.json
    │   ├── config_default_tims.json
    │   ├── config_schema.json
    │   └── img
    │   │   ├── __init__.py
    │   │   ├── comments_icon_black.png
    │   │   ├── comments_icon_white.png
    │   │   ├── config_icon.png
    │   │   ├── docs_icon_black.png
    │   │   ├── docs_icon_white.png
    │   │   ├── github_icon_black.png
    │   │   ├── github_icon_white.png
    │   │   ├── ms2rescore_logo.png
    │   │   └── program_icon.ico
    ├── parse_psms.py
    ├── parse_spectra.py
    ├── report
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── charts.py
    │   ├── generate.py
    │   ├── templates
    │   │   ├── __init__.py
    │   │   ├── about.html
    │   │   ├── base.html
    │   │   ├── config.html
    │   │   ├── features.html
    │   │   ├── log.html
    │   │   ├── metadata.html
    │   │   ├── overview.html
    │   │   ├── stats-card.html
    │   │   ├── style.html
    │   │   ├── target-decoy.html
    │   │   └── texts.toml
    │   └── utils.py
    ├── rescoring_engines
    │   ├── __init__.py
    │   ├── mokapot.py
    │   └── percolator.py
    └── utils.py
├── ms2rescore_innosetup.iss
├── pyproject.toml
└── tests
    ├── __init__.py
    ├── test_config_parser.py
    ├── test_data
        ├── msms-psms.tsv
        └── test.mgf
    └── test_parse_spectra.py


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
  1 | name: Publish
  2 | 
  3 | on:
  4 |   release:
  5 |     types: [created]
  6 |   workflow_dispatch:
  7 | 
  8 | jobs:
  9 |   python-package:
 10 |     runs-on: ubuntu-latest
 11 |     permissions:
 12 |       id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
 13 |     steps:
 14 |       - uses: actions/checkout@v4
 15 | 
 16 |       - name: Set up Python
 17 |         uses: actions/setup-python@v5
 18 |         with:
 19 |           python-version: "3.11"
 20 | 
 21 |       - name: Install dependencies
 22 |         run: |
 23 |           python -m pip install --upgrade pip
 24 |           python -m pip install --upgrade build pytest
 25 | 
 26 |       - name: Build source and wheel
 27 |         run: |
 28 |           python -m build --sdist --wheel --outdir dist/
 29 | 
 30 |       - name: Test built package
 31 |         run: |
 32 |           pip install --only-binary :all: dist/ms2rescore-*.whl
 33 |           # pytest
 34 |           ms2rescore --help
 35 | 
 36 |       - name: Upload build artifacts
 37 |         uses: actions/upload-artifact@v4
 38 |         with:
 39 |           name: python-package
 40 |           path: dist/*
 41 | 
 42 |       - uses: pypa/gh-action-pypi-publish@release/v1
 43 | 
 44 |   windows-installer:
 45 |     runs-on: windows-latest
 46 |     needs: python-package
 47 |     steps:
 48 |       - uses: actions/checkout@v4
 49 | 
 50 |       - uses: actions/setup-python@v5
 51 |         with:
 52 |           python-version: "3.11"
 53 | 
 54 |       - name: Install package and dependencies
 55 |         run: |
 56 |           python -m pip install --upgrade pip
 57 |           pip install --only-binary :all: .[ionmob] pyinstaller
 58 | 
 59 |       - name: Install Inno Setup
 60 |         uses: crazy-max/ghaction-chocolatey@v3
 61 |         with:
 62 |           args: install innosetup -y --allow-unofficial --force
 63 | 
 64 |       - name: Run pyinstaller
 65 |         run: pyinstaller ./ms2rescore.spec --clean --noconfirm
 66 | 
 67 |       - name: Test built exe
 68 |         run: dist/ms2rescore/ms2rescore.exe
 69 | 
 70 |       - name: Run Inno Setup
 71 |         run: ISCC.exe ./ms2rescore_innosetup.iss /DAppVersion=${{  github.ref_name }}
 72 | 
 73 |       - name: Upload artifact
 74 |         uses: actions/upload-artifact@v4
 75 |         with:
 76 |           name: dist
 77 |           path: dist/*.exe
 78 | 
 79 |       - name: Upload installer to release
 80 |         uses: svenstaro/upload-release-action@v2
 81 |         with:
 82 |           repo_token: ${{ secrets.GITHUB_TOKEN }}
 83 |           tag: ${{ github.ref }}
 84 |           file_glob: true
 85 |           file: dist/*.exe
 86 | 
 87 |   docker-image:
 88 |     runs-on: ubuntu-latest
 89 |     permissions:
 90 |       packages: write
 91 |       contents: read
 92 |       attestations: write
 93 |       id-token: write
 94 |     steps:
 95 |       - name: Check out the repo
 96 |         uses: actions/checkout@v4
 97 | 
 98 |       - name: Log in to the Container registry
 99 |         uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
100 |         with:
101 |           registry: ghcr.io
102 |           username: ${{ github.actor }}
103 |           password: ${{ secrets.GITHUB_TOKEN }}
104 | 
105 |       - name: Extract metadata (tags, labels) for Docker
106 |         id: meta
107 |         uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
108 |         with:
109 |           images: ghcr.io/${{ github.repository }}
110 | 
111 |       - name: Build and push Docker images
112 |         id: push
113 |         uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
114 |         with:
115 |           context: .
116 |           push: true
117 |           tags: ${{ steps.meta.outputs.tags }}
118 |           labels: ${{ steps.meta.outputs.labels }}
119 | 
120 |       - name: Generate artifact attestation
121 |         uses: actions/attest-build-provenance@v1
122 |         with:
123 |           subject-name: ghcr.io/${{ github.repository }}
124 |           subject-digest: ${{ steps.push.outputs.digest }}
125 |           push-to-registry: true
126 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   test-python-package:
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.9", "3.10", "3.11"]
16 |     steps:
17 |       - uses: actions/checkout@v4
18 | 
19 |       - name: Set up Python ${{ matrix.python-version }}
20 |         uses: actions/setup-python@v5
21 |         with:
22 |           python-version: ${{ matrix.python-version }}
23 | 
24 |       - name: Install dependencies
25 |         run: |
26 |           python -m pip install --upgrade pip
27 |           pip install ruff
28 | 
29 |       - name: Run Ruff
30 |         run: ruff check --output-format=github .
31 | 
32 |       - name: Build and install ms2rescore package
33 |         run: |
34 |           pip install --only-binary :all: .[dev]
35 | 
36 |       - name: Test with pytest
37 |         run: |
38 |           pytest
39 | 
40 |       - name: Test installation
41 |         run: |
42 |           ms2rescore --help
43 | 
44 |   test-windows-installer:
45 |     # Only run on push to main (e.g., after PR merge)
46 |     if: ${{ github.ref == 'refs/heads/main' }}
47 |     runs-on: windows-latest
48 |     steps:
49 |       - uses: actions/checkout@v4
50 | 
51 |       - uses: actions/setup-python@v5
52 |         with:
53 |           python-version: "3.11"
54 | 
55 |       - name: Install package and dependencies
56 |         run: |
57 |           python -m pip install --upgrade pip
58 |           pip install --only-binary :all: .[ionmob] pyinstaller
59 | 
60 |       - name: Install Inno Setup
61 |         uses: crazy-max/ghaction-chocolatey@v1
62 |         with:
63 |           args: install innosetup -y --allow-unofficial --force
64 | 
65 |       - name: Run pyinstaller
66 |         run: pyinstaller ./ms2rescore.spec --clean --noconfirm
67 | 
68 |       - name: Test built exe
69 |         run: dist/ms2rescore/ms2rescore.exe
70 | 
71 |       - name: Run Inno Setup
72 |         run: ISCC.exe ./ms2rescore_innosetup.iss /DAppVersion=${{  github.ref_name }}
73 | 
74 |       - name: Upload artifact
75 |         uses: actions/upload-artifact@v4
76 |         with:
77 |           name: dist
78 |           path: dist/*.exe
79 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Others
  2 | nbs/
  3 | .prettierrc
  4 | data/
  5 | steps.txt
  6 | old_files/
  7 | prepare_pin_files.py
  8 | *.jar
  9 | *.tar
 10 | 
 11 | # Ruff
 12 | .ruff_cache/
 13 | 
 14 | # Atom remote-sync config
 15 | .remote-sync.json
 16 | 
 17 | # Byte-compiled / optimized / DLL files
 18 | __pycache__/
 19 | *.py[cod]
 20 | *$py.class
 21 | 
 22 | # C extensions
 23 | *.so
 24 | 
 25 | # Distribution / packaging
 26 | .Python
 27 | env/
 28 | build/
 29 | develop-eggs/
 30 | dist/
 31 | downloads/
 32 | eggs/
 33 | .eggs/
 34 | lib/
 35 | lib64/
 36 | parts/
 37 | sdist/
 38 | var/
 39 | *.egg-info/
 40 | .installed.cfg
 41 | *.egg
 42 | 
 43 | # PyInstaller
 44 | #  Usually these files are written by a python script from a template
 45 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 46 | *.manifest
 47 | 
 48 | # Installer logs
 49 | pip-log.txt
 50 | pip-delete-this-directory.txt
 51 | 
 52 | # Unit test / coverage reports
 53 | htmlcov/
 54 | .tox/
 55 | .coverage
 56 | .coverage.*
 57 | .cache
 58 | nosetests.xml
 59 | coverage.xml
 60 | *,cover
 61 | .hypothesis/
 62 | 
 63 | # Translations
 64 | *.mo
 65 | *.pot
 66 | 
 67 | # Django stuff:
 68 | *.log
 69 | local_settings.py
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # IPython Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # celery beat schedule file
 91 | celerybeat-schedule
 92 | 
 93 | # dotenv
 94 | .env
 95 | 
 96 | # virtualenv
 97 | venv/
 98 | ENV/
 99 | .venv*/
100 | 
101 | # Spyder project settings
102 | .spyderproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # vscode
108 | .vscode/
109 | .pytest_cache/
110 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/sbrunner/jsonschema2md
 3 |     rev: 0.9.0
 4 |     hooks:
 5 |       - id: jsonschema2md
 6 |         files: ms2rescore/package_data/config_schema.json
 7 |         args:
 8 |           - ms2rescore/package_data/config_schema.json
 9 |           - docs/source/config_schema.md
10 | 
11 |   - repo: https://github.com/ralfg/convert-config-hook
12 |     rev: 0.1.6
13 |     hooks:
14 |       - id: convert-config
15 |         files: "examples\\/.*-ms2rescore\\.toml"
16 |         args: ["--output-format", "json"]
17 | 
18 |   - repo: https://github.com/pre-commit/pre-commit-hooks
19 |     rev: v2.3.0
20 |     hooks:
21 |       - id: check-yaml
22 |       - id: trailing-whitespace
23 |       # - id: end-of-file-fixer
24 | 
25 |   # - repo: https://github.com/pycqa/isort
26 |   #   rev: 5.11.2
27 |   #   hooks:
28 |   #     - id: isort
29 |   #       name: isort (python)
30 | 
31 |   - repo: https://github.com/psf/black
32 |     rev: 22.10.0
33 |     hooks:
34 |       - id: black
35 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3.11"
 7 | 
 8 | sphinx:
 9 |   configuration: docs/source/conf.py
10 |   builder: dirhtml
11 | 
12 | python:
13 |   install:
14 |     - method: pip
15 |       path: .
16 |       extra_requirements:
17 |         - docs
18 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | ############
  2 | Contributing
  3 | ############
  4 | 
  5 | This document briefly describes how to contribute to
  6 | `ms2rescore <https://github.com/compomics/ms2rescore>`_.
  7 | 
  8 | 
  9 | 
 10 | Before you begin
 11 | ################
 12 | 
 13 | If you have an idea for a feature, use case to add or an approach for a bugfix,
 14 | you are welcome to communicate it with the community by opening a
 15 | thread in
 16 | `GitHub Discussions <https://github.com/compomics/ms2rescore/discussions>`_
 17 | or in `GitHub Issues <https://github.com/compomics/ms2rescore/issues>`_.
 18 | 
 19 | Welcome contributions include:
 20 | 
 21 | - New features, such as the addition of new feature generators
 22 | - Improvements of existing functionality
 23 | - Bugfixes
 24 | 
 25 | 
 26 | 
 27 | Development setup
 28 | #################
 29 | 
 30 | Local install
 31 | *************
 32 | 
 33 | #. Setup Python 3, and preferably create a virtual environment.
 34 | #. Clone the `ms2rescore repository <https://github.com/compomics/ms2rescore>`_.
 35 | #. Use pip in editable mode to setup the development environment:
 36 | 
 37 | .. code-block:: sh
 38 | 
 39 |     pip install --editable .[dev,docs]
 40 | 
 41 | 
 42 | Pre-commit hooks
 43 | ****************
 44 | 
 45 | Pre-commit hooks ensure that certain checks are performed before making a new commit. For instance,
 46 | the ``black`` pre-commit hook is used to format all Python code, and ``jsonschema2md`` is used to
 47 | automatically generate Markdown documentation for the configuration file. Setup the pre-commit
 48 | hooks with:
 49 | 
 50 | .. code-block:: sh
 51 | 
 52 |     pre-commit install
 53 | 
 54 | 
 55 | Unit tests
 56 | **********
 57 | 
 58 | Run tests with ``pytest``:
 59 | 
 60 | .. code-block:: sh
 61 | 
 62 |     pytest ./tests
 63 | 
 64 | 
 65 | Documentation
 66 | *************
 67 | 
 68 | To work on the documentation and get a live preview, install the requirements
 69 | and run ``sphinx-autobuild``:
 70 | 
 71 | .. code-block:: sh
 72 | 
 73 |     pip install .[docs]
 74 |     sphinx-autobuild  --watch ./ms2rescore ./docs/source/ ./docs/_build/html/
 75 | 
 76 | Then browse to http://localhost:8000 to watch the live preview.
 77 | 
 78 | 
 79 | How to contribute
 80 | #################
 81 | 
 82 | - Fork `ms2rescore <https://github.com/compomics/ms2rescore>`_ on GitHub to
 83 |   make your changes.
 84 | - Commit and push your changes to your
 85 |   `fork <https://help.github.com/articles/pushing-to-a-remote/>`_.
 86 | - Ensure that the tests and documentation (both Python docstrings and files in
 87 |   ``/docs/source/``) have been updated according to your changes. Python
 88 |   docstrings are formatted in the
 89 |   `numpydoc style <https://numpydoc.readthedocs.io/en/latest/format.html>`_.
 90 | - Open a
 91 |   `pull request <https://help.github.com/articles/creating-a-pull-request/>`_
 92 |   with these changes. You pull request message ideally should include:
 93 | 
 94 |     - A description of why the changes should be made.
 95 |     - A description of the implementation of the changes.
 96 |     - A description of how to test the changes.
 97 | 
 98 | - The pull request should pass all the continuous integration tests which are
 99 |   automatically run by
100 |   `GitHub Actions <https://github.com/compomics/ms2rescore/actions>`_.
101 | 
102 | 
103 | 
104 | Release workflow
105 | ################
106 | 
107 | - When a new version is ready to be published:
108 | 
109 |     #. Change the ``__version__`` in ``ms2rescore/__init__.py`` following
110 |        `semantic versioning <https://semver.org/>`_.
111 |     #. Update the changelog (if not already done) in ``CHANGELOG.md`` according to
112 |        `Keep a Changelog <https://keepachangelog.com/en/1.0.0/>`_.
113 |     #. Merge all final changes with the ``main`` branch.
114 |     #. On GitHub, draft a new release with the new version number and the
115 |        changes that are listed in ``CHANGELOG.md``.
116 | 
117 | - When a new release is published on GitHub, the following GitHub Actions are triggered:
118 | 
119 |     #. The Python package is build and published to PyPI.
120 |     #. The Windows installer is build with pyInstaller and InnoSetup and published to the GitHub
121 |        release.
122 | 
123 | - A webhook triggers a new build of the documentation on Read the Docs.
124 | 
125 | - The Bioconda recipe is automatically updated by the Bioconda bot, and subsequently both the Conda
126 |   Python package and the Docker image are build.
127 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11
 2 | 
 3 | # ARG DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | LABEL name="ms2rescore"
 6 | 
 7 | # ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/ms2rescore
 8 | 
 9 | ADD pyproject.toml /ms2rescore/pyproject.toml
10 | ADD LICENSE /ms2rescore/LICENSE
11 | ADD README.md /ms2rescore/README.md
12 | ADD MANIFEST.in /ms2rescore/MANIFEST.in
13 | ADD ms2rescore /ms2rescore/ms2rescore
14 | 
15 | RUN apt-get update \
16 |     && apt install -y procps \
17 |     && pip install /ms2rescore --only-binary :all:
18 | 
19 | ENTRYPOINT [""]
20 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include ms2rescore/package_data/**/*
2 | include ms2rescore/package_data/*
3 | include ms2rescore/report/templates/*
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <img src="https://github.com/compomics/ms2rescore/raw/main/img/ms2rescore_logo.png" width="150" height="150" alt="MS²Rescore"/>
  2 | <br/><br/>
  3 | 
  4 | [![GitHub release](https://img.shields.io/github/release-pre/compomics/ms2rescore.svg?style=flat-square)](https://github.com/compomics/ms2rescore/releases)
  5 | [![PyPI](https://flat.badgen.net/pypi/v/ms2rescore)](https://pypi.org/project/ms2rescore/)
  6 | [![GitHub Workflow Status](https://flat.badgen.net/github/checks/compomics/ms2rescore/main)](https://github.com/compomics/ms2rescore/actions/)
  7 | [![GitHub issues](https://img.shields.io/github/issues/compomics/ms2rescore?style=flat-square)](https://github.com/compomics/ms2rescore/issues)
  8 | [![GitHub](https://img.shields.io/github/license/compomics/ms2rescore.svg?style=flat-square)](https://www.apache.org/licenses/LICENSE-2.0)
  9 | [![Last commit](https://flat.badgen.net/github/last-commit/compomics/ms2rescore)](https://github.com/compomics/ms2rescore/commits/)
 10 | 
 11 | Modular and user-friendly platform for AI-assisted rescoring of peptide identifications
 12 | 
 13 | ## About MS²Rescore
 14 | 
 15 | MS²Rescore performs ultra-sensitive peptide identification rescoring with LC-MS predictors such as
 16 | [MS²PIP][ms2pip] and [DeepLC][deeplc], and with ML-driven rescoring engines
 17 | [Percolator][percolator] or [Mokapot][mokapot]. This results in more confident peptide
 18 | identifications, which allows you to get **more peptide IDs** at the same false discovery rate
 19 | (FDR) threshold, or to set a **more stringent FDR threshold** while still retaining a similar
 20 | number of peptide IDs. MS²Rescore is **ideal for challenging proteomics identification workflows**,
 21 | such as proteogenomics, metaproteomics, or immunopeptidomics.
 22 | 
 23 | ![MS²Rescore overview](https://raw.githubusercontent.com/compomics/ms2rescore/main/docs/source/_static/img/ms2rescore-overview.png)
 24 | 
 25 | MS²Rescore can read peptide identifications in any format supported by [psm_utils][psm_utils]
 26 | (see [Supported file formats][file-formats]) and has been tested with various search engines output
 27 | files:
 28 | 
 29 | - [MS Amanda](http://ms.imp.ac.at/?goto=msamanda) `.csv`
 30 | - [Sage](https://github.com/lazear/sage) `.sage.tsv`
 31 | - [PeptideShaker](https://compomics.github.io/projects/peptide-shaker.html) `.mzid`
 32 | - [ProteomeDiscoverer](#)`.msf`
 33 | - [MSGFPlus](https://omics.pnl.gov/software/ms-gf) `.mzid`
 34 | - [Mascot](https://www.matrixscience.com/) `.mzid`
 35 | - [MaxQuant](https://www.maxquant.org/) `msms.txt`
 36 | - [X!Tandem](https://www.thegpm.org/tandem/) `.xml`
 37 | - [PEAKS](https://www.bioinfor.com/peaksdb/) `.mzid`
 38 | 
 39 | MS²Rescore is available as a [desktop application][desktop], a [command line tool][cli], and a
 40 | [modular Python API][python-package].
 41 | 
 42 | ## TIMS²Rescore: Direct support for DDA-PASEF data
 43 | 
 44 | MS²Rescore v3.1+ includes TIMS²Rescore, a usage mode with specialized default configurations for
 45 | DDA-PASEF data from timsTOF instruments. TIMS²Rescore makes use of new MS²PIP prediction models for
 46 | timsTOF fragmentation and IM2Deep for ion mobility separation. Bruker .d and miniTDF spectrum
 47 | files are directly supported through the [timsrust](https://github.com/MannLabs/timsrust) library.
 48 | 
 49 | Checkout our [paper](https://doi.org/10.1021/acs.jproteome.4c00609) for more information and the
 50 | [TIMS²Rescore documentation][tims2rescore] to get started.
 51 | 
 52 | ## Citing
 53 | 
 54 | **Latest MS²Rescore publication:**
 55 | 
 56 | > **MS²Rescore 3.0 is a modular, flexible, and user-friendly platform to boost peptide identifications, as showcased with MS Amanda 3.0.**
 57 | > Louise Marie Buur*, Arthur Declercq*, Marina Strobl, Robbin Bouwmeester, Sven Degroeve, Lennart Martens, Viktoria Dorfer*, and Ralf Gabriels*.
 58 | > _Journal of Proteome Research_ (2024) [doi:10.1021/acs.jproteome.3c00785](https://doi.org/10.1021/acs.jproteome.3c00785) <br/> \*contributed equally <span class="__dimensions_badge_embed__" data-doi="10.1021/acs.jproteome.3c00785" data-hide-zero-citations="true" data-style="small_rectangle"></span>
 59 | 
 60 | **MS²Rescore for immunopeptidomics:**
 61 | 
 62 | > **MS²Rescore: Data-driven rescoring dramatically boosts immunopeptide identification rates.**
 63 | > Arthur Declercq, Robbin Bouwmeester, Aurélie Hirschler, Christine Carapito, Sven Degroeve, Lennart Martens, and Ralf Gabriels.
 64 | > _Molecular & Cellular Proteomics_ (2021) [doi:10.1016/j.mcpro.2022.100266](https://doi.org/10.1016/j.mcpro.2022.100266) <span class="__dimensions_badge_embed__" data-doi="10.1016/j.mcpro.2022.100266" data-hide-zero-citations="true" data-style="small_rectangle"></span>
 65 | 
 66 | **MS²Rescore for timsTOF DDA-PASEF data:**
 67 | 
 68 | > **TIMS²Rescore: A DDA-PASEF optimized data-driven rescoring pipeline based on MS²Rescore.**
 69 | > Arthur Declercq*, Robbe Devreese*, Jonas Scheid, Caroline Jachmann, Tim Van Den Bossche, Annica Preikschat, David Gomez-Zepeda, Jeewan Babu Rijal, Aurélie Hirschler, Jonathan R Krieger, Tharan Srikumar, George Rosenberger, Dennis Trede, Christine Carapito, Stefan Tenzer, Juliane S Walz, Sven Degroeve, Robbin Bouwmeester, Lennart Martens, and Ralf Gabriels.
 70 | > _Journal of Proteome Research_ (2025) [doi:10.1021/acs.jproteome.4c00609](https://doi.org/10.1021/acs.jproteome.4c00609) <span class="__dimensions_badge_embed__" data-doi="10.1021/acs.jproteome.4c00609" data-hide-zero-citations="true" data-style="small_rectangle"></span>
 71 | 
 72 | **Original publication describing the concept of rescoring with predicted spectra:**
 73 | 
 74 | > **Accurate peptide fragmentation predictions allow data driven approaches to replace and improve upon proteomics search engine scoring functions.**
 75 | > Ana S C Silva, Robbin Bouwmeester, Lennart Martens, and Sven Degroeve.
 76 | > _Bioinformatics_ (2019) [doi:10.1093/bioinformatics/btz383](https://doi.org/10.1093/bioinformatics/btz383) <span class="__dimensions_badge_embed__" data-doi="10.1093/bioinformatics/btz383" data-hide-zero-citations="true" data-style="small_rectangle"></span>
 77 | 
 78 | To replicate the experiments described in this article, check out the
 79 | [publication branch][publication-branch] of the repository.
 80 | 
 81 | ## Getting started
 82 | 
 83 | The desktop application can be installed on Windows with a [one-click installer][desktop-installer].
 84 | The Python package and command line interface can be installed with `pip`, `conda`, or `docker`.
 85 | Check out the [full documentation][docs] to get started.
 86 | 
 87 | ## Questions or issues?
 88 | 
 89 | Have questions on how to apply MS²Rescore on your data? Or ran into issues while using MS²Rescore?
 90 | Post your questions on the [GitHub Discussions][discussions] forum and we are happy to help!
 91 | 
 92 | ## How to contribute
 93 | 
 94 | Bugs, questions or suggestions? Feel free to post an issue in the [issue tracker][issues] or to
 95 | make a [pull request][pr]!
 96 | 
 97 | [docs]: https://ms2rescore.readthedocs.io/
 98 | [issues]: https://github.com/compomics/ms2rescore/issues/
 99 | [discussions]: https://github.com/compomics/ms2rescore/discussions/
100 | [pr]: https://github.com/compomics/ms2rescore/pulls/
101 | [desktop]: https://ms2rescore.readthedocs.io/en/stable/gui/
102 | [desktop-installer]: https://github.com/compomics/ms2rescore/releases/latest
103 | [cli]: https://ms2rescore.readthedocs.io/en/stable/cli/
104 | [python-package]: https://ms2rescore.readthedocs.io/en/stable/api/ms2rescore/
105 | [docker]: https://ms2rescore.readthedocs.io/en/stable/installation#docker-container
106 | [publication-branch]: https://github.com/compomics/ms2rescore/tree/pub
107 | [ms2pip]: https://github.com/compomics/ms2pip
108 | [deeplc]: https://github.com/compomics/deeplc
109 | [percolator]: https://github.com/percolator/percolator/
110 | [mokapot]: https://mokapot.readthedocs.io/
111 | [psm_utils]: https://github.com/compomics/psm_utils
112 | [file-formats]: https://psm-utils.readthedocs.io/en/stable/#supported-file-formats
113 | [tims2rescore]: https://ms2rescore.readthedocs.io/en/stable/userguide/tims2Rescore
114 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | /* replace the copyright to eliminate the copyright symbol enforced by
 2 |    the ReadTheDocs theme but eschewed by our legal team */
 3 |    div[role=contentinfo] {
 4 |     visibility: hidden;
 5 |     position: relative;
 6 | }
 7 | 
 8 | div[role=contentinfo]:after {
 9 |     visibility: visible;
10 |     position: absolute;
11 |     top: 0;
12 |     left: 0;
13 |     content: "Creative Commons CC-BY-SA 4.0";
14 | }
15 | 
16 | :not(dt) > strong, :not(dt) > b,
17 | .rst-content .viewcode-back,
18 | .rst-content .viewcode-link {
19 |     font-weight: semi-bold;
20 |     color: #2c3e50;
21 | }
22 | 
23 | .wy-menu-vertical header,
24 | .wy-menu-vertical p.caption {
25 |     color: #80b4e8;
26 | }
27 | 
28 | /** Mobile nav-bar **/
29 | .wy-nav-top,
30 | .wy-side-nav-search{
31 |     background: #2c3e50;
32 | }
33 | 
34 | /** Signature text **/
35 | html.writer-html4 .rst-content dl:not(.docutils)>dt,
36 | html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple)>dt {
37 |     color: #555;
38 | }
39 | 
40 | /** Signature bg **/
41 | html.writer-html4 .rst-content dl:not(.docutils)>dt,
42 | html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple)>dt {
43 |     background: #C9F4ED;
44 |     border-top: 3px solid #80b4e8;
45 | }
46 | 
47 | /** Figure bottom margin **/
48 | .rst-content figure,
49 | .rst-content .tab-content > figure:last-child {
50 |     margin-bottom: 24px;
51 | }
52 | 


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-example-xtandem-advanced.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-advanced.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-example-xtandem-finished.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-finished.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-example-xtandem-modifications-before.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-modifications-before.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-example-xtandem-modifications-filled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-modifications-filled.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-example-xtandem-output-files.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-output-files.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-example-xtandem-processes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-processes.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-example-xtandem-progress.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-progress.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-example-xtandem-psm-file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-psm-file.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-example-xtandem-psm-filetype.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-psm-filetype.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-example-xtandem-spectra.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-spectra.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-example-xtandem-start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-example-xtandem-start.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-fixed-modifications.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-fixed-modifications.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-modification-mapping.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-modification-mapping.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-overview.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-screenshot-old.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-screenshot-old.png


--------------------------------------------------------------------------------
/docs/source/_static/img/gui-screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/gui-screenshot.png


--------------------------------------------------------------------------------
/docs/source/_static/img/ms2rescore-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/ms2rescore-overview.png


--------------------------------------------------------------------------------
/docs/source/_static/img/ms2rescore_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/ms2rescore_logo.png


--------------------------------------------------------------------------------
/docs/source/_static/img/percolator-install-path.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/percolator-install-path.png


--------------------------------------------------------------------------------
/docs/source/_static/img/qc-reports.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/docs/source/_static/img/qc-reports.png


--------------------------------------------------------------------------------
/docs/source/api/ms2rescore.feature_generators.rst:
--------------------------------------------------------------------------------
 1 | *****************************
 2 | ms2rescore.feature_generators
 3 | *****************************
 4 | 
 5 | .. automodule:: ms2rescore.feature_generators
 6 |    :members:
 7 | 
 8 | .. py:data:: ms2rescore.feature_generators.FEATURE_GENERATORS
 9 |    :type: dict
10 | 
11 |    Implemented feature generator classes by name.
12 | 
13 | 
14 | ms2rescore.feature_generators.base
15 | ##################################
16 | 
17 | .. automodule:: ms2rescore.feature_generators.base
18 |    :members:
19 | 
20 | 
21 | 
22 | ms2rescore.feature_generators.basic
23 | ####################################
24 | 
25 | .. automodule:: ms2rescore.feature_generators.basic
26 |    :members:
27 | 
28 | 
29 | 
30 | ms2rescore.feature_generators.deeplc
31 | ####################################
32 | 
33 | .. automodule:: ms2rescore.feature_generators.deeplc
34 |    :members:
35 | 
36 | 
37 | 
38 | ms2rescore.feature_generators.ionmob
39 | ####################################
40 | 
41 | .. automodule:: ms2rescore.feature_generators.ionmob
42 |    :members:
43 | 
44 | 
45 | 
46 | ms2rescore.feature_generators.maxquant
47 | ######################################
48 | 
49 | .. automodule:: ms2rescore.feature_generators.maxquant
50 |    :members:
51 | 
52 | 
53 | 
54 | ms2rescore.feature_generators.ms2pip
55 | ####################################
56 | 
57 | .. automodule:: ms2rescore.feature_generators.ms2pip
58 |    :members:
59 | 


--------------------------------------------------------------------------------
/docs/source/api/ms2rescore.report.rst:
--------------------------------------------------------------------------------
 1 | *****************
 2 | ms2rescore.report
 3 | *****************
 4 | 
 5 | .. automodule:: ms2rescore.report
 6 |    :members:
 7 | 
 8 | 
 9 | 
10 | Generate report
11 | ###############
12 | 
13 | .. automodule:: ms2rescore.report.generate
14 |    :members:
15 | 
16 | 
17 | Charts
18 | ######
19 | 
20 | .. automodule:: ms2rescore.report.charts
21 |    :members:
22 | 


--------------------------------------------------------------------------------
/docs/source/api/ms2rescore.rescoring_engines.rst:
--------------------------------------------------------------------------------
 1 | ****************************
 2 | ms2rescore.rescoring_engines
 3 | ****************************
 4 | 
 5 | .. automodule:: ms2rescore.rescoring_engines
 6 |    :members:
 7 | 
 8 | 
 9 | 
10 | Mokapot
11 | #######
12 | 
13 | .. automodule:: ms2rescore.rescoring_engines.mokapot
14 |    :members:
15 | 
16 | 
17 | 
18 | Percolator
19 | ##########
20 | 
21 | .. automodule:: ms2rescore.rescoring_engines.percolator
22 |    :members:
23 | 


--------------------------------------------------------------------------------
/docs/source/api/ms2rescore.rst:
--------------------------------------------------------------------------------
1 | **********
2 | ms2rescore
3 | **********
4 | 
5 | .. automodule:: ms2rescore
6 |    :members:
7 |    :imported-members:
8 |    :exclude-members: filterwarnings
9 | 


--------------------------------------------------------------------------------
/docs/source/cli.rst:
--------------------------------------------------------------------------------
 1 | **********************
 2 | Command line interface
 3 | **********************
 4 | 
 5 | Run MS²Rescore
 6 | ==============
 7 | 
 8 | .. argparse::
 9 |    :module: ms2rescore.__main__
10 |    :func: _argument_parser
11 |    :prog: ms2rescore
12 | 
13 | 
14 | Other commands
15 | ==============
16 | 
17 | Generate HTML report
18 | --------------------
19 | Generate a report from MS²Rescore result file(s):
20 | 
21 | .. code-block:: console
22 | 
23 |     ms2rescore-report [OPTIONS] OUTPUT_PREFIX
24 | 
25 | or
26 | 
27 | .. code-block:: console
28 | 
29 |     python -m ms2rescore.report [OPTIONS] OUTPUT_PREFIX
30 | 
31 | 
32 | 
33 | Start graphical user interface
34 | ------------------------------
35 | Start the graphical user interface. For more info, see :ref:`Graphical user interface`.
36 | 
37 | .. code-block:: console
38 | 
39 |     ms2rescore-gui
40 | 
41 | or
42 | 
43 | .. code-block:: console
44 | 
45 |     python -m ms2rescore.gui
46 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | """Configuration file for the Sphinx documentation builder."""
 2 | 
 3 | import os
 4 | import sys
 5 | 
 6 | sys.path.insert(0, os.path.abspath("../../"))
 7 | 
 8 | from ms2rescore import __version__  # noqa: E402
 9 | 
10 | # Project information
11 | project = "ms2rescore"
12 | author = "CompOmics"
13 | github_project_url = "https://github.com/compomics/ms2rescore/"
14 | github_doc_root = "https://github.com/compomics/ms2rescore/tree/main/docs/"
15 | release = __version__
16 | 
17 | # General configuration
18 | extensions = [
19 |     "nbsphinx",
20 |     "sphinx.ext.autodoc",
21 |     "sphinx.ext.autosectionlabel",
22 |     "sphinx.ext.autosummary",
23 |     "sphinx.ext.napoleon",
24 |     "sphinx.ext.intersphinx",
25 |     "sphinxarg.ext",
26 |     "sphinx_inline_tabs",
27 |     "sphinx_rtd_theme",
28 |     "myst_parser",
29 | ]
30 | source_suffix = [".rst"]
31 | master_doc = "index"
32 | exclude_patterns = ["_build"]
33 | 
34 | # Options for HTML output
35 | html_theme = "sphinx_rtd_theme"
36 | html_static_path = ["_static"]
37 | html_css_files = ["css/custom.css"]
38 | html_js_files = ["js/badge.min.js"]
39 | 
40 | # Autodoc options
41 | autodoc_default_options = {"members": True, "show-inheritance": True}
42 | autodoc_member_order = "bysource"
43 | autodoc_typehints = "description"
44 | autoclass_content = "init"
45 | 
46 | # Intersphinx options
47 | intersphinx_mapping = {
48 |     "python": ("https://docs.python.org/3", None),
49 |     "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
50 |     "numpy": ("https://numpy.org/doc/stable/", None),
51 |     "plotly": ("https://plotly.com/python-api-reference/", None),
52 |     "psm_utils": ("https://psm-utils.readthedocs.io/en/stable/", None),
53 |     "mokapot": ("https://mokapot.readthedocs.io/en/stable/", None),
54 | }
55 | 
56 | # nbsphinx options
57 | nbsphinx_execute = "never"
58 | 
59 | 
60 | def setup(app):
61 |     config = {  # noqa: F841
62 |         "enable_eval_rst": True,
63 |     }
64 | 


--------------------------------------------------------------------------------
/docs/source/config_schema.md:
--------------------------------------------------------------------------------
  1 | # MS²Rescore configuration
  2 | 
  3 | ## Properties
  4 | 
  5 | - **`ms2rescore`** *(object)*: General MS²Rescore settings. Cannot contain additional properties.
  6 |   - **`feature_generators`** *(object)*: Feature generators and their configurations. Default: `{"basic": {}, "ms2pip": {"model": "HCD", "ms2_tolerance": 0.02}, "deeplc": {}, "maxquant": {}}`.
  7 |     - **`.*`**: Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*.
  8 |     - **`basic`**: Refer to *[#/definitions/basic](#definitions/basic)*.
  9 |     - **`ms2pip`**: Refer to *[#/definitions/ms2pip](#definitions/ms2pip)*.
 10 |     - **`deeplc`**: Refer to *[#/definitions/deeplc](#definitions/deeplc)*.
 11 |     - **`maxquant`**: Refer to *[#/definitions/maxquant](#definitions/maxquant)*.
 12 |     - **`ionmob`**: Refer to *[#/definitions/ionmob](#definitions/ionmob)*.
 13 |     - **`im2deep`**: Refer to *[#/definitions/im2deep](#definitions/im2deep)*.
 14 |   - **`rescoring_engine`** *(object)*: Rescoring engine to use and its configuration. Leave empty to skip rescoring and write features to file. Default: `{"mokapot": {}}`.
 15 |     - **`.*`**: Refer to *[#/definitions/rescoring_engine](#definitions/rescoring_engine)*.
 16 |     - **`percolator`**: Refer to *[#/definitions/percolator](#definitions/percolator)*.
 17 |     - **`mokapot`**: Refer to *[#/definitions/mokapot](#definitions/mokapot)*.
 18 |   - **`config_file`**: Path to configuration file.
 19 |     - **One of**
 20 |       - *string*
 21 |       - *null*
 22 |   - **`psm_file`**: Path to file with peptide-spectrum matches.
 23 |     - **One of**
 24 |       - *string*
 25 |       - *null*
 26 |       - *array*
 27 |         - **Items** *(string)*
 28 |   - **`psm_file_type`** *(string)*: PSM file type. By default inferred from file extension. Default: `"infer"`.
 29 |   - **`psm_reader_kwargs`** *(object)*: Keyword arguments passed to the PSM reader. Default: `{}`.
 30 |   - **`spectrum_path`**: Path to spectrum file or directory with spectrum files.
 31 |     - **One of**
 32 |       - *string*
 33 |       - *null*
 34 |   - **`output_path`**: Path and root name for output files.
 35 |     - **One of**
 36 |       - *string*
 37 |       - *null*
 38 |   - **`log_level`** *(string)*: Logging level. Must be one of: `["debug", "info", "warning", "error", "critical"]`.
 39 |   - **`id_decoy_pattern`**: Regex pattern used to identify the decoy PSMs in identification file. Default: `null`.
 40 |     - **One of**
 41 |       - *string*
 42 |       - *null*
 43 |   - **`spectrum_id_pattern`**: Regex pattern to extract index or scan number from spectrum file. Requires at least one capturing group. Default: `"(.*)"`.
 44 |     - **One of**
 45 |       - *string*
 46 |       - *null*
 47 |   - **`psm_id_pattern`**: Regex pattern to extract index or scan number from PSM file. Requires at least one capturing group. Default: `"(.*)"`.
 48 |     - **One of**
 49 |       - *string*
 50 |       - *null*
 51 |   - **`psm_id_rt_pattern`**: Regex pattern to extract retention time from PSM identifier. Requires at least one capturing group. Default: `null`.
 52 |     - **One of**
 53 |       - *string*
 54 |       - *null*
 55 |   - **`psm_id_im_pattern`**: Regex pattern to extract ion mobility from PSM identifier. Requires at least one capturing group. Default: `null`.
 56 |     - **One of**
 57 |       - *string*
 58 |       - *null*
 59 |   - **`lower_score_is_better`** *(boolean)*: Bool indicating if lower score is better. Default: `false`.
 60 |   - **`max_psm_rank_input`** *(number)*: Maximum rank of PSMs to use as input for rescoring. Minimum: `1`. Default: `10`.
 61 |   - **`max_psm_rank_output`** *(number)*: Maximum rank of PSMs to return after rescoring, before final FDR calculation. Minimum: `1`. Default: `1`.
 62 |   - **`modification_mapping`** *(object)*: Mapping of modification labels to each replacement label. Default: `{}`.
 63 |   - **`fixed_modifications`** *(object)*: Mapping of amino acids with fixed modifications to the modification name. Can contain additional properties. Default: `{}`.
 64 |   - **`processes`** *(number)*: Number of parallel processes to use; -1 for all available. Minimum: `-1`. Default: `-1`.
 65 |   - **`rename_to_usi`** *(boolean)*: Convert spectrum IDs to their universal spectrum identifier.
 66 |   - **`fasta_file`**: Path to FASTA file with protein sequences to use for protein inference.
 67 |     - **One of**
 68 |       - *string*
 69 |       - *null*
 70 |   - **`write_flashlfq`** *(boolean)*: Write results to a FlashLFQ-compatible file. Default: `false`.
 71 |   - **`write_report`** *(boolean)*: Write an HTML report with various QC metrics and charts. Default: `false`.
 72 |   - **`profile`** *(boolean)*: Write a txt report using cProfile for profiling. Default: `false`.
 73 | ## Definitions
 74 | 
 75 | - <a id="definitions/feature_generator"></a>**`feature_generator`** *(object)*: Feature generator configuration. Can contain additional properties.
 76 | - <a id="definitions/rescoring_engine"></a>**`rescoring_engine`** *(object)*: Rescoring engine configuration. Can contain additional properties.
 77 | - <a id="definitions/basic"></a>**`basic`** *(object)*: Basic feature generator configuration. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*.
 78 | - <a id="definitions/ms2pip"></a>**`ms2pip`** *(object)*: MS²PIP feature generator configuration. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*.
 79 |   - **`model`** *(string)*: MS²PIP model to use (see MS²PIP documentation). Default: `"HCD"`.
 80 |   - **`ms2_tolerance`** *(number)*: MS2 error tolerance in Da. Minimum: `0`. Default: `0.02`.
 81 | - <a id="definitions/deeplc"></a>**`deeplc`** *(object)*: DeepLC feature generator configuration. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*.
 82 |   - **`calibration_set_size`**: Calibration set size. Default: `0.15`.
 83 |     - **One of**
 84 |       - *integer*
 85 |       - *number*
 86 | - <a id="definitions/maxquant"></a>**`maxquant`** *(object)*: MaxQuant feature generator configuration. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*.
 87 | - <a id="definitions/ionmob"></a>**`ionmob`** *(object)*: Ion mobility feature generator configuration using Ionmob. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*.
 88 |   - **`ionmob_model`** *(string)*: Path to Ionmob model directory. Default: `"GRUPredictor"`.
 89 |   - **`reference_dataset`** *(string)*: Path to Ionmob reference dataset file. Default: `"Meier_unimod.parquet"`.
 90 |   - **`tokenizer`** *(string)*: Path to tokenizer json file. Default: `"tokenizer.json"`.
 91 | - <a id="definitions/im2deep"></a>**`im2deep`** *(object)*: Ion mobility feature generator configuration using IM2Deep. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*.
 92 |   - **`reference_dataset`** *(string)*: Path to IM2Deep reference dataset file. Default: `"Meier_unimod.parquet"`.
 93 | - <a id="definitions/mokapot"></a>**`mokapot`** *(object)*: Mokapot rescoring engine configuration. Additional properties are passed to the Mokapot brew function. Can contain additional properties. Refer to *[#/definitions/rescoring_engine](#definitions/rescoring_engine)*.
 94 |   - **`train_fdr`** *(number)*: FDR threshold for training Mokapot. Minimum: `0`. Maximum: `1`. Default: `0.01`.
 95 |   - **`write_weights`** *(boolean)*: Write Mokapot weights to a text file. Default: `false`.
 96 |   - **`write_txt`** *(boolean)*: Write Mokapot results to a text file. Default: `false`.
 97 | - <a id="definitions/percolator"></a>**`percolator`** *(object)*: Percolator rescoring engine configuration. Can contain additional properties. Refer to *[#/definitions/rescoring_engine](#definitions/rescoring_engine)*.
 98 |   - **`init-weights`**: Weights file for scoring function. Default: `false`.
 99 |     - **One of**
100 |       - *string*
101 |       - *null*
102 | 


--------------------------------------------------------------------------------
/docs/source/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/docs/source/gui.rst:
--------------------------------------------------------------------------------
  1 | ************************
  2 | Graphical user interface
  3 | ************************
  4 | 
  5 | 
  6 | Installation
  7 | ============
  8 | 
  9 | The MS²Rescore desktop application can be installed on Windows with a
 10 | :ref:`one-click installer <Windows installer>`. Alternatively, or on other platforms, follow the
 11 | :ref:`Python package installation instructions <Python package>`.
 12 | 
 13 | 
 14 | Starting the application
 15 | ========================
 16 | 
 17 | If installed with the one-click installer, simply start MS²Rescore from the start menu or with the
 18 | desktop shortcut. Otherwise, start the application from the
 19 | :ref:`command line <command line interface>` with the command ``ms2rescore-gui`` or with
 20 | ``python -m ms2rescore.gui``.
 21 | 
 22 | 
 23 | Application overview
 24 | ====================
 25 | 
 26 | The MS²Rescore graphical user interface is divided into three main sections:
 27 | 
 28 | 1. A side bar with references, window controls, and the current version number.
 29 | 2. The configuration pane with input file selection, and parameter configuration.
 30 | 3. The application log pane with the status output.
 31 | 
 32 | On the bottom of the window, the application log level can be selected. The log level determines
 33 | which messages are shown in the application log pane. On the bottom right, the application can be
 34 | started with the "Start" button. The "Stop" button can be used to stop the application at any time
 35 | during the execution.
 36 | 
 37 | .. figure:: ../_static/img/gui-overview.png
 38 |    :width: 100%
 39 |    :alt: MS²Rescore graphical user interface
 40 | 
 41 |    Overview of the MS²Rescore desktop application.
 42 | 
 43 | 
 44 | Configuring MS²Rescore
 45 | ======================
 46 | 
 47 | Input file selection
 48 | ^^^^^^^^^^^^^^^^^^^^
 49 | 
 50 | The main input for MS²Rescore are the PSM file(s) (search engine output) and the spectrum file(s).
 51 | See :ref:`Input files` for more information.
 52 | 
 53 | One or more PSM files can be selected from the file system with the "Browse files" button under.
 54 | To make ensure correct reading of the file, specify the file type with from the drop-down menu.
 55 | 
 56 | .. figure:: ../_static/img/gui-example-xtandem-psm-file.png
 57 |    :width: 60%
 58 |    :alt: PSM file selection
 59 | 
 60 |    PSM file selection
 61 | 
 62 | 
 63 | .. figure:: ../_static/img/gui-example-xtandem-psm-filetype.png
 64 |    :width: 60%
 65 |    :alt: PSM file type selection
 66 | 
 67 |    PSM file type selection
 68 | 
 69 | 
 70 | To select a single spectrum file (mzML or MGF), click the "Browse files" button. To select a
 71 | folder with spectrum files, click the "Browse directories" button.
 72 | 
 73 | .. figure:: ../_static/img/gui-example-xtandem-spectra.png
 74 |    :width: 60%
 75 |    :alt: Spectrum file selection
 76 | 
 77 |    Spectrum file selection
 78 | 
 79 | 
 80 | Optionally, for protein inference information, a FASTA file can also be provided. Ensure that
 81 | this file contains the same protein sequences as the search database used for the search engine.
 82 | If a FASTA file is provided, protein digestion settings may need to be configured in the rescoring
 83 | engine configuration.
 84 | 
 85 | 
 86 | Number of processes
 87 | ^^^^^^^^^^^^^^^^^^^
 88 | 
 89 | The number of processes can be configured to run the application in parallel. The default is to
 90 | use all available CPU cores. The number of processes can be reduced to avoid overloading the
 91 | system or to avoid memory issues. A number under 16 is recommended.
 92 | 
 93 | 
 94 | Modification mapping
 95 | ^^^^^^^^^^^^^^^^^^^^
 96 | 
 97 | Depending on the search engine, the peptide modification labels will have to be mapped
 98 | to labels that can be understood by MS²Rescore. For example, X!Tandem uses mass shift labels, such
 99 | as ``+57.02146`` for carbamidomethylation. However, tools such as DeepLC requires the atomic
100 | composition for all modifications. As this cannot be derived from the mass shift (or other labels
101 | that are not known to MS²Rescore), a mapping has to be provided.
102 | 
103 | .. figure:: ../_static/img/gui-example-xtandem-modifications-before.png
104 |    :width: 70%
105 |    :alt: Modification mapping
106 | 
107 |    Modification mapping configuration. Click the plus sign to add more rows.
108 | 
109 | 
110 | In modification mapping, click the plus sign to add more rows to the table, or click the minus sign
111 | to remove rows. In the first column "Search engine label", enter the modification label as it
112 | appears in the PSM file. In the second column "ProForma label", enter a ProForma-compatible
113 | modification label. More information on accepted labels can be found in :ref:`Parsing modification
114 | labels`.
115 | 
116 | .. figure:: ../_static/img/gui-example-xtandem-modifications-filled.png
117 |    :width: 70%
118 |    :alt: Modification mapping
119 | 
120 |    Modification mapping configuration for the X!Tandem example. Mass shift labels from X!Tandem
121 |    are mapped to ProForma UniMod labels.
122 | 
123 | 
124 | Fixed modifications
125 | ^^^^^^^^^^^^^^^^^^^
126 | 
127 | If the search engine PSM file does not contain information on which fixed modifications were used,
128 | this must be specified in the MS²Rescore configuration. At the time of writing, only MaxQuant
129 | ``msms.txt``` files do not contain this information. For all other search engines, this information
130 | is contained in the PSM file and the following field can be left empty.
131 | 
132 | 
133 | Advanced options
134 | ^^^^^^^^^^^^^^^^
135 | 
136 | Most advanced options are only required for specific use cases or with specific search engine PSM
137 | files. All options are listed in the :doc:`userguide/configuration` section of the user guide.
138 | 
139 | In the X!Tandem example, only the `PSM ID regex pattern` option is required. This option is used
140 | to extract the spectrum ID from the PSM file. The spectrum ID is used to match the PSM to the
141 | spectrum file. See :ref:`Mapping PSMs to spectra` for more information.
142 | 
143 | .. figure:: ../_static/img/gui-example-xtandem-advanced.png
144 |    :width: 70%
145 |    :alt: Advanced options
146 | 
147 |    Advanced options
148 | 
149 | 
150 | For reference, all parameters for the X!Tandem example are also listed in the example
151 | configuration file on
152 | `GitHub <https://github.com/compomics/ms2rescore/blob/main/examples/xtandem-ms2rescore.toml>`_.
153 | 
154 | 
155 | Starting the rescoring process
156 | ==============================
157 | 
158 | After the configuration is complete, click the "Start" button to start the rescoring process.
159 | The application will show the progress in the application log pane. The log level can be changed
160 | before the run to show more or less information.
161 | 
162 | .. figure:: ../_static/img/gui-example-xtandem-progress.png
163 |    :width: 100%
164 |    :alt: Running application
165 | 
166 |    Running application with log output
167 | 
168 | 
169 | A pop up will appear when the application is finished, or when an error occurred. If an error
170 | has occurred, the error message in the pop up should provide some insight into what went wrong.
171 | If the error message is not clear, please report the issue on the
172 | `GitHub issue tracker <https://github.com/compomics/ms2rescore/issues>`_ or post your question on
173 | the `Discussion forum <https://github.com/compomics/ms2rescore/discussions>`_.
174 | 
175 | .. figure:: ../_static/img/gui-example-xtandem-finished.png
176 |    :width: 40%
177 |    :alt: Pop up when MS²Rescore is finished
178 | 
179 |    Pop up when MS²Rescore is finished
180 | 
181 | 
182 | Viewing the results
183 | ===================
184 | 
185 | After a successful run, the output files can be found in the directory of the input PSM file, or
186 | in the specified output directory. The most important files are the ``*.ms2rescore.psms.tsv`` file,
187 | which contains all PSMs with their new scores, and the ``*.ms2rescore.report.html`` file, which
188 | contains interactive charts that visualize the results and various quality control metrics. See
189 | :ref:`Output files` for more information.
190 | 
191 | .. figure:: ../_static/img/gui-example-xtandem-output-files.png
192 |    :width: 100%
193 |    :alt: Output files
194 | 
195 |    Overview of the output files after rescoring the X!Tandem example.
196 | 
197 | Double click the ``*.ms2rescore.report.html`` file to open it in the default web browser:
198 | 
199 | .. figure:: ../_static/img/qc-reports.png
200 |    :width: 100%
201 |    :alt: Rescoring report
202 | 
203 |    Rescoring QC report with interactive charts.
204 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. include::  ../../README.md
 2 |    :parser: myst_parser.sphinx_
 3 | 
 4 | .. toctree::
 5 |    :caption: About
 6 |    :hidden:
 7 |    :includehidden:
 8 | 
 9 |    About <self>
10 |    installation
11 |    contributing
12 | 
13 | .. toctree::
14 |    :caption: User guide
15 |    :hidden:
16 |    :includehidden:
17 |    :glob:
18 | 
19 |    userguide/*
20 | 
21 | 
22 | .. toctree::
23 |    :caption: Tutorials
24 |    :hidden:
25 |    :includehidden:
26 |    :glob:
27 | 
28 |    tutorials/*
29 | 
30 | 
31 | .. toctree::
32 |    :caption: Python API reference
33 |    :hidden:
34 |    :includehidden:
35 |    :glob:
36 | 
37 |    api/*
38 | 
39 | 
40 | .. toctree::
41 |    :caption: Command line interface
42 |    :hidden:
43 |    :includehidden:
44 | 
45 |    cli
46 | 
47 | 
48 | 
49 | .. toctree::
50 |    :caption: Graphical user interface
51 |    :hidden:
52 |    :includehidden:
53 | 
54 |    gui
55 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | ************
 2 | Installation
 3 | ************
 4 | 
 5 | Python package
 6 | ==============
 7 | 
 8 | .. image:: https://flat.badgen.net/badge/install%20with/pip/green?icon=pypi
 9 |     :alt: Install with pip
10 |     :target: https://pypi.org/project/ms2rescore/
11 | 
12 | .. image:: https://flat.badgen.net/badge/install%20with/conda/green?icon=conda
13 |     :alt: Install with conda
14 |     :target: https://anaconda.org/bioconda/ms2rescore
15 | 
16 | MS²Rescore is installable as a Python package on Windows, macOS and Linux.
17 | 
18 | In a fresh `virtual environment <https://docs.python.org/3/library/venv.html>`_, run::
19 | 
20 |     pip install ms2rescore
21 | 
22 | 
23 | Or, in a fresh `conda environment <https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html>`_, run::
24 | 
25 |     conda install -c bioconda ms2rescore
26 | 
27 | Bioconda packages are only available for Linux and macOS.
28 | 
29 | 
30 | Windows installer
31 | =================
32 | 
33 | .. image:: https://flat.badgen.net/badge/install%20for/windows/blue?icon=windows
34 |     :alt: Get for Windows
35 |     :target: https://github.com/compomics/ms2rescore/releases/latest
36 | 
37 | Download the ``.exe`` file from the
38 | `latest release <https://github.com/compomics/ms2rescore/releases/latest>`_
39 | and go through the installation steps. If Microsoft Defender SmartScreen displays a warning, click
40 | "More info" and then click "Run anyway".
41 | 
42 | 
43 | Docker container
44 | ================
45 | 
46 | .. image:: https://flat.badgen.net/badge/pull/biocontainer/blue?icon=docker
47 |     :alt: Pull with Docker
48 |     :target: https://quay.io/repository/biocontainers/ms2rescore
49 | 
50 | First check the latest version tag on
51 | `biocontainers/ms2rescore/tags <https://quay.io/repository/biocontainers/ms2rescore?tab=tags>`_.
52 | Then pull and run the container with:
53 | 
54 | .. code-block:: bash
55 | 
56 |    docker container run -v <working-directory>:/data -w /data quay.io/biocontainers/ms2rescore:<tag> ms2rescore <ms2rescore-arguments>
57 | 
58 | where ``<working-directory>`` is the absolute path to the directory with your MS²Rescore input
59 | files, ``<tag>`` is the container version tag, and ``<ms2rescore-arguments>`` are the ms2rescore
60 | command line options (see :ref:`Command line interface`).
61 | 
62 | 
63 | Installing Percolator
64 | =====================
65 | 
66 | To use :ref:`percolator` as rescoring engine, it must be installed separately. Percolator is
67 | available for most platforms and can be downloaded from the
68 | `GitHub releases page <https://github.com/percolator/percolator/releases/latest>`_. Ensure that
69 | the ``percolator`` executable is in your ``PATH``. On Windows, this can be done by checking the
70 | ``Add percolator to the system PATH for current user`` option during installation:
71 | 
72 | .. figure:: ../_static/img/percolator-install-path.png
73 |    :width: 60%
74 |    :alt: Percolator installation on Windows
75 | 
76 | .. note::
77 |    Alternatively, :ref:`mokapot` can be used as rescoring engine, which does not require a separate
78 |    installation.
79 | 
80 | For development
81 | ===============
82 | 
83 | Clone this repository and use pip to install an editable version:
84 | 
85 | .. code-block:: bash
86 | 
87 |    pip install --editable .
88 | 


--------------------------------------------------------------------------------
/docs/source/userguide/input-files.rst:
--------------------------------------------------------------------------------
 1 | ###########
 2 | Input files
 3 | ###########
 4 | 
 5 | PSM file(s)
 6 | ===========
 7 | 
 8 | The **peptide-spectrum match (PSM) file** is generally the output from a proteomics search engine.
 9 | This file serves as the main input to MS²Rescore.
10 | 
11 | The PSM file should contain **all putative identifications** made by the search engine, including
12 | both target and decoy PSMs. Ensure that the search engine was configured to include decoy entries
13 | in the search database and was operated with **target-decoy competition** enabled (i.e.,
14 | considering both target and decoy sequences simultaneously during the search).
15 | 
16 | .. attention::
17 |    As a general rule, MS²Rescore always needs access to **all target and decoy PSMs, without any
18 |    FDR-filtering**. For some search engines, this means that the FDR-filter should be disabled or
19 |    set to 100%.
20 | 
21 | 
22 | One or multiple PSM files can be provided at once. Note that merging PSMs from different MS runs
23 | could have an impact on the correctness of the FDR control. Combining multiple PSM files should
24 | generally only be done for LC-fractionated mass spectrometry runs.
25 | 
26 | Various PSM file types are supported. The type can be specified with the ``psm_file_type`` option.
27 | Check the list of :py:mod:`psm_utils` tags in the
28 | :external+psm_utils:ref:`supported file formats <supported file formats>` section. Depending on the
29 | file extension, the file type can also be inferred from the file name. In that case,
30 | ``psm_file_type`` option can be set to ``infer``.
31 | 
32 | 
33 | Spectrum file(s)
34 | ================
35 | 
36 | Spectrum files are required for some feature generators. Both ``mzML`` and ``mgf`` formats are
37 | supported. The ``spectrum_path`` option can be either a single file or a folder. If the
38 | ``spectrum_path`` is a folder, MS²Rescore will search for spectrum files in the directory according
39 | to the run names in the PSM file.
40 | 


--------------------------------------------------------------------------------
/docs/source/userguide/output-files.rst:
--------------------------------------------------------------------------------
 1 | ############
 2 | Output files
 3 | ############
 4 | 
 5 | Depending on the options you choose, the following files will be created. All PSMs, peptides, and
 6 | proteins are not yet filtered at any false discovery rate (FDR) level.
 7 | 
 8 | Main output files:
 9 | 
10 | +-----------------------------------+----------------------------------------------------------------------------------+
11 | | File                              | Description                                                                      |
12 | +===================================+==================================================================================+
13 | | ``<prefix>.psms.tsv``             | Main output file with rescored PSMs and their new scores                         |
14 | +-----------------------------------+----------------------------------------------------------------------------------+
15 | | ``<prefix>.report.html``          | HTML report with interactive plots showing the results and some quality control  |
16 | |                                   | metrics.                                                                         |
17 | +-----------------------------------+----------------------------------------------------------------------------------+
18 | 
19 | Log and configuration files:
20 | 
21 | +--------------------------------------+--------------------------------------------------------------------------------------+
22 | | File                                 | Description                                                                          |
23 | +======================================+======================================================================================+
24 | | ``<prefix>.log.txt``                 | Log file with information about the run                                              |
25 | +--------------------------------------+--------------------------------------------------------------------------------------+
26 | | ``<prefix>.log.html``                | HTML version of the log file                                                         |
27 | +--------------------------------------+--------------------------------------------------------------------------------------+
28 | | ``<prefix>.full-config.json``        | Full configuration file with all the parameters used                                 |
29 | |                                      | as configured in the user-provided configuration file, the command line or graphical |
30 | |                                      | interface, and the default values.                                                   |
31 | +--------------------------------------+--------------------------------------------------------------------------------------+
32 | | ``<prefix>.feature_names.tsv``       | List of the features and their descriptions                                          |
33 | +--------------------------------------+--------------------------------------------------------------------------------------+
34 | 
35 | Rescoring engine files:
36 | 
37 | +-------------------------------------------------------------+-------------------------------------------------------------+
38 | | File                                                        | Description                                                 |
39 | +=============================================================+=============================================================+
40 | | ``<prefix>.<mokapot/percolator>.psms.txt``                  | PSMs and their new scores at PSM-level FDR.                 |
41 | +-------------------------------------------------------------+-------------------------------------------------------------+
42 | | ``<prefix>.<mokapot/percolator>.peptides.txt``              | Peptides and their new scores at peptide-level FDR.         |
43 | +-------------------------------------------------------------+-------------------------------------------------------------+
44 | | ``<prefix>.<mokapot/percolator>.proteins.txt``              | Proteins and their new scores at protein-level FDR.         |
45 | +-------------------------------------------------------------+-------------------------------------------------------------+
46 | | ``<prefix>.<mokapot/percolator>.decoy.psms.txt``            | Decoy PSMs and their new scores at PSM-level FDR.           |
47 | +-------------------------------------------------------------+-------------------------------------------------------------+
48 | | ``<prefix>.<mokapot/percolator>.decoy.peptides.txt``        | Decoy peptides and their new scores at peptide-level FDR.   |
49 | +-------------------------------------------------------------+-------------------------------------------------------------+
50 | | ``<prefix>.<mokapot/percolator>.decoy.proteins.txt``        | Decoy proteins and their new scores at protein-level FDR.   |
51 | +-------------------------------------------------------------+-------------------------------------------------------------+
52 | | ``<prefix>.<mokapot/percolator>.weights.txt``               | Feature weights, showing feature usage in the rescoring run |
53 | +-------------------------------------------------------------+-------------------------------------------------------------+
54 | 
55 | If no rescoring engine is selected, if Percolator was selected, or in DEBUG mode, the following
56 | files will also be written:
57 | 
58 | +-------------------------------------------------------------+-----------------------------------------------------------+
59 | | File                                                        | Description                                               |
60 | +=============================================================+===========================================================+
61 | | ``<prefix>.pin``                                            | PSMs with all features for rescoring                      |
62 | +-------------------------------------------------------------+-----------------------------------------------------------+
63 | 


--------------------------------------------------------------------------------
/docs/source/userguide/search-engine-notes.rst:
--------------------------------------------------------------------------------
 1 | #################################
 2 | Notes for specific search engines
 3 | #################################
 4 | 
 5 | MSGFPlus
 6 | ========
 7 | 
 8 | - Run MSGFPlus in a concatenated target-decoy search, with the ``-addFeatures 1`` flag.
 9 | 
10 | 
11 | MaxQuant
12 | ========
13 | 
14 | - Run MaxQuant without FDR filtering (set to 1)
15 | - Make sure to correctly configure both ``modification_mapping`` and ``fixed_modifications``.
16 |   See :ref:`Parsing modification labels` for more information.
17 | 


--------------------------------------------------------------------------------
/docs/source/userguide/tims2Rescore.rst:
--------------------------------------------------------------------------------
 1 | .. _tims2rescore:
 2 | 
 3 | TIMS²Rescore
 4 | ============
 5 | 
 6 | Introduction
 7 | ------------
 8 | 
 9 | `TIMS²Rescore` is a specialized version of `MS²Rescore` for timsTOF DDA-PASEF data. This guide
10 | provides an overview of how to use TIMS²Rescore effectively.
11 | 
12 | Installing TIMS²Rescore
13 | -----------------------
14 | 
15 | TIMS²Rescore is part of the ``ms2rescore`` package. Check out the :ref:`installation` instructions
16 | to get started.
17 | 
18 | Usage
19 | -----
20 | 
21 | To use TIMS²Rescore, follow these steps:
22 | 
23 | 1. Prepare your input files:
24 |     - To boost DDA-PASEF peptide identifications, TIMS²Rescore requires the spectrum files from
25 |       the timsTOF instrument and the PSM files with identifications from a supported search engine.
26 |     - Make sure that the PSM file format comes from a supported search engine or is a standard
27 |       format such as mzIdentML (See
28 |       :external+psm_utils:ref:`supported file formats <supported file formats>`).
29 |     - Spectrum files can directly be passed as ``.d`` or `miniTDF` raw data or can optionally be
30 |       first converted to mzML or MGF. We recommend using the format that was passed to the search
31 |       engine.
32 | 
33 | 2. Run ``tims2rescore``:
34 |     - Open a terminal or command prompt.
35 |     - Navigate to the directory where your input files are located.
36 |     - Execute the following command:
37 | 
38 |       .. code-block:: bash
39 | 
40 |           tims2rescore -p <path_to_psm_file> -s <path_to_spectrum_file>
41 | 
42 |     Replace `<path_to_psm_file>`, `<path_to_tims_file>`, and `<path_to_output_file>` with the
43 |     actual paths to your input and output files.
44 | 
45 |     .. admonition:: note
46 | 
47 |         By default, specialized timsTOF models will be used for predictions. Optionally you can
48 |         further configure TIMS²Rescore through a configuration file. For more information, refer
49 |         to the :ref:`configuration` tab in the user guide.
50 | 
51 | 3. Review the results:
52 |     - Once the ``tims2rescore`` process completes, you will find the rescoring results in the
53 |       same directory as the input files.
54 |     - If you want a detailed report of the rescoring performance, you can either give the set
55 |       `write_report` to `True` in the configuration file, use the `--write_report` option in the
56 |       ``tims2rescore`` command line. Alternatively, run the following command after rescoring:
57 | 
58 |       .. code-block:: bash
59 | 
60 |           ms2rescore-report <output_prefix>
61 | 
62 |       Replace `<output_prefix>` with the actual output prefix of the result files to the output
63 |       file. For instance, if the output file is ``identifications.psms.tsv``, then the output
64 |       prefix is ``identifications``.
65 | 
66 | Additional options
67 | ------------------
68 | 
69 | `tims2rescore` provides additional options to customize rescoring. You can explore these options
70 | by running the following command:
71 | 
72 | .. code-block:: bash
73 | 
74 |     tims2rescore --help
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/examples/mascot-ms2rescore.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ms2rescore": {
 3 |         "psm_file": "examples/data/search/mascot/F010956.mzid",
 4 |         "psm_file_type": "mzid",
 5 |         "spectrum_path": "examples/data/spectra/F010956.mgf",
 6 |         "psm_id_pattern": "(.*)",
 7 |         "id_decoy_pattern": "^rev_",
 8 |         "fasta_file": "examples/data/fasta/uniprot-proteome-human-contaminants.fasta"
 9 |     }
10 | }


--------------------------------------------------------------------------------
/examples/mascot-ms2rescore.toml:
--------------------------------------------------------------------------------
1 | [ms2rescore]
2 | psm_file = "examples/data/search/mascot/F010956.mzid"
3 | psm_file_type = "mzid"
4 | spectrum_path = "examples/data/spectra/F010956.mgf"
5 | psm_id_pattern = "(.*)"
6 | id_decoy_pattern = '^rev_'
7 | fasta_file = "examples/data/fasta/uniprot-proteome-human-contaminants.fasta"
8 | 


--------------------------------------------------------------------------------
/examples/maxquant-ms2rescore.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ms2rescore": {
 3 |         "psm_file": "examples/data/search/maxquant/msms.txt",
 4 |         "psm_file_type": "msms",
 5 |         "spectrum_path": "examples/data/spectra",
 6 |         "spectrum_id_pattern": ".*scan=(\\d+)$",
 7 |         "fasta_file": "examples/data/fasta/uniprot-proteome-human-contaminants.fasta",
 8 |         "modification_mapping": {
 9 |             "gl": "Gln->pyro-Glu",
10 |             "ox": "Oxidation",
11 |             "ac": "Acetylation",
12 |             "de": "Deamidation"
13 |         },
14 |         "fixed_modifications": {
15 |             "Carbamidomethyl": [
16 |                 "C"
17 |             ]
18 |         }
19 |     }
20 | }


--------------------------------------------------------------------------------
/examples/maxquant-ms2rescore.toml:
--------------------------------------------------------------------------------
 1 | [ms2rescore]
 2 | psm_file = "examples/data/search/maxquant/msms.txt"
 3 | psm_file_type = "msms"
 4 | spectrum_path = "examples/data/spectra"
 5 | spectrum_id_pattern = '.*scan=(\d+)$'                                        # Single quotes for literal regex string
 6 | fasta_file = "examples/data/fasta/uniprot-proteome-human-contaminants.fasta"
 7 | 
 8 | [ms2rescore.modification_mapping]
 9 | "gl" = "Gln->pyro-Glu"
10 | "ox" = "Oxidation"
11 | "ac" = "Acetylation"
12 | "de" = "Deamidation"
13 | 
14 | [ms2rescore.fixed_modifications]
15 | "Carbamidomethyl" = ["C"]
16 | 


--------------------------------------------------------------------------------
/examples/msgfplus-ms2rescore.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ms2rescore": {
 3 |         "psm_file": "examples/id/msgfplus.pin",
 4 |         "psm_file_type": "percolator",
 5 |         "psm_reader_kwargs": {
 6 |             "score_column": "PSMScore"
 7 |         },
 8 |         "log_level": "debug",
 9 |         "processes": 16,
10 |         "rescoring_engine": {
11 |             "mokapot": {
12 |                 "fasta_file": "examples/proteins/uniprot-proteome-human-contaminants.fasta",
13 |                 "write_weights": true,
14 |                 "write_txt": true
15 |             }
16 |         }
17 |     }
18 | }


--------------------------------------------------------------------------------
/examples/msgfplus-ms2rescore.toml:
--------------------------------------------------------------------------------
 1 | [ms2rescore]
 2 | psm_file = "examples/id/msgfplus.pin"
 3 | psm_file_type = "percolator"
 4 | psm_reader_kwargs = { "score_column" = "PSMScore" }
 5 | log_level = "debug"
 6 | processes = 16
 7 | 
 8 | [ms2rescore.rescoring_engine.mokapot]
 9 | fasta_file = "examples/proteins/uniprot-proteome-human-contaminants.fasta"
10 | write_weights = true
11 | write_txt = true
12 | 


--------------------------------------------------------------------------------
/examples/peptideshaker-ms2rescore.json:
--------------------------------------------------------------------------------
1 | {
2 |     "ms2rescore": {
3 |         "psm_file": "examples/data/search/peptideshaker/peptideshaker-example.mzid",
4 |         "spectrum_path": "examples/data/spectra/qExactive01819.mzML"
5 |     }
6 | }


--------------------------------------------------------------------------------
/examples/peptideshaker-ms2rescore.toml:
--------------------------------------------------------------------------------
1 | [ms2rescore]
2 | psm_file = "examples/data/search/peptideshaker/peptideshaker-example.mzid"
3 | spectrum_path = "examples/data/spectra/qExactive01819.mzML"
4 | 


--------------------------------------------------------------------------------
/examples/sage-ms2rescore.json:
--------------------------------------------------------------------------------
1 | {
2 |     "ms2rescore": {
3 |         "psm_file": "examples/data/search/sage/results.sage.tsv",
4 |         "psm_file_type": "sage",
5 |         "spectrum_path": "examples/data/spectra/qExactive01819.mzML",
6 |         "fasta_file": "examples/data/fasta/uniprot-human-reviewed-trypsin-june-2021_concatenated_target_decoy.fasta"
7 |     }
8 | }


--------------------------------------------------------------------------------
/examples/sage-ms2rescore.toml:
--------------------------------------------------------------------------------
1 | [ms2rescore]
2 | psm_file = "examples/data/search/sage/results.sage.tsv"
3 | psm_file_type = "sage"
4 | spectrum_path = "examples/data/spectra/qExactive01819.mzML"
5 | fasta_file = "examples/data/fasta/uniprot-human-reviewed-trypsin-june-2021_concatenated_target_decoy.fasta"
6 | 


--------------------------------------------------------------------------------
/examples/xtandem-ms2rescore.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "ms2rescore": {
 3 |         "psm_file": "examples/data/search/xtandem/pyro.t.xml",
 4 |         "psm_file_type": "xtandem",
 5 |         "spectrum_path": "examples/data/spectra/Velos005137.mgf",
 6 |         "psm_id_pattern": "(\\S+).*",
 7 |         "modification_mapping": {
 8 |             "+57.022": "U:Carbamidomethyl",
 9 |             "+15.994": "U:Oxidation",
10 |             "+39.9954": "U:Pyro-carbamidomethyl",
11 |             "+42.0106": "U:Acetyl",
12 |             "-17.0266": "U:Gln->pyro-Glu",
13 |             "-18.0106": "U:Glu->pyro-Glu"
14 |         }
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/examples/xtandem-ms2rescore.toml:
--------------------------------------------------------------------------------
 1 | [ms2rescore]
 2 | psm_file = "examples/data/search/xtandem/pyro.t.xml"
 3 | psm_file_type = "xtandem"
 4 | spectrum_path = "examples/data/spectra/Velos005137.mgf"
 5 | psm_id_pattern = '(\S+).*'
 6 | 
 7 | [ms2rescore.modification_mapping]
 8 | "+57.022" = "U:Carbamidomethyl"
 9 | "+15.994" = "U:Oxidation"
10 | "+39.9954" = "U:Pyro-carbamidomethyl"
11 | "+42.0106" = "U:Acetyl"
12 | "-17.0266" = "U:Gln->pyro-Glu"
13 | "-18.0106" = "U:Glu->pyro-Glu"
14 | 


--------------------------------------------------------------------------------
/img/gui-screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/img/gui-screenshot.png


--------------------------------------------------------------------------------
/img/ms2rescore.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/img/ms2rescore.ico


--------------------------------------------------------------------------------
/img/ms2rescore_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/img/ms2rescore_logo.png


--------------------------------------------------------------------------------
/img/ms2rescore_logo.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
 3 | 
 4 | <svg
 5 |    width="99.958801mm"
 6 |    height="99.958801mm"
 7 |    viewBox="0 0 99.958801 99.958801"
 8 |    version="1.1"
 9 |    id="svg8"
10 |    xmlns="http://www.w3.org/2000/svg"
11 |    xmlns:svg="http://www.w3.org/2000/svg"
12 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
13 |    xmlns:cc="http://creativecommons.org/ns#"
14 |    xmlns:dc="http://purl.org/dc/elements/1.1/">
15 |   <defs
16 |      id="defs2" />
17 |   <metadata
18 |      id="metadata5">
19 |     <rdf:RDF>
20 |       <cc:Work
21 |          rdf:about="">
22 |         <dc:format>image/svg+xml</dc:format>
23 |         <dc:type
24 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
25 |       </cc:Work>
26 |     </rdf:RDF>
27 |   </metadata>
28 |   <g
29 |      id="layer1"
30 |      transform="translate(-55.020599,-98.520607)">
31 |     <path
32 |        id="rect4528"
33 |        style="fill:#ffffff;fill-opacity:0;stroke-width:0.217005"
34 |        d="M 55.020599,98.520607 H 154.9794 V 198.47941 H 55.020599 Z" />
35 |     <path
36 |        id="path4532"
37 |        style="fill:#2c3e50;stroke-width:0.264583"
38 |        d="M 154.9794,148.5 A 49.979401,49.979397 0 0 1 105,198.4794 49.979401,49.979397 0 0 1 55.020599,148.5 49.979401,49.979397 0 0 1 105,98.520603 49.979401,49.979397 0 0 1 154.9794,148.5 Z" />
39 |     <g
40 |        aria-label="MS²Rescore"
41 |        transform="matrix(0.26458333,0,0,0.26458333,-10.959949,42.545818)"
42 |        id="flowRoot3713"
43 |        style="font-weight:300;font-size:24px;line-height:1.25;font-family:Bahnschrift;-inkscape-font-specification:'Bahnschrift, Light';text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#fafafa">
44 |       <path
45 |          d="m 306.55638,420.11009 -0.10667,-19.78666 -9.59999,16.10666 h -4.69333 l -9.54666,-15.57332 v 19.25332 h -9.76 v -37.33331 h 8.69333 l 13.11999,21.54665 12.8,-21.54665 h 8.69332 l 0.10667,37.33331 z"
46 |          style="font-weight:normal;font-size:53.3333px;font-family:'Montserrat ExtraBold';-inkscape-font-specification:'Montserrat ExtraBold, '"
47 |          id="path2422" />
48 |       <path
49 |          d="m 336.84962,420.85675 q -4.53333,0 -8.79999,-1.11999 -4.26667,-1.12 -6.93333,-2.98667 l 3.46666,-7.78666 q 2.50667,1.65333 5.81333,2.66666 3.30667,1.01334 6.50666,1.01334 6.08,0 6.08,-3.04 0,-1.6 -1.76,-2.34667 -1.70666,-0.8 -5.54666,-1.65333 -4.21333,-0.90667 -7.04,-1.92 -2.82666,-1.06666 -4.85333,-3.36 -2.02666,-2.29333 -2.02666,-6.18666 0,-3.41333 1.86666,-6.13333 1.86667,-2.77333 5.54666,-4.37333 3.73334,-1.6 9.12,-1.6 3.68,0 7.25333,0.85333 3.57333,0.8 6.29333,2.4 l -3.25333,7.84 q -5.33333,-2.88 -10.34666,-2.88 -3.14667,0 -4.58667,0.96 -1.44,0.90667 -1.44,2.4 0,1.49333 1.70667,2.24 1.70666,0.74666 5.49333,1.54666 4.26666,0.90667 7.03999,1.97333 2.82667,1.01334 4.85333,3.30667 2.08,2.24 2.08,6.13333 0,3.36 -1.86666,6.08 -1.86667,2.71999 -5.6,4.37333 -3.73333,1.59999 -9.06666,1.59999 z"
50 |          style="font-weight:normal;font-size:53.3333px;font-family:'Montserrat ExtraBold';-inkscape-font-specification:'Montserrat ExtraBold, '"
51 |          id="path2424" />
52 |       <path
53 |          d="m 375.56956,397.3901 v 5.28 h -19.03999 v -4.21333 l 8.85333,-7.62666 q 1.33333,-1.12 1.81333,-1.86667 0.48,-0.8 0.48,-1.49333 0,-0.90667 -0.74666,-1.44 -0.74667,-0.58667 -2.29333,-0.58667 -1.44,0 -2.56,0.58667 -1.06667,0.53333 -1.70667,1.6 l -5.17333,-2.77333 q 1.28,-2.24 3.84,-3.52 2.56,-1.28 6.23999,-1.28 4.32,0 6.93333,1.92 2.61334,1.86666 2.61334,4.96 0,1.70666 -0.90667,3.25333 -0.90667,1.54666 -3.36,3.73333 l -4.05333,3.46666 z"
54 |          style="font-weight:normal;font-size:53.3333px;font-family:'Montserrat ExtraBold';-inkscape-font-specification:'Montserrat ExtraBold, '"
55 |          id="path2426" />
56 |       <path
57 |          d="m 397.48946,410.19009 h -5.75999 v 9.92 h -10.56 v -37.33331 h 17.06666 q 5.06666,0 8.8,1.70666 3.73333,1.65334 5.75999,4.8 2.02667,3.09333 2.02667,7.30666 0,4.05333 -1.92,7.09333 -1.86667,2.98667 -5.38666,4.69333 l 8.05332,11.73333 h -11.30666 z m 6.66667,-13.59999 q 0,-2.61333 -1.65334,-4.05333 -1.65333,-1.44 -4.90666,-1.44 h -5.86666 v 10.93333 h 5.86666 q 3.25333,0 4.90666,-1.38667 1.65334,-1.44 1.65334,-4.05333 z"
58 |          style="font-weight:normal;font-size:53.3333px;font-family:'Montserrat ExtraBold';-inkscape-font-specification:'Montserrat ExtraBold, '"
59 |          id="path2428" />
60 |       <path
61 |          d="m 449.70276,405.7101 q 0,0.10666 -0.16,2.50666 h -21.01332 q 0.64,2.24 2.45333,3.46667 1.86667,1.17333 4.64,1.17333 2.08,0 3.57333,-0.58667 1.54667,-0.58666 3.09333,-1.92 l 5.33333,5.54667 q -4.21333,4.69333 -12.31999,4.69333 -5.06667,0 -8.90666,-1.92 -3.84,-1.92 -5.97333,-5.33333 -2.08,-3.41333 -2.08,-7.73333 0,-4.26666 2.02666,-7.62666 2.08,-3.41333 5.70667,-5.33333 3.62666,-1.92 8.15999,-1.92 4.32,0 7.84,1.81333 3.52,1.76 5.54666,5.17333 2.08,3.36 2.08,8 z m -15.41332,-7.78666 q -2.34667,0 -3.94667,1.33333 -1.54666,1.33333 -1.97333,3.62666 h 11.83999 q -0.42666,-2.29333 -2.02666,-3.62666 -1.54667,-1.33333 -3.89333,-1.33333 z"
62 |          style="font-weight:normal;font-size:53.3333px;font-family:'Montserrat ExtraBold';-inkscape-font-specification:'Montserrat ExtraBold, '"
63 |          id="path2430" />
64 |       <path
65 |          d="m 464.90265,420.59009 q -3.62666,0 -7.14666,-0.8 -3.52,-0.85333 -5.65333,-2.18667 l 3.04,-6.93333 q 1.97333,1.22667 4.69333,1.97334 2.72,0.69333 5.38666,0.69333 2.45334,0 3.46667,-0.48 1.06667,-0.53333 1.06667,-1.49333 0,-0.96 -1.28,-1.33334 -1.22667,-0.42666 -3.94667,-0.79999 -3.46666,-0.42667 -5.91999,-1.12 -2.4,-0.69334 -4.16,-2.56 -1.76,-1.86667 -1.76,-5.22667 0,-2.77333 1.65333,-4.90666 1.65333,-2.18667 4.8,-3.41333 3.2,-1.28 7.62666,-1.28 3.14667,0 6.24,0.64 3.09333,0.64 5.17333,1.81333 l -3.04,6.88 q -3.84,-2.13333 -8.32,-2.13333 -2.39999,0 -3.51999,0.58666 -1.12,0.53333 -1.12,1.44 0,1.01333 1.22666,1.44 1.22667,0.37333 4.05333,0.8 3.57333,0.53333 5.92,1.22667 2.34667,0.69333 4.05333,2.55999 1.76,1.81334 1.76,5.12 0,2.72 -1.65333,4.90666 -1.65334,2.13334 -4.90667,3.36 -3.19999,1.22667 -7.73333,1.22667 z"
66 |          style="font-weight:normal;font-size:53.3333px;font-family:'Montserrat ExtraBold';-inkscape-font-specification:'Montserrat ExtraBold, '"
67 |          id="path2432" />
68 |       <path
69 |          d="m 498.34258,420.59009 q -4.79999,0 -8.58666,-1.92 -3.78666,-1.92 -5.92,-5.33333 -2.07999,-3.41333 -2.07999,-7.73333 0,-4.32 2.07999,-7.67999 2.13334,-3.41334 5.92,-5.28 3.78667,-1.92 8.58666,-1.92 4.90667,0 8.48,2.13333 3.57333,2.08 5.06666,5.81333 l -7.83999,4 q -1.97334,-3.89333 -5.76,-3.89333 -2.72,0 -4.53333,1.81333 -1.76,1.81333 -1.76,5.01333 0,3.25333 1.76,5.12 1.81333,1.81333 4.53333,1.81333 3.78666,0 5.76,-3.89333 l 7.83999,4 q -1.49333,3.73333 -5.06666,5.86666 -3.57333,2.08 -8.48,2.08 z"
70 |          style="font-weight:normal;font-size:53.3333px;font-family:'Montserrat ExtraBold';-inkscape-font-specification:'Montserrat ExtraBold, '"
71 |          id="path2434" />
72 |       <path
73 |          d="m 529.91581,420.59009 q -4.64,0 -8.37333,-1.92 -3.73333,-1.92 -5.86667,-5.33333 -2.07999,-3.41333 -2.07999,-7.73333 0,-4.26666 2.07999,-7.67999 2.13334,-3.41334 5.81333,-5.28 3.73333,-1.92 8.42667,-1.92 4.69333,0 8.42666,1.92 3.73333,1.86666 5.81333,5.28 2.08,3.35999 2.08,7.67999 0,4.32 -2.08,7.73333 -2.08,3.41333 -5.81333,5.33333 -3.73333,1.92 -8.42666,1.92 z m 0,-8.05333 q 2.66666,0 4.37333,-1.81333 1.70666,-1.86667 1.70666,-5.12 0,-3.2 -1.70666,-5.01333 -1.70667,-1.81333 -4.37333,-1.81333 -2.66667,0 -4.37334,1.81333 -1.70666,1.81333 -1.70666,5.01333 0,3.25333 1.70666,5.12 1.70667,1.81333 4.37334,1.81333 z"
74 |          style="font-weight:normal;font-size:53.3333px;font-family:'Montserrat ExtraBold';-inkscape-font-specification:'Montserrat ExtraBold, '"
75 |          id="path2436" />
76 |       <path
77 |          d="m 560.58236,394.6701 q 1.54666,-1.97333 3.99999,-2.93333 2.50667,-1.01333 5.70667,-1.01333 v 9.11999 q -1.38667,-0.16 -2.29333,-0.16 -3.25334,0 -5.12,1.76 -1.81333,1.76 -1.81333,5.38667 v 13.27999 h -10.13333 v -28.90665 h 9.65333 z"
78 |          style="font-weight:normal;font-size:53.3333px;font-family:'Montserrat ExtraBold';-inkscape-font-specification:'Montserrat ExtraBold, '"
79 |          id="path2438" />
80 |       <path
81 |          d="m 603.72885,405.7101 q 0,0.10666 -0.16,2.50666 h -21.01332 q 0.64,2.24 2.45333,3.46667 1.86667,1.17333 4.64,1.17333 2.08,0 3.57333,-0.58667 1.54667,-0.58666 3.09333,-1.92 l 5.33333,5.54667 q -4.21333,4.69333 -12.31999,4.69333 -5.06666,0 -8.90666,-1.92 -3.84,-1.92 -5.97333,-5.33333 -2.08,-3.41333 -2.08,-7.73333 0,-4.26666 2.02667,-7.62666 2.08,-3.41333 5.70666,-5.33333 3.62666,-1.92 8.16,-1.92 4.31999,0 7.83999,1.81333 3.52,1.76 5.54666,5.17333 2.08,3.36 2.08,8 z m -15.41332,-7.78666 q -2.34667,0 -3.94667,1.33333 -1.54666,1.33333 -1.97333,3.62666 h 11.83999 q -0.42666,-2.29333 -2.02666,-3.62666 -1.54667,-1.33333 -3.89333,-1.33333 z"
82 |          style="font-weight:normal;font-size:53.3333px;font-family:'Montserrat ExtraBold';-inkscape-font-specification:'Montserrat ExtraBold, '"
83 |          id="path2440" />
84 |     </g>
85 |   </g>
86 | </svg>
87 | 


--------------------------------------------------------------------------------
/ms2rescore.spec:
--------------------------------------------------------------------------------
  1 | import importlib.metadata
  2 | import os
  3 | import re
  4 | 
  5 | from PyInstaller.building.build_main import COLLECT, EXE, PYZ, Analysis
  6 | from PyInstaller.utils.hooks import collect_all
  7 | 
  8 | from ms2rescore import __version__
  9 | 
 10 | # Package info
 11 | exe_name = "ms2rescore"
 12 | script_name = "ms2rescore/gui/__main__.py"
 13 | icon = "./img/ms2rescore.ico"
 14 | location = os.getcwd()
 15 | project = "ms2rescore"
 16 | bundle_name = "ms2rescore"
 17 | bundle_identifier = f"{bundle_name}.{__version__}"
 18 | 
 19 | extra_requirements = {"ionmob"}
 20 | 
 21 | # Requirements config
 22 | skip_requirements_regex = r"^(?:.*\..*)"
 23 | 
 24 | 
 25 | # Collect hidden imports and data for all requirements
 26 | requirements = importlib.metadata.requires(project)
 27 | requirements = {
 28 |     re.match(r"^[\w\-]+", req)[0]  # Remove version specifiers
 29 |     for req in requirements
 30 |     if "; extra ==" not in req  # Exclude optional dependencies
 31 | }
 32 | requirements.update([project, "xgboost"])
 33 | requirements.update(extra_requirements)
 34 | 
 35 | hidden_imports = set()
 36 | datas = []
 37 | binaries = []
 38 | checked = set()
 39 | while requirements:
 40 |     requirement = requirements.pop()
 41 |     if re.match(skip_requirements_regex, requirement):
 42 |         continue
 43 |     if requirement in ["tomli"]:
 44 |         continue
 45 |     checked.add(requirement)
 46 |     module_version = importlib.metadata.version(re.match(r"^[\w\-]+", requirement)[0])
 47 |     try:
 48 |         datas_, binaries_, hidden_imports_ = collect_all(requirement, include_py_files=True)
 49 |     except ImportError:
 50 |         continue
 51 |     datas += datas_
 52 |     hidden_imports_ = set(hidden_imports_)
 53 |     if "" in hidden_imports_:
 54 |         hidden_imports_.remove("")
 55 |     if None in hidden_imports_:
 56 |         hidden_imports_.remove(None)
 57 |     requirements |= hidden_imports_ - checked
 58 |     hidden_imports |= hidden_imports_
 59 | 
 60 | hidden_imports = sorted([h for h in hidden_imports if "tests" not in h.split(".")])
 61 | hidden_imports = [h for h in hidden_imports if "__pycache__" not in h]
 62 | datas = [
 63 |     d
 64 |     for d in datas
 65 |     if ("__pycache__" not in d[0]) and (d[1] not in [".", "build", "dist", "Output"])
 66 | ]
 67 | datas += [("ms2rescore\package_data", "package_data")]
 68 | 
 69 | block_cipher = None
 70 | # Build package
 71 | a = Analysis(
 72 |     [script_name],
 73 |     pathex=[location],
 74 |     binaries=binaries,
 75 |     datas=datas,
 76 |     hiddenimports=hidden_imports,
 77 |     hookspath=[],
 78 |     hooksconfig={},
 79 |     runtime_hooks=[],
 80 |     excludes=[],
 81 |     win_no_prefer_redirects=False,
 82 |     win_private_assemblies=False,
 83 |     cipher=block_cipher,
 84 |     noarchive=False,
 85 | )
 86 | 
 87 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
 88 | 
 89 | exe = EXE(
 90 |     pyz,
 91 |     a.scripts,
 92 |     [],
 93 |     exclude_binaries=True,
 94 |     name=exe_name,
 95 |     debug=False,
 96 |     bootloader_ignore_signals=False,
 97 |     strip=False,
 98 |     upx=True,
 99 |     console=False,
100 |     windowed=True,
101 |     disable_windowed_traceback=False,
102 |     target_arch=None,
103 |     codesign_identity=None,
104 |     entitlements_file=None,
105 |     icon="./img/ms2rescore.ico",
106 | )
107 | 
108 | coll = COLLECT(
109 |     exe, a.binaries, a.zipfiles, a.datas, strip=False, upx=True, upx_exclude=[], name=exe_name
110 | )
111 | 


--------------------------------------------------------------------------------
/ms2rescore/__init__.py:
--------------------------------------------------------------------------------
 1 | """Modular and user-friendly platform for AI-assisted rescoring of peptide identifications ."""
 2 | 
 3 | __version__ = "3.2.0.dev2"
 4 | __all__ = [
 5 |     "parse_configurations",
 6 |     "rescore",
 7 | ]
 8 | 
 9 | from warnings import filterwarnings
10 | 
11 | # mzmlb is not used, so hdf5plugin is not needed
12 | filterwarnings(
13 |     "ignore",
14 |     message="hdf5plugin is missing",
15 |     category=UserWarning,
16 |     module="psims.mzmlb",
17 | )
18 | 
19 | from ms2rescore.config_parser import parse_configurations  # noqa: E402
20 | from ms2rescore.core import rescore  # noqa: E402
21 | 


--------------------------------------------------------------------------------
/ms2rescore/__main__.py:
--------------------------------------------------------------------------------
  1 | """MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and RTs."""
  2 | 
  3 | import argparse
  4 | import cProfile
  5 | import importlib.resources
  6 | import json
  7 | import logging
  8 | import sys
  9 | from pathlib import Path
 10 | from typing import Union
 11 | 
 12 | from rich.console import Console
 13 | from rich.logging import RichHandler
 14 | from rich.text import Text
 15 | 
 16 | from ms2rescore import __version__, package_data
 17 | from ms2rescore.config_parser import parse_configurations
 18 | from ms2rescore.core import rescore
 19 | from ms2rescore.exceptions import MS2RescoreConfigurationError
 20 | 
 21 | try:
 22 |     import matplotlib.pyplot as plt
 23 | 
 24 |     plt.set_loglevel("warning")
 25 | except ImportError:
 26 |     pass
 27 | 
 28 | LOG_MAPPING = {
 29 |     "critical": logging.CRITICAL,
 30 |     "error": logging.ERROR,
 31 |     "warning": logging.WARNING,
 32 |     "info": logging.INFO,
 33 |     "debug": logging.DEBUG,
 34 | }
 35 | LOGGER = logging.getLogger(__name__)
 36 | CONSOLE = Console(record=True)
 37 | 
 38 | 
 39 | def _print_credits(tims=False):
 40 |     """Print software credits to terminal."""
 41 |     text = Text()
 42 |     text.append("\n")
 43 |     if tims:
 44 |         text.append("TIMS²Rescore", style="bold link https://github.com/compomics/tims2rescore")
 45 |     else:
 46 |         text.append("MS²Rescore", style="bold link https://github.com/compomics/ms2rescore")
 47 |     text.append(f" (v{__version__})\n", style="bold")
 48 |     if tims:
 49 |         text.append("MS²Rescore tuned for timsTOF DDA-PASEF data.\n", style="italic")
 50 |     text.append("Developed at CompOmics, VIB / Ghent University, Belgium.\n")
 51 |     text.append("Please cite: ")
 52 |     if tims:
 53 |         text.append(
 54 |             "Declercq & Devreese et al. bioRxiv (2024)",
 55 |             style="link https://doi.org/10.1101/2024.05.29.596400",
 56 |         )
 57 |     else:
 58 |         text.append(
 59 |             "Buur & Declercq et al. JPR (2024)",
 60 |             style="link https://doi.org/10.1021/acs.jproteome.3c00785",
 61 |         )
 62 |     text.append("\n")
 63 |     if tims:
 64 |         text.stylize("#006cb5")
 65 |     CONSOLE.print(text)
 66 | 
 67 | 
 68 | def _argument_parser() -> argparse.ArgumentParser:
 69 |     """Parse CLI arguments."""
 70 |     parser = argparse.ArgumentParser(
 71 |         description="MS²Rescore: Sensitive PSM rescoring with predicted features.",
 72 |         formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=42),
 73 |     )
 74 |     parser.add_argument("-v", "--version", action="version", version=__version__)
 75 |     parser.add_argument(
 76 |         "-p",
 77 |         "--psm-file",
 78 |         metavar="FILE",
 79 |         action="store",
 80 |         type=str,
 81 |         nargs="*",
 82 |         dest="psm_file",
 83 |         help="path to PSM file (PIN, mzIdentML, MaxQuant msms, X!Tandem XML...)",
 84 |     )
 85 |     parser.add_argument(
 86 |         "-t",
 87 |         "--psm-file-type",
 88 |         metavar="STR",
 89 |         action="store",
 90 |         type=str,
 91 |         dest="psm_file_type",
 92 |         help="PSM file type (default: 'infer')",
 93 |     )
 94 |     parser.add_argument(
 95 |         "-s",
 96 |         "--spectrum-path",
 97 |         metavar="FILE/DIR",
 98 |         action="store",
 99 |         type=str,
100 |         dest="spectrum_path",
101 |         help="path to MGF/mzML spectrum file or directory with spectrum files (default: derived\
102 |             from identification file)",
103 |     )
104 |     parser.add_argument(
105 |         "-c",
106 |         "--config-file",
107 |         metavar="FILE",
108 |         action="store",
109 |         type=str,
110 |         dest="config_file",
111 |         help="path to MS²Rescore configuration file (see README.md)",
112 |     )
113 |     parser.add_argument(
114 |         "-o",
115 |         "--output-path",
116 |         metavar="FILE",
117 |         action="store",
118 |         type=str,
119 |         dest="output_path",
120 |         help="Path and stem for output file names (default: derive from identification file)",
121 |     )
122 |     parser.add_argument(
123 |         "-l",
124 |         "--log-level",
125 |         metavar="STR",
126 |         action="store",
127 |         type=str,
128 |         dest="log_level",
129 |         help="logging level (default: `info`)",
130 |     )
131 |     parser.add_argument(
132 |         "-n",
133 |         "--processes",
134 |         metavar="INT",
135 |         action="store",
136 |         type=int,
137 |         dest="processes",
138 |         help="number of parallel processes available to MS²Rescore",
139 |     )
140 |     parser.add_argument(
141 |         "-f",
142 |         "--fasta-file",
143 |         metavar="FILE",
144 |         action="store",
145 |         type=str,
146 |         dest="fasta_file",
147 |         help="path to FASTA file",
148 |     )
149 |     parser.add_argument(
150 |         "--write-report",
151 |         # metavar="BOOL",
152 |         action="store_true",
153 |         dest="write_report",
154 |         help="boolean to enable profiling with cProfile",
155 |     )
156 |     parser.add_argument(
157 |         "--profile",
158 |         # metavar="BOOL",
159 |         action="store_true",
160 |         # type=bool,
161 |         # dest="profile",
162 |         help="boolean to enable profiling with cProfile",
163 |     )
164 | 
165 |     return parser
166 | 
167 | 
168 | def _setup_logging(passed_level: str, log_file: Union[str, Path]):
169 |     """Setup logging for writing to log file and Rich Console."""
170 |     if passed_level not in LOG_MAPPING:
171 |         raise MS2RescoreConfigurationError(
172 |             f"Invalid log level '{passed_level}'. "
173 |             f"Valid levels are: {', '.join(LOG_MAPPING.keys())}"
174 |         )
175 |     logging.basicConfig(
176 |         format="%(name)s // %(message)s",
177 |         datefmt="%Y-%m-%d %H:%M:%S",
178 |         level=LOG_MAPPING[passed_level],
179 |         handlers=[
180 |             logging.FileHandler(log_file, mode="w", encoding="utf-8"),
181 |             RichHandler(rich_tracebacks=True, console=CONSOLE, show_path=False),
182 |         ],
183 |     )
184 | 
185 | 
186 | def profile(fnc, filepath):
187 |     """A decorator that uses cProfile to profile a function"""
188 | 
189 |     def inner(*args, **kwargs):
190 |         with cProfile.Profile() as profiler:
191 |             return_value = fnc(*args, **kwargs)
192 |         profiler.dump_stats(filepath + ".profile.prof")
193 |         return return_value
194 | 
195 |     return inner
196 | 
197 | 
198 | def main_tims():
199 |     """Run MS²Rescore command-line interface in TIMS²Rescore mode."""
200 |     main(tims=True)
201 | 
202 | 
203 | def main(tims=False):
204 |     """Run MS²Rescore command-line interface."""
205 |     _print_credits(tims)
206 | 
207 |     # Parse CLI arguments and configuration file
208 |     parser = _argument_parser()
209 |     cli_args = parser.parse_args()
210 | 
211 |     configurations = []
212 |     if tims:
213 |         configurations.append(
214 |             json.load(importlib.resources.open_text(package_data, "config_default_tims.json"))
215 |         )
216 |     if cli_args.config_file:
217 |         configurations.append(cli_args.config_file)
218 |     configurations.append(cli_args)
219 | 
220 |     try:
221 |         config = parse_configurations(configurations)
222 |     except MS2RescoreConfigurationError as e:
223 |         LOGGER.critical(e)
224 |         sys.exit(1)
225 | 
226 |     # Setup logging
227 |     _setup_logging(
228 |         config["ms2rescore"]["log_level"], config["ms2rescore"]["output_path"] + ".log.txt"
229 |     )
230 | 
231 |     # Run MS²Rescore
232 |     try:
233 |         if cli_args.profile:
234 |             profiled_rescore = profile(rescore, config["ms2rescore"]["output_path"])
235 |             profiled_rescore(configuration=config)
236 |         else:
237 |             rescore(configuration=config)
238 |     except Exception as e:
239 |         LOGGER.exception(e)
240 |         sys.exit(1)
241 |     finally:
242 |         CONSOLE.save_html(config["ms2rescore"]["output_path"] + ".log.html")
243 | 
244 | 
245 | if __name__ == "__main__":
246 |     main()
247 | 


--------------------------------------------------------------------------------
/ms2rescore/config_parser.py:
--------------------------------------------------------------------------------
  1 | """Parse configuration from command line arguments and configuration files."""
  2 | 
  3 | import importlib.resources
  4 | import json
  5 | import multiprocessing as mp
  6 | from argparse import Namespace
  7 | from pathlib import Path
  8 | from typing import Dict, List, Union
  9 | 
 10 | try:
 11 |     import tomllib
 12 | except ImportError:
 13 |     import tomli as tomllib
 14 | 
 15 | from cascade_config import CascadeConfig
 16 | 
 17 | from ms2rescore import package_data
 18 | from ms2rescore.exceptions import MS2RescoreConfigurationError
 19 | 
 20 | 
 21 | def _parse_output_path(configured_path, psm_file_path):
 22 |     """Parse output path and make parent dirs if required."""
 23 |     psm_file_stem = Path(psm_file_path).stem + ".ms2rescore"
 24 |     if configured_path:
 25 |         configured_path = Path(configured_path)
 26 |         # If existing dir, add psm_file stem
 27 |         if configured_path.is_dir():
 28 |             return (configured_path / psm_file_stem).as_posix()
 29 |         # If parent is existing dir, use as is (user intended as path + stem)
 30 |         elif configured_path.parent.is_dir():
 31 |             return configured_path.as_posix()
 32 |         # If none-existing dir, create dirs and add psm_file stem
 33 |         else:
 34 |             configured_path.mkdir(parents=True, exist_ok=True)
 35 |             return (configured_path / psm_file_stem).as_posix()
 36 |     else:
 37 |         # If none, use psm_file path and stem
 38 |         return (Path(psm_file_path).parent / psm_file_stem).as_posix()
 39 | 
 40 | 
 41 | def _validate_filenames(config: Dict) -> Dict:
 42 |     """Validate and infer input/output filenames."""
 43 |     # psm_file should be provided
 44 |     if not config["ms2rescore"]["psm_file"]:
 45 |         raise MS2RescoreConfigurationError("PSM file should be provided.")
 46 | 
 47 |     # if psm_file is a string turn into a list else leave as is
 48 |     if isinstance(config["ms2rescore"]["psm_file"], str):
 49 |         config["ms2rescore"]["psm_file"] = [config["ms2rescore"]["psm_file"]]
 50 | 
 51 |     # all provided psm_file(s) should exist
 52 |     psm_files = []
 53 |     for psm_file in config["ms2rescore"]["psm_file"]:
 54 |         id_file = Path(psm_file)
 55 |         if not id_file.is_file():
 56 |             raise FileNotFoundError(id_file)
 57 |         psm_files.append(id_file.as_posix())
 58 |     config["ms2rescore"]["psm_file"] = psm_files
 59 | 
 60 |     # spectrum_path should either be None, or existing path to file or dir
 61 |     if config["ms2rescore"]["spectrum_path"]:
 62 |         spectrum_path = Path(config["ms2rescore"]["spectrum_path"])
 63 |         if not spectrum_path.exists():
 64 |             raise FileNotFoundError(spectrum_path)
 65 |         config["ms2rescore"]["spectrum_path"] = spectrum_path.as_posix()
 66 | 
 67 |     # Parse output_path
 68 |     config["ms2rescore"]["output_path"] = _parse_output_path(
 69 |         config["ms2rescore"]["output_path"], config["ms2rescore"]["psm_file"][0]
 70 |     )
 71 | 
 72 |     # Parse config_file as posix path to avoid combination of forward and backward slashes
 73 |     if config["ms2rescore"]["config_file"]:
 74 |         config["ms2rescore"]["config_file"] = Path(config["ms2rescore"]["config_file"]).as_posix()
 75 | 
 76 |     return config
 77 | 
 78 | 
 79 | def _validate_processes(config: Dict) -> Dict:
 80 |     """Validate requested processes with available cpu count."""
 81 |     n_available = mp.cpu_count()
 82 |     if (config["ms2rescore"]["processes"] == -1) or (
 83 |         config["ms2rescore"]["processes"] > n_available
 84 |     ):
 85 |         config["ms2rescore"]["processes"] = n_available
 86 |     return config
 87 | 
 88 | 
 89 | def parse_configurations(configurations: List[Union[dict, str, Path, Namespace]]) -> Dict:
 90 |     """
 91 |     Parse and validate MS²Rescore configuration files and CLI arguments.
 92 | 
 93 |     Default configuration, user configuration files, and CLI/class arguments are parsed
 94 |     in cascading order, with each successive configuration taking priority over the
 95 |     previous.
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     configurations: Dict, str, Path, Namespace, List[Dict, str, Path, Namespace]
100 |         configuration dictionary, path to configuration files, argparse Namespace, or a list of the
101 |         above.
102 |     """
103 |     if not isinstance(configurations, list):
104 |         configurations = [configurations]
105 | 
106 |     # Initialize CascadeConfig with validation schema and defaults
107 |     config_schema = importlib.resources.open_text(package_data, "config_schema.json")
108 |     config_default = importlib.resources.open_text(package_data, "config_default.json")
109 |     cascade_conf = CascadeConfig(
110 |         validation_schema=json.load(config_schema),
111 |         none_overrides_value=False,
112 |         max_recursion_depth=1,
113 |     )
114 |     cascade_conf.add_dict(json.load(config_default))
115 | 
116 |     # Add configurations
117 |     for config in configurations:
118 |         if not config:
119 |             continue
120 |         if isinstance(config, dict):
121 |             cascade_conf.add_dict(config)
122 |         elif isinstance(config, str) or isinstance(config, Path):
123 |             if Path(config).suffix.lower() == ".json":
124 |                 cascade_conf.add_json(config)
125 |             elif Path(config).suffix.lower() == ".toml":
126 |                 cascade_conf.add_dict(dict(tomllib.load(Path(config).open("rb"))))
127 |             else:
128 |                 raise MS2RescoreConfigurationError(
129 |                     "Unknown file extension for configuration file. Should be `json` or " "`toml`."
130 |                 )
131 |         elif isinstance(config, Namespace):
132 |             cascade_conf.add_namespace(config, subkey="ms2rescore")
133 |         else:
134 |             raise ValueError(
135 |                 "Configuration should be a dictionary, argparse Namespace, or path to a "
136 |                 "configuration file."
137 |             )
138 | 
139 |     # Parse configurations
140 |     config = cascade_conf.parse()
141 | 
142 |     # Validate and infer filenames and number of parallel processes
143 |     config = _validate_filenames(config)
144 |     config = _validate_processes(config)
145 | 
146 |     # Convert feature_generators and rescoring_engine names to lowercase
147 |     config["ms2rescore"]["feature_generators"] = {
148 |         k.lower(): v for k, v in config["ms2rescore"]["feature_generators"].items()
149 |     }
150 |     config["ms2rescore"]["rescoring_engine"] = {
151 |         k.lower(): v for k, v in config["ms2rescore"]["rescoring_engine"].items()
152 |     }
153 | 
154 |     return config
155 | 


--------------------------------------------------------------------------------
/ms2rescore/exceptions.py:
--------------------------------------------------------------------------------
 1 | """MS²Rescore exceptions."""
 2 | 
 3 | 
 4 | class MS2RescoreError(Exception):
 5 |     """Generic MS2Rescore error."""
 6 | 
 7 |     pass
 8 | 
 9 | 
10 | class MS2RescoreConfigurationError(MS2RescoreError):
11 |     """Invalid MS2Rescore configuration."""
12 | 
13 |     pass
14 | 
15 | 
16 | class IDFileParsingError(MS2RescoreError):
17 |     """Identification file parsing error."""
18 | 
19 |     pass
20 | 
21 | 
22 | class ModificationParsingError(IDFileParsingError):
23 |     """Identification file parsing error."""
24 | 
25 |     pass
26 | 
27 | 
28 | class MissingValuesError(MS2RescoreError):
29 |     """Missing values in PSMs and/or spectra."""
30 | 
31 |     pass
32 | 
33 | 
34 | class ReportGenerationError(MS2RescoreError):
35 |     """Error while generating report."""
36 | 
37 |     pass
38 | 
39 | 
40 | class RescoringError(MS2RescoreError):
41 |     """Error while rescoring PSMs."""
42 | 
43 |     pass
44 | 


--------------------------------------------------------------------------------
/ms2rescore/feature_generators/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Feature generators to add rescoring features to PSMs from various (re)sources and prediction tools.
 3 | """
 4 | 
 5 | from ms2rescore.feature_generators.basic import BasicFeatureGenerator
 6 | from ms2rescore.feature_generators.deeplc import DeepLCFeatureGenerator
 7 | from ms2rescore.feature_generators.ionmob import IonMobFeatureGenerator
 8 | from ms2rescore.feature_generators.maxquant import MaxQuantFeatureGenerator
 9 | from ms2rescore.feature_generators.ms2pip import MS2PIPFeatureGenerator
10 | from ms2rescore.feature_generators.im2deep import IM2DeepFeatureGenerator
11 | 
12 | FEATURE_GENERATORS = {
13 |     "basic": BasicFeatureGenerator,
14 |     "ms2pip": MS2PIPFeatureGenerator,
15 |     "deeplc": DeepLCFeatureGenerator,
16 |     "maxquant": MaxQuantFeatureGenerator,
17 |     "ionmob": IonMobFeatureGenerator,
18 |     "im2deep": IM2DeepFeatureGenerator,
19 | }
20 | 


--------------------------------------------------------------------------------
/ms2rescore/feature_generators/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Set
 3 | 
 4 | from psm_utils import PSMList
 5 | 
 6 | from ms2rescore.parse_spectra import MSDataType
 7 | 
 8 | 
 9 | class FeatureGeneratorBase(ABC):
10 |     """Base class from which all feature generators must inherit."""
11 | 
12 |     # List of required MS data types for feature generation
13 |     required_ms_data: Set[MSDataType] = set()
14 | 
15 |     def __init__(self, *args, **kwargs) -> None:
16 |         super().__init__()
17 | 
18 |     @property
19 |     @abstractmethod
20 |     def feature_names(self):
21 |         pass
22 | 
23 |     @abstractmethod
24 |     def add_features(psm_list: PSMList):
25 |         pass
26 | 
27 | 
28 | class FeatureGeneratorException(Exception):
29 |     """Base class for exceptions raised by feature generators."""
30 | 
31 |     pass
32 | 


--------------------------------------------------------------------------------
/ms2rescore/feature_generators/basic.py:
--------------------------------------------------------------------------------
  1 | """Generate basic features that can be extracted from any PSM list."""
  2 | 
  3 | import logging
  4 | from typing import Dict, Iterable, List, Tuple
  5 | 
  6 | import numpy as np
  7 | from psm_utils import PSMList
  8 | 
  9 | from ms2rescore.feature_generators.base import FeatureGeneratorBase
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class BasicFeatureGenerator(FeatureGeneratorBase):
 15 |     def __init__(self, *args, **kwargs) -> None:
 16 |         """
 17 |         Generate basic features that can be extracted from any PSM list, including search engine
 18 |         score, charge state, and MS1 error.
 19 | 
 20 |         Parameters
 21 |         ----------
 22 |         *args
 23 |             Positional arguments passed to the base class.
 24 |         **kwargs
 25 |             Keyword arguments passed to the base class.
 26 | 
 27 |         Attributes
 28 |         ----------
 29 |         feature_names: list[str]
 30 |             Names of the features that will be added to the PSMs.
 31 | 
 32 |         """
 33 |         super().__init__(*args, **kwargs)
 34 |         self._feature_names = None
 35 | 
 36 |     @property
 37 |     def feature_names(self) -> List[str]:
 38 |         if self._feature_names is None:
 39 |             raise ValueError("Feature names have not been set yet. First run `add_features`.")
 40 |         return self._feature_names
 41 | 
 42 |     def add_features(self, psm_list: PSMList) -> None:
 43 |         """
 44 |         Add basic features to a PSM list.
 45 | 
 46 |         Parameters
 47 |         ----------
 48 |         psm_list
 49 |             PSM list to add features to.
 50 | 
 51 |         """
 52 |         logger.info("Adding basic features to PSMs.")
 53 | 
 54 |         self._feature_names = []  # Reset feature names
 55 | 
 56 |         charge_states = np.array([psm.peptidoform.precursor_charge for psm in psm_list])
 57 |         precursor_mzs = psm_list["precursor_mz"]
 58 |         scores = psm_list["score"]
 59 | 
 60 |         has_charge = None not in charge_states
 61 |         has_mz = None not in precursor_mzs and has_charge
 62 |         has_score = None not in scores
 63 | 
 64 |         if has_charge:
 65 |             charge_n = charge_states
 66 |             charge_one_hot, one_hot_names = _one_hot_encode_charge(charge_states)
 67 |             self._feature_names.extend(["charge_n"] + one_hot_names)
 68 | 
 69 |         if has_mz:  # Charge also required for theoretical m/z
 70 |             theo_mz = np.array([psm.peptidoform.theoretical_mz for psm in psm_list])
 71 |             abs_ms1_error_ppm = np.abs((precursor_mzs - theo_mz) / theo_mz * 10**6)
 72 |             self._feature_names.append("abs_ms1_error_ppm")
 73 | 
 74 |         if has_score:
 75 |             self._feature_names.append("search_engine_score")
 76 | 
 77 |         for i, psm in enumerate(psm_list):
 78 |             psm.rescoring_features.update(
 79 |                 dict(
 80 |                     **{"charge_n": charge_n[i]} if has_charge else {},
 81 |                     **charge_one_hot[i] if has_charge else {},
 82 |                     **{"abs_ms1_error_ppm": abs_ms1_error_ppm[i]} if has_mz else {},
 83 |                     **{"search_engine_score": scores[i]} if has_score else {},
 84 |                 )
 85 |             )
 86 | 
 87 | 
 88 | def _one_hot_encode_charge(
 89 |     charge_states: np.ndarray,
 90 | ) -> Tuple[Iterable[Dict[str, int]], List[str]]:
 91 |     """One-hot encode charge states."""
 92 |     n_entries = len(charge_states)
 93 |     min_charge = np.min(charge_states)
 94 |     max_charge = np.max(charge_states)
 95 | 
 96 |     mask = np.zeros((n_entries, max_charge - min_charge + 1), dtype=bool)
 97 |     mask[np.arange(n_entries), charge_states - min_charge] = 1
 98 |     one_hot = mask.view("i1")
 99 | 
100 |     heading = [f"charge_{i}" for i in range(min_charge, max_charge + 1)]
101 | 
102 |     return [dict(zip(heading, row)) for row in one_hot], heading
103 | 


--------------------------------------------------------------------------------
/ms2rescore/feature_generators/deeplc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | DeepLC retention time-based feature generator.
  3 | 
  4 | DeepLC is a fully modification-aware peptide retention time predictor. It uses a deep convolutional
  5 | neural network to predict retention times based on the atomic composition of the (modified) amino
  6 | acid residues in the peptide. See
  7 | `github.com/compomics/deeplc <https://github.com/compomics/deeplc>`_ for more information.
  8 | 
  9 | If you use DeepLC through MS²Rescore, please cite:
 10 | 
 11 | .. epigraph::
 12 |     Bouwmeester, R., Gabriels, R., Hulstaert, N. et al. DeepLC can predict retention times for
 13 |     peptides that carry unknown modifications. *Nat Methods* 18, 1363-1369 (2021).
 14 |     `doi:10.1038/s41592-021-01301-5 <https://doi.org/10.1038/s41592-021-01301-5>`_
 15 | 
 16 | """
 17 | 
 18 | import contextlib
 19 | import logging
 20 | import os
 21 | from collections import defaultdict
 22 | from inspect import getfullargspec
 23 | from itertools import chain
 24 | from typing import List, Union
 25 | 
 26 | import numpy as np
 27 | from psm_utils import PSMList
 28 | 
 29 | from ms2rescore.feature_generators.base import FeatureGeneratorBase
 30 | from ms2rescore.parse_spectra import MSDataType
 31 | 
 32 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | class DeepLCFeatureGenerator(FeatureGeneratorBase):
 37 |     """DeepLC retention time-based feature generator."""
 38 | 
 39 |     required_ms_data = {MSDataType.retention_time}
 40 | 
 41 |     def __init__(
 42 |         self,
 43 |         *args,
 44 |         lower_score_is_better: bool = False,
 45 |         calibration_set_size: Union[int, float, None] = None,
 46 |         processes: int = 1,
 47 |         **kwargs,
 48 |     ) -> None:
 49 |         """
 50 |         Generate DeepLC-based features for rescoring.
 51 | 
 52 |         DeepLC retraining is on by default. Add ``deeplc_retrain: False`` as a keyword argument to
 53 |         disable retraining.
 54 | 
 55 |         Parameters
 56 |         ----------
 57 |         lower_score_is_better
 58 |             Whether a lower PSM score denotes a better matching PSM. Default: False
 59 |         calibration_set_size: int or float
 60 |             Amount of best PSMs to use for DeepLC calibration. If this value is lower
 61 |             than the number of available PSMs, all PSMs will be used. (default: 0.15)
 62 |         processes: {int, None}
 63 |             Number of processes to use in DeepLC. Defaults to 1.
 64 |         kwargs: dict
 65 |             Additional keyword arguments are passed to DeepLC.
 66 | 
 67 |         Attributes
 68 |         ----------
 69 |         feature_names: list[str]
 70 |             Names of the features that will be added to the PSMs.
 71 | 
 72 |         """
 73 |         super().__init__(*args, **kwargs)
 74 | 
 75 |         self.lower_psm_score_better = lower_score_is_better
 76 |         self.calibration_set_size = calibration_set_size
 77 |         self.processes = processes
 78 |         self.deeplc_kwargs = kwargs or {}
 79 | 
 80 |         self._verbose = logger.getEffectiveLevel() <= logging.DEBUG
 81 | 
 82 |         # Lazy-load DeepLC
 83 |         from deeplc import DeepLC
 84 | 
 85 |         self.DeepLC = DeepLC
 86 | 
 87 |         # Remove any kwargs that are not DeepLC arguments
 88 |         self.deeplc_kwargs = {
 89 |             k: v for k, v in self.deeplc_kwargs.items() if k in getfullargspec(DeepLC).args
 90 |         }
 91 |         self.deeplc_kwargs.update({"config_file": None})
 92 | 
 93 |         # Set default DeepLC arguments
 94 |         if "deeplc_retrain" not in self.deeplc_kwargs:
 95 |             self.deeplc_kwargs["deeplc_retrain"] = False
 96 | 
 97 |         self.deeplc_predictor = None
 98 |         if "path_model" in self.deeplc_kwargs:
 99 |             self.user_model = self.deeplc_kwargs.pop("path_model")
100 |             logging.debug(f"Using user-provided DeepLC model {self.user_model}.")
101 |         else:
102 |             self.user_model = None
103 | 
104 |     @property
105 |     def feature_names(self) -> List[str]:
106 |         return [
107 |             "observed_retention_time",
108 |             "predicted_retention_time",
109 |             "rt_diff",
110 |             "observed_retention_time_best",
111 |             "predicted_retention_time_best",
112 |             "rt_diff_best",
113 |         ]
114 | 
115 |     def add_features(self, psm_list: PSMList) -> None:
116 |         """Add DeepLC-derived features to PSMs."""
117 | 
118 |         logger.info("Adding DeepLC-derived features to PSMs.")
119 | 
120 |         # Get easy-access nested version of PSMList
121 |         psm_dict = psm_list.get_psm_dict()
122 | 
123 |         # Run DeepLC for each spectrum file
124 |         current_run = 1
125 |         total_runs = sum(len(runs) for runs in psm_dict.values())
126 | 
127 |         for runs in psm_dict.values():
128 |             # Reset DeepLC predictor for each collection of runs
129 |             self.deeplc_predictor = None
130 |             self.selected_model = None
131 |             for run, psms in runs.items():
132 |                 peptide_rt_diff_dict = defaultdict(
133 |                     lambda: {
134 |                         "observed_retention_time_best": np.inf,
135 |                         "predicted_retention_time_best": np.inf,
136 |                         "rt_diff_best": np.inf,
137 |                     }
138 |                 )
139 |                 logger.info(
140 |                     f"Running DeepLC for PSMs from run ({current_run}/{total_runs}): `{run}`..."
141 |                 )
142 | 
143 |                 # Disable wild logging to stdout by Tensorflow, unless in debug mode
144 | 
145 |                 with contextlib.redirect_stdout(
146 |                     open(os.devnull, "w", encoding="utf-8")
147 |                 ) if not self._verbose else contextlib.nullcontext():
148 |                     # Make new PSM list for this run (chain PSMs per spectrum to flat list)
149 |                     psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values())))
150 | 
151 |                     psm_list_calibration = self._get_calibration_psms(psm_list_run)
152 |                     logger.debug(f"Calibrating DeepLC with {len(psm_list_calibration)} PSMs...")
153 |                     self.deeplc_predictor = self.DeepLC(
154 |                         n_jobs=self.processes,
155 |                         verbose=self._verbose,
156 |                         path_model=self.selected_model or self.user_model,
157 |                         **self.deeplc_kwargs,
158 |                     )
159 |                     self.deeplc_predictor.calibrate_preds(psm_list_calibration)
160 |                     # Still calibrate for each run, but do not try out all model options.
161 |                     # Just use model that was selected based on first run
162 |                     if not self.selected_model:
163 |                         self.selected_model = list(self.deeplc_predictor.model.keys())
164 |                         self.deeplc_kwargs["deeplc_retrain"] = False
165 |                         logger.debug(
166 |                             f"Selected DeepLC model {self.selected_model} based on "
167 |                             "calibration of first run. Using this model (after new "
168 |                             "calibrations) for the remaining runs."
169 |                         )
170 | 
171 |                     logger.debug("Predicting retention times...")
172 |                     predictions = np.array(self.deeplc_predictor.make_preds(psm_list_run))
173 |                     observations = psm_list_run["retention_time"]
174 |                     rt_diffs_run = np.abs(predictions - observations)
175 | 
176 |                     logger.debug("Adding features to PSMs...")
177 |                     for i, psm in enumerate(psm_list_run):
178 |                         psm["rescoring_features"].update(
179 |                             {
180 |                                 "observed_retention_time": observations[i],
181 |                                 "predicted_retention_time": predictions[i],
182 |                                 "rt_diff": rt_diffs_run[i],
183 |                             }
184 |                         )
185 |                         peptide = psm.peptidoform.proforma.split("\\")[0]  # remove charge
186 |                         if peptide_rt_diff_dict[peptide]["rt_diff_best"] > rt_diffs_run[i]:
187 |                             peptide_rt_diff_dict[peptide] = {
188 |                                 "observed_retention_time_best": observations[i],
189 |                                 "predicted_retention_time_best": predictions[i],
190 |                                 "rt_diff_best": rt_diffs_run[i],
191 |                             }
192 |                     for psm in psm_list_run:
193 |                         psm["rescoring_features"].update(
194 |                             peptide_rt_diff_dict[psm.peptidoform.proforma.split("\\")[0]]
195 |                         )
196 |                 current_run += 1
197 | 
198 |     def _get_calibration_psms(self, psm_list: PSMList):
199 |         """Get N best scoring target PSMs for calibration."""
200 |         psm_list_targets = psm_list[~psm_list["is_decoy"]]
201 |         if self.calibration_set_size:
202 |             n_psms = self._get_number_of_calibration_psms(psm_list_targets)
203 |             indices = np.argsort(psm_list_targets["score"])
204 |             indices = indices[:n_psms] if self.lower_psm_score_better else indices[-n_psms:]
205 |             return psm_list_targets[indices]
206 |         else:
207 |             identified_psms = psm_list_targets[psm_list_targets["qvalue"] <= 0.01]
208 |             if len(identified_psms) == 0:
209 |                 raise ValueError(
210 |                     "No target PSMs with q-value <= 0.01 found. Please set calibration set size for calibrating deeplc."
211 |                 )
212 |             elif (len(identified_psms) < 500) & (self.deeplc_kwargs["deeplc_retrain"]):
213 |                 logger.warning(
214 |                     " Less than 500 target PSMs with q-value <= 0.01 found for retraining. Consider turning of deeplc_retrain, as this is likely not enough data for retraining."
215 |                 )
216 |             return identified_psms
217 | 
218 |     def _get_number_of_calibration_psms(self, psm_list):
219 |         """Get number of calibration PSMs given `calibration_set_size` and total number of PSMs."""
220 |         if isinstance(self.calibration_set_size, float):
221 |             if not 0 < self.calibration_set_size <= 1:
222 |                 raise ValueError(
223 |                     "If `calibration_set_size` is a float, it cannot be smaller than "
224 |                     "or equal to 0 or larger than 1."
225 |                 )
226 |             else:
227 |                 num_calibration_psms = round(len(psm_list) * self.calibration_set_size)
228 |         elif isinstance(self.calibration_set_size, int):
229 |             if self.calibration_set_size > len(psm_list):
230 |                 logger.warning(
231 |                     f"Requested number of calibration PSMs ({self.calibration_set_size}"
232 |                     f") is larger than total number of PSMs ({len(psm_list)}). Using "
233 |                     "all PSMs for calibration."
234 |                 )
235 |                 num_calibration_psms = len(psm_list)
236 |             else:
237 |                 num_calibration_psms = self.calibration_set_size
238 |         else:
239 |             raise TypeError(
240 |                 "Expected float or int for `calibration_set_size`. Got "
241 |                 f"{type(self.calibration_set_size)} instead. "
242 |             )
243 |         logger.debug(f"Using {num_calibration_psms} PSMs for calibration")
244 |         return num_calibration_psms
245 | 


--------------------------------------------------------------------------------
/ms2rescore/feature_generators/im2deep.py:
--------------------------------------------------------------------------------
  1 | """
  2 | IM2Deep ion mobility-based feature generator.
  3 | 
  4 | IM2Deep is a fully modification-aware peptide ion mobility predictor. It uses a deep convolutional
  5 | neural network to predict retention times based on the atomic composition of the (modified) amino
  6 | acid residues in the peptide. See
  7 | `github.com/compomics/IM2Deep <https://github.com/compomics/IM2Deep>`_ for more information.
  8 | 
  9 | """
 10 | 
 11 | import contextlib
 12 | import logging
 13 | import os
 14 | from inspect import getfullargspec
 15 | from itertools import chain
 16 | from typing import List
 17 | 
 18 | import numpy as np
 19 | import pandas as pd
 20 | from im2deep.utils import im2ccs
 21 | from im2deep.im2deep import predict_ccs
 22 | from psm_utils import PSMList
 23 | 
 24 | from ms2rescore.feature_generators.base import FeatureGeneratorBase
 25 | from ms2rescore.parse_spectra import MSDataType
 26 | 
 27 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
 28 | logger = logging.getLogger(__name__)
 29 | 
 30 | 
 31 | class IM2DeepFeatureGenerator(FeatureGeneratorBase):
 32 |     """IM2Deep collision cross section feature generator."""
 33 | 
 34 |     required_ms_data = {MSDataType.ion_mobility}
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         *args,
 39 |         processes: int = 1,
 40 |         **kwargs,
 41 |     ):
 42 |         """
 43 |         Initialize the IM2DeepFeatureGenerator.
 44 | 
 45 |         Parameters
 46 |         ----------
 47 |         processes : int, optional
 48 |             Number of parallel processes to use for IM2Deep predictions. Default is 1.
 49 |         **kwargs : dict, optional
 50 |             Additional keyword arguments to `im2deep.predict_ccs`.
 51 | 
 52 |         """
 53 |         super().__init__(*args, **kwargs)
 54 | 
 55 |         self._verbose = logger.getEffectiveLevel() <= logging.DEBUG
 56 | 
 57 |         # Remove any kwargs that are not IM2Deep arguments
 58 |         self.im2deep_kwargs = kwargs or {}
 59 |         self.im2deep_kwargs = {
 60 |             k: v for k, v in self.im2deep_kwargs.items() if k in getfullargspec(predict_ccs).args
 61 |         }
 62 |         self.im2deep_kwargs["n_jobs"] = processes
 63 | 
 64 |     @property
 65 |     def feature_names(self) -> List[str]:
 66 |         return [
 67 |             "ccs_observed_im2deep",
 68 |             "ccs_predicted_im2deep",
 69 |             "ccs_error_im2deep",
 70 |             "abs_ccs_error_im2deep",
 71 |             "perc_ccs_error_im2deep",
 72 |         ]
 73 | 
 74 |     def add_features(self, psm_list: PSMList) -> None:
 75 |         """Add IM2Deep-derived features to PSMs"""
 76 |         logger.info("Adding IM2Deep-derived features to PSMs")
 77 | 
 78 |         # Get easy-access nested version of PSMlist
 79 |         psm_dict = psm_list.get_psm_dict()
 80 | 
 81 |         # Run IM2Deep for each spectrum file
 82 |         current_run = 1
 83 |         total_runs = sum(len(runs) for runs in psm_dict.values())
 84 | 
 85 |         for runs in psm_dict.values():
 86 |             # Reset IM2Deep predictor for each collection of runs
 87 |             for run, psms in runs.items():
 88 |                 logger.info(
 89 |                     f"Running IM2Deep for PSMs from run ({current_run}/{total_runs}): `{run}`..."
 90 |                 )
 91 | 
 92 |                 # Disable wild logging to stdout by TensorFlow, unless in debug mode
 93 |                 with (
 94 |                     contextlib.redirect_stdout(open(os.devnull, "w", encoding="utf-8"))
 95 |                     if not self._verbose
 96 |                     else contextlib.nullcontext()
 97 |                 ):
 98 |                     # Make new PSM list for this run (chain PSMs per spectrum to flat list)
 99 |                     psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values())))
100 | 
101 |                     logger.debug("Calibrating IM2Deep...")
102 | 
103 |                     # Convert ion mobility to CCS and calibrate CCS values
104 |                     psm_list_run_df = psm_list_run.to_dataframe()
105 |                     psm_list_run_df["charge"] = [
106 |                         pep.precursor_charge for pep in psm_list_run_df["peptidoform"]
107 |                     ]
108 |                     psm_list_run_df["ccs_observed"] = im2ccs(
109 |                         psm_list_run_df["ion_mobility"],
110 |                         psm_list_run_df["precursor_mz"],
111 |                         psm_list_run_df["charge"],
112 |                     )
113 | 
114 |                     # Create dataframe with high confidence hits for calibration
115 |                     cal_psm_df = self.make_calibration_df(psm_list_run_df)
116 | 
117 |                     # Make predictions with IM2Deep
118 |                     logger.debug("Predicting CCS values...")
119 |                     predictions = predict_ccs(
120 |                         psm_list_run, cal_psm_df, write_output=False, **self.im2deep_kwargs
121 |                     )
122 | 
123 |                     # Add features to PSMs
124 |                     logger.debug("Adding features to PSMs...")
125 |                     observations = psm_list_run_df["ccs_observed"]
126 |                     ccs_diffs_run = np.abs(predictions - observations)
127 |                     for i, psm in enumerate(psm_list_run):
128 |                         psm["rescoring_features"].update(
129 |                             {
130 |                                 "ccs_observed_im2deep": observations[i],
131 |                                 "ccs_predicted_im2deep": predictions[i],
132 |                                 "ccs_error_im2deep": ccs_diffs_run[i],
133 |                                 "abs_ccs_error_im2deep": np.abs(ccs_diffs_run[i]),
134 |                                 "perc_ccs_error_im2deep": np.abs(ccs_diffs_run[i])
135 |                                 / observations[i]
136 |                                 * 100,
137 |                             }
138 |                         )
139 | 
140 |                 current_run += 1
141 | 
142 |     @staticmethod
143 |     def make_calibration_df(psm_list_df: pd.DataFrame, threshold: float = 0.25) -> pd.DataFrame:
144 |         """
145 |         Make dataframe for calibration of IM2Deep predictions.
146 | 
147 |         Parameters
148 |         ----------
149 |         psm_list_df
150 |             DataFrame with PSMs.
151 |         threshold
152 |             Percentage of highest scoring identified target PSMs to use for calibration,
153 |             default 0.95.
154 | 
155 |         Returns
156 |         -------
157 |         pd.DataFrame
158 |             DataFrame with high confidence hits for calibration.
159 | 
160 |         """
161 |         identified_psms = psm_list_df[
162 |             (psm_list_df["qvalue"] < 0.01)
163 |             & (~psm_list_df["is_decoy"])
164 |             & (psm_list_df["charge"] < 5)  # predictions do not go higher for IM2Deep
165 |         ]
166 |         calibration_psms = identified_psms[
167 |             identified_psms["qvalue"] < identified_psms["qvalue"].quantile(1 - threshold)
168 |         ]
169 |         logger.debug(
170 |             f"Number of high confidence hits for calculating shift: {len(calibration_psms)}"
171 |         )
172 |         return calibration_psms
173 | 


--------------------------------------------------------------------------------
/ms2rescore/feature_generators/maxquant.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Feature generator for PSMs from the MaxQuant search engine.
  3 | 
  4 | MaxQuant msms.txt files contain various metrics from peptide-spectrum matching that can be used
  5 | to generate rescoring features. These include features related to the mass errors of the seven
  6 | fragment ions with the highest intensities, and features related to the ion current of the
  7 | identified fragment ions.
  8 | 
  9 | """
 10 | 
 11 | import logging
 12 | from typing import List, Tuple
 13 | 
 14 | import numpy as np
 15 | from psm_utils import PSMList
 16 | 
 17 | from ms2rescore.exceptions import MS2RescoreError
 18 | from ms2rescore.feature_generators.base import FeatureGeneratorBase
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | class MaxQuantFeatureGenerator(FeatureGeneratorBase):
 24 |     """Generate MaxQuant-derived features."""
 25 | 
 26 |     available_features = [
 27 |         "mean_error_top7",
 28 |         "sq_mean_error_top7",
 29 |         "stdev_error_top7",
 30 |         "ln_explained_ion_current",
 31 |         "ln_nterm_ion_current_ratio",
 32 |         "ln_cterm_ion_current_ratio",
 33 |         "ln_ms2_ion_current",
 34 |     ]
 35 | 
 36 |     def __init__(self, *args, **kwargs) -> None:
 37 |         """
 38 |         Generate MaxQuant-derived features.
 39 | 
 40 |         Attributes
 41 |         ----------
 42 |         feature_names: list[str]
 43 |             Names of the features that will be added to the PSMs.
 44 | 
 45 |         Raises
 46 |         ------
 47 |         MissingMetadataError
 48 |             If the required metadata entries are not present in the PSMs.
 49 | 
 50 |         """
 51 |         super().__init__(*args, **kwargs)
 52 |         self._feature_names = self.available_features.copy()
 53 | 
 54 |     @property
 55 |     def feature_names(self) -> List[str]:
 56 |         return self._feature_names
 57 | 
 58 |     def add_features(self, psm_list: PSMList):
 59 |         """
 60 |         Add MaxQuant-derived features to PSMs.
 61 | 
 62 |         Parameters
 63 |         ----------
 64 |         psm_list
 65 |             PSMs to add features to.
 66 | 
 67 |         """
 68 |         # Check if all PSMs are from MaxQuant
 69 |         if not self._all_psms_from_maxquant(psm_list):
 70 |             self._feature_names = []  # Set feature names to empty list to indicate none added
 71 |             logger.warning("Not all PSMs are from MaxQuant. Skipping MaxQuant feature generation.")
 72 |             return
 73 |         else:
 74 |             self._feature_names = self.available_features  # Reset feature names
 75 |             logger.info("Adding MaxQuant-derived features to PSMs.")
 76 | 
 77 |         # Infer mass deviations column name
 78 |         for column_name in [
 79 |             "Mass deviations [Da]",
 80 |             "Mass Deviations [Da]",
 81 |             "Mass deviations [ppm]",
 82 |             "Mass Deviations [ppm]",
 83 |         ]:
 84 |             if column_name in psm_list[0]["metadata"].keys():
 85 |                 self._mass_deviations_key = column_name
 86 |                 break
 87 |         else:
 88 |             raise MissingMetadataError(
 89 |                 "No mass deviations entry in PSM metadata. Cannot compute MaxQuant features."
 90 |             )
 91 | 
 92 |         # Check other columns
 93 |         for column_name in ["Intensities", "Matches", "Intensity coverage"]:
 94 |             if column_name not in psm_list[0]["metadata"].keys():
 95 |                 raise MissingMetadataError(
 96 |                     f"Missing {column_name} entry in PSM metadata. Cannot compute MaxQuant features."
 97 |                 )
 98 | 
 99 |         # Add features to PSMs
100 |         for psm in psm_list:
101 |             psm["rescoring_features"].update(self._compute_features(psm["metadata"]))
102 | 
103 |     @staticmethod
104 |     def _all_psms_from_maxquant(psm_list):
105 |         """Check if the PSMs are from MaxQuant."""
106 |         return (psm_list["source"] == "msms").all()
107 | 
108 |     def _compute_features(self, psm_metadata):
109 |         """Compute features from derived from intensities and mass errors."""
110 |         features = {}
111 |         if all(k in psm_metadata.keys() for k in ["Intensities", self._mass_deviations_key]):
112 |             (
113 |                 features["mean_error_top7"],
114 |                 features["sq_mean_error_top7"],
115 |                 features["stdev_error_top7"],
116 |             ) = self._calculate_top7_peak_features(
117 |                 psm_metadata["Intensities"], psm_metadata[self._mass_deviations_key]
118 |             )
119 | 
120 |         if all(k in psm_metadata.keys() for k in ["Intensities", "Matches", "Intensity coverage"]):
121 |             (
122 |                 features["ln_explained_ion_current"],
123 |                 features["ln_nterm_ion_current_ratio"],
124 |                 features["ln_cterm_ion_current_ratio"],
125 |                 features["ln_ms2_ion_current"],
126 |             ) = self._calculate_ion_current_features(
127 |                 psm_metadata["Matches"],
128 |                 psm_metadata["Intensities"],
129 |                 psm_metadata["Intensity coverage"],
130 |             )
131 | 
132 |         return features
133 | 
134 |     @staticmethod
135 |     def _calculate_top7_peak_features(intensities: str, mass_errors: str) -> Tuple[np.ndarray]:
136 |         """
137 |         Calculate "top 7 peak"-related search engine features.
138 |         The following features are calculated:
139 |         - mean_error_top7: Mean of mass errors of the seven fragment ion peaks with the
140 |           highest intensities
141 |         - sq_mean_error_top7: Squared MeanErrorTop7
142 |         - stdev_error_top7: Standard deviation of mass errors of the seven fragment ion
143 |           peaks with the highest intensities
144 |         """
145 |         try:
146 |             intensities = [float(i) for i in intensities.split(";")]
147 |             mass_errors = [float(i) for i in mass_errors.split(";")]
148 |         except ValueError:
149 |             return 0.0, 0.0, 0.0
150 | 
151 |         indices_most_intens = np.array(intensities).argsort()[-1:-8:-1]
152 |         mass_errors_top7 = [(mass_errors[i]) for i in indices_most_intens]
153 |         mean_error_top7 = np.mean(mass_errors_top7)
154 |         sq_mean_error_top7 = mean_error_top7**2
155 |         stdev_error_top7 = np.std(mass_errors_top7)
156 | 
157 |         return mean_error_top7, sq_mean_error_top7, stdev_error_top7
158 | 
159 |     @staticmethod
160 |     def _calculate_ion_current_features(
161 |         matches: str, intensities: str, intensity_coverage: str
162 |     ) -> Tuple[np.ndarray]:
163 |         """
164 |         Calculate ion current related search engine features.
165 |         The following features are calculated:
166 |         - ln_explained_ion_current: Summed intensity of identified fragment ions,
167 |           divided by that of all fragment ions, logged
168 |         - ln_nterm_ion_current_ratio: Summed intensity of identified N-terminal
169 |           fragments, divided by that of all identified fragments, logged
170 |         - ln_cterm_ion_current_ratio: Summed intensity of identified N-terminal
171 |           fragments, divided by that of all identified fragments, logged
172 |         - ln_ms2_ion_current: Summed intensity of all observed fragment ions, logged
173 |         """
174 |         pseudo_count = 0.00001
175 |         try:
176 |             ln_explained_ion_current = float(intensity_coverage) + pseudo_count
177 |             summed_intensities = sum([float(i) for i in intensities.split(";")])
178 |         except ValueError:
179 |             return 0.0, 0.0, 0.0, 0.0
180 | 
181 |         # Calculate ratio between matched b- and y-ion intensities
182 |         y_ion_int = sum(
183 |             [
184 |                 float(intensities.split(";")[i])
185 |                 for i, m in enumerate(matches.split(";"))
186 |                 if m.startswith("y")
187 |             ]
188 |         )
189 |         y_int_ratio = y_ion_int / summed_intensities
190 | 
191 |         ln_nterm_ion_current_ratio = (y_int_ratio + pseudo_count) * ln_explained_ion_current
192 |         ln_cterm_ion_current_ratio = (1 - y_int_ratio + pseudo_count) * ln_explained_ion_current
193 |         ln_ms2_ion_current = summed_intensities / ln_explained_ion_current
194 | 
195 |         out = [
196 |             ln_explained_ion_current,
197 |             ln_nterm_ion_current_ratio,
198 |             ln_cterm_ion_current_ratio,
199 |             ln_ms2_ion_current,
200 |         ]
201 | 
202 |         return tuple([np.log(x) for x in out])
203 | 
204 | 
205 | class MissingMetadataError(MS2RescoreError):
206 |     """Exception raised when a required metadata entry is missing."""
207 | 
208 |     pass
209 | 


--------------------------------------------------------------------------------
/ms2rescore/gui/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/gui/__init__.py


--------------------------------------------------------------------------------
/ms2rescore/gui/__main__.py:
--------------------------------------------------------------------------------
 1 | """Entrypoint for MS²Rescore GUI."""
 2 | 
 3 | import multiprocessing
 4 | import os
 5 | import contextlib
 6 | 
 7 | from ms2rescore.gui.app import app
 8 | 
 9 | 
10 | def main():
11 |     """Entrypoint for MS²Rescore GUI."""
12 |     multiprocessing.freeze_support()
13 |     # Redirect stdout when running GUI (packaged app might not have console attached)
14 |     with contextlib.redirect_stdout(open(os.devnull, "w")):
15 |         app()
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     main()
20 | 


--------------------------------------------------------------------------------
/ms2rescore/package_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/__init__.py


--------------------------------------------------------------------------------
/ms2rescore/package_data/config_default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "./config_schema.json",
 3 |     "ms2rescore": {
 4 |         "feature_generators": {
 5 |             "basic": {},
 6 |             "ms2pip": {
 7 |                 "model": "HCD",
 8 |                 "ms2_tolerance": 0.02
 9 |             },
10 |             "deeplc": {
11 |                 "deeplc_retrain": false
12 |             },
13 |             "maxquant": {}
14 |         },
15 |         "rescoring_engine": {
16 |             "mokapot": {
17 |                 "train_fdr": 0.01,
18 |                 "write_weights": true,
19 |                 "write_txt": true
20 |             }
21 |         },
22 |         "config_file": null,
23 |         "psm_file": null,
24 |         "psm_file_type": "infer",
25 |         "psm_reader_kwargs": {},
26 |         "spectrum_path": null,
27 |         "output_path": null,
28 |         "log_level": "info",
29 |         "id_decoy_pattern": null,
30 |         "psm_id_pattern": null,
31 |         "spectrum_id_pattern": null,
32 |         "psm_id_rt_pattern": null,
33 |         "psm_id_im_pattern": null,
34 |         "lower_score_is_better": false,
35 |         "max_psm_rank_input": 10,
36 |         "max_psm_rank_output": 1,
37 |         "modification_mapping": {},
38 |         "fixed_modifications": {},
39 |         "processes": -1,
40 |         "rename_to_usi": false,
41 |         "fasta_file": null,
42 |         "write_flashlfq": false,
43 |         "write_report": false
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/ms2rescore/package_data/config_default_tims.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "./config_schema.json",
 3 |     "ms2rescore": {
 4 |         "feature_generators": {
 5 |             "basic": {},
 6 |             "ms2pip": {
 7 |                 "model": "timsTOF",
 8 |                 "ms2_tolerance": 0.02
 9 |             },
10 |             "deeplc": {
11 |                 "deeplc_retrain": false
12 |             },
13 |             "im2deep": {},
14 |             "maxquant": {}
15 |         },
16 |         "rescoring_engine": {
17 |             "mokapot": {
18 |                 "write_weights": true,
19 |                 "write_txt": true
20 |             }
21 |         },
22 |         "psm_file": null
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/ms2rescore/package_data/img/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/__init__.py


--------------------------------------------------------------------------------
/ms2rescore/package_data/img/comments_icon_black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/comments_icon_black.png


--------------------------------------------------------------------------------
/ms2rescore/package_data/img/comments_icon_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/comments_icon_white.png


--------------------------------------------------------------------------------
/ms2rescore/package_data/img/config_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/config_icon.png


--------------------------------------------------------------------------------
/ms2rescore/package_data/img/docs_icon_black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/docs_icon_black.png


--------------------------------------------------------------------------------
/ms2rescore/package_data/img/docs_icon_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/docs_icon_white.png


--------------------------------------------------------------------------------
/ms2rescore/package_data/img/github_icon_black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/github_icon_black.png


--------------------------------------------------------------------------------
/ms2rescore/package_data/img/github_icon_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/github_icon_white.png


--------------------------------------------------------------------------------
/ms2rescore/package_data/img/ms2rescore_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/ms2rescore_logo.png


--------------------------------------------------------------------------------
/ms2rescore/package_data/img/program_icon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/package_data/img/program_icon.ico


--------------------------------------------------------------------------------
/ms2rescore/parse_psms.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | from typing import Dict, Optional, Union
  4 | 
  5 | import numpy as np
  6 | import psm_utils.io
  7 | from psm_utils import PSMList
  8 | 
  9 | from ms2rescore.exceptions import MS2RescoreConfigurationError
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList:
 15 |     """
 16 |     Parse PSMs and prepare for rescoring.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     config
 21 |         Dictionary containing general ms2rescore configuration (everything under ``ms2rescore``
 22 |         top-level key).
 23 |     psm_list
 24 |         PSMList object containing PSMs. If None, PSMs will be read from ``psm_file``.
 25 | 
 26 |     """
 27 |     # Read PSMs
 28 |     try:
 29 |         psm_list = _read_psms(config, psm_list)
 30 |     except psm_utils.io.PSMUtilsIOException:
 31 |         raise MS2RescoreConfigurationError(
 32 |             "Error occurred while reading PSMs. Please check the 'psm_file' and "
 33 |             "'psm_file_type' settings. See "
 34 |             "https://ms2rescore.readthedocs.io/en/latest/userguide/input-files/"
 35 |             " for more information."
 36 |         )
 37 | 
 38 |     # Filter by PSM rank
 39 |     psm_list.set_ranks(config["lower_score_is_better"])
 40 |     rank_filter = psm_list["rank"] <= config["max_psm_rank_input"]
 41 |     psm_list = psm_list[rank_filter]
 42 |     logger.info(f"Removed {sum(~rank_filter)} PSMs with rank >= {config['max_psm_rank_input']}.")
 43 | 
 44 |     # Remove invalid AAs, find decoys, calculate q-values
 45 |     psm_list = _remove_invalid_aa(psm_list)
 46 |     _find_decoys(psm_list, config["id_decoy_pattern"])
 47 |     _calculate_qvalues(psm_list, config["lower_score_is_better"])
 48 |     if config["psm_id_rt_pattern"] or config["psm_id_im_pattern"]:
 49 |         logger.debug("Parsing retention time and/or ion mobility from PSM identifier...")
 50 |         _parse_values_from_spectrum_id(
 51 |             psm_list, config["psm_id_rt_pattern"], config["psm_id_im_pattern"]
 52 |         )
 53 | 
 54 |     # Store scoring values for comparison later
 55 |     for psm in psm_list:
 56 |         psm.provenance_data.update(
 57 |             {
 58 |                 "before_rescoring_score": psm.score,
 59 |                 "before_rescoring_qvalue": psm.qvalue,
 60 |                 "before_rescoring_pep": psm.pep,
 61 |                 "before_rescoring_rank": psm.rank,
 62 |             }
 63 |         )
 64 | 
 65 |     logger.debug("Parsing modifications...")
 66 |     modifications_found = set(
 67 |         [
 68 |             re.search(r"\[([^\[\]]*)\]", x.proforma).group(1)
 69 |             for x in psm_list["peptidoform"]
 70 |             if "[" in x.proforma
 71 |         ]
 72 |     )
 73 |     logger.debug(f"Found modifications: {modifications_found}")
 74 |     non_mapped_modifications = modifications_found - set(config["modification_mapping"].keys())
 75 |     if non_mapped_modifications:
 76 |         logger.warning(
 77 |             f"Non-mapped modifications found: {non_mapped_modifications}\n"
 78 |             "This can be ignored if they are Unimod modification labels."
 79 |         )
 80 |     psm_list.rename_modifications(config["modification_mapping"])
 81 |     psm_list.add_fixed_modifications(config["fixed_modifications"])
 82 |     psm_list.apply_fixed_modifications()
 83 | 
 84 |     if config["psm_id_pattern"]:
 85 |         pattern = re.compile(config["psm_id_pattern"])
 86 |         logger.debug("Applying 'psm_id_pattern'...")
 87 |         logger.debug(
 88 |             f"Parsing '{psm_list[0].spectrum_id}' to '{_match_psm_ids(psm_list[0].spectrum_id, pattern)}'"
 89 |         )
 90 |         new_ids = [_match_psm_ids(old_id, pattern) for old_id in psm_list["spectrum_id"]]
 91 |         psm_list["spectrum_id"] = new_ids
 92 | 
 93 |     return psm_list
 94 | 
 95 | 
 96 | def _read_psms(config, psm_list):
 97 |     if isinstance(psm_list, PSMList):
 98 |         return psm_list
 99 |     else:
100 |         total_files = len(config["psm_file"])
101 |         psm_list = []
102 |         for current_file, psm_file in enumerate(config["psm_file"]):
103 |             logger.info(
104 |                 f"Reading PSMs from PSM file ({current_file+1}/{total_files}): '{psm_file}'..."
105 |             )
106 |             psm_list.extend(
107 |                 psm_utils.io.read_file(
108 |                     psm_file,
109 |                     filetype=config["psm_file_type"],
110 |                     show_progressbar=True,
111 |                     **config["psm_reader_kwargs"],
112 |                 )
113 |             )
114 |             logger.debug(f"Read {len(psm_list)} PSMs from '{psm_file}'.")
115 | 
116 |         return PSMList(psm_list=psm_list)
117 | 
118 | 
119 | def _find_decoys(psm_list: PSMList, id_decoy_pattern: Optional[str] = None):
120 |     """Find decoys in PSMs, log amount, and raise error if none found."""
121 |     logger.debug("Finding decoys...")
122 |     if id_decoy_pattern:
123 |         psm_list.find_decoys(id_decoy_pattern)
124 | 
125 |     n_psms = len(psm_list)
126 |     percent_decoys = sum(psm_list["is_decoy"]) / n_psms * 100
127 |     logger.info(f"Found {n_psms} PSMs, of which {percent_decoys:.2f}% are decoys.")
128 | 
129 |     if not any(psm_list["is_decoy"]):
130 |         raise MS2RescoreConfigurationError(
131 |             "No decoy PSMs found. Please check if decoys are present in the PSM file and that "
132 |             "the 'id_decoy_pattern' option is correct. See "
133 |             "https://ms2rescore.readthedocs.io/en/latest/userguide/configuration/#selecting-decoy-psms"
134 |             " for more information."
135 |         )
136 | 
137 | 
138 | def _calculate_qvalues(psm_list: PSMList, lower_score_is_better: bool):
139 |     """Calculate q-values for PSMs if not present."""
140 |     # Calculate q-values if not present
141 |     if None in psm_list["qvalue"]:
142 |         logger.debug("Recalculating q-values...")
143 |         psm_list.calculate_qvalues(reverse=not lower_score_is_better)
144 | 
145 | 
146 | def _match_psm_ids(old_id, regex_pattern):
147 |     """Match PSM IDs to regex pattern or raise Exception if no match present."""
148 |     match = re.search(regex_pattern, str(old_id))
149 |     try:
150 |         return match[1]
151 |     except (TypeError, IndexError):
152 |         raise MS2RescoreConfigurationError(
153 |             f"'psm_id_pattern' could not be extracted from PSM spectrum IDs (i.e. {old_id})."
154 |             " Ensure that the regex contains a capturing group?"
155 |         )
156 | 
157 | 
158 | def _parse_values_from_spectrum_id(
159 |     psm_list: PSMList,
160 |     psm_id_rt_pattern: Optional[str] = None,
161 |     psm_id_im_pattern: Optional[str] = None,
162 | ):
163 |     """Parse retention time and or ion mobility values from the spectrum_id."""
164 |     for pattern, label, key in zip(
165 |         [psm_id_rt_pattern, psm_id_im_pattern],
166 |         ["retention time", "ion mobility"],
167 |         ["retention_time", "ion_mobility"],
168 |     ):
169 |         if pattern:
170 |             logger.debug(f"Parsing {label} from spectrum_id with regex pattern " f"{pattern}")
171 |             try:
172 |                 pattern = re.compile(pattern)
173 |                 psm_list[key] = [
174 |                     float(pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
175 |                 ]
176 |             except AttributeError:
177 |                 raise MS2RescoreConfigurationError(
178 |                     f"Could not parse {label} from spectrum_id with the "
179 |                     f"{pattern} regex pattern. "
180 |                     f"Example spectrum_id: '{psm_list[0].spectrum_id}'\n. "
181 |                     f"Please make sure the {label} key is present in the spectrum_id "
182 |                     "and the value is in a capturing group or disable the relevant feature generator."
183 |                 )
184 | 
185 | 
186 | def _remove_invalid_aa(psm_list: PSMList) -> PSMList:
187 |     """Remove PSMs with invalid amino acids."""
188 |     invalid_psms = np.array(
189 |         [any(aa in "BJOUXZ" for aa in psm.peptidoform.sequence) for psm in psm_list]
190 |     )
191 | 
192 |     if any(invalid_psms):
193 |         logger.warning(f"Removed {sum(invalid_psms)} PSMs with invalid amino acids.")
194 |         return psm_list[~invalid_psms]
195 |     else:
196 |         logger.debug("No PSMs with invalid amino acids found.")
197 |         return psm_list
198 | 


--------------------------------------------------------------------------------
/ms2rescore/parse_spectra.py:
--------------------------------------------------------------------------------
  1 | """Parse MGF files."""
  2 | 
  3 | import logging
  4 | import re
  5 | from enum import Enum
  6 | from itertools import chain
  7 | from typing import Optional, Set, Tuple
  8 | 
  9 | import numpy as np
 10 | from ms2rescore_rs import get_precursor_info
 11 | from psm_utils import PSMList
 12 | 
 13 | from ms2rescore.exceptions import MS2RescoreError
 14 | from ms2rescore.utils import infer_spectrum_path
 15 | 
 16 | LOGGER = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | class MSDataType(str, Enum):
 20 |     """Enum for MS data types required for feature generation."""
 21 | 
 22 |     retention_time = "retention time"
 23 |     ion_mobility = "ion mobility"
 24 |     precursor_mz = "precursor m/z"
 25 |     ms2_spectra = "MS2 spectra"
 26 | 
 27 |     # Mimic behavior of StrEnum (Python >=3.11)
 28 |     def __str__(self):
 29 |         return self.value
 30 | 
 31 | 
 32 | def add_precursor_values(
 33 |     psm_list: PSMList, spectrum_path: str, spectrum_id_pattern: Optional[str] = None
 34 | ) -> Set[MSDataType]:
 35 |     """
 36 |     Add precursor m/z, retention time, and ion mobility values to a PSM list.
 37 | 
 38 |     Parameters
 39 |     ----------
 40 |     psm_list
 41 |         PSM list to add precursor values to.
 42 |     spectrum_path
 43 |         Path to the spectrum files.
 44 |     spectrum_id_pattern
 45 |         Regular expression pattern to extract spectrum IDs from file names. If provided, the
 46 |         pattern must contain a single capturing group that matches the spectrum ID. Default is
 47 |         None.
 48 | 
 49 |     Returns
 50 |     -------
 51 |     available_ms_data
 52 |         Set of available MS data types in the PSM list.
 53 | 
 54 |     """
 55 |     # Check if precursor values are missing in PSM list
 56 |     rt_missing = any(v is None or v == 0 or np.isnan(v) for v in psm_list["retention_time"])
 57 |     im_missing = any(v is None or v == 0 or np.isnan(v) for v in psm_list["ion_mobility"])
 58 |     mz_missing = any(v is None or v == 0 or np.isnan(v) for v in psm_list["precursor_mz"])
 59 | 
 60 |     # Get precursor values from spectrum files
 61 |     LOGGER.info("Parsing precursor info from spectrum files...")
 62 |     mz, rt, im = _get_precursor_values(psm_list, spectrum_path, spectrum_id_pattern)
 63 |     mz_found, rt_found, im_found = np.all(mz != 0.0), np.all(rt != 0.0), np.all(im != 0.0)
 64 |     # ms2rescore_rs always returns 0.0 for missing values
 65 | 
 66 |     # Update PSM list with missing precursor values
 67 |     if rt_missing and rt_found:
 68 |         LOGGER.debug("Missing retention time values in PSM list. Updating from spectrum files.")
 69 |         psm_list["retention_time"] = rt
 70 |     if im_missing and im_found:
 71 |         LOGGER.debug("Missing ion mobility values in PSM list. Updating from spectrum files.")
 72 |         psm_list["ion_mobility"] = im
 73 |     if mz_missing and mz_found:
 74 |         LOGGER.debug("Missing precursor m/z values in PSM list. Updating from spectrum files.")
 75 |         psm_list["precursor_mz"] = mz
 76 |     else:
 77 |         # Check if precursor m/z values are consistent between PSMs and spectrum files
 78 |         mz_diff = np.abs(psm_list["precursor_mz"] - mz)
 79 |         if np.mean(mz_diff) > 1e-2:
 80 |             LOGGER.warning(
 81 |                 "Mismatch between precursor m/z values in PSM list and spectrum files (mean "
 82 |                 "difference exceeds 0.01 Da). Please ensure that the correct spectrum files are "
 83 |                 "provided and that the `spectrum_id_pattern` and `psm_id_pattern` options are "
 84 |                 "configured correctly. See "
 85 |                 "https://ms2rescore.readthedocs.io/en/stable/userguide/configuration/#mapping-psms-to-spectra "
 86 |                 "for more information."
 87 |             )
 88 | 
 89 |     # Return available MS data types
 90 |     available_ms_data = {
 91 |         MSDataType.ms2_spectra,  # Assume MS2 spectra are always present
 92 |         MSDataType.retention_time if not rt_missing or rt_found else None,
 93 |         MSDataType.ion_mobility if not im_missing or im_found else None,
 94 |         MSDataType.precursor_mz if not mz_missing or mz_found else None,
 95 |     }
 96 |     available_ms_data.discard(None)
 97 | 
 98 |     return available_ms_data
 99 | 
100 | 
101 | def _get_precursor_values(
102 |     psm_list: PSMList, spectrum_path: str, spectrum_id_pattern: str
103 | ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
104 |     """Get precursor m/z, RT, and IM from spectrum files."""
105 |     # Iterate over different runs in PSM list
106 |     precursor_dict = dict()
107 |     psm_dict = psm_list.get_psm_dict()
108 |     for runs in psm_dict.values():
109 |         for run_name, psms in runs.items():
110 |             psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values())))
111 |             spectrum_file = infer_spectrum_path(spectrum_path, run_name)
112 | 
113 |             LOGGER.debug("Reading spectrum file: '%s'", spectrum_file)
114 |             precursors = get_precursor_info(str(spectrum_file))
115 | 
116 |             # Parse spectrum IDs with regex pattern if provided
117 |             if spectrum_id_pattern:
118 |                 compiled_pattern = re.compile(spectrum_id_pattern)
119 |                 precursors = {
120 |                     compiled_pattern.search(spectrum_id).group(1): precursor
121 |                     for spectrum_id, precursor in precursors.items()
122 |                 }
123 | 
124 |             # Ensure all PSMs have a precursor values
125 |             for psm in psm_list_run:
126 |                 if psm.spectrum_id not in precursors:
127 |                     raise SpectrumParsingError(
128 |                         "Mismatch between PSM and spectrum file IDs. Could find precursor values "
129 |                         f"for PSM with ID {psm.spectrum_id} in run {run_name}.\n"
130 |                         "Please check that the `spectrum_id_pattern` and `psm_id_pattern` options "
131 |                         "are configured correctly. See "
132 |                         "https://ms2rescore.readthedocs.io/en/stable/userguide/configuration/#mapping-psms-to-spectra"
133 |                         " for more information.\n"
134 |                         f"Example ID from PSM file: {psm.spectrum_id}\n"
135 |                         f"Example ID from spectrum file: {list(precursors.keys())[0]}"
136 |                     )
137 | 
138 |             # Store precursor values in dictionary
139 |             precursor_dict[run_name] = precursors
140 | 
141 |     # Reshape precursor values into arrays matching PSM list
142 |     mzs = np.fromiter((precursor_dict[psm.run][psm.spectrum_id].mz for psm in psm_list), float)
143 |     rts = np.fromiter((precursor_dict[psm.run][psm.spectrum_id].rt for psm in psm_list), float)
144 |     ims = np.fromiter((precursor_dict[psm.run][psm.spectrum_id].im for psm in psm_list), float)
145 | 
146 |     return mzs, rts, ims
147 | 
148 | 
149 | class SpectrumParsingError(MS2RescoreError):
150 |     """Error parsing retention time from spectrum file."""
151 | 
152 |     pass
153 | 


--------------------------------------------------------------------------------
/ms2rescore/report/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Functionality for analyzing and reporting MS²Rescore results, including reusable Plotly-based
3 | charts and HTML-report generation.
4 | """
5 | 


--------------------------------------------------------------------------------
/ms2rescore/report/__main__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import click
 4 | from rich.logging import RichHandler
 5 | 
 6 | from ms2rescore.report.generate import generate_report
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | @click.command()
12 | @click.argument("output_prefix", type=str)
13 | def main(**kwargs):
14 |     logging.getLogger("mokapot").setLevel(logging.WARNING)
15 |     logging.basicConfig(
16 |         level=logging.INFO,
17 |         handlers=[RichHandler(rich_tracebacks=True)],
18 |         format="%(message)s",
19 |     )
20 | 
21 |     try:
22 |         generate_report(kwargs["output_prefix"])
23 |     except Exception as e:
24 |         logger.exception(e)
25 |         exit(1)
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     main()
30 | 


--------------------------------------------------------------------------------
/ms2rescore/report/templates/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/ms2rescore/report/templates/__init__.py


--------------------------------------------------------------------------------
/ms2rescore/report/templates/about.html:
--------------------------------------------------------------------------------
 1 | <div class="card mb-4">
 2 |   <div class="card-header px-4">About MS²Rescore</div>
 3 |   <div class="card-body p-4">
 4 |     <div class="row">
 5 |       <div class="col-3 col-md-2 px-3">
 6 |         <img class="logo" src="https://raw.githubusercontent.com/compomics/ms2rescore/main/img/ms2rescore_logo.svg" alt="MS2Rescore logo" />
 7 |       </div>
 8 |       <div class="col-9 col-md-10">
 9 |         <p class="lead">MS²Rescore performs sensitive rescoring of peptide-spectrum matches (PSMs) using features from predictors of peptide behavior in LC-MS, such as MS²PIP for spectrum prediction, and DeepLC for retention time prediction. Rescoring is performed with Percolator or Mokapot.</p>
10 |         <p class="text-muted">
11 |           Please cite:<br />
12 |           <strong>MS²Rescore: Data-driven rescoring dramatically boosts immunopeptide identification rates.</strong><br />
13 |           Arthur Declercq, Robbin Bouwmeester, Aurélie Hirschler, Christine Carapito, Sven Degroeve, Lennart Martens, Ralf Gabriels<br />
14 |           <em>Molecular & Cellular Proteomics</em> (2021) <a href="https://doi.org/10.1016/j.mcpro.2022.100266" target="_blank">doi:10.1016/j.mcpro.2022</a>
15 |         </p>
16 |         <a class="btn btn-outline-dark me-3" href="https://github.com/compomics/ms2rescore" target="_blank"><i class="fa fa-github"></i> GitHub repo</a>
17 |         <a class="btn btn-outline-dark me-3" href="https://ms2rescore.readthedocs.io/en/latest/" target="_blank"><i class="fa fa-book"></i> Documentation</a>
18 |         <a class="btn btn-outline-dark me-3" href="https://github.com/compomics/ms2rescore/discussions" target="_blank"><i class="fa fa-comments"></i> Discussion forum</a>
19 |       </div>
20 |     </div>
21 |   </div>
22 | </div>
23 | 


--------------------------------------------------------------------------------
/ms2rescore/report/templates/base.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 6 |     <title>MS&sup2;Rescore report</title>
 7 |     <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-4bw+/aepP/YC94hEpVNVgiZdgIC5+VKNBQNGCHeKRQN+PtmoHDEXuppvnDJzQIu9" crossorigin="anonymous" />
 8 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css" />
 9 |     <link rel="preconnect" href="https://fonts.googleapis.com" />
10 |     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
11 |     <link href="https://fonts.googleapis.com/css2?family=Lato:wght@300;400&family=Oswald:wght@400;500;600;700&display=swap" rel="stylesheet" />
12 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/font-awesome@4.7.0/css/font-awesome.min.css" />
13 |     <script type="text/javascript" id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
14 |     <script type="text/javascript" src="https://cdn.plot.ly/plotly-2.25.2.min.js" charset="utf-8"></script>
15 |     {% include 'style.html' %}
16 |   </head>
17 |   <body>
18 |     <div class="container">
19 |       <!-- TITLE -->
20 |       <h1 class="mt-4 mb-4">MS<sup>2</sup>Rescore QC report</h1>
21 | 
22 |       <!-- REPORT METADATA -->
23 |       {% include 'metadata.html' %}
24 | 
25 |       <!-- ABOUT CARD -->
26 |       {% include 'about.html' %}
27 | 
28 |       <!-- CONTENT CARD -->
29 |       <div class="card mb-4">
30 |         <div class="card-header px-4">
31 |           <ul class="nav nav-tabs card-header-tabs" id="cardTabNav">
32 |             {% for tab in main_tabs %}
33 |             <li class="nav-item">
34 |               <a href="#{{ tab.id }}" class="nav-link {% if loop.index == 1 %}active{% endif %}" data-bs-toggle="tab">{{ tab.title }}</a>
35 |             </li>
36 |             {% endfor %}
37 |           </ul>
38 |         </div>
39 |         <div class="card-body p-4">
40 |           <div class="tab-content">
41 |             {% for tab in main_tabs %}
42 |             <div class="tab-pane fade {% if loop.index == 1 %}show active{% endif %}" id="{{ tab.id }}">{% include tab.template %}</div>
43 |             {% endfor %}
44 |           </div>
45 |         </div>
46 |       </div>
47 |     </div>
48 | 
49 |     <!-- JS dependencies -->
50 |     <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.1/dist/js/bootstrap.bundle.min.js" integrity="sha384-HwwvtgBNo3bZJJLYd8oVXjrBZt8cqVSpeBNS5n7C8IVInixGAoxmnlMuBnhbgrkm" crossorigin="anonymous"></script>
51 |     <script src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"></script>
52 |     <script src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/json.min.js"></script>
53 |     <script src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/python.min.js"></script>
54 | 
55 |     <!-- Highlight all code snippets except under the log tab -->
56 |     <script type="text/javascript">
57 |       const no_highlight_elements = document.querySelectorAll('#main_tab_log code');
58 |       no_highlight_elements.forEach((element) => {
59 |         element.classList.add('nohighlight');
60 |       });
61 |       hljs.highlightAll();
62 |     </script>
63 |   </body>
64 | </html>
65 | 


--------------------------------------------------------------------------------
/ms2rescore/report/templates/config.html:
--------------------------------------------------------------------------------
1 | <p>{{ tab.context.description}}</p>
2 | <pre><code class="language-json">{{ tab.context.config }}</code></pre>
3 | 


--------------------------------------------------------------------------------
/ms2rescore/report/templates/features.html:
--------------------------------------------------------------------------------
1 | {% for chart in tab.context.charts %}
2 | <h3>{{ chart.title }}</h3>
3 | <p>{{ chart.description }}</p>
4 | <div>{{ chart.chart }}</div>
5 | {% endfor %}
6 | 


--------------------------------------------------------------------------------
/ms2rescore/report/templates/log.html:
--------------------------------------------------------------------------------
1 | {{ tab.context.log }}
2 | 


--------------------------------------------------------------------------------
/ms2rescore/report/templates/metadata.html:
--------------------------------------------------------------------------------
 1 | <div class="row mb-4">
 2 |   <div class="col">
 3 |     <p>
 4 |       <span class="text-nowrap text-uppercase text-muted">Generated on</span><br />
 5 |       {{ metadata.generated_on }}
 6 |     </p>
 7 |   </div>
 8 |   <div class="col">
 9 |     <p>
10 |       <span class="text-nowrap text-uppercase text-muted">MS<sup>2</sup>Rescore version</span><br />
11 |       v{{ metadata.ms2rescore_version }}
12 |     </p>
13 |   </div>
14 |   <div class="col">
15 |     <p>
16 |       <span class="text-nowrap text-uppercase text-muted">PSM filename</span><br />
17 |       <code>{{ metadata.psm_filename }}</code>
18 |     </p>
19 |   </div>
20 | </div>
21 | 


--------------------------------------------------------------------------------
/ms2rescore/report/templates/overview.html:
--------------------------------------------------------------------------------
 1 | <h3>General statistics</h3>
 2 | <div class="mt-2 mb-4">
 3 |   <div class="row gy-4">{% for stats in tab.context.stats %} {% include 'stats-card.html' %} {% endfor %}</div>
 4 | </div>
 5 | 
 6 | <h3>Identification charts</h3>
 7 | {% for chart in tab.context.charts %}
 8 | <h4>{{ chart.title }}</h4>
 9 | <p>{{ chart.description }}</p>
10 | <div>{{ chart.chart }}</div>
11 | {% endfor %}
12 | 


--------------------------------------------------------------------------------
/ms2rescore/report/templates/stats-card.html:
--------------------------------------------------------------------------------
 1 | <div class="col">
 2 |   <div class="card {{ stats.card_color }}">
 3 |     <div class="card-statistic-3 p-4">
 4 |       <h5 class="card-title mb-4">{{ stats.item }}</h5>
 5 |       <div class="d-flex align-items-bottom justify-content-between text-nowrap mb-2">
 6 |         <div class="pe-1"><span class="h2">{{ stats.number }} </span><span>{{ stats.diff }}</span></div>
 7 |         <div class="d-flex flex-column align-items-end">
 8 |           <div class="mt-auto">{{ stats.percentage }} <i class="fa {% if stats.is_increase %} fa-arrow-up {% else %} fa-arrow-down {% endif %}"></i></div>
 9 |         </div>
10 |       </div>
11 |       <div class="progress" style="height: 12px; background-color: {{ stats.bar_color }}">
12 |         <div class="progress-bar" style="width: {{ stats.bar_percentage }}%; background-color: #c6c6c6"></div>
13 |       </div>
14 |     </div>
15 |   </div>
16 | </div>
17 | 


--------------------------------------------------------------------------------
/ms2rescore/report/templates/style.html:
--------------------------------------------------------------------------------
 1 | <style>
 2 |   @media (min-width: 1400px) {
 3 |     .container,
 4 |     .container-lg,
 5 |     .container-md,
 6 |     .container-sm,
 7 |     .container-xl,
 8 |     .container-xxl {
 9 |       max-width: 1140px;
10 |     }
11 |   }
12 | 
13 |   body {
14 |     font-family: "Lato", sans-serif;
15 |     font-weight: 400;
16 |     line-height: 1.6;
17 |   }
18 | 
19 |   h1,
20 |   h2,
21 |   h3,
22 |   h4,
23 |   h5,
24 |   h6,
25 |   .h1,
26 |   .h2,
27 |   .h3,
28 |   .h4,
29 |   .h5,
30 |   .h6 {
31 |     font-family: "Oswald", sans-serif;
32 |     font-weight: 500;
33 |   }
34 | 
35 |   .lead {
36 |     font-weight: 400;
37 |   }
38 | 
39 |   .logo {
40 |     width: 100%;
41 |   }
42 | 
43 |   .card {
44 |     background-color: #fff;
45 |     border-radius: 10px;
46 |     box-shadow: 0 0.46875rem 2.1875rem rgba(90, 97, 105, 0.1), 0 0.9375rem 1.40625rem rgba(90, 97, 105, 0.1), 0 0.25rem 0.53125rem rgba(90, 97, 105, 0.12), 0 0.125rem 0.1875rem rgba(90, 97, 105, 0.1);
47 |   }
48 | 
49 |   .card-bg-blue {
50 |     background: rgb(2, 0, 36);
51 |     background: linear-gradient(-45deg, rgba(2, 0, 36, 1) 0%, rgb(48, 60, 95) 33%, rgb(34, 147, 170) 100%);
52 |     color: #fff;
53 |     border: none;
54 |   }
55 | 
56 |   .card-bg-green {
57 |     background: rgb(0, 21, 36);
58 |     background: linear-gradient(-45deg, rgb(0 4 36) 0%, rgb(5 65 81) 33%, rgba(0, 173, 127, 1) 100%);
59 |     color: #fff;
60 |     border: none;
61 |   }
62 | 
63 |   .card-bg-red {
64 |     background: rgb(36, 0, 14);
65 |     background: linear-gradient(-45deg, rgba(36, 0, 14, 1) 0%, rgb(117, 47, 66) 33%, rgba(173, 0, 24, 1) 100%);
66 |     color: #fff;
67 |     border: none;
68 |   }
69 | 
70 |   .card .card-statistic-3 .card-icon-large .fas,
71 |   .card .card-statistic-3 .card-icon-large .far,
72 |   .card .card-statistic-3 .card-icon-large .fab,
73 |   .card .card-statistic-3 .card-icon-large .fal {
74 |     font-size: 110px;
75 |   }
76 | 
77 |   .card .card-statistic-3 .card-icon {
78 |     text-align: center;
79 |     line-height: 50px;
80 |     margin-left: 15px;
81 |     color: #000;
82 |     position: absolute;
83 |     right: -5px;
84 |     top: 20px;
85 |     opacity: 0.1;
86 |   }
87 | 
88 |   pre code {
89 |     background-color: #f3f3f3;
90 |     display: block;
91 |     overflow-x: auto;
92 |     padding: 1em;
93 |   }
94 | 
95 | </style>
96 | 


--------------------------------------------------------------------------------
/ms2rescore/report/templates/target-decoy.html:
--------------------------------------------------------------------------------
1 | {% for chart in tab.context.charts %}
2 | <h3>{{ chart.title }}</h3>
3 | <p>{{ chart.description }}</p>
4 | <div>{{ chart.chart }}</div>
5 | {% endfor %}
6 | 


--------------------------------------------------------------------------------
/ms2rescore/report/templates/texts.toml:
--------------------------------------------------------------------------------
  1 | [configuration]
  2 | description = """
  3 | This is the full configuration that was used, as rendered from the
  4 | default configuration, the user-provided configuration file, and the
  5 | command line arguments, in cascading order.
  6 | """
  7 | 
  8 | [charts.score_comparison]
  9 | title = "Score comparison"
 10 | description = """
 11 | This scatter plot shows the score for both target (blue) and decoy
 12 | (red) PSMs before rescoring (x-axis) and after rescoring (y-axis). Dashed
 13 | lines indicate the 1% FDR threshold for scores before and after rescoring.
 14 | PSMs in the upper-left quadrant are only identified after rescoring.
 15 | """
 16 | 
 17 | [charts.fdr_comparison]
 18 | title = "False discovery rate comparison"
 19 | description = """
 20 | This plot shows the number of identified target PSMs
 21 | in function of the FDR threshold. The plot starts at the top-right corner
 22 | with the total number of PSMs in the dataset (no FDR filtering). As the
 23 | FDR threshold becomes more stringent (towards the left of the x-axis),
 24 | the number of identified target PSMs goes down. The dashed line
 25 | indicates the 1% FDR threshold.
 26 | """
 27 | 
 28 | [charts.identification_overlap]
 29 | title = "Identification overlap"
 30 | description = """
 31 | This plot shows the unique identified PSMs, peptides, and (optionally)
 32 | protein groups that were removed, retained, and gained by rescoring.
 33 | """
 34 | 
 35 | [charts.score_histogram]
 36 | title = "Score histogram"
 37 | description = """
 38 | The score histogram shows the score distribution for both target PSMs (blue)
 39 | and decoy PSMs (red). The target score distribution is exected to show
 40 | two modes, corresponding to high-scoring (presumably correct) PSMs
 41 | and low-scoring (presumably incorrect) PSMs. The decoy score
 42 | distribution is expected to show a single mode, which should
 43 | approximate the low-scoring part of the target score distribution
 44 | as closely as possible. This approximation can be more easily assessed
 45 | in the percentile-percentile plot (see below). The dashed line indicates
 46 | the 1% FDR threshold.
 47 | """
 48 | 
 49 | [charts.pp_plot]
 50 | title = "Percentile-percentile plot"
 51 | description = """
 52 | The percentile-percentile (PP) plot shows the empirical cumulative
 53 | distribution function (ECDF) of the target distribution in function of
 54 | the ECDF of the decoy distribution. In the context of peptide
 55 | identification, it can be used to assess the quality of decoy PSMs and
 56 | their capacity to help in correctly estimating the false discovery rate.
 57 | 
 58 | Ideally, the PP-plot should follow a straight diagonal line up until the
 59 | end of the decoy distribution (right-hand side of the plot), where the
 60 | line turns vertically upwards. This means that the decoy distribution
 61 | perfectly aligns with the first part of the target distribution (the
 62 | low-scoring and presumably bad target PSMs) and therefore correctly
 63 | models the bad target PSMs. This diagonal line matches the ratio of
 64 | the number of decoy to the number of target PSMs.
 65 | 
 66 | More information on this type of diagnostic plot can be found at
 67 | <a href="https://statomics.github.io/TargetDecoy/articles/TargetDecoy.html" target="_blank">statomics.github.io/TargetDecoy</a>.
 68 | """
 69 | 
 70 | [charts.feature_usage]
 71 | title = "Feature usage in rescoring model"
 72 | description = """
 73 | This plot shows the usage of each features in the
 74 | rescoring model. The higher the usage, the more the feature contributes to separating target and
 75 | decoy PSMs. Note that the usage is not necessarily correlated with the individual
 76 | performance of the feature, as some features may be redundant with others.
 77 | """
 78 | 
 79 | [charts.feature_performance]
 80 | title = "Individual feature performance"
 81 | description = """
 82 | The following plot shows the performance of individual features. For each
 83 | feature, q-values are calculated as if that feature was individually used for scoring PSMs without
 84 | any other information. Then, the area under curve (AUC) is calculated for the empirical cumulative
 85 | distribution function (ECDF) of the q-values. The higher the AUC, the better the feature is at
 86 | discriminating between target and decoy PSMs without any other information.
 87 | """
 88 | 
 89 | [charts.ms2pip_pearson]
 90 | title = "MS²PIP model performance"
 91 | description = """
 92 | MS²PIP model performance can be estimated by calculating the Pearson correlation coefficient
 93 | between the predicted and observed fragment ion intensities. Fragment intensities are first
 94 | normalized to the total ion current and then log2-transformed. The following histogram shows the
 95 | distribution of Pearson correlation coefficients for all target PSMs that passed the 1% FDR
 96 | threshold. The red dashed line indicates the median correlation.
 97 | """
 98 | 
 99 | [charts.deeplc_performance]
100 | title = "DeepLC model performance"
101 | description = """
102 | DeepLC model performance can be visualized by plotting the predicted retention times against the
103 | observed retention times (top chart), or by calculating the relative mean absolute error (RMAE). The
104 | bottom chart shows the distribution of RMAE values of DeepLC predictions on 460 different benchmark
105 | datasets. The red line indicates the RMAE value for all target PSMs that passed the 1% FDR threshold
106 | of the current dataset. A lower RMAE value indicates better performance.
107 | """
108 | 
109 | [charts.im2deep_performance]
110 | title = "IM2Deep model performance"
111 | description = """
112 | IM2Deep model performance can be visualized by plotting the predicted CCS against the observed CCS.
113 | """
114 | 
115 | [charts.ionmob_performance]
116 | title = "ionmob model performance"
117 | description = """
118 | ionmob model performance can be visualized by plotting the predicted CCS against the observed CCS.
119 | """
120 | 


--------------------------------------------------------------------------------
/ms2rescore/report/utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for MS²Rescore report generation."""
 2 | 
 3 | import logging
 4 | from collections import defaultdict
 5 | from csv import DictReader
 6 | from pathlib import Path
 7 | from typing import Optional, Tuple
 8 | 
 9 | import pandas as pd
10 | import psm_utils
11 | from mokapot import LinearConfidence, LinearPsmDataset, read_fasta
12 | 
13 | from ms2rescore.exceptions import ReportGenerationError
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | def read_feature_names(feature_names_path: Path) -> dict:
19 |     """Read feature names and mapping with feature generator from file."""
20 |     feature_names = defaultdict(list)
21 |     with open(feature_names_path) as f:
22 |         reader = DictReader(f, delimiter="\t")
23 |         for line in reader:
24 |             feature_names[line["feature_generator"]].append(line["feature_name"])
25 |     return feature_names
26 | 
27 | 
28 | def get_feature_values(
29 |     psm_list: psm_utils.PSMList, feature_names: Optional[list] = None
30 | ) -> pd.DataFrame:
31 |     """Get feature values for all PSMs in a PSM list."""
32 |     if not feature_names:
33 |         feature_names = list(psm_list[0].rescoring_features.keys())
34 |     features = pd.DataFrame(
35 |         {fname: psm.rescoring_features[fname] for fname in feature_names} for psm in psm_list
36 |     ).astype("float32")
37 |     return features
38 | 
39 | 
40 | def get_confidence_estimates(
41 |     psm_list: psm_utils.PSMList, fasta_file: Optional[str] = None
42 | ) -> Tuple[LinearConfidence, LinearConfidence]:
43 |     """Return identification confidence before and after rescoring."""
44 |     try:
45 |         score_before = pd.DataFrame.from_records(psm_list["provenance_data"])[
46 |             "before_rescoring_score"
47 |         ].astype(float)
48 |     except KeyError as e:
49 |         raise ReportGenerationError(
50 |             "No `before_rescoring_score` in PSM list provenance data. Ensure that the PSM list "
51 |             "was generated by MS²Rescore. Could not generate report."
52 |         ) from e
53 | 
54 |     score_after = psm_list["score"]
55 |     peptide = (
56 |         pd.Series(psm_list["peptidoform"]).astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True)
57 |     )
58 |     psms = pd.DataFrame({"peptide": peptide, "is_target": ~psm_list["is_decoy"]}).reset_index()
59 |     lin_psm_dataset = LinearPsmDataset(
60 |         psms=psms,
61 |         target_column="is_target",
62 |         spectrum_columns="index",
63 |         peptide_column="peptide",
64 |     )
65 |     if fasta_file:
66 |         fasta = read_fasta(fasta_file)
67 |         lin_psm_dataset.add_proteins(fasta)
68 | 
69 |     confidence = dict()
70 |     for when, scores in [("before", score_before), ("after", score_after)]:
71 |         try:
72 |             confidence[when] = lin_psm_dataset.assign_confidence(scores=scores)
73 |         except (RuntimeError, IndexError):
74 |             confidence[when] = None
75 |             logger.warning("Could not assign confidence estimates for %s rescoring.", when)
76 | 
77 |     return confidence["before"], confidence["after"]
78 | 


--------------------------------------------------------------------------------
/ms2rescore/rescoring_engines/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Rescoring engines integrated in MS²Rescore.
3 | 
4 | Each integrated rescoring engine typically includes a :py:func:`rescore` function that takes a
5 | :py:class:`~psm_utils.psm_list.PSMList` as input and writes the new scores, q-values, and PEPs to
6 | the original :py:class:`~psm_utils.psm_list.PSMList`.
7 | 
8 | """
9 | 


--------------------------------------------------------------------------------
/ms2rescore/rescoring_engines/mokapot.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mokapot integration for MS²Rescore.
  3 | 
  4 | :py:mod:`mokapot` is a full-Python implementation of the semi-supervised learning algorithms
  5 | introduced with Percolator. It builds upon the flexible scikit-learn package, which makes it
  6 | highly efficient for routine applications, but also customizable for experimental research
  7 | settings. Using Mokapot through MS²Rescore brings several advantages over Percolator: It can be
  8 | easily installed in the same Python environment, and it is generally faster as the communication
  9 | between the tools happens completely within Python, without the need to write and read files
 10 | or communicate through the command line. See
 11 | `mokapot.readthedocs.io <https://mokapot.readthedocs.io/>`_ for more information.
 12 | 
 13 | If you use Mokapot through MS²Rescore, please cite:
 14 | 
 15 | .. epigraph::
 16 |    Fondrie W. E. & Noble W. S. mokapot: Fast and Flexible Semisupervised
 17 |    Learning for Peptide Detection. *J Proteome Res* (2021).
 18 |    `doi:10.1021/acs.jproteome.0c01010 <https://doi.org/10.1021/acs.jproteome.0c01010>`_
 19 | 
 20 | """
 21 | 
 22 | import logging
 23 | import re
 24 | from typing import Any, Dict, List, Optional, Tuple
 25 | 
 26 | import mokapot
 27 | import numpy as np
 28 | import pandas as pd
 29 | import psm_utils
 30 | from mokapot.brew import brew
 31 | from mokapot.dataset import LinearPsmDataset
 32 | from mokapot.model import PercolatorModel
 33 | from pyteomics.mass import nist_mass
 34 | 
 35 | from ms2rescore.exceptions import RescoringError
 36 | 
 37 | logger = logging.getLogger(__name__)
 38 | logging.getLogger("numba").setLevel(logging.WARNING)
 39 | 
 40 | 
 41 | def rescore(
 42 |     psm_list: psm_utils.PSMList,
 43 |     output_file_root: str = "ms2rescore",
 44 |     fasta_file: Optional[str] = None,
 45 |     train_fdr: float = 0.01,
 46 |     write_weights: bool = False,
 47 |     write_txt: bool = False,
 48 |     protein_kwargs: Optional[Dict[str, Any]] = None,
 49 |     **kwargs: Any,
 50 | ) -> None:
 51 |     """
 52 |     Rescore PSMs with Mokapot.
 53 | 
 54 |     The function provides a high-level interface to use Mokapot within MS²Rescore. It first
 55 |     converts the :py:class:`~psm_utils.psm_list.PSMList` to a
 56 |     :py:class:`~mokapot.dataset.LinearPsmDataset`, and then optionally adds protein information
 57 |     from a FASTA file. The dataset is then passed to the :py:func:`~mokapot.brew` function, which
 58 |     returns the new scores, q-values, and PEPs. These are then written back to the original
 59 |     :py:class:`~psm_utils.psm_list.PSMList`.
 60 | 
 61 |     Parameters
 62 |     ----------
 63 |     psm_list
 64 |         PSMs to be rescored.
 65 |     output_file_root
 66 |         Root of output file names. Defaults to ``"ms2rescore"``.
 67 |     fasta_file
 68 |         Path to FASTA file with protein sequences to use for protein inference. Defaults to
 69 |         ``None``.
 70 |     train_fdr
 71 |         FDR to use for training the Mokapot model. Defaults to ``0.01``.
 72 |     write_weights
 73 |         Write model weights to a text file. Defaults to ``False``.
 74 |     write_txt
 75 |         Write Mokapot results to a text file. Defaults to ``False``.
 76 |     protein_kwargs
 77 |         Keyword arguments to pass to the :py:meth:`~mokapot.dataset.LinearPsmDataset.add_proteins`
 78 |         method.
 79 |     **kwargs
 80 |         Additional keyword arguments are passed to the Mokapot :py:func:`~mokapot.brew` function.
 81 | 
 82 |     """
 83 |     _set_log_levels()
 84 | 
 85 |     if "write_flashlfq" in kwargs:
 86 |         _ = kwargs.pop("write_flashlfq")
 87 |         logger.warning(
 88 |             "The `write_flashlfq` argument has moved. To write FlashLFQ generic TSV, use the "
 89 |             "MS²Rescore-level `write_flashlfq` option instead."
 90 |         )
 91 | 
 92 |     # Convert PSMList to Mokapot dataset
 93 |     lin_psm_data = convert_psm_list(psm_list)
 94 |     feature_names = list(lin_psm_data.features.columns)
 95 | 
 96 |     # Add proteins
 97 |     if fasta_file:
 98 |         logger.debug(f"Adding protein info from {fasta_file} with options: `{protein_kwargs}`")
 99 |         lin_psm_data.add_proteins(fasta_file, **protein_kwargs)
100 | 
101 |     # Rescore
102 |     logger.debug(f"Mokapot brew options: `{kwargs}`")
103 |     try:
104 |         confidence_results, models = brew(
105 |             lin_psm_data, model=PercolatorModel(train_fdr=train_fdr), rng=8, **kwargs
106 |         )
107 |     except RuntimeError as e:
108 |         raise RescoringError("Mokapot could not be run. Please check the input data.") from e
109 | 
110 |     add_psm_confidence(psm_list, confidence_results)
111 |     add_peptide_confidence(psm_list, confidence_results)
112 | 
113 |     # Write results
114 |     if write_weights:
115 |         try:
116 |             save_model_weights(models, feature_names, output_file_root)
117 |         except AttributeError:
118 |             logger.warning(
119 |                 "Could not extract Mokapot model weights with the `coef_` attribute. Most likely, "
120 |                 "a model type different from the default (linear SVM) was used. No weights will "
121 |                 "be saved."
122 |             )
123 |     if write_txt:
124 |         confidence_results.to_txt(file_root=output_file_root, decoys=True)
125 | 
126 | 
127 | def convert_psm_list(
128 |     psm_list: psm_utils.PSMList,
129 |     feature_names: Optional[List[str]] = None,
130 | ) -> LinearPsmDataset:
131 |     """
132 |     Convert a PSM list to a Mokapot dataset.
133 | 
134 |     Parameters
135 |     ----------
136 |     psm_list
137 |         PSMList to rescore.
138 |     feature_names
139 |         List of feature names to use. Items must be keys in the PSM `rescoring_features` dict.
140 | 
141 |     """
142 |     psm_df = psm_list.to_dataframe()
143 |     psm_df = psm_df.reset_index(drop=True).reset_index()
144 | 
145 |     psm_df["peptide"] = (
146 |         psm_df["peptidoform"].astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True)
147 |     )
148 |     psm_df["is_target"] = ~psm_df["is_decoy"]
149 |     psm_df["charge"] = psm_df["peptidoform"].apply(lambda x: x.precursor_charge)
150 |     psm_df["calcmass"] = psm_df["peptidoform"].apply(lambda x: x.theoretical_mass)
151 |     psm_df["expmass"] = _mz_to_mass(psm_df["precursor_mz"], psm_df["charge"])
152 | 
153 |     required_columns = [
154 |         "index",
155 |         "spectrum_id",
156 |         "peptide",
157 |         "is_target",
158 |         "protein_list",
159 |         "run",
160 |         "calcmass",
161 |         "expmass",
162 |         "retention_time",
163 |         "charge",
164 |     ]
165 |     feature_df = pd.DataFrame(list(psm_df["rescoring_features"])).astype(float).fillna(0.0)
166 |     feature_df.columns = [f"feature:{f}" for f in feature_df.columns]
167 |     combined_df = pd.concat([psm_df[required_columns], feature_df], axis=1)
168 | 
169 |     feature_names = [f"feature:{f}" for f in feature_names] if feature_names else None
170 | 
171 |     lin_psm_data = LinearPsmDataset(
172 |         psms=combined_df,
173 |         target_column="is_target",
174 |         spectrum_columns="index",  # Use artificial index to allow multi-rank rescoring
175 |         peptide_column="peptide",
176 |         protein_column="protein_list",
177 |         feature_columns=feature_names or list(feature_df.columns),
178 |         filename_column="run",
179 |         scan_column="spectrum_id",  # Keep as spectrum_id?
180 |         calcmass_column="calcmass",
181 |         expmass_column="expmass",
182 |         rt_column="retention_time",
183 |         charge_column="charge",
184 |     )
185 | 
186 |     return lin_psm_data
187 | 
188 | 
189 | def save_model_weights(
190 |     models: Tuple[mokapot.model.Model], feature_names: List[str], output_file_root: str
191 | ):
192 |     """
193 |     Save model weights to a file.
194 | 
195 |     Parameters
196 |     ----------
197 |     models
198 |         Tuple of Mokapot models (one for each fold) to save.
199 |     feature_names
200 |         List of feature names that were used to train the models.
201 |     output_file_root
202 |         Root of output file names.
203 | 
204 |     """
205 |     try:
206 |         coefficients = np.stack([m.estimator.coef_[0] for m in models])
207 |     except AttributeError as e:
208 |         raise AttributeError(
209 |             "Could not extract Mokapot model weights with the `coef_` attribute. Most likely, "
210 |             "a model type different from the default (linear SVM) was used."
211 |         ) from e
212 | 
213 |     pd.DataFrame(coefficients, columns=list(feature_names)).to_csv(
214 |         output_file_root + ".mokapot.weights.tsv", sep="\t", index=False
215 |     )
216 | 
217 | 
218 | def add_psm_confidence(
219 |     psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
220 | ) -> None:
221 |     """Add PSM-level confidence estimates to PSM list, updating score, qvalue, pep, and rank."""
222 |     # Reshape confidence estimates to match PSMList
223 |     keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
224 |     mokapot_values_targets = (
225 |         confidence_results.confidence_estimates["psms"].set_index("index").sort_index()[keys]
226 |     )
227 |     mokapot_values_decoys = (
228 |         confidence_results.decoy_confidence_estimates["psms"].set_index("index").sort_index()[keys]
229 |     )
230 |     q = np.full((len(psm_list), 3), np.nan)
231 |     q[mokapot_values_targets.index] = mokapot_values_targets.values
232 |     q[mokapot_values_decoys.index] = mokapot_values_decoys.values
233 | 
234 |     # Add Mokapot results to PSMList
235 |     psm_list["score"] = q[:, 0]
236 |     psm_list["qvalue"] = q[:, 1]
237 |     psm_list["pep"] = q[:, 2]
238 | 
239 |     # Reset ranks to match new scores
240 |     psm_list.set_ranks(lower_score_better=False)
241 | 
242 | 
243 | def add_peptide_confidence(
244 |     psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
245 | ) -> None:
246 |     """Add Mokapot peptide-level confidence estimates to PSM list."""
247 |     keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
248 |     peptide_info = pd.concat(
249 |         [
250 |             confidence_results.confidence_estimates["peptides"].set_index("peptide")[keys],
251 |             confidence_results.decoy_confidence_estimates["peptides"].set_index("peptide")[keys],
252 |         ],
253 |         axis=0,
254 |     ).to_dict(orient="index")
255 | 
256 |     # Add peptide-level scores to PSM metadata
257 |     # run_key = "na" if not all(psm.run for psm in psm_list) else None
258 |     no_charge_pattern = re.compile(r"(/\d+$)")
259 |     for psm in psm_list:
260 |         peptide_scores = peptide_info[(no_charge_pattern.sub("", str(psm.peptidoform), 1))]
261 |         psm.metadata.update(
262 |             {
263 |                 "peptide_score": peptide_scores["mokapot score"],
264 |                 "peptide_qvalue": peptide_scores["mokapot q-value"],
265 |                 "peptide_pep": peptide_scores["mokapot PEP"],
266 |             }
267 |         )
268 | 
269 | 
270 | def _mz_to_mass(mz: float, charge: int) -> float:
271 |     """Convert m/z to mass."""
272 |     return mz * charge - charge * nist_mass["H"][1][0]
273 | 
274 | 
275 | def _set_log_levels() -> None:
276 |     """Set log levels for Mokapot and Numba to avoid too-high verbosity."""
277 |     # Set mokapot logging to WARNING if not in debug mode
278 |     if logger.getEffectiveLevel() > logging.DEBUG:
279 |         logging.getLogger("mokapot").setLevel(logging.WARNING)
280 | 
281 |     # Keep Numba logging to INFO or higher
282 |     if logger.getEffectiveLevel() < logging.INFO:
283 |         logging.getLogger("numba").setLevel(logging.INFO)
284 | 


--------------------------------------------------------------------------------
/ms2rescore/rescoring_engines/percolator.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Percolator integration for MS²Rescore
  3 | 
  4 | Percolator was the first tool to introduce semi-supervised learning for PSM rescoring. It is
  5 | still widely used and has been integrated in many proteomics data analysis pipelines. This module
  6 | integrates with Percolator through its command line interface. Percolator must be installed
  7 | separately and the ``percolator`` command must be available in the PATH for this module to work.
  8 | See `github.com/percolator/percolator <https://github.com/percolator/percolator>`_	for
  9 | more information.
 10 | 
 11 | If you use Percolator through MS²Rescore, please cite:
 12 | 
 13 | .. epigraph::
 14 |     The M, MacCoss MJ, Noble WS, Käll L. Fast and Accurate Protein False Discovery Rates on
 15 |     Large-Scale Proteomics Data Sets with Percolator 3.0. *J Am Soc Mass Spectrom* (2016).
 16 |     `doi:10.1007/s13361-016-1460-7 <https://doi.org/10.1007/s13361-016-1460-7>`_
 17 | 
 18 | """
 19 | 
 20 | import logging
 21 | import subprocess
 22 | from typing import Any, Dict, Optional
 23 | from copy import deepcopy
 24 | 
 25 | import psm_utils
 26 | 
 27 | from ms2rescore.exceptions import MS2RescoreError
 28 | 
 29 | logger = logging.getLogger(__name__)
 30 | 
 31 | 
 32 | LOG_LEVEL_MAP = {
 33 |     "critical": 0,
 34 |     "error": 0,
 35 |     "warning": 0,
 36 |     "info": 1,
 37 |     "debug": 2,
 38 | }
 39 | 
 40 | 
 41 | def rescore(
 42 |     psm_list: psm_utils.PSMList,
 43 |     output_file_root: str = "ms2rescore",
 44 |     log_level: str = "info",
 45 |     processes: int = 1,
 46 |     fasta_file: Optional[str] = None,
 47 |     percolator_kwargs: Optional[Dict[str, Any]] = None,
 48 | ) -> None:
 49 |     """
 50 |     Rescore PSMs with Percolator.
 51 | 
 52 |     Aside from updating the PSM ``score``, ``qvalue``, and ``pep`` values, the following output
 53 |     files are written:
 54 | 
 55 |         - Target PSMs: ``{output_file_root}.percolator.psms.pout``
 56 |         - Target peptides: ``{output_file_root}.percolator.peptides.pout``
 57 |         - Target proteins: ``{output_file_root}.percolator.proteins.pout``
 58 |         - Decoy PSMs: ``{output_file_root}.percolator.decoy.psms.pout``
 59 |         - Decoy peptides: ``{output_file_root}.percolator.decoy.peptides.pout``
 60 |         - Decoy proteins: ``{output_file_root}.percolator.decoy.proteins.pout``
 61 |         - Feature weights: ``{output_file_root}.percolator.weights.tsv``
 62 | 
 63 |     Percolator is run through its command line interface. Percolator must be installed separately
 64 |     and the ``percolator`` command must be available in the PATH for this module to work.
 65 | 
 66 |     Parameters
 67 |     ----------
 68 |     psm_list
 69 |         PSMs to be rescored.
 70 |     output_file_root
 71 |         Root of output file names. Defaults to ``ms2rescore``.
 72 |     log_level
 73 |         Log level for Percolator. Defaults to ``info``.
 74 |     processes
 75 |         Number of processes to use. Defaults to 1.
 76 |     fasta_file
 77 |         Path to FASTA file for protein inference. Defaults to ``None``.
 78 |     percolator_kwargs
 79 |         Additional keyword arguments for Percolator. Defaults to ``None``.
 80 | 
 81 |     """
 82 |     percolator_kwargs = {
 83 |         "results-psms": output_file_root + ".percolator.psms.pout",
 84 |         "decoy-results-psms": output_file_root + ".percolator.decoy.psms.pout",
 85 |         "results-peptides": output_file_root + ".percolator.peptides.pout",
 86 |         "decoy-results-peptides": output_file_root + ".percolator.decoy.peptides.pout",
 87 |         "results-proteins": output_file_root + ".percolator.proteins.pout",
 88 |         "decoy-results-proteins": output_file_root + ".percolator.decoy.proteins.pout",
 89 |         "weights": output_file_root + ".percolator.weights.tsv",
 90 |         "verbose": LOG_LEVEL_MAP[log_level],
 91 |         "num-threads": min(processes, 128),  # Higher values not supported by Percolator
 92 |         "post-processing-tdc": True,
 93 |     }
 94 |     if percolator_kwargs:
 95 |         percolator_kwargs.update(percolator_kwargs)
 96 | 
 97 |     if fasta_file:
 98 |         percolator_kwargs["picked-protein"] = fasta_file
 99 | 
100 |     pin_filepath = f"{output_file_root}.pin"
101 |     percolator_cmd = _construct_percolator_command(percolator_kwargs, pin_filepath)
102 | 
103 |     # Need to be able to link back to original PSMs, so reindex spectrum IDs, but copy PSM list
104 |     # to avoid modifying original...
105 |     # TODO: Better approach for this?
106 | 
107 |     psm_list_reindexed = deepcopy(psm_list)
108 |     psm_list_reindexed.set_ranks()
109 |     psm_list_reindexed["spectrum_id"] = [
110 |         f"{psm.get_usi(as_url=False)}_{psm.rank}" for psm in psm_list_reindexed
111 |     ]
112 |     spectrum_id_index = {
113 |         spectrum_id: index for index, spectrum_id in enumerate(psm_list_reindexed["spectrum_id"])
114 |     }
115 | 
116 |     _write_pin_file(psm_list_reindexed, pin_filepath)
117 | 
118 |     logger.debug(f"Running percolator command {' '.join(percolator_cmd)}")
119 |     try:
120 |         output = subprocess.run(percolator_cmd, capture_output=True)
121 |     except FileNotFoundError as e:
122 |         if subprocess.getstatusoutput("percolator")[0] != 0:
123 |             raise MS2RescoreError(
124 |                 "Could not run Percolator. Please ensure that the program is installed and "
125 |                 "available in your PATH. See "
126 |                 "https://ms2rescore.readthedocs.io/en/latest/installation/#installing-percolator "
127 |                 "for more information."
128 |             ) from e
129 |         else:
130 |             logger.warn(f"Running Percolator resulted in an error:\n{output.stdout}")
131 |             raise MS2RescoreError("Percolator error") from e
132 |     except subprocess.CalledProcessError as e:
133 |         logger.warn(f"Running Percolator resulted in an error:\n{output.stdout}")
134 |         raise MS2RescoreError("Percolator error") from e
135 | 
136 |     logger.info(
137 |         "Percolator output: \n" + _decode_string(output.stderr), extra={"highlighter": None}
138 |     )
139 | 
140 |     _update_psm_scores(
141 |         psm_list,
142 |         percolator_kwargs["results-psms"],
143 |         percolator_kwargs["decoy-results-psms"],
144 |         spectrum_id_index,
145 |     )
146 | 
147 | 
148 | def _update_psm_scores(
149 |     psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str, spectrum_id_index: list
150 | ):
151 |     """
152 |     Update PSM scores with Percolator results.
153 | 
154 |     PSMs from the target and decoy pout files are mapped back by their collection, run,
155 |     spectrum_id, and peptidoform.
156 | 
157 |     """
158 |     target_psms = psm_utils.io.read_file(target_pout, filetype="percolator")
159 |     decoy_psms = psm_utils.io.read_file(decoy_pout, filetype="percolator")
160 |     psm_list_percolator = psm_utils.PSMList(psm_list=target_psms.psm_list + decoy_psms.psm_list)
161 | 
162 |     # Sort by reindexed spectrum_id so order matches original PSM list
163 |     psm_list_percolator = sorted(
164 |         psm_list_percolator, key=lambda psm: spectrum_id_index[psm["spectrum_id"]]
165 |     )
166 | 
167 |     if not len(psm_list) == len(psm_list_percolator):
168 |         raise MS2RescoreError(
169 |             f"Number of PSMs in original list ({len(psm_list)}) does not match number of PSMs in "
170 |             f"Percolator output ({len(psm_list_percolator)})"
171 |         )
172 | 
173 |     for original_psm, new_psm in zip(psm_list, psm_list_percolator):
174 |         original_psm["score"] = new_psm["score"]
175 |         original_psm["qvalue"] = new_psm["qvalue"]
176 |         original_psm["pep"] = new_psm["pep"]
177 | 
178 |     psm_list.set_ranks(lower_score_better=False)
179 | 
180 | 
181 | def _write_pin_file(psm_list: psm_utils.PSMList, filepath: str):
182 |     """Write PIN file for rescoring."""
183 |     logger.debug(f"Writing PIN file to {filepath}")
184 |     psm_utils.io.write_file(
185 |         psm_list,
186 |         filename=filepath,
187 |         filetype="percolator",
188 |         style="pin",
189 |         feature_names=psm_list[0].rescoring_features.keys(),
190 |     )
191 | 
192 | 
193 | def _construct_percolator_command(percolator_kwargs: Dict, pin_filepath: str):
194 |     """Create Percolator command for given set of arguments and path to PIN file."""
195 |     percolator_cmd = ["percolator"]
196 |     for key, value in percolator_kwargs.items():
197 |         if not isinstance(value, bool):
198 |             percolator_cmd.append(f"--{key}")
199 |             percolator_cmd.append(str(value))
200 |             if key == "init-weights":
201 |                 percolator_cmd.append("--static")
202 |         elif isinstance(value, bool) & value is False:
203 |             continue
204 |         else:
205 |             percolator_cmd.append(f"--{key}")
206 |     percolator_cmd.append(pin_filepath)
207 |     return percolator_cmd
208 | 
209 | 
210 | def _decode_string(encoded_string):
211 |     for encoding in ["utf-8", "latin-1", "ascii", "iso-8859-15"]:
212 |         try:
213 |             decoded_string = encoded_string.decode(encoding)
214 |             logger.debug(f"Decoded stderr with {encoding}")
215 |             return decoded_string
216 |         except UnicodeDecodeError:
217 |             pass
218 |     else:
219 |         raise MS2RescoreError("Could not infer encoding of Percolator logs.")
220 | 


--------------------------------------------------------------------------------
/ms2rescore/utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from glob import glob
 4 | from pathlib import Path
 5 | from typing import Optional, Union
 6 | 
 7 | from ms2rescore.exceptions import MS2RescoreConfigurationError
 8 | from ms2rescore_rs import is_supported_file_type
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | def infer_spectrum_path(
14 |     configured_path: Union[str, Path, None],
15 |     run_name: Optional[str] = None,
16 | ) -> Union[str, Path]:
17 |     """
18 |     Infer spectrum path from passed path and expected filename (e.g. from PSM file).
19 | 
20 |     Parameters
21 |     ----------
22 |     configured_path: str, Path, None
23 |         User-defined path to spectrum file or directory containing spectrum file
24 |     run_name : str, optional
25 |         MS run name (stem of spectrum filename), e.g., as expected from PSM file.
26 | 
27 |     """
28 |     # If no spectrum path configured, use expected run_name in default dir
29 |     if not configured_path:
30 |         if run_name:
31 |             resolved_path = os.path.join(".", run_name)
32 |         else:
33 |             raise MS2RescoreConfigurationError(
34 |                 "Could not resolve spectrum file name: No spectrum path configured "
35 |                 "and no run name in PSM file found."
36 |             )
37 | 
38 |     else:
39 |         is_bruker_dir = configured_path.endswith(".d") or _is_minitdf(configured_path)
40 | 
41 |         # If passed path is directory (that is not Bruker raw), join with run name
42 |         if os.path.isdir(configured_path) and not is_bruker_dir:
43 |             if run_name:
44 |                 resolved_path = os.path.join(configured_path, run_name)
45 |             else:
46 |                 raise MS2RescoreConfigurationError(
47 |                     "Could not resolve spectrum file name: Spectrum path is directory "
48 |                     "but no run name in PSM file found."
49 |                 )
50 | 
51 |         # If passed path is file, use that, but warn if basename doesn't match expected
52 |         elif os.path.isfile(configured_path) or (os.path.isdir(configured_path) and is_bruker_dir):
53 |             if run_name and Path(configured_path).stem != Path(run_name).stem:
54 |                 logger.warning(
55 |                     "Passed spectrum path (`%s`) does not match run name found in PSM "
56 |                     "file (`%s`). Continuing with passed spectrum path.",
57 |                     configured_path,
58 |                     run_name,
59 |                 )
60 |             resolved_path = configured_path
61 |         else:
62 |             raise MS2RescoreConfigurationError(
63 |                 "Configured `spectrum_path` must be `None` or a path to an existing file "
64 |                 "or directory. If `None` or path to directory, spectrum run information "
65 |                 "should be present in the PSM file."
66 |             )
67 | 
68 |     # Match with file extension if not in resolved_path yet
69 |     if not is_supported_file_type(resolved_path) or not os.path.exists(resolved_path):
70 |         for filename in glob(resolved_path + "*"):
71 |             if is_supported_file_type(filename):
72 |                 resolved_path = filename
73 |                 break
74 |         else:
75 |             raise MS2RescoreConfigurationError(
76 |                 f"Resolved spectrum filename ('{resolved_path}') does not contain a supported "
77 |                 "file extension (mzML, MGF, or .d) and could not find any matching existing "
78 |                 "files."
79 |             )
80 | 
81 |     return Path(resolved_path)
82 | 
83 | 
84 | def _is_minitdf(spectrum_file: str) -> bool:
85 |     """
86 |     Check if the spectrum file is a Bruker miniTDF folder.
87 | 
88 |     A Bruker miniTDF folder has no fixed name, but contains files matching the patterns
89 |     ``*ms2spectrum.bin`` and ``*ms2spectrum.parquet``.
90 |     """
91 |     files = set(Path(spectrum_file).glob("*ms2spectrum.bin"))
92 |     files.update(Path(spectrum_file).glob("*ms2spectrum.parquet"))
93 |     return len(files) >= 2
94 | 


--------------------------------------------------------------------------------
/ms2rescore_innosetup.iss:
--------------------------------------------------------------------------------
 1 | #define AppName "MS2Rescore"
 2 | #define AppPublisher "CompOmics"
 3 | #define AppURL "https://github.com/compomics/ms2rescore"
 4 | #define AppExeName "ms2rescore.exe"
 5 | 
 6 | [Setup]
 7 | AppId={{2D3D12BD-3AE2-426E-8DE8-092148C12071}
 8 | AppName={#AppName}
 9 | AppVersion={#AppVersion}
10 | AppPublisher={#AppPublisher}
11 | AppPublisherURL={#AppURL}
12 | AppSupportURL={#AppURL}
13 | AppUpdatesURL={#AppURL}
14 | DefaultDirName={autopf}\{#AppName}
15 | DisableProgramGroupPage=yes
16 | LicenseFile=.\LICENSE
17 | PrivilegesRequired=lowest
18 | PrivilegesRequiredOverridesAllowed=dialog
19 | OutputDir="dist"
20 | OutputBaseFilename="{#AppName}-{#AppVersion}-Windows64bit"
21 | Compression=lzma
22 | SolidCompression=yes
23 | WizardStyle=modern
24 | 
25 | [Languages]
26 | Name: "english"; MessagesFile: "compiler:Default.isl"
27 | 
28 | [Tasks]
29 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
30 | 
31 | [Files]
32 | Source: "dist\ms2rescore\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs
33 | 
34 | [Icons]
35 | Name: "{autoprograms}\{#AppName}"; Filename: "{app}\{#AppExeName}"
36 | Name: "{autodesktop}\{#AppName}"; Filename: "{app}\{#AppExeName}"; Tasks: desktopicon
37 | 
38 | [Run]
39 | Filename: "{app}\{#AppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(AppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent
40 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "ms2rescore"
  3 | description = " Modular and user-friendly platform for AI-assisted rescoring of peptide identifications."
  4 | readme = "README.md"
  5 | license = { file = "LICENSE" }
  6 | keywords = [
  7 |     "MS2Rescore",
  8 |     "MS2PIP",
  9 |     "DeepLC",
 10 |     "Percolator",
 11 |     "proteomics",
 12 |     "mass spectrometry",
 13 |     "peptide identification",
 14 |     "rescoring",
 15 |     "machine learning",
 16 | ]
 17 | authors = [
 18 |     { name = "Ralf Gabriels", email = "ralf@gabriels.dev" },
 19 |     { name = "Arthur Declercq", email = "arthur.declercq@ugent.be" },
 20 |     { name = "Ana Sílvia C. Silva" },
 21 |     { name = "Robbin Bouwmeester" },
 22 |     { name = "Louise Buur" },
 23 | ]
 24 | classifiers = [
 25 |     "Intended Audience :: Science/Research",
 26 |     "License :: OSI Approved :: Apache Software License",
 27 |     "Operating System :: OS Independent",
 28 |     "Programming Language :: Python :: 3 :: Only",
 29 |     "Topic :: Scientific/Engineering :: Bio-Informatics",
 30 |     "Development Status :: 5 - Production/Stable",
 31 | ]
 32 | dynamic = ["version"]
 33 | requires-python = ">=3.9"
 34 | dependencies = [
 35 |     "cascade-config>=0.4.0",
 36 |     "click>=7",
 37 |     "customtkinter>=5,<6",
 38 |     "deeplc>=3.0,<3.1",
 39 |     "deeplcretrainer",
 40 |     "im2deep>=0.3.1",
 41 |     "jinja2>=3",
 42 |     "lxml>=4.5",
 43 |     "mokapot==0.10",  # 0.11.0 will introduce API changes
 44 |     "ms2pip>=4.0.0",
 45 |     "ms2rescore_rs>=0.4.0",
 46 |     "numpy>=1.25",
 47 |     "pandas>=1",
 48 |     "plotly>=5",
 49 |     "psm_utils>=1.1",
 50 |     "pyteomics>=4.7.2",
 51 |     "rich>=12",
 52 |     "tomli>=2; python_version < '3.11'",
 53 | ]
 54 | 
 55 | [project.optional-dependencies]
 56 | ionmob = ["ionmob>=0.2", "tensorflow"]
 57 | dev = ["ruff", "black", "pytest", "pytest-cov", "pre-commit"]
 58 | docs = [
 59 |     "sphinx",
 60 |     "myst-parser",
 61 |     "nbsphinx",
 62 |     "numpydoc>=1,<2",
 63 |     "semver>=2",
 64 |     "sphinx_inline_tabs",
 65 |     "sphinx_rtd_theme",
 66 |     "sphinx-argparse",
 67 |     "sphinx-autobuild",
 68 |     "toml",
 69 | ]
 70 | 
 71 | [project.urls]
 72 | GitHub = "https://github.com/compomics/ms2rescore"
 73 | ReadTheDocs = "https://ms2rescore.readthedocs.io"
 74 | PyPi = "https://pypi.org/project/ms2rescore/"
 75 | CompOmics = "https://www.compomics.com"
 76 | 
 77 | [project.scripts]
 78 | ms2rescore = "ms2rescore.__main__:main"
 79 | ms2rescore-gui = "ms2rescore.gui.__main__:main"
 80 | ms2rescore-report = "ms2rescore.report.__main__:main"
 81 | tims2rescore = "ms2rescore.__main__:main_tims"
 82 | 
 83 | [build-system]
 84 | requires = ["flit_core >=3.2,<4"]
 85 | build-backend = "flit_core.buildapi"
 86 | 
 87 | [tool.isort]
 88 | profile = "black"
 89 | 
 90 | [tool.black]
 91 | line-length = 99
 92 | target-version = ['py39']
 93 | 
 94 | [tool.ruff]
 95 | line-length = 99
 96 | target-version = 'py39'
 97 | 
 98 | [tool.ruff.lint]
 99 | extend-select = ["T201", "T203"]
100 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/ms2rescore/e7bd07ee16029f6f7928b0646ea5b4cac8f8a148/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_config_parser.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from ms2rescore.config_parser import _parse_output_path
 4 | 
 5 | 
 6 | def test__parse_output_path():
 7 |     # Ensure that test dir exists
 8 |     Path("examples/id").mkdir(parents=True, exist_ok=True)
 9 |     test_psm_file = "some/dir/psm_file.mzid"
10 | 
11 |     test_cases = [
12 |         ("examples/id", "examples/id/psm_file.ms2rescore"),  # Existing dir
13 |         ("examples/id/custom_stem", "examples/id/custom_stem"),  # Parent is existing dir
14 |         ("some/other_dir", "some/other_dir/psm_file.ms2rescore"),  # None-existing dir
15 |         (
16 |             "some/other_dir/",
17 |             "some/other_dir/psm_file.ms2rescore",
18 |         ),  # None-existing dir, with trailing slash
19 |         (None, "some/dir/psm_file.ms2rescore"),
20 |     ]
21 | 
22 |     for output_path, expected in test_cases:
23 |         assert _parse_output_path(output_path, test_psm_file) == expected
24 | 


--------------------------------------------------------------------------------
/tests/test_data/test.mgf:
--------------------------------------------------------------------------------
 1 | BEGIN IONS
 2 | TITLE=peptide: peptide1
 3 | CHARGE=2+
 4 | PEPMASS=475.137295
 5 | ION_MOBILITY=42.42
 6 | RTINSECONDS=51.2
 7 | 72.04439 100
 8 | 148.06043 600
 9 | 232.07504 300
10 | 263.08737 400
11 | 347.10198 500
12 | 423.11802 200
13 | END IONS
14 | 


--------------------------------------------------------------------------------
/tests/test_parse_spectra.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import MagicMock, patch
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | from psm_utils import PSM, PSMList
  6 | 
  7 | from ms2rescore.feature_generators.base import MSDataType
  8 | from ms2rescore.parse_spectra import (
  9 |     SpectrumParsingError,
 10 |     _get_precursor_values,
 11 |     add_precursor_values,
 12 | )
 13 | 
 14 | 
 15 | @pytest.fixture
 16 | def mock_psm_list():
 17 |     psm_list = PSMList(
 18 |         psm_list=[
 19 |             PSM(
 20 |                 peptidoform="PEPTIDE/2",
 21 |                 run="run1",
 22 |                 spectrum_id="spectrum1",
 23 |                 retention_time=None,
 24 |                 ion_mobility=None,
 25 |                 precursor_mz=None,
 26 |             ),
 27 |             PSM(
 28 |                 peptidoform="PEPTIDE/2",
 29 |                 run="run1",
 30 |                 spectrum_id="spectrum2",
 31 |                 retention_time=None,
 32 |                 ion_mobility=None,
 33 |                 precursor_mz=None,
 34 |             ),
 35 |         ]
 36 |     )
 37 |     return psm_list
 38 | 
 39 | 
 40 | @pytest.fixture
 41 | def mock_precursor_info():
 42 |     return {
 43 |         "spectrum1": MagicMock(mz=529.7935187324, rt=10.5, im=1.0),
 44 |         "spectrum2": MagicMock(mz=651.83, rt=12.3, im=1.2),
 45 |     }
 46 | 
 47 | 
 48 | @pytest.fixture
 49 | def mock_precursor_info_missing_im():
 50 |     return {
 51 |         "spectrum1": MagicMock(mz=529.7935187324, rt=10.5, im=0.0),
 52 |         "spectrum2": MagicMock(mz=651.83, rt=12.3, im=0.0),
 53 |     }
 54 | 
 55 | 
 56 | @pytest.fixture
 57 | def mock_precursor_info_incomplete():
 58 |     return {
 59 |         "spectrum1": MagicMock(mz=529.7935187324, rt=10.5, im=1.0),
 60 |         # "spectrum2" is missing
 61 |     }
 62 | 
 63 | 
 64 | @patch("ms2rescore.parse_spectra.get_precursor_info")
 65 | @patch("ms2rescore.parse_spectra.infer_spectrum_path")
 66 | def test_add_precursor_values(
 67 |     mock_infer_spectrum_path, mock_get_precursor_info, mock_psm_list, mock_precursor_info
 68 | ):
 69 |     mock_infer_spectrum_path.return_value = "test_data/test_spectrum_file.mgf"
 70 |     mock_get_precursor_info.return_value = mock_precursor_info
 71 | 
 72 |     available_ms_data = add_precursor_values(mock_psm_list, "test_data")
 73 | 
 74 |     assert MSDataType.retention_time in available_ms_data
 75 |     assert MSDataType.ion_mobility in available_ms_data
 76 |     assert MSDataType.precursor_mz in available_ms_data
 77 | 
 78 |     for psm in mock_psm_list:
 79 |         assert psm.retention_time is not None
 80 |         assert psm.ion_mobility is not None
 81 |         assert psm.precursor_mz is not None
 82 | 
 83 | 
 84 | @patch("ms2rescore.parse_spectra.get_precursor_info")
 85 | @patch("ms2rescore.parse_spectra.infer_spectrum_path")
 86 | def test_add_precursor_values_missing_im(
 87 |     mock_infer_spectrum_path,
 88 |     mock_get_precursor_info,
 89 |     mock_psm_list,
 90 |     mock_precursor_info_missing_im,
 91 | ):
 92 |     mock_infer_spectrum_path.return_value = "test_data/test_spectrum_file.mgf"
 93 |     mock_get_precursor_info.return_value = mock_precursor_info_missing_im
 94 | 
 95 |     available_ms_data = add_precursor_values(mock_psm_list, "test_data")
 96 | 
 97 |     assert MSDataType.retention_time in available_ms_data
 98 |     assert MSDataType.ion_mobility not in available_ms_data
 99 |     assert MSDataType.precursor_mz in available_ms_data
100 | 
101 |     for psm in mock_psm_list:
102 |         assert psm.retention_time is not None
103 |         assert psm.ion_mobility is None
104 |         assert psm.precursor_mz is not None
105 | 
106 | 
107 | @patch("ms2rescore.parse_spectra.get_precursor_info")
108 | @patch("ms2rescore.parse_spectra.infer_spectrum_path")
109 | def test_get_precursor_values(
110 |     mock_infer_spectrum_path, mock_get_precursor_info, mock_psm_list, mock_precursor_info
111 | ):
112 |     mock_infer_spectrum_path.return_value = "test_data/test_spectrum_file.mgf"
113 |     mock_get_precursor_info.return_value = mock_precursor_info
114 | 
115 |     mzs, rts, ims = _get_precursor_values(mock_psm_list, "test_data", None)
116 | 
117 |     expected_mzs = np.array([529.7935187324, 651.83])
118 |     expected_rts = np.array([10.5, 12.3])
119 |     expected_ims = np.array([1.0, 1.2])
120 | 
121 |     np.testing.assert_array_equal(mzs, expected_mzs)
122 |     np.testing.assert_array_equal(rts, expected_rts)
123 |     np.testing.assert_array_equal(ims, expected_ims)
124 | 
125 | 
126 | @patch("ms2rescore.parse_spectra.get_precursor_info")
127 | @patch("ms2rescore.parse_spectra.infer_spectrum_path")
128 | def test_get_precursor_values_missing_spectrum_id(
129 |     mock_infer_spectrum_path,
130 |     mock_get_precursor_info,
131 |     mock_psm_list,
132 |     mock_precursor_info_incomplete,
133 | ):
134 |     mock_infer_spectrum_path.return_value = "test_data/test_spectrum_file.mgf"
135 |     mock_get_precursor_info.return_value = mock_precursor_info_incomplete
136 | 
137 |     with pytest.raises(SpectrumParsingError):
138 |         _get_precursor_values(mock_psm_list, "test_data", None)
139 | 
140 | 
141 | def test_spectrum_parsing_error():
142 |     with pytest.raises(SpectrumParsingError):
143 |         raise SpectrumParsingError("Test error message")
144 | 


--------------------------------------------------------------------------------