├── .github └── workflows │ ├── lint.yml │ ├── publish.yml │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── codecov.yml ├── docs ├── Makefile ├── make.bat └── src │ ├── annot_fmt.png │ ├── annotating.md │ ├── api.md │ ├── conf.py │ ├── contact.md │ ├── facet.png │ ├── index.md │ ├── install.md │ ├── ion_types.png │ ├── iplot_mirror.json │ ├── iplot_spectrum.json │ ├── mass_errors.png │ ├── mirror.png │ ├── neutral_losses_1.png │ ├── neutral_losses_2.png │ ├── plotting.md │ ├── proforma_ast.png │ ├── proforma_ex1.png │ ├── proforma_ex2.png │ ├── proforma_ex3.png │ ├── quickstart.md │ ├── quickstart.png │ ├── runtime.md │ └── runtime.png ├── environment.yml ├── pyproject.toml ├── setup.cfg ├── setup.py ├── spectrum_utils.png ├── spectrum_utils ├── __init__.py ├── fragment_annotation.py ├── iplot.py ├── monosaccharide.lark ├── plot.py ├── proforma.ebnf ├── proforma.py ├── spectrum.py └── utils.py └── tests ├── __init__.py ├── fragment_annotation_test.py ├── proforma_test.py └── spectrum_test.py /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: [ main, dev ] 6 | pull_request: 7 | branches: [ main, dev ] 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Setup Python 3.12 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: "3.12" 18 | 19 | - name: Install Ruff 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install ruff 23 | 24 | - name: Lint with Ruff 25 | run: | 26 | ruff check . --output-format=github 27 | 28 | - name: Check formatting with Ruff 29 | run: | 30 | ruff format --check . 31 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | release: 5 | types: [ created ] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Set up Python 14 | uses: actions/setup-python@v5 15 | with: 16 | python-version: "3.x" 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install setuptools wheel twine build 21 | - name: Build and publish 22 | env: 23 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 24 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 25 | run: | 26 | python -m build --sdist --wheel . 27 | twine upload dist/* 28 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | 3 | on: 4 | push: 5 | branches: [ main, dev ] 6 | pull_request: 7 | branches: [ main, dev ] 8 | schedule: 9 | - cron: "0 0 1 1/1 *" # Run monthly. 10 | 11 | jobs: 12 | build: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | os: [ ubuntu-latest, windows-latest, macos-latest ] 17 | python-version: [ "3.10", "3.11", "3.12" ] 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | 26 | - name: Install dependencies 27 | run: | 28 | pip install uv 29 | uv pip install pytest pytest-cov wheel --system 30 | uv pip install -e . --system 31 | - name: Run unit and system tests 32 | run: | 33 | pytest --cov=spectrum_utils --verbose tests/ 34 | - name: Upload coverage to codecov 35 | uses: codecov/codecov-action@v4 36 | with: 37 | token: ${{ secrets.CODECOV_TOKEN }} 38 | fail_ci_if_error: true 39 | verbose: true 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # https://gist.github.com/octocat/9257657 2 | 3 | # Compiled source # 4 | ################### 5 | *.com 6 | *.class 7 | *.dll 8 | *.exe 9 | *.o 10 | *.so 11 | 12 | # Packages # 13 | ############ 14 | # it's better to unpack these files and commit the raw source 15 | # git has its own built in compression methods 16 | *.7z 17 | *.dmg 18 | *.gz 19 | *.iso 20 | *.jar 21 | *.rar 22 | *.tar 23 | *.zip 24 | 25 | # Logs and databases # 26 | ###################### 27 | *.log 28 | *.sql 29 | *.sqlite 30 | 31 | # OS generated files # 32 | ###################### 33 | .DS_Store 34 | .DS_Store? 35 | ._* 36 | .Spotlight-V100 37 | .Trashes 38 | ehthumbs.db 39 | Thumbs.db 40 | 41 | # https://github.com/github/gitignore/blob/master/ArchLinuxPackages.gitignore 42 | 43 | *.tar 44 | *.tar.* 45 | *.jar 46 | *.exe 47 | *.msi 48 | *.zip 49 | *.tgz 50 | *.log 51 | *.log.* 52 | *.sig 53 | 54 | pkg/ 55 | #src/ 56 | 57 | # https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore 58 | 59 | # Prerequisites 60 | *.d 61 | 62 | # Compiled Object files 63 | *.slo 64 | *.lo 65 | *.o 66 | *.obj 67 | 68 | # Precompiled Headers 69 | *.gch 70 | *.pch 71 | 72 | # Compiled Dynamic libraries 73 | *.so 74 | *.dylib 75 | *.dll 76 | 77 | # Fortran module files 78 | *.mod 79 | *.smod 80 | 81 | # Compiled Static libraries 82 | *.lai 83 | *.la 84 | *.a 85 | *.lib 86 | 87 | # Executables 88 | *.exe 89 | *.out 90 | *.app 91 | 92 | # https://github.com/github/gitignore/blob/master/C.gitignore 93 | 94 | # Prerequisites 95 | *.d 96 | 97 | # Object files 98 | *.o 99 | *.ko 100 | *.obj 101 | *.elf 102 | 103 | # Linker output 104 | *.ilk 105 | *.map 106 | *.exp 107 | 108 | # Precompiled Headers 109 | *.gch 110 | *.pch 111 | 112 | # Libraries 113 | *.lib 114 | *.a 115 | *.la 116 | *.lo 117 | 118 | # Shared objects (inc. Windows DLLs) 119 | *.dll 120 | *.so 121 | *.so.* 122 | *.dylib 123 | 124 | # Executables 125 | *.exe 126 | *.out 127 | *.app 128 | *.i*86 129 | *.x86_64 130 | *.hex 131 | 132 | # Debug files 133 | *.dSYM/ 134 | *.su 135 | *.idb 136 | *.pdb 137 | 138 | # Kernel Module Compile Results 139 | *.mod* 140 | *.cmd 141 | .tmp_versions/ 142 | modules.order 143 | Module.symvers 144 | Mkfile.old 145 | dkms.conf 146 | 147 | # https://github.com/github/gitignore/blob/master/CMake.gitignore 148 | 149 | CMakeLists.txt.user 150 | CMakeCache.txt 151 | CMakeFiles 152 | CMakeScripts 153 | Testing 154 | Makefile 155 | cmake_install.cmake 156 | install_manifest.txt 157 | compile_commands.json 158 | CTestTestfile.cmake 159 | 160 | # https://github.com/github/gitignore/blob/master/CUDA.gitignore 161 | 162 | *.i 163 | *.ii 164 | *.gpu 165 | *.ptx 166 | *.cubin 167 | *.fatbin 168 | 169 | # https://github.com/github/gitignore/blob/master/Java.gitignore 170 | 171 | # Compiled class file 172 | *.class 173 | 174 | # Log file 175 | *.log 176 | 177 | # BlueJ files 178 | *.ctxt 179 | 180 | # Mobile Tools for Java (J2ME) 181 | .mtj.tmp/ 182 | 183 | # Package Files # 184 | *.jar 185 | *.war 186 | *.nar 187 | *.ear 188 | *.zip 189 | *.tar.gz 190 | *.rar 191 | 192 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 193 | hs_err_pid* 194 | 195 | # https://github.com/github/gitignore/blob/master/Maven.gitignore 196 | 197 | target/ 198 | pom.xml.tag 199 | pom.xml.releaseBackup 200 | pom.xml.versionsBackup 201 | pom.xml.next 202 | release.properties 203 | dependency-reduced-pom.xml 204 | buildNumber.properties 205 | .mvn/timing.properties 206 | .mvn/wrapper/maven-wrapper.jar 207 | 208 | # https://github.com/github/gitignore/blob/master/Python.gitignore 209 | 210 | # Byte-compiled / optimized / DLL files 211 | __pycache__/ 212 | *.py[cod] 213 | *$py.class 214 | 215 | # C extensions 216 | *.so 217 | 218 | # Distribution / packaging 219 | .Python 220 | build/ 221 | develop-eggs/ 222 | dist/ 223 | downloads/ 224 | eggs/ 225 | .eggs/ 226 | lib/ 227 | lib64/ 228 | parts/ 229 | sdist/ 230 | var/ 231 | wheels/ 232 | share/python-wheels/ 233 | *.egg-info/ 234 | .installed.cfg 235 | *.egg 236 | MANIFEST 237 | 238 | # PyInstaller 239 | # Usually these files are written by a python script from a template 240 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 241 | *.manifest 242 | *.spec 243 | 244 | # Installer logs 245 | pip-log.txt 246 | pip-delete-this-directory.txt 247 | 248 | # Unit test / coverage reports 249 | htmlcov/ 250 | .tox/ 251 | .nox/ 252 | .coverage 253 | .coverage.* 254 | .cache 255 | nosetests.xml 256 | coverage.xml 257 | *.cover 258 | .hypothesis/ 259 | .pytest_cache/ 260 | 261 | # Translations 262 | *.mo 263 | *.pot 264 | 265 | # Django stuff: 266 | *.log 267 | local_settings.py 268 | db.sqlite3 269 | 270 | # Flask stuff: 271 | instance/ 272 | .webassets-cache 273 | 274 | # Scrapy stuff: 275 | .scrapy 276 | 277 | # Sphinx documentation 278 | docs/_build/ 279 | 280 | # PyBuilder 281 | target/ 282 | 283 | # Jupyter Notebook 284 | .ipynb_checkpoints 285 | 286 | # IPython 287 | profile_default/ 288 | ipython_config.py 289 | 290 | # pyenv 291 | .python-version 292 | 293 | # celery beat schedule file 294 | celerybeat-schedule 295 | 296 | # SageMath parsed files 297 | *.sage.py 298 | 299 | # Environments 300 | .env 301 | .venv 302 | env/ 303 | venv/ 304 | ENV/ 305 | env.bak/ 306 | venv.bak/ 307 | 308 | # Spyder project settings 309 | .spyderproject 310 | .spyproject 311 | 312 | # Rope project settings 313 | .ropeproject 314 | 315 | # mkdocs documentation 316 | /site 317 | 318 | # mypy 319 | .mypy_cache/ 320 | .dmypy.json 321 | dmypy.json 322 | 323 | # Pyre type checker 324 | .pyre/ 325 | 326 | # https://github.com/github/gitignore/blob/master/R.gitignore 327 | 328 | # History files 329 | .Rhistory 330 | .Rapp.history 331 | 332 | # Session Data files 333 | .RData 334 | 335 | # Example code in package build process 336 | *-Ex.R 337 | 338 | # Output files from R CMD build 339 | /*.tar.gz 340 | 341 | # Output files from R CMD check 342 | /*.Rcheck/ 343 | 344 | # RStudio files 345 | .Rproj.user/ 346 | 347 | # produced vignettes 348 | vignettes/*.html 349 | vignettes/*.pdf 350 | 351 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 352 | .httr-oauth 353 | 354 | # knitr and R markdown default cache directories 355 | /*_cache/ 356 | /cache/ 357 | 358 | # Temporary files created by R markdown 359 | *.utf8.md 360 | *.knit.md 361 | 362 | # https://github.com/github/gitignore/blob/master/TeX.gitignore 363 | 364 | ## Core latex/pdflatex auxiliary files: 365 | *.aux 366 | *.lof 367 | *.log 368 | *.lot 369 | *.fls 370 | *.out 371 | *.toc 372 | *.fmt 373 | *.fot 374 | *.cb 375 | *.cb2 376 | .*.lb 377 | 378 | ## Intermediate documents: 379 | *.dvi 380 | *.xdv 381 | *-converted-to.* 382 | # these rules might exclude image files for figures etc. 383 | # *.ps 384 | # *.eps 385 | # *.pdf 386 | 387 | ## Generated if empty string is given at "Please type another file name for output:" 388 | .pdf 389 | 390 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 391 | *.bbl 392 | *.bcf 393 | *.blg 394 | *-blx.aux 395 | *-blx.bib 396 | *.run.xml 397 | 398 | ## Build tool auxiliary files: 399 | *.fdb_latexmk 400 | *.synctex 401 | *.synctex(busy) 402 | *.synctex.gz 403 | *.synctex.gz(busy) 404 | *.pdfsync 405 | 406 | ## Build tool directories for auxiliary files 407 | # latexrun 408 | latex.out/ 409 | 410 | ## Auxiliary and intermediate files from other packages: 411 | # algorithms 412 | *.alg 413 | *.loa 414 | 415 | # achemso 416 | acs-*.bib 417 | 418 | # amsthm 419 | *.thm 420 | 421 | # beamer 422 | *.nav 423 | *.pre 424 | *.snm 425 | *.vrb 426 | 427 | # changes 428 | *.soc 429 | 430 | # comment 431 | *.cut 432 | 433 | # cprotect 434 | *.cpt 435 | 436 | # elsarticle (documentclass of Elsevier journals) 437 | *.spl 438 | 439 | # endnotes 440 | *.ent 441 | 442 | # fixme 443 | *.lox 444 | 445 | # feynmf/feynmp 446 | *.mf 447 | *.mp 448 | *.t[1-9] 449 | *.t[1-9][0-9] 450 | *.tfm 451 | 452 | #(r)(e)ledmac/(r)(e)ledpar 453 | *.end 454 | *.?end 455 | *.[1-9] 456 | *.[1-9][0-9] 457 | *.[1-9][0-9][0-9] 458 | *.[1-9]R 459 | *.[1-9][0-9]R 460 | *.[1-9][0-9][0-9]R 461 | *.eledsec[1-9] 462 | *.eledsec[1-9]R 463 | *.eledsec[1-9][0-9] 464 | *.eledsec[1-9][0-9]R 465 | *.eledsec[1-9][0-9][0-9] 466 | *.eledsec[1-9][0-9][0-9]R 467 | 468 | # glossaries 469 | *.acn 470 | *.acr 471 | *.glg 472 | *.glo 473 | *.gls 474 | *.glsdefs 475 | 476 | # gnuplottex 477 | *-gnuplottex-* 478 | 479 | # gregoriotex 480 | *.gaux 481 | *.gtex 482 | 483 | # htlatex 484 | *.4ct 485 | *.4tc 486 | *.idv 487 | *.lg 488 | *.trc 489 | *.xref 490 | 491 | # hyperref 492 | *.brf 493 | 494 | # knitr 495 | *-concordance.tex 496 | # TODO Comment the next line if you want to keep your tikz graphics files 497 | *.tikz 498 | *-tikzDictionary 499 | 500 | # listings 501 | *.lol 502 | 503 | # makeidx 504 | *.idx 505 | *.ilg 506 | *.ind 507 | *.ist 508 | 509 | # minitoc 510 | *.maf 511 | *.mlf 512 | *.mlt 513 | *.mtc[0-9]* 514 | *.slf[0-9]* 515 | *.slt[0-9]* 516 | *.stc[0-9]* 517 | 518 | # minted 519 | _minted* 520 | *.pyg 521 | 522 | # morewrites 523 | *.mw 524 | 525 | # nomencl 526 | *.nlg 527 | *.nlo 528 | *.nls 529 | 530 | # pax 531 | *.pax 532 | 533 | # pdfpcnotes 534 | *.pdfpc 535 | 536 | # sagetex 537 | *.sagetex.sage 538 | *.sagetex.py 539 | *.sagetex.scmd 540 | 541 | # scrwfile 542 | *.wrt 543 | 544 | # sympy 545 | *.sout 546 | *.sympy 547 | sympy-plots-for-*.tex/ 548 | 549 | # pdfcomment 550 | *.upa 551 | *.upb 552 | 553 | # pythontex 554 | *.pytxcode 555 | pythontex-files-*/ 556 | 557 | # tcolorbox 558 | *.listing 559 | 560 | # thmtools 561 | *.loe 562 | 563 | # TikZ & PGF 564 | *.dpth 565 | *.md5 566 | *.auxlock 567 | 568 | # todonotes 569 | *.tdo 570 | 571 | # vhistory 572 | *.hst 573 | *.ver 574 | 575 | # easy-todo 576 | *.lod 577 | 578 | # xcolor 579 | *.xcp 580 | 581 | # xmpincl 582 | *.xmpi 583 | 584 | # xindy 585 | *.xdy 586 | 587 | # xypic precompiled matrices 588 | *.xyc 589 | 590 | # endfloat 591 | *.ttt 592 | *.fff 593 | 594 | # Latexian 595 | TSWLatexianTemp* 596 | 597 | ## Editors: 598 | # WinEdt 599 | *.bak 600 | *.sav 601 | 602 | # Texpad 603 | .texpadtmp 604 | 605 | # LyX 606 | *.lyx~ 607 | 608 | # Kile 609 | *.backup 610 | 611 | # KBibTeX 612 | *~[0-9]* 613 | 614 | # auto folder when using emacs and auctex 615 | ./auto/* 616 | *.el 617 | 618 | # expex forward references with \gathertags 619 | *-tags.tex 620 | 621 | # standalone packages 622 | *.sta 623 | 624 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore 625 | 626 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 627 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 628 | 629 | .idea/ 630 | 631 | # User-specific stuff 632 | .idea/**/workspace.xml 633 | .idea/**/tasks.xml 634 | .idea/**/usage.statistics.xml 635 | .idea/**/dictionaries 636 | .idea/**/shelf 637 | 638 | # Generated files 639 | .idea/**/contentModel.xml 640 | 641 | # Sensitive or high-churn files 642 | .idea/**/dataSources/ 643 | .idea/**/dataSources.ids 644 | .idea/**/dataSources.local.xml 645 | .idea/**/sqlDataSources.xml 646 | .idea/**/dynamic.xml 647 | .idea/**/uiDesigner.xml 648 | .idea/**/dbnavigator.xml 649 | 650 | # Gradle 651 | .idea/**/gradle.xml 652 | .idea/**/libraries 653 | 654 | # Gradle and Maven with auto-import 655 | # When using Gradle or Maven with auto-import, you should exclude module files, 656 | # since they will be recreated, and may cause churn. Uncomment if using 657 | # auto-import. 658 | # .idea/modules.xml 659 | # .idea/*.iml 660 | # .idea/modules 661 | 662 | # CMake 663 | cmake-build-*/ 664 | 665 | # Mongo Explorer plugin 666 | .idea/**/mongoSettings.xml 667 | 668 | # File-based project format 669 | *.iws 670 | 671 | # IntelliJ 672 | out/ 673 | 674 | # mpeltonen/sbt-idea plugin 675 | .idea_modules/ 676 | 677 | # JIRA plugin 678 | atlassian-ide-plugin.xml 679 | 680 | # Cursive Clojure plugin 681 | .idea/replstate.xml 682 | 683 | # Crashlytics plugin (for Android Studio and IntelliJ) 684 | com_crashlytics_export_strings.xml 685 | crashlytics.properties 686 | crashlytics-build.properties 687 | fabric.properties 688 | 689 | # Editor-based Rest Client 690 | .idea/httpRequests 691 | 692 | # Android studio 3.1+ serialized cache file 693 | .idea/caches/build_file_checksums.ser 694 | 695 | # https://github.com/github/gitignore/blob/master/Global/Kate.gitignore 696 | 697 | # Swap Files # 698 | .*.kate-swp 699 | .swp.* 700 | 701 | # https://github.com/github/gitignore/blob/master/Global/LibreOffice.gitignore 702 | 703 | # LibreOffice locks 704 | .~lock.*# 705 | 706 | # https://github.com/github/gitignore/blob/master/Global/Linux.gitignore 707 | 708 | *~ 709 | 710 | # temporary files which can be created if a process still has a handle open of a deleted file 711 | .fuse_hidden* 712 | 713 | # KDE directory preferences 714 | .directory 715 | 716 | # Linux trash folder which might appear on any partition or disk 717 | .Trash-* 718 | 719 | # .nfs files are created when an open file is removed but is still being accessed 720 | .nfs* 721 | 722 | # https://github.com/github/gitignore/blob/master/Global/SublimeText.gitignore 723 | 724 | # Cache files for Sublime Text 725 | *.tmlanguage.cache 726 | *.tmPreferences.cache 727 | *.stTheme.cache 728 | 729 | # Workspace files are user-specific 730 | *.sublime-workspace 731 | 732 | # Project files should be checked into the repository, unless a significant 733 | # proportion of contributors will probably not be using Sublime Text 734 | # *.sublime-project 735 | 736 | # SFTP configuration file 737 | sftp-config.json 738 | 739 | # Package control specific files 740 | Package Control.last-run 741 | Package Control.ca-list 742 | Package Control.ca-bundle 743 | Package Control.system-ca-bundle 744 | Package Control.cache/ 745 | Package Control.ca-certs/ 746 | Package Control.merged-ca-bundle 747 | Package Control.user-ca-bundle 748 | oscrypto-ca-bundle.crt 749 | bh_unicode_properties.cache 750 | 751 | # Sublime-github package stores a github token in this file 752 | # https://packagecontrol.io/packages/sublime-github 753 | GitHub.sublime-settings 754 | 755 | # https://github.com/github/gitignore/blob/master/Global/VisualStudioCode.gitignore 756 | 757 | .vscode/* 758 | !.vscode/settings.json 759 | !.vscode/tasks.json 760 | !.vscode/launch.json 761 | !.vscode/extensions.json 762 | 763 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore 764 | 765 | # General 766 | .DS_Store 767 | .AppleDouble 768 | .LSOverride 769 | 770 | # Icon must end with two \r 771 | Icon 772 | 773 | 774 | # Thumbnails 775 | ._* 776 | 777 | # Files that might appear in the root of a volume 778 | .DocumentRevisions-V100 779 | .fseventsd 780 | .Spotlight-V100 781 | .TemporaryItems 782 | .Trashes 783 | .VolumeIcon.icns 784 | .com.apple.timemachine.donotpresent 785 | 786 | # Directories potentially created on remote AFP share 787 | .AppleDB 788 | .AppleDesktop 789 | Network Trash Folder 790 | Temporary Items 791 | .apdisk 792 | 793 | # https://github.com/github/gitignore/blob/master/community/Python/JupyterNotebooks.gitignore 794 | 795 | # gitignore template for Jupyter Notebooks 796 | # website: http://jupyter.org/ 797 | 798 | .ipynb_checkpoints 799 | */.ipynb_checkpoints/* 800 | 801 | # Remove previous ipynb_checkpoints 802 | # git rm -r .ipynb_checkpoints/ 803 | # 804 | 805 | # https://github.com/github/gitignore/blob/master/community/Python/Nikola.gitignore 806 | 807 | # gitignore template for Nikola static site generator 808 | # website: https://getnikola.com/ 809 | 810 | .doit.db 811 | *.py[cod] 812 | cache/ 813 | output/ 814 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | # Ruff version. 4 | rev: v0.4.1 5 | hooks: 6 | # Run the linter. 7 | - id: ruff 8 | types_or: [ python, pyi, jupyter ] 9 | args: [ --fix ] 10 | # Run the formatter. 11 | - id: ruff-format 12 | types_or: [ python, pyi, jupyter ] 13 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | python: 4 | version: 3.10 5 | install: 6 | - method: pip 7 | path: . 8 | extra_requirements: 9 | - docs, iplot 10 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # spectrum_utils Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | We as members, contributors, and leaders pledge to make participation in our 7 | community a harassment-free experience for everyone, regardless of age, body 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender 9 | identity and expression, level of experience, education, socio-economic status, 10 | nationality, personal appearance, race, religion, or sexual identity 11 | and orientation. 12 | 13 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our community include: 18 | 19 | * Demonstrating empathy and kindness toward other people. 20 | * Being respectful of differing opinions, viewpoints, and experiences. 21 | * Giving and gracefully accepting constructive feedback. 22 | * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience. 23 | * Focusing on what is best not just for us as individuals, but for the overall community. 24 | 25 | Examples of unacceptable behavior include: 26 | 27 | * The use of sexualized language or imagery, and sexual attention or advances of any kind. 28 | * Trolling, insulting or derogatory comments, and personal or political attacks. 29 | * Public or private harassment. 30 | * Publishing others' private information, such as a physical or email address, without their explicit permission. 31 | * Other conduct which could reasonably be considered inappropriate in a professional setting. 32 | 33 | ## Enforcement Responsibilities 34 | 35 | Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. 36 | 37 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. 38 | 39 | ## Scope 40 | 41 | This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. 42 | Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. 43 | 44 | ## Enforcement 45 | 46 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at . 47 | All complaints will be reviewed and investigated promptly and fairly. 48 | 49 | All community leaders are obligated to respect the privacy and security of the reporter of any incident. 50 | 51 | ## Enforcement Guidelines 52 | 53 | Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: 54 | 55 | ### 1. Correction 56 | 57 | **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. 58 | 59 | **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. 60 | A public apology may be requested. 61 | 62 | ### 2. Warning 63 | 64 | **Community Impact**: A violation through a single incident or series of actions. 65 | 66 | **Consequence**: A warning with consequences for continued behavior. 67 | No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. 68 | This includes avoiding interactions in community spaces as well as external channels like social media. 69 | Violating these terms may lead to a temporary or permanent ban. 70 | 71 | ### 3. Temporary Ban 72 | 73 | **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. 74 | 75 | **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. 76 | No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. 77 | Violating these terms may lead to a permanent ban. 78 | 79 | ### 4. Permanent Ban 80 | 81 | **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. 82 | 83 | **Consequence**: A permanent ban from any sort of public interaction within the community. 84 | 85 | ## Attribution 86 | 87 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. 88 | 89 | [homepage]: https://www.contributor-covenant.org 90 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html 91 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to spectrum_utils 2 | 3 | :+1::tada: First off, thanks for taking the time to contribute! :tada::+1: 4 | 5 | The following document provides guidelines for contributing to the documentation and the code of spectrum_utils. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. 6 | **No contribution is too small!** 7 | Even fixing a simple typo in the documentation is immensely helpful. 8 | 9 | ## Contributing to the documentation 10 | 11 | We use [sphinx](https://www.sphinx-doc.org/en/master/) to generate our documentation and deploy it to this site. 12 | Most of the pages on the site are created from simple text files written in markdown. 13 | There are two exceptions to this: 14 | 15 | 1. The API documentation is automatically generated from the documentation contained in the code. 16 | 17 | 2. The vignettes are created from Jupyter notebooks. 18 | 19 | ### Editing most documents 20 | 21 | The easiest way to edit a document is by clicking the "Edit on GitHub" like in the top right hand corner of each page. 22 | You'll be taken to GitHub where you can click on the pencil to edit the document. 23 | 24 | You can then make your changes directly on GitHub. 25 | Once you're finished, fill in a description of what you changed and click the "Propose Changes" button. 26 | 27 | Alternatively, these documents live in the `docs/src` directory of the repository and can be edited like code. 28 | See [Contributing to the code](#contributing-to-the-code) below for more details on contributing this way. 29 | 30 | 31 | ## Contributing to the code 32 | 33 | We welcome contributions to the source code of spectrum_utils---particularly ones that address discussed [issues](https://github.com/bittremieux/spectrum_utils/issues). 34 | 35 | Contributions to spectrum_utils follow a standard GitHub contribution workflow: 36 | 37 | 1. Create your own fork of the spectrum_utils repository on GitHub. 38 | 39 | 2. Clone your forked spectrum_utils repository to work on locally. 40 | 41 | 3. Create a new branch with a descriptive name for your changes: 42 | 43 | ```bash 44 | git checkout -b fix_x 45 | ``` 46 | 47 | 4. Make your changes (make sure to read below first). 48 | 49 | 5. Add, commit, and push your changes to your forked repository. 50 | 51 | 6. On the GitHub page for you forked repository, click "Pull request" to propose adding your changes to spectrum_utils. 52 | 53 | 7. We'll review, discuss, and help you make any revisions that are required. 54 | If all goes well, your changes will be added to spectrum_utils in the next release! 55 | 56 | 57 | ### Python code style 58 | 59 | The spectrum_utils project follows the [PEP 8 guidelines](https://www.python.org/dev/peps/pep-0008/) for Python code style. 60 | More specifically, we use [black](https://black.readthedocs.io/en/stable/) to format and lint Python code in spectrum_utils. 61 | 62 | We highly recommend setting up a pre-commit hook for black. 63 | This will run black on all of the Python source files before the changes can be committed. 64 | Because we run black for code linting as part of our tests, setting up this hook can save you from having to revise code formatting. 65 | Take the following steps to set up the pre-commit hook: 66 | 67 | 1. Verify that black and pre-commit are installed. 68 | If not, you can install them with pip or conda: 69 | 70 | ```bash 71 | # Using pip 72 | pip install black pre-commit 73 | 74 | # Using conda 75 | conda -c conda-forge black pre-commit 76 | ``` 77 | 78 | 2. Navigate to your local copy of the spectrum_utils repository and activate the hook: 79 | 80 | ```bash 81 | pre-commit install 82 | ``` 83 | 84 | One the hook is installed, black will be run before any commit is made. 85 | If a file is changed by black, then you need to `git add` the file again before finished the commit. 86 | 87 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include spectrum_utils/monosaccharide.lark 2 | include spectrum_utils/proforma.ebnf 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # spectrum_utils 2 | 3 | [![conda](https://img.shields.io/conda/vn/bioconda/spectrum_utils?color=green)](http://bioconda.github.io/recipes/spectrum_utils/README.html) 4 | [![PyPI](https://img.shields.io/pypi/v/spectrum_utils?color=green)](https://pypi.org/project/spectrum_utils/) 5 | [![Build status](https://github.com/bittremieux/spectrum_utils/workflows/tests/badge.svg)](https://github.com/bittremieux/spectrum_utils/actions?query=workflow:tests) 6 | [![docs](https://readthedocs.org/projects/spectrum-utils/badge/?version=latest)](https://spectrum-utils.readthedocs.io/en/latest/?badge=latest) 7 | 8 | spectrum_utils is a Python package for efficient mass spectrometry data processing and visualization. 9 | 10 | spectrum_utils contains the following features: 11 | 12 | - Spectrum loading from online proteomics and metabolomics data resources using the [Universal Spectrum Identifier (USI)](https://www.psidev.info/usi) mechanism. 13 | - Common spectrum processing operations (precursor & noise peak removal, intensity filtering, intensity scaling) optimized for computational efficiency. 14 | - Annotating observed spectrum fragments using the [ProForma 2.0 specification](https://www.psidev.info/proforma) for (modified) peptidoforms. 15 | - Publication-quality, fully customizable spectrum plotting and interactive spectrum plotting. 16 | 17 | ![spectrum_utils logo](spectrum_utils.png) 18 | 19 | ## Installation 20 | 21 | spectrum_utils requires Python version 3.10+ and can be installed with pip or conda. 22 | 23 | Using pip: 24 | 25 | pip install spectrum_utils[iplot] 26 | 27 | Using conda: 28 | 29 | conda install -c bioconda spectrum_utils 30 | 31 | ## Documentation 32 | 33 | Please see the [documentation](https://spectrum-utils.readthedocs.io/) for detailed installation instructions, usage examples, the API reference, and more information. 34 | 35 | ## Citation 36 | 37 | spectrum_utils is freely available as open source under the [Apache 2.0 license](http://opensource.org/licenses/Apache-2.0). 38 | 39 | When using spectrum_utils, please cite the following manuscripts: 40 | 41 | - Wout Bittremieux. "spectrum_utils: A Python package for mass spectrometry data processing and visualization." _Analytical Chemistry_ **92**, 659--661 (2020) doi:[10.1021/acs.analchem.9b04884](https://doi.org/10.1021/acs.analchem.9b04884). 42 | - Wout Bittremieux, Lev Levitsky, Matteo Pilz, Timo Sachsenberg, Florian Huber, Mingxun Wang, Pieter C. Dorrestein. "Unified and standardized mass spectrometry data processing in Python using spectrum_utils" _Journal of Proteome Research_ **22**, 625--631 (2023) doi:[10.1021/acs.jproteome.2c00632](https://doi.org/10.1021/acs.jproteome.2c00632). 43 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | # Only allow commits that increase or maintain test coverage. 2 | coverage: 3 | status: 4 | project: 5 | default: 6 | target: auto # auto compares coverage to the previous base commit. 7 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = src 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | buildapi: 15 | sphinx-apidoc -feT ../spectrum_utils -o $(SOURCEDIR)/api 16 | @echo "Auto-generation of API documentation finished. The generated files are in '$(SOURCEDIR)/api/'" 17 | 18 | .PHONY: help Makefile 19 | 20 | # Catch-all target: route all unknown targets to Sphinx using the new 21 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 22 | %: Makefile 23 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 24 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/src/annot_fmt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/annot_fmt.png -------------------------------------------------------------------------------- /docs/src/annotating.md: -------------------------------------------------------------------------------- 1 | # Spectrum annotating 2 | 3 | See the [quickstart](quickstart.md) for a brief introduction on how to start using spectrum_utils. 4 | Here we will describe the spectrum annotation functionality provided by spectrum_utils in more detail. 5 | 6 | ## Fragment ion annotation 7 | 8 | As demonstrated in the [quickstart](quickstart.md), fragment ions can be annotated based on the [ProForma 2.0](https://www.psidev.info/proforma) specification. 9 | 10 | The ProForma specification makes it possible to unambiguously represent peptide sequences and peptidoforms, which are specific forms of peptides that results from the combination of the amino acid sequences and modification(s) at specific amino acid positions. 11 | Modifications are defined by controlled vocabularies (CVs), including [Unimod](https://www.unimod.org/), [PSI-MOD](https://github.com/HUPO-PSI/psi-mod-CV/), and others, and can be specified by their name or CV accession. 12 | ProForma also supports special modification use cases, including support for modifications from cross-linking (using [XL-MOD](https://arxiv.org/abs/2003.00329)), glycans (using the [Glycan Naming Ontology](https://gnome.glyomics.org/)), and top-down extensions. 13 | 14 | The following are (non-exhaustive) examples to demonstrate how ProForma can be used to annotate peaks in spectra: 15 | 16 | - Specify modifications by their name: `EM[Oxidation]EVEES[Phospho]PEK`. 17 | ```python 18 | import matplotlib.pyplot as plt 19 | import spectrum_utils.plot as sup 20 | import spectrum_utils.spectrum as sus 21 | 22 | 23 | # Retrieve the spectrum by its USI. 24 | usi = "mzspec:MSV000082283:f07074:scan:5475" 25 | spectrum = sus.MsmsSpectrum.from_usi(usi) 26 | 27 | # Annotate the spectrum with its ProForma string. 28 | peptide = "EM[Oxidation]EVEES[Phospho]PEK" 29 | spectrum = spectrum.annotate_proforma(peptide, 10, "ppm") 30 | 31 | # Plot the spectrum. 32 | fig, ax = plt.subplots(figsize=(12, 6)) 33 | sup.spectrum(spectrum, grid=False, ax=ax) 34 | ax.set_title(peptide, fontdict={"fontsize": "xx-large"}) 35 | ax.spines["right"].set_visible(False) 36 | ax.spines["top"].set_visible(False) 37 | plt.savefig("proforma_ex1.png", bbox_inches="tight", dpi=300, transparent=True) 38 | plt.close() 39 | ``` 40 | ![ProForma example spectrum plot](proforma_ex1.png) 41 | 42 | - Specify modifications by their CV accession: `EM[MOD:00719]EVEES[MOD:00046]PEK`. 43 | ```python 44 | import matplotlib.pyplot as plt 45 | import spectrum_utils.plot as sup 46 | import spectrum_utils.spectrum as sus 47 | 48 | 49 | # Retrieve the spectrum by its USI. 50 | usi = "mzspec:MSV000082283:f07074:scan:5475" 51 | spectrum = sus.MsmsSpectrum.from_usi(usi) 52 | 53 | # Annotate the spectrum with its ProForma string. 54 | peptide = "EM[MOD:00719]EVEES[MOD:00046]PEK" 55 | spectrum = spectrum.annotate_proforma(peptide, 10, "ppm") 56 | 57 | # Plot the spectrum. 58 | fig, ax = plt.subplots(figsize=(12, 6)) 59 | sup.spectrum(spectrum, grid=False, ax=ax) 60 | ax.set_title(peptide, fontdict={"fontsize": "xx-large"}) 61 | ax.spines["right"].set_visible(False) 62 | ax.spines["top"].set_visible(False) 63 | plt.savefig("proforma_ex2.png", bbox_inches="tight", dpi=300, transparent=True) 64 | plt.close() 65 | ``` 66 | ![ProForma example spectrum plot](proforma_ex2.png) 67 | 68 | - Specify modifications by their delta mass: `EM[+15.9949]EVEES[+79.9663]PEK`. 69 | ```python 70 | import matplotlib.pyplot as plt 71 | import spectrum_utils.plot as sup 72 | import spectrum_utils.spectrum as sus 73 | 74 | 75 | # Retrieve the spectrum by its USI. 76 | usi = "mzspec:MSV000082283:f07074:scan:5475" 77 | spectrum = sus.MsmsSpectrum.from_usi(usi) 78 | 79 | # Annotate the spectrum with its ProForma string. 80 | peptide = "EM[+15.9949]EVEES[+79.9663]PEK" 81 | spectrum = spectrum.annotate_proforma(peptide, 10, "ppm") 82 | 83 | # Plot the spectrum. 84 | fig, ax = plt.subplots(figsize=(12, 6)) 85 | sup.spectrum(spectrum, grid=False, ax=ax) 86 | ax.set_title(peptide, fontdict={"fontsize": "xx-large"}) 87 | ax.spines["right"].set_visible(False) 88 | ax.spines["top"].set_visible(False) 89 | plt.savefig("proforma_ex3.png", bbox_inches="tight", dpi=300, transparent=True) 90 | plt.close() 91 | ``` 92 | ![ProForma example spectrum plot](proforma_ex3.png) 93 | 94 | For full details and advanced use cases, please consult the [ProForma 2.0](https://www.psidev.info/proforma) specification. 95 | 96 | ### Supported ProForma 2.0 features 97 | 98 | As described in the [ProForma 2.0 specification](https://www.psidev.info/proforma), there are several levels of compliance. 99 | spectrum_utils uniquely supports the _full_ ProForma 2.0 specification, including the following features: 100 | 101 | 1. Base Level Support. 102 | Represents the lowest level of compliance, this level involves providing support for: 103 | - Amino acid sequences. 104 | - Protein modifications using two of the supported CVs/ontologies: Unimod and PSI-MOD. 105 | - Protein modifications using delta masses (without prefixes). 106 | - N-terminal, C-terminal, and labile modifications. 107 | - Ambiguity in the modification position, including support for localization scores. 108 | - `INFO` tag. 109 | 110 | 2. Additional Separate Support. 111 | These features are independent from each other: 112 | - Unusual amino acids (O and U). 113 | - Ambiguous amino acids (e.g. X, B, Z). 114 | This would include support for sequence tags of known mass (using the character X). 115 | - Protein modifications using delta masses (using prefixes for the different CVs/ontologies). 116 | - Use of prefixes for Unimod (`U:`) and PSI-MOD (`M:`) names. 117 | - Support for the joint representation of experimental data and its interpretation. 118 | 119 | 3. Top Down Extensions. 120 | - Additional CV/ontologies for protein modifications: RESID (the prefix `R` MUST be used for RESID CV/ontology term names). 121 | - Chemical formulas (this feature occurs in two places in this list). 122 | 123 | 4. Cross-Linking Extensions 124 | - Cross-linked peptides (using the XL-MOD CV/ontology, the prefix `X` MUST be used for XL-MOD CV/ontology term names). 125 | 126 | 5. Glycan Extensions. 127 | - Additional CV/ontologies for protein modifications: GNO (the prefix `G` MUST be used for GNO CV/ontology term names). 128 | - Glycan composition. 129 | - Chemical formulas (this feature occurs in two places in this list). 130 | 131 | 6. Spectral Support. 132 | - Charge and chimeric spectra are special cases (see Appendix II). 133 | - Global modifications (e.g., every C is C13). 134 | 135 | ### Implementation details 136 | 137 | Internally, spectrum_utils represents the ProForma 2.0 specification as a formal grammar which is used to create an abstract syntax tree when parsing a ProForma string. 138 | This approach is similar to how compilers interpret complex source code instructions, and the formal grammar is the only existing codified representation for ProForma 2.0 that is machine-readable. 139 | This is an extremely robust and scalable solution to cover the full ProForma 2.0 specification, including optional extensions and edge cases, compared to alternative approaches, such as combinations of regular expressions. 140 | 141 | Example abstract syntax tree for `{Glycan:HexNAcHex2}[Acetyl]-EM[UNIMOD:35]EVNES[Obs:+79.966|Phospho|Sulfo]PEK`, which demonstrates several functionalities of the ProForma specification: 142 | 143 | - Multiple labile glycan modifications (1 HexNAc and 2 Hex). 144 | - An N-terminal acetylation specified by its [modification name (in Unimod)](https://www.unimod.org/modifications_view.php?editid1=1). 145 | - Oxidation of methionine specified by its [Unimod accession (`UNIMOD:35`)](https://www.unimod.org/modifications_view.php?editid1=35). 146 | - An observed mass difference of 79.966 Da that can interpreted as a [phosphorylation](https://www.unimod.org/modifications_view.php?editid1=21) or [sulfation](https://www.unimod.org/modifications_view.php?editid1=40). 147 | 148 | ![ProForma abstract syntax tree](proforma_ast.png) 149 | 150 | (ion_types)= 151 | ## Ion types 152 | 153 | During fragment ion annotation, by default peptide b and y ions will be annotated. 154 | Additionally, spectrum_utils supports several other ion types: 155 | 156 | - Primary `"a"`, `"b"`, `"c"`, `"x"`, `"y"`, and `"z"` peptide fragments. 157 | - Internal fragment ions `"m"`, which result from two amide bond cleavages and thus do not contain either terminus. 158 | - Immonium ions `"I"`, which are internal fragments for individual amino acids formed by a b/y cleavage on the N-terminal side and an a/x cleavage on the C-terminal side. 159 | - Intact precursor ions `"p"`. 160 | - Reporter ions from isobaric labeling `"r"`. 161 | 162 | Specify the desired ion types when annotating a spectrum using its ProForma string. 163 | For example, `MsmsSpectrum.annotate_proforma(..., ion_types="abyIm")` will find matching peaks for the a, b, and y peptide fragments, immonium ions, and internal fragment ions. 164 | 165 | ```python 166 | import matplotlib.pyplot as plt 167 | import spectrum_utils.plot as sup 168 | import spectrum_utils.spectrum as sus 169 | 170 | 171 | usi = "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555" 172 | peptide = "VLHPLEGAVVIIFK" 173 | spectrum = sus.MsmsSpectrum.from_usi(usi) 174 | spectrum.annotate_proforma(peptide, 10, "ppm", ion_types="abyIm") 175 | 176 | fig, ax = plt.subplots(figsize=(12, 6)) 177 | sup.spectrum(spectrum, grid=False, ax=ax) 178 | ax.set_title(peptide, fontdict={"fontsize": "xx-large"}) 179 | ax.spines["right"].set_visible(False) 180 | ax.spines["top"].set_visible(False) 181 | plt.savefig("ion_types.png", dpi=300, bbox_inches="tight", transparent=True) 182 | plt.close() 183 | ``` 184 | ![Ion types example spectrum plot](ion_types.png) 185 | 186 | Besides the canonical peptide fragments, we can also observe immomium ions (dark gray) and several internal fragment ions (yellow). 187 | 188 | ## Neutral losses 189 | 190 | Each of the above ions can also be automatically considered with a neutral loss (or gain). 191 | Neutral losses need to be specified by a concise label (preferably their molecular formula) and mass difference: 192 | 193 | The following example demonstrates how the number of observed peaks that can be interpreted increases by considering fragments with an optional ammonia (NH3) or water (H2O) neutral loss: 194 | 195 | ```python 196 | import matplotlib.pyplot as plt 197 | import spectrum_utils.plot as sup 198 | import spectrum_utils.spectrum as sus 199 | 200 | 201 | usi = "mzspec:PXD014834:TCGA-AA-3518-01A-11_W_VU_20120915_A0218_3F_R_FR01:scan:8370" 202 | peptide = "WNQLQAFWGTGK" 203 | spectrum = sus.MsmsSpectrum.from_usi(usi) 204 | spectrum.annotate_proforma( 205 | peptide, 206 | fragment_tol_mass=0.05, 207 | fragment_tol_mode="Da", 208 | ion_types="aby", 209 | neutral_losses={"NH3": -17.026549, "H2O": -18.010565}, 210 | ) 211 | 212 | fig, ax = plt.subplots(figsize=(12, 6)) 213 | sup.spectrum(spectrum, grid=False, ax=ax) 214 | ax.set_title(peptide, fontdict={"fontsize": "xx-large"}) 215 | ax.spines["right"].set_visible(False) 216 | ax.spines["top"].set_visible(False) 217 | plt.savefig("neutral_losses_1.png", dpi=300, bbox_inches="tight", transparent=True) 218 | plt.close() 219 | ``` 220 | 221 | ![Neutral losses example spectrum plot](neutral_losses_1.png) 222 | 223 | Peaks that correspond to peptide fragments with a neutral loss are highlighted in the matching color. 224 | 225 | In contrast, the same peptide--spectrum match without considering neutral losses is able to explain far fewer peaks: 226 | 227 | ```python 228 | import matplotlib.pyplot as plt 229 | import spectrum_utils.plot as sup 230 | import spectrum_utils.spectrum as sus 231 | 232 | 233 | usi = "mzspec:PXD014834:TCGA-AA-3518-01A-11_W_VU_20120915_A0218_3F_R_FR01:scan:8370" 234 | peptide = "WNQLQAFWGTGK" 235 | spectrum = sus.MsmsSpectrum.from_usi(usi) 236 | spectrum.annotate_proforma( 237 | peptide, fragment_tol_mass=0.05, fragment_tol_mode="Da", ion_types="aby", 238 | ) 239 | 240 | fig, ax = plt.subplots(figsize=(12, 6)) 241 | sup.spectrum(spectrum, grid=False, ax=ax) 242 | ax.set_title(peptide, fontdict={"fontsize": "xx-large"}) 243 | ax.spines["right"].set_visible(False) 244 | ax.spines["top"].set_visible(False) 245 | plt.savefig("neutral_losses_2.png", dpi=300, bbox_inches="tight", transparent=True) 246 | plt.close() 247 | ``` 248 | 249 | ![Neutral losses example spectrum plot](neutral_losses_2.png) 250 | 251 | ### Common neutral losses 252 | 253 | Overview of common neutral losses: 254 | 255 | | Neutral loss/gain | Molecular formula | Mass difference | 256 | | --- | --- | --- | 257 | | Hydrogen | H | 1.007825 | 258 | | Ammonia | NH3 | 17.026549 | 259 | | Water | H2O | 18.010565 | 260 | | Carbon monoxide | CO | 27.994915 | 261 | | Carbon dioxide | CO2 | 43.989829 | 262 | | Formamide | HCONH2 | 45.021464 | 263 | | Formic acid | HCOOH | 46.005479 | 264 | | Methanesulfenic acid | CH4OS | 63.998301 | 265 | | Sulfur trioxide | SO3 | 79.956818 | 266 | | Metaphosphoric acid | HPO3 | 79.966331 | 267 | | Mercaptoacetamide | C2H5NOS | 91.009195 | 268 | | Mercaptoacetic acid | C2H4O2S | 91.993211 | 269 | | Phosphoric acid | H3PO4 | 97.976896 | 270 | 271 | Note that typically the neutral _loss_ mass difference should be negative. 272 | 273 | By default, no neutral losses are considered. 274 | If the `neutral_losses` argument of `MsmsSpectrum.annotate_proforma(...)` is set to `True`, all above mass differences will be considered as neutral losses (negative). 275 | -------------------------------------------------------------------------------- /docs/src/api.md: -------------------------------------------------------------------------------- 1 | # Python API 2 | 3 | ## spectrum_utils.spectrum module 4 | 5 | ```{eval-rst} 6 | .. autoclass:: spectrum_utils.spectrum.MsmsSpectrum 7 | :members: 8 | :undoc-members: 9 | :private-members: 10 | :show-inheritance: 11 | ``` 12 | 13 | ## spectrum_utils.proforma module 14 | 15 | ```{eval-rst} 16 | .. automodule:: spectrum_utils.proforma 17 | :members: 18 | :undoc-members: 19 | :private-members: 20 | :show-inheritance: 21 | ``` 22 | 23 | ## spectrum_utils.fragment_annotation module 24 | 25 | ```{eval-rst} 26 | .. automodule:: spectrum_utils.fragment_annotation 27 | :members: 28 | :undoc-members: 29 | :private-members: 30 | :show-inheritance: 31 | ``` 32 | 33 | ## spectrum_utils.plot module 34 | 35 | ```{eval-rst} 36 | .. automodule:: spectrum_utils.plot 37 | :members: 38 | :undoc-members: 39 | :private-members: 40 | :show-inheritance: 41 | ``` 42 | 43 | ## spectrum_utils.iplot module 44 | 45 | ```{eval-rst} 46 | .. automodule:: spectrum_utils.iplot 47 | :members: 48 | :undoc-members: 49 | :private-members: 50 | :show-inheritance: 51 | ``` 52 | 53 | ## spectrum_utils.utils module 54 | 55 | ```{eval-rst} 56 | .. automodule:: spectrum_utils.utils 57 | :members: 58 | :undoc-members: 59 | :private-members: 60 | :show-inheritance: 61 | ``` 62 | -------------------------------------------------------------------------------- /docs/src/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | import os 15 | import sys 16 | 17 | sys.path.insert(0, os.path.abspath(os.path.join("..", ".."))) 18 | 19 | import spectrum_utils 20 | 21 | 22 | # -- Project information ----------------------------------------------------- 23 | 24 | project = "spectrum_utils" 25 | copyright = "2019–2022, Wout Bittremieux" 26 | author = "Wout Bittremieux" 27 | 28 | # The short X.Y version 29 | version = spectrum_utils.__version__ 30 | # The full version, including alpha/beta/rc tags 31 | release = spectrum_utils.__version__ 32 | 33 | 34 | # -- General configuration --------------------------------------------------- 35 | 36 | # If your documentation needs a minimal Sphinx version, state it here. 37 | # needs_sphinx = "1.0" 38 | 39 | # Add any Sphinx extension module names here, as strings. They can be 40 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 41 | # ones. 42 | extensions = [ 43 | "myst_parser", # Use Markdown instead of reStructuredText. 44 | "sphinx_markdown_tables", # Support tables in Markdown. 45 | "sphinx.ext.autodoc", # Include documentation from docstrings. 46 | # "sphinx.ext.autosummary", # Generate documentation summary one-liners. 47 | # "sphinx.ext.doctest", # Test code in the documentation. 48 | # "sphinx.ext.coverage", # Collect documentation coverage statistics. 49 | "sphinx.ext.napoleon", # Support NumPy and Google style docstrings. 50 | "sphinx.ext.viewcode", # Add links to the source code. 51 | "sphinx_rtd_theme", # Read-the-docs theme. 52 | ] 53 | 54 | # Generate documentation from all docstrings. 55 | autodoc_default_options = { 56 | "member-order": "bysource", # Sort by order in the source. 57 | "special-members": "__init__", # Include __init__ methods. 58 | "undoc-members": True, # Include methods without a docstring. 59 | } 60 | # Prevent import errors from these modules. 61 | # autodoc_mock_imports = [] 62 | # Scan all found documents for autosummary directives and generate stub pages 63 | # for each. 64 | autosummary_generate = True 65 | 66 | # Add any paths that contain templates here, relative to this directory. 67 | # templates_path = ["_templates"] 68 | 69 | # The suffix(es) of source filenames. 70 | # You can specify multiple suffix as a list of string: 71 | source_suffix = [".rst", ".md"] 72 | 73 | # The master toctree document. 74 | master_doc = "index" 75 | 76 | # The language for content autogenerated by Sphinx. Refer to documentation 77 | # for a list of supported languages. 78 | # This is also used if you do content translation via gettext catalogs. 79 | # Usually you set "language" from the command line for these cases. 80 | language = "en" 81 | 82 | # List of patterns, relative to source directory, that match files and 83 | # directories to ignore when looking for source files. 84 | # This pattern also affects html_static_path and html_extra_path. 85 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 86 | 87 | # The name of the Pygments (syntax highlighting) style to use. 88 | pygments_style = None 89 | 90 | 91 | # -- Options for HTML output ------------------------------------------------- 92 | 93 | # The theme to use for HTML and HTML Help pages. See the documentation for 94 | # a list of builtin themes. 95 | html_theme = "sphinx_rtd_theme" 96 | 97 | # Theme options are theme-specific and customize the look and feel of a theme 98 | # further. For a list of options available for each theme, see the 99 | # documentation. 100 | # html_theme_options = {} 101 | 102 | # Add any paths that contain custom static files (such as style sheets) here, 103 | # relative to this directory. They are copied after the builtin static files, 104 | # so a file named "default.css" will overwrite the builtin "default.css". 105 | # html_static_path = ['_static'] 106 | 107 | # Custom sidebar templates, must be a dictionary that maps document names 108 | # to template names. 109 | # The default sidebars (for documents that don't match any pattern) are 110 | # defined by theme itself. Builtin themes are using these templates by 111 | # default: ``["localtoc.html", "relations.html", "sourcelink.html", 112 | # "searchbox.html"]``. 113 | # html_sidebars = {} 114 | 115 | 116 | # -- Options for HTMLHelp output --------------------------------------------- 117 | 118 | # Output file base name for HTML help builder. 119 | htmlhelp_basename = "spectrum_utilsdoc" 120 | 121 | 122 | # -- Options for LaTeX output ------------------------------------------------ 123 | 124 | latex_elements = { 125 | # The paper size ("letterpaper" or "a4paper"). 126 | # "papersize": "letterpaper", 127 | # The font size ("10pt", "11pt" or "12pt"). 128 | # "pointsize": "10pt", 129 | # Additional stuff for the LaTeX preamble. 130 | # "preamble": "", 131 | # Latex figure (float) alignment 132 | # "figure_align": "htbp", 133 | } 134 | 135 | # Grouping the document tree into LaTeX files. List of tuples 136 | # (source start file, target name, title, 137 | # author, documentclass [howto, manual, or own class]). 138 | latex_documents = [ 139 | ( 140 | master_doc, 141 | "spectrum_utils.tex", 142 | "spectrum\\_utils Documentation", 143 | "Wout Bittremieux", 144 | "manual", 145 | ), 146 | ] 147 | 148 | 149 | # -- Options for manual page output ------------------------------------------ 150 | 151 | # One entry per manual page. List of tuples 152 | # (source start file, name, description, authors, manual section). 153 | man_pages = [ 154 | (master_doc, "spectrum_utils", "spectrum_utils Documentation", [author], 1) 155 | ] 156 | 157 | 158 | # -- Options for Texinfo output ---------------------------------------------- 159 | 160 | # Grouping the document tree into Texinfo files. List of tuples 161 | # (source start file, target name, title, author, 162 | # dir menu entry, description, category) 163 | texinfo_documents = [ 164 | ( 165 | master_doc, 166 | "spectrum_utils", 167 | "spectrum_utils Documentation", 168 | author, 169 | "spectrum_utils", 170 | " Python package for efficient MS/MS spectrum processing and " 171 | "visualization.", 172 | "Miscellaneous", 173 | ), 174 | ] 175 | 176 | 177 | # -- Options for Epub output ------------------------------------------------- 178 | 179 | # Bibliographic Dublin Core info. 180 | epub_title = project 181 | 182 | # The unique identifier of the text. This can be a ISBN number 183 | # or the project homepage. 184 | # epub_identifier = "" 185 | 186 | # A unique identification for the text. 187 | # epub_uid = "" 188 | 189 | # A list of files that should not be packed into the epub file. 190 | epub_exclude_files = ["search.html"] 191 | 192 | 193 | # -- Extension configuration ------------------------------------------------- 194 | 195 | autodoc_mock_imports = [ 196 | "fastobo", 197 | "lark", 198 | "matplotlib", 199 | "numba", 200 | "numpy", 201 | "pandas", 202 | "pyteomics", 203 | ] 204 | -------------------------------------------------------------------------------- /docs/src/contact.md: -------------------------------------------------------------------------------- 1 | # Contact 2 | 3 | For more information you can visit the [official GitHub repository](https://github.com/bittremieux/spectrum_utils/). 4 | 5 | ## Citation 6 | 7 | When using spectrum_utils, please cite the following manuscripts: 8 | 9 | - Wout Bittremieux. "spectrum_utils: A Python package for mass spectrometry data processing and visualization." _Analytical Chemistry_ **92**, 659--661 (2020) doi:[10.1021/acs.analchem.9b04884](https://doi.org/10.1021/acs.analchem.9b04884). 10 | - Wout Bittremieux, Lev Levitsky, Matteo Pilz, Timo Sachsenberg, Florian Huber, Mingxun Wang, Pieter C. Dorrestein. "Unified and standardized mass spectrometry data processing in Python using spectrum_utils" _bioRxiv_ (2022). 11 | -------------------------------------------------------------------------------- /docs/src/facet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/facet.png -------------------------------------------------------------------------------- /docs/src/index.md: -------------------------------------------------------------------------------- 1 | # spectrum_utils 2 | 3 | [![conda](https://img.shields.io/conda/vn/bioconda/spectrum_utils?color=green)](http://bioconda.github.io/recipes/spectrum_utils/README.html) 4 | [![PyPI](https://img.shields.io/pypi/v/spectrum_utils?color=green)](https://pypi.org/project/spectrum_utils/) 5 | [![Build status](https://github.com/bittremieux/spectrum_utils/workflows/tests/badge.svg)](https://github.com/bittremieux/spectrum_utils/actions?query=workflow:tests) 6 | [![docs](https://readthedocs.org/projects/spectrum-utils/badge/?version=latest)](https://spectrum-utils.readthedocs.io/en/latest/?badge=latest) 7 | 8 | ## About spectrum_utils 9 | 10 | spectrum_utils is a Python package for efficient mass spectrometry data processing and visualization. 11 | 12 | spectrum_utils contains the following features: 13 | 14 | - Spectrum loading from online proteomics and metabolomics data resources using the [Universal Spectrum Identifier (USI)](https://www.psidev.info/usi) mechanism. 15 | - Common spectrum processing operations (precursor & noise peak removal, intensity filtering, intensity scaling) optimized for computational efficiency. 16 | - Annotating observed spectrum fragments using the [ProForma 2.0 specification](https://www.psidev.info/proforma) for (modified) peptidoforms. 17 | - Publication-quality, fully customizable spectrum plotting and interactive spectrum plotting. 18 | 19 | See the documentation for more information and detailed examples on how to get started with spectrum_utils for versatile mass spectrometry data manipulation in Python. 20 | 21 | ## Citation 22 | 23 | spectrum_utils is freely available as open source under the [Apache 2.0 license](http://opensource.org/licenses/Apache-2.0). 24 | 25 | When using spectrum_utils, please cite the following manuscripts: 26 | 27 | - Wout Bittremieux. "spectrum_utils: A Python package for mass spectrometry data processing and visualization." _Analytical Chemistry_ **92**, 659--661 (2020) doi:[10.1021/acs.analchem.9b04884](https://doi.org/10.1021/acs.analchem.9b04884). 28 | - Wout Bittremieux, Lev Levitsky, Matteo Pilz, Timo Sachsenberg, Florian Huber, Mingxun Wang, Pieter C. Dorrestein. "Unified and standardized mass spectrometry data processing in Python using spectrum_utils" _Journal of Proteome Research_ **22**, 625--631 (2023) doi:[10.1021/acs.jproteome.2c00632](https://doi.org/10.1021/acs.jproteome.2c00632). 29 | 30 | ```{toctree} 31 | --- 32 | caption: Contents 33 | maxdepth: 1 34 | --- 35 | 36 | install 37 | quickstart 38 | annotating 39 | plotting 40 | runtime 41 | api 42 | contact 43 | ``` 44 | -------------------------------------------------------------------------------- /docs/src/install.md: -------------------------------------------------------------------------------- 1 | # Install 2 | 3 | spectrum_utils requires Python version 3.8+ and can be installed with pip or conda. 4 | 5 | Using pip: 6 | 7 | pip install spectrum_utils[iplot] 8 | 9 | Using conda: 10 | 11 | conda install -c bioconda spectrum_utils 12 | 13 | ## Supported Python versions 14 | 15 | spectrum_utils supports Python version 3.8 and above. 16 | 17 | ## Dependencies 18 | 19 | spectrum_utils has the following third-party dependencies: 20 | 21 | - [fastobo](https://fastobo.readthedocs.io/) 22 | - [Lark](https://lark-parser.readthedocs.io/) 23 | - [Matplotlib](https://matplotlib.org/) 24 | - [Numba](http://numba.pydata.org/) 25 | - [NumPy](https://www.numpy.org/) 26 | - [Pandas](https://pandas.pydata.org/) 27 | - [platformdirs](https://github.com/platformdirs/platformdirs) 28 | - [Pyteomics](https://pyteomics.readthedocs.io/) 29 | - [Vega-Altair](https://altair-viz.github.io/) 30 | 31 | Missing dependencies will be automatically installed when you install spectrum_utils using pip or conda. 32 | 33 | Additionally, we recommend manually installing [pyteomics.cythonize](https://pypi.org/project/pyteomics.cythonize/) as a plug-in replacement for faster fragment ion mass calculations. 34 | 35 | ## Advanced installation instructions 36 | 37 | spectrum_utils provides modular installation capabilities to minimize the number of third-party dependencies that will be installed when only a subset of the spectrum_utils functionality is required. 38 | The previous pip and conda commands will install all optional spectrum_utils extensions (excluding developer and documentation dependencies). 39 | Power users can customize their spectrum_utils installation by specifying one or more of the following sets of dependencies: 40 | 41 | - `dev`: Developer dependencies for automatic linting and testing. 42 | - `docs`: Dependencies to generate these documentation pages. 43 | - `iplot`: Interactive spectrum plotting using [Vega-Altair](https://altair-viz.github.io/). 44 | -------------------------------------------------------------------------------- /docs/src/ion_types.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/ion_types.png -------------------------------------------------------------------------------- /docs/src/iplot_spectrum.json: -------------------------------------------------------------------------------- 1 | {"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}}, "layer": [{"data": {"name": "data-a64c5702074096e6cd965645a52da966"}, "mark": {"type": "rule", "size": 2}, "encoding": {"color": {"type": "nominal", "field": "color", "legend": null, "scale": null}, "tooltip": [{"type": "quantitative", "field": "mz", "format": ".4f", "title": "m/z"}, {"type": "quantitative", "field": "intensity", "format": ".1%", "title": "Intensity"}], "x": {"type": "quantitative", "axis": {"grid": true, "title": "m/z", "titleFontStyle": "italic"}, "field": "mz", "scale": {"nice": true, "padding": 5}}, "y": {"type": "quantitative", "axis": {"format": "%", "grid": true, "title": "Intensity"}, "field": "intensity", "scale": {"nice": true}}}}, {"data": {"name": "data-74878dd49dd20ad3d314195fc2ba5f7b"}, "mark": {"type": "rule", "size": 2}, "encoding": {"color": {"type": "nominal", "field": "color", "legend": null, "scale": null}, "tooltip": [{"type": "quantitative", "field": "mz", "format": ".4f", "title": "m/z"}, {"type": "quantitative", "field": "intensity", "format": ".1%", "title": "Intensity"}, {"type": "nominal", "field": "fragment", "title": "Fragment"}, {"type": "nominal", "field": "mz_delta", "title": "m/z deviation"}], "x": {"type": "quantitative", "axis": {"grid": true, "title": "m/z", "titleFontStyle": "italic"}, "field": "mz", "scale": {"nice": true, "padding": 5}}, "y": {"type": "quantitative", "axis": {"format": "%", "grid": true, "title": "Intensity"}, "field": "intensity", "scale": {"nice": true}}}}, {"data": {"name": "data-74878dd49dd20ad3d314195fc2ba5f7b"}, "mark": {"type": "text", "align": "left", "angle": 270, "baseline": "middle", "dx": 5}, "encoding": {"color": {"type": "nominal", "field": "color", "legend": null, "scale": null}, "text": {"type": "nominal", "field": "fragment"}, "tooltip": [{"type": "quantitative", "field": "mz", "format": ".4f", "title": "m/z"}, {"type": "quantitative", "field": "intensity", "format": ".1%", "title": "Intensity"}, {"type": "nominal", "field": "fragment", "title": "Fragment"}, {"type": "nominal", "field": "mz_delta", "title": "m/z deviation"}], "x": {"type": "quantitative", "axis": {"grid": true, "title": "m/z", "titleFontStyle": "italic"}, "field": "mz", "scale": {"nice": true, "padding": 5}}, "y": {"type": "quantitative", "axis": {"format": "%", "grid": true, "title": "Intensity"}, "field": "intensity", "scale": {"nice": true}}}}], "height": 400, "width": 640, "$schema": "https://vega.github.io/schema/vega-lite/v4.8.1.json", "datasets": {"data-a64c5702074096e6cd965645a52da966": [{"mz": 101.07122039794922, "intensity": 0.148338183760643, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 109.68924713134766, "intensity": 0.007550157606601715, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 115.86998748779297, "intensity": 0.007336989510804415, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 120.08110046386719, "intensity": 0.12255959212779999, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 129.06594848632812, "intensity": 0.020143359899520874, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 129.102294921875, "intensity": 0.12308508157730103, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 130.06533813476562, "intensity": 0.1629226803779602, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 130.08633422851562, "intensity": 0.04563209041953087, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 130.95578002929688, "intensity": 0.008904634043574333, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 131.11859130859375, "intensity": 0.010367393493652344, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 132.07968139648438, "intensity": 0.07641462981700897, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 136.07566833496094, "intensity": 0.05691216513514519, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 141.3184814453125, "intensity": 0.008756570518016815, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 142.21034240722656, "intensity": 0.009229789488017559, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 155.08184814453125, "intensity": 0.011481806635856628, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 159.0760498046875, "intensity": 0.03314025327563286, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 159.09169006347656, "intensity": 1.0, "fragment": "", "mz_delta": "0.1ppm", "color": "#388E3C"}, {"mz": 160.07516479492188, "intensity": 0.011401713825762272, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 160.09506225585938, "intensity": 0.054223813116550446, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 170.06008911132812, "intensity": 0.32688644528388977, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 171.0631866455078, "intensity": 0.02041677013039589, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 175.11871337890625, "intensity": 0.028740430250763893, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 186.12335205078125, "intensity": 0.056916072964668274, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 197.12814331054688, "intensity": 0.0751049593091011, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 199.109130859375, "intensity": 0.0103872399777174, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 199.17962646484375, "intensity": 0.023118557408452034, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 200.1028594970703, "intensity": 0.039687901735305786, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 201.10281372070312, "intensity": 0.041108112782239914, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 201.1234130859375, "intensity": 0.03132535144686699, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 204.1133575439453, "intensity": 0.02866506576538086, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 211.14495849609375, "intensity": 0.020317386835813522, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 213.1597900390625, "intensity": 0.021088870242238045, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 214.09768676757812, "intensity": 0.22772477567195892, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 214.15350341796875, "intensity": 0.0830322653055191, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 215.10003662109375, "intensity": 0.02158365398645401, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 216.6425323486328, "intensity": 0.01774018071591854, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 217.13331604003906, "intensity": 0.011508912779390812, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 224.1397705078125, "intensity": 0.018519965931773186, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 225.1234893798828, "intensity": 0.031006278470158577, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 226.08267211914062, "intensity": 0.027994472533464432, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 228.13485717773438, "intensity": 0.01959451474249363, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 229.12010192871094, "intensity": 0.03323378413915634, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 233.16494750976562, "intensity": 0.07654907554388046, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 239.082275390625, "intensity": 0.03774549812078476, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 242.14993286132812, "intensity": 0.11195865273475647, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 243.10874938964844, "intensity": 0.13127824664115906, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 244.10833740234375, "intensity": 0.025648383423686028, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 245.3516845703125, "intensity": 0.020252369344234467, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 253.09628295898438, "intensity": 0.032357994467020035, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 256.10833740234375, "intensity": 0.033013418316841125, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 261.15960693359375, "intensity": 0.06451421976089478, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 273.1348571777344, "intensity": 0.03534402325749397, "fragment": "", "mz_delta": "0.9ppm", "color": "#388E3C"}, {"mz": 282.1797790527344, "intensity": 0.019065726548433304, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 284.1026611328125, "intensity": 0.6415199041366577, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 285.1063232421875, "intensity": 0.07752740383148193, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 287.1734313964844, "intensity": 0.026046147570014, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 289.11083984375, "intensity": 0.017870688810944557, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 302.1326904296875, "intensity": 0.02836090512573719, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 310.1726989746094, "intensity": 0.027750393375754356, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 313.1876220703125, "intensity": 0.04817057028412819, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 318.1550598144531, "intensity": 0.061667412519454956, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 321.15631103515625, "intensity": 0.028431694954633713, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 326.1781311035156, "intensity": 0.04074738919734955, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 328.2327880859375, "intensity": 0.017787983641028404, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 334.1546630859375, "intensity": 0.019314678385853767, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 344.0024719238281, "intensity": 0.010485530830919743, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 344.1928405761719, "intensity": 0.10169167816638947, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 345.1581115722656, "intensity": 0.03624998405575752, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 347.1709289550781, "intensity": 0.06590598076581955, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 352.1987609863281, "intensity": 0.026266280561685562, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 353.18170166015625, "intensity": 0.14427943527698517, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 356.19281005859375, "intensity": 0.07075531035661697, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 361.2311096191406, "intensity": 0.03169279918074608, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 363.2070007324219, "intensity": 0.09083585441112518, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 367.14434814453125, "intensity": 0.05584902688860893, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 370.21044921875, "intensity": 0.05039053410291672, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 384.1676330566406, "intensity": 0.043628327548503876, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 385.1712341308594, "intensity": 0.04909650981426239, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 395.1347961425781, "intensity": 0.19516165554523468, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 402.1809387207031, "intensity": 0.020460274070501328, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 409.2093505859375, "intensity": 0.029131930321455002, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 412.1607360839844, "intensity": 0.2674732506275177, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 413.1644592285156, "intensity": 0.04392719641327858, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 417.177734375, "intensity": 0.034838926047086716, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 424.217529296875, "intensity": 0.08377550542354584, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 425.22711181640625, "intensity": 0.020911267027258873, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 430.19342041015625, "intensity": 0.04080050066113472, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 441.2466735839844, "intensity": 0.030935177579522133, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 454.23114013671875, "intensity": 0.03229103237390518, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 462.21368408203125, "intensity": 0.034385476261377335, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 463.1971130371094, "intensity": 0.07402148842811584, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 480.2240905761719, "intensity": 0.13736052811145782, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 481.226318359375, "intensity": 0.02832726202905178, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 484.24505615234375, "intensity": 0.022815125063061714, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 488.21954345703125, "intensity": 0.02879846654832363, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 497.2502746582031, "intensity": 0.09249623119831085, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 498.25396728515625, "intensity": 0.06746581196784973, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 506.25054931640625, "intensity": 0.020595932379364967, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 508.2207336425781, "intensity": 0.03447815775871277, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 512.2582397460938, "intensity": 0.02941327542066574, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 516.2634887695312, "intensity": 0.02493799850344658, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 517.2271728515625, "intensity": 0.02447432652115822, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 521.2412719726562, "intensity": 0.01960829272866249, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 523.7442626953125, "intensity": 0.01984090358018875, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 525.2426147460938, "intensity": 0.0960967093706131, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 526.2479248046875, "intensity": 0.035229749977588654, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 530.2679443359375, "intensity": 0.05706870183348656, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 538.2637939453125, "intensity": 0.02259320579469204, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 545.2481689453125, "intensity": 0.028604505583643913, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 549.2872924804688, "intensity": 0.14675451815128326, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 550.78271484375, "intensity": 0.031602609902620316, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 555.285888671875, "intensity": 0.026801517233252525, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 559.2943115234375, "intensity": 0.12255251407623291, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 559.7947998046875, "intensity": 0.06386437267065048, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 571.28662109375, "intensity": 0.047733355313539505, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 587.294189453125, "intensity": 0.022753508761525154, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 599.2872314453125, "intensity": 0.035877007991075516, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 599.785400390625, "intensity": 0.034990787506103516, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 600.4091186523438, "intensity": 0.02041812427341938, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 608.2901611328125, "intensity": 0.047849226742982864, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 629.7884521484375, "intensity": 0.024940097704529762, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 636.2815551757812, "intensity": 0.03865744173526764, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 639.3478393554688, "intensity": 0.018782714381814003, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 646.3280029296875, "intensity": 0.030414801090955734, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 647.328369140625, "intensity": 0.02265116572380066, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 653.3023071289062, "intensity": 0.031094927340745926, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 670.3162231445312, "intensity": 0.02026170864701271, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 677.3325805664062, "intensity": 0.03700347617268562, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 685.322998046875, "intensity": 0.031203679740428925, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 692.3241577148438, "intensity": 0.021440720185637474, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 696.351318359375, "intensity": 0.13134905695915222, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 702.3402709960938, "intensity": 0.019092896953225136, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 703.3506469726562, "intensity": 0.024013128131628036, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 707.3213500976562, "intensity": 0.024482762441039085, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 709.8466186523438, "intensity": 0.03129369020462036, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 710.3462524414062, "intensity": 0.06588925421237946, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 717.8883666992188, "intensity": 0.04304360970854759, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 718.3870849609375, "intensity": 0.32829058170318604, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 718.8858642578125, "intensity": 0.03321429714560509, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 724.34130859375, "intensity": 0.04471525177359581, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 729.3600463867188, "intensity": 0.020301733165979385, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 730.3282470703125, "intensity": 0.02373546175658703, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 748.375244140625, "intensity": 0.04757232218980789, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 757.3715209960938, "intensity": 0.03368370234966278, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 763.1437377929688, "intensity": 0.020129267126321793, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 766.2276000976562, "intensity": 0.03150264918804169, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 767.3910522460938, "intensity": 0.19662556052207947, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 767.736572265625, "intensity": 0.019396141171455383, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 783.81298828125, "intensity": 0.020094377920031548, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 831.4067993164062, "intensity": 0.02632063627243042, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 835.418701171875, "intensity": 0.026114407926797867, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 858.4285278320312, "intensity": 0.03353990986943245, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 859.4189453125, "intensity": 0.045826975256204605, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 869.4483642578125, "intensity": 0.019475221633911133, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 871.410400390625, "intensity": 0.028745388612151146, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 876.4351806640625, "intensity": 0.13731563091278076, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 877.419677734375, "intensity": 0.31163832545280457, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 878.4134521484375, "intensity": 0.11510690301656723, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 886.4219360351562, "intensity": 0.02274133637547493, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 887.4178466796875, "intensity": 0.02371060661971569, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 895.4476318359375, "intensity": 0.21141238510608673, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 904.451904296875, "intensity": 0.025122331455349922, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 905.44287109375, "intensity": 0.023934079334139824, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 906.4541015625, "intensity": 0.019386500120162964, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 972.4820556640625, "intensity": 0.031104160472750664, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 989.505859375, "intensity": 0.030449291691184044, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 990.4984130859375, "intensity": 0.0242347102612257, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1008.5327758789062, "intensity": 0.24949823319911957, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1017.518798828125, "intensity": 0.023513633757829666, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1018.51953125, "intensity": 0.021530823782086372, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1037.5977783203125, "intensity": 0.021331775933504105, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1046.4642333984375, "intensity": 0.021640582010149956, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1082.492919921875, "intensity": 0.0222757738083601, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1100.57470703125, "intensity": 0.022380664944648743, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1117.570556640625, "intensity": 0.02564471773803234, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1118.573974609375, "intensity": 0.1540607213973999, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1119.55908203125, "intensity": 0.07648862898349762, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1136.58984375, "intensity": 0.14887091517448425, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1145.5716552734375, "intensity": 0.021669859066605568, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1214.585693359375, "intensity": 0.02321801893413067, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1215.5576171875, "intensity": 0.02200160175561905, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1227.5980224609375, "intensity": 0.022091779857873917, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1233.6329345703125, "intensity": 0.030069278553128242, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1250.6297607421875, "intensity": 0.11248490959405899, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1260.6072998046875, "intensity": 0.040360987186431885, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1261.614013671875, "intensity": 0.033135995268821716, "fragment": "", "mz_delta": "2.2ppm", "color": "#388E3C"}, {"mz": 1272.6572265625, "intensity": 0.02319318614900112, "fragment": "", "mz_delta": null, "color": "#212121"}], "data-74878dd49dd20ad3d314195fc2ba5f7b": [{"mz": 147.11273193359375, "intensity": 0.09118045121431351, "fragment": "y1", "mz_delta": "-0.5ppm", "color": "#D32F2F"}, {"mz": 187.086669921875, "intensity": 0.11504451185464859, "fragment": "b1", "mz_delta": "0.4ppm", "color": "#1976D2"}, {"mz": 204.13414001464844, "intensity": 0.22047029435634613, "fragment": "y2", "mz_delta": "-0.6ppm", "color": "#D32F2F"}, {"mz": 301.1289978027344, "intensity": 0.4070082902908325, "fragment": "b2", "mz_delta": "-1.7ppm", "color": "#1976D2"}, {"mz": 305.18109130859375, "intensity": 0.2134922742843628, "fragment": "y3", "mz_delta": "-2.8ppm", "color": "#D32F2F"}, {"mz": 362.2037353515625, "intensity": 0.5880805253982544, "fragment": "y4", "mz_delta": "0.9ppm", "color": "#D32F2F"}, {"mz": 429.1875915527344, "intensity": 0.30558738112449646, "fragment": "b3", "mz_delta": "-1.2ppm", "color": "#1976D2"}, {"mz": 542.272216796875, "intensity": 0.07725690305233002, "fragment": "b4", "mz_delta": "0.1ppm", "color": "#1976D2"}, {"mz": 548.280517578125, "intensity": 0.6340454816818237, "fragment": "y5", "mz_delta": "-4.0ppm", "color": "#D32F2F"}, {"mz": 695.351318359375, "intensity": 0.4454374611377716, "fragment": "y6", "mz_delta": "0.3ppm", "color": "#D32F2F"}, {"mz": 766.387451171875, "intensity": 0.585404098033905, "fragment": "y7", "mz_delta": "-1.0ppm", "color": "#D32F2F"}, {"mz": 894.4446411132812, "intensity": 0.44065672159194946, "fragment": "y8", "mz_delta": "-2.4ppm", "color": "#D32F2F"}, {"mz": 1007.5315551757812, "intensity": 0.4444051682949066, "fragment": "y9", "mz_delta": "0.7ppm", "color": "#D32F2F"}, {"mz": 1135.5819091796875, "intensity": 0.24221596121788025, "fragment": "y10", "mz_delta": "-6.7ppm", "color": "#D32F2F"}, {"mz": 1232.5897216796875, "intensity": 0.04881492629647255, "fragment": "b10", "mz_delta": "4.1ppm", "color": "#1976D2"}, {"mz": 1249.630615234375, "intensity": 0.1376694291830063, "fragment": "y11", "mz_delta": "-1.4ppm", "color": "#D32F2F"}]}} -------------------------------------------------------------------------------- /docs/src/mass_errors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/mass_errors.png -------------------------------------------------------------------------------- /docs/src/mirror.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/mirror.png -------------------------------------------------------------------------------- /docs/src/neutral_losses_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/neutral_losses_1.png -------------------------------------------------------------------------------- /docs/src/neutral_losses_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/neutral_losses_2.png -------------------------------------------------------------------------------- /docs/src/plotting.md: -------------------------------------------------------------------------------- 1 | # Spectrum visualization 2 | 3 | The [quickstart](quickstart.md) briefly introduced the spectrum_utils plotting functionality. 4 | Often, nice spectrum graphics can be created with only a single line of code; it's as easy as using `spectrum_utils.plot.spectrum(...)` to visualize a single spectrum. 5 | 6 | Here we will briefly describe some advanced functionality to customize your spectrum plots. 7 | Some of the arguments that can be provided to `spectrum_utils.plot.spectrum(...)` are: 8 | 9 | - `color_ions`: Boolean flag indicating whether the annotated peaks should be colored. 10 | - `annot_fmt`: A function that converts a `FragmentAnnotation` to a label to annotate the corresponding peak (see below). 11 | - `annot_kws`: A dictionary with options to customize peak label texts. 12 | See the [`matplotlib.text.Text` documentation](https://matplotlib.org/3.1.1/api/text_api.html#matplotlib.text.Text) for available options. 13 | - `grid`: Enable/disable the grid. 14 | 15 | See the [API reference](api.md) for full details on how to use these settings. 16 | 17 | ## Peak annotations 18 | 19 | By default, singly-charged b and y peptide fragment ions are annotated with a label in the spectrum plots. 20 | To avoid overcrowding the spectrum plots, other peaks will be highlighted in the matching color, but will not receive an annotation label. 21 | However, which peaks to annotate and the format of the label can be fully customized by providing a callable that implements your desired behavior to `annot_fmt`. 22 | 23 | Here we will show example code that can guide you in implementing your custom peak labeling functionality. 24 | As mentioned previously, the default implementation labels singly-charged b and y peptide ions that have not undergone a neutral loss (slightly adapted for conciseness): 25 | 26 | ```python 27 | def annotate_ion_type(annotation, ion_types="by"): 28 | if ( 29 | annotation.ion_type[0] in ion_types 30 | and annotation.neutral_loss is None 31 | and annotation.isotope == 0 32 | and annotation.charge == 1 33 | ): 34 | return annotation.ion_type 35 | else: 36 | return "" 37 | ``` 38 | 39 | To annotate additional [ion types](ion_types), you can reuse the default implementation in combination with `functools.partial`: 40 | 41 | ```python 42 | import functools 43 | import spectrum_utils.plot 44 | 45 | spectrum_utils.plot.spectrum(..., annot_fmt=functools.partial(ion_types="abyIm")) 46 | ``` 47 | 48 | Peak labels that contain full information according to the [PSI peak interpretation specification](https://docs.google.com/document/d/1yEUNG4Ump6vnbMDs4iV4s3XISflmOkRAyqUuutcCG2w/edit?usp=sharing) can be achieved by using the `str` function: `spectrum_utils.plot.spectrum(..., annot_fmt=str)`. 49 | 50 | Finally, we'll show an example of custom peak labeling functionality that indicates the charge state by repeated `+` symbols, neutral loss of ammonia by the `*` symbol, and neutral loss of water by the `o` symbol. This behavior is similar to the [Lorikeet spectrum viewer](https://uwpr.github.io/Lorikeet/). 51 | 52 | ```python 53 | import matplotlib.pyplot as plt 54 | import spectrum_utils.plot as sup 55 | import spectrum_utils.spectrum as sus 56 | 57 | 58 | def annotate_ion_type(annotation, ion_types="aby"): 59 | if annotation.ion_type[0] in ion_types: 60 | if abs(annotation.isotope) == 1: 61 | iso = "+i" if annotation.isotope > 0 else "-i" 62 | elif annotation.isotope != 0: 63 | iso = f"{annotation.isotope:+}i" 64 | else: 65 | iso = "" 66 | nl = {"-NH3": "*", "-H2O": "o"}.get(annotation.neutral_loss, "") 67 | return f"{annotation.ion_type}{iso}{'+' * annotation.charge}{nl}" 68 | else: 69 | return "" 70 | 71 | 72 | usi = "mzspec:PXD014834:TCGA-AA-3518-01A-11_W_VU_20120915_A0218_3F_R_FR01:scan:8370" 73 | peptide = "WNQLQAFWGTGK" 74 | spectrum = sus.MsmsSpectrum.from_usi(usi) 75 | spectrum.annotate_proforma( 76 | peptide, 77 | fragment_tol_mass=0.05, 78 | fragment_tol_mode="Da", 79 | ion_types="aby", 80 | max_ion_charge=2, 81 | neutral_losses={"NH3": -17.026549, "H2O": -18.010565}, 82 | ) 83 | 84 | fig, ax = plt.subplots(figsize=(12, 6)) 85 | sup.spectrum(spectrum, annot_fmt=annotate_ion_type, grid=False, ax=ax) 86 | ax.set_title(peptide, fontdict={"fontsize": "xx-large"}) 87 | ax.spines["right"].set_visible(False) 88 | ax.spines["top"].set_visible(False) 89 | plt.savefig("annot_fmt.png", dpi=300, bbox_inches="tight", transparent=True) 90 | plt.close() 91 | ``` 92 | 93 | ![Spectrum plot with neutral losses labeled](annot_fmt.png) 94 | 95 | ## Mirror plot 96 | 97 | A mirror plot can be used to visualize matching spectra, for example, to plot identifications from spectral library searching. 98 | Again, only a single line of code is required to do the actual plotting: `spectrum_utils.plot.mirror(...)` 99 | 100 | ```python 101 | import matplotlib.pyplot as plt 102 | import spectrum_utils.plot as sup 103 | import spectrum_utils.spectrum as sus 104 | 105 | 106 | peptide = "DLTDYLM[Oxidation]K" 107 | usi_top = "mzspec:MSV000079960:DY_HS_Exp7-Ad1:scan:30372" 108 | spectrum_top = sus.MsmsSpectrum.from_usi(usi_top) 109 | spectrum_top.annotate_proforma(peptide, 0.5, "Da", ion_types="aby") 110 | usi_bottom = "mzspec:MSV000080679:j11962_C1orf144:scan:10671" 111 | spectrum_bottom = sus.MsmsSpectrum.from_usi(usi_bottom) 112 | spectrum_bottom.annotate_proforma(peptide, 0.5, "Da", ion_types="aby") 113 | 114 | fig, ax = plt.subplots(figsize=(12, 6)) 115 | sup.mirror(spectrum_top, spectrum_bottom, ax=ax) 116 | plt.savefig("mirror.png", dpi=300, bbox_inches="tight", transparent=True) 117 | plt.close() 118 | ``` 119 | 120 | ![Mirror spectrum plot](mirror.png) 121 | 122 | All of the advanced plotting arguments described above can be provided for the mirror plot as well using the `spectrum_kws` argument. 123 | 124 | ## Mass error plot 125 | 126 | The difference between the observed and the theoretical mass of annotated fragment ions can be visualized in a mass error plot. In these bubble plots, the size of the bubbles corresponds to the intensity of the fragment ions, the x-axis shows the observed _m/z_, and the y-axis shows the mass error either ppm or in Dalton. Use `spectrum_utils.plot.mass_errors(...)` to plot mass errors: 127 | 128 | ```python 129 | import matplotlib.pyplot as plt 130 | import spectrum_utils.plot as sup 131 | import spectrum_utils.spectrum as sus 132 | 133 | usi = "mzspec:PXD022531:j12541_C5orf38:scan:12368" 134 | peptide = "VAATLEILTLK/2" 135 | spectrum = sus.MsmsSpectrum.from_usi(usi) 136 | spectrum.annotate_proforma( 137 | peptide, 138 | fragment_tol_mass=0.05, 139 | fragment_tol_mode="Da", 140 | ion_types="aby", 141 | max_ion_charge=2, 142 | neutral_losses={"NH3": -17.026549, "H2O": -18.010565}, 143 | ) 144 | 145 | fig, ax = plt.subplots(figsize=(10.5, 3)) 146 | sup.mass_errors(spectrum, plot_unknown=False, ax=ax) 147 | plt.savefig("mass_errors.png", dpi=300, bbox_inches="tight", transparent=True) 148 | plt.close() 149 | ``` 150 | 151 | ![Mass error plot](mass_errors.png) 152 | 153 | ## Figure-level facet plot 154 | 155 | The figure-level `spectrum_utils.plot.facet` function combines the `spectrum_utils.plot.mirror` and `spectrum_utils.plot.mass_errors` functionality: 156 | 157 | ```python 158 | import matplotlib.pyplot as plt 159 | import spectrum_utils.plot as sup 160 | import spectrum_utils.spectrum as sus 161 | 162 | peptide = "VAATLEILTLK/2" 163 | annotation_settings = { 164 | "fragment_tol_mass": 0.05, 165 | "fragment_tol_mode": "Da", 166 | "ion_types": "aby", 167 | "max_ion_charge": 2, 168 | "neutral_losses": {"NH3": -17.026549, "H2O": -18.010565}, 169 | } 170 | 171 | usi_top = "mzspec:PXD022531:j12541_C5orf38:scan:12368" 172 | spectrum_top = sus.MsmsSpectrum.from_usi(usi_top) 173 | spectrum_top.annotate_proforma(peptide, **annotation_settings) 174 | 175 | usi_bottom = "mzspec:PXD022531:b11156_PRAMEF17:scan:22140" 176 | spectrum_bottom = sus.MsmsSpectrum.from_usi(usi_bottom) 177 | spectrum_bottom.annotate_proforma(peptide, **annotation_settings) 178 | 179 | fig = sup.facet( 180 | spec_top=spectrum_top, 181 | spec_mass_errors=spectrum_top, 182 | spec_bottom=spectrum_bottom, 183 | mass_errors_kws={"plot_unknown": False}, 184 | height=7, 185 | width=10.5, 186 | ) 187 | plt.savefig("facet.png", dpi=300, bbox_inches="tight", transparent=True) 188 | plt.close() 189 | ``` 190 | 191 | ![Facet plot](facet.png) 192 | 193 | ## Interactive plotting 194 | 195 | Besides the standard plotting functionality in `spectrum_utils.plot`, spectrum_utils also contains interactive plotting functionality in `spectrum_utils.iplot`. 196 | `iplot` is a drop-in replacement for `plot`; only the import statement needs to be changed to produce interactive plots. 197 | 198 | Interactive plot of an individual spectrum: 199 | 200 | 201 | 202 | 203 | 204 | ```python 205 | import spectrum_utils.iplot as sup 206 | import spectrum_utils.spectrum as sus 207 | 208 | 209 | usi = "mzspec:PXD004732:01650b_BC2-TUM_first_pool_53_01_01-3xHCD-1h-R2:scan:41840" 210 | spectrum = sus.MsmsSpectrum.from_usi(usi) 211 | spectrum.annotate_proforma("WNQLQAFWGTGK", 10, "ppm", ion_types="aby") 212 | 213 | chart = sup.spectrum(spectrum) 214 | chart.properties(width=640, height=400).save("iplot_spectrum.json") 215 | ``` 216 | 217 |
218 | 219 | Interactive mirror plot of two spectra: 220 | 221 | ```python 222 | import spectrum_utils.iplot as sup 223 | import spectrum_utils.spectrum as sus 224 | 225 | 226 | peptide = "DLTDYLM[Oxidation]K" 227 | usi_top = "mzspec:MSV000079960:DY_HS_Exp7-Ad1:scan:30372" 228 | spectrum_top = sus.MsmsSpectrum.from_usi(usi_top) 229 | spectrum_top.annotate_proforma(peptide, 0.5, "Da", ion_types="aby") 230 | usi_bottom = "mzspec:MSV000080679:j11962_C1orf144:scan:10671" 231 | spectrum_bottom = sus.MsmsSpectrum.from_usi(usi_bottom) 232 | spectrum_bottom.annotate_proforma(peptide, 0.5, "Da", ion_types="aby") 233 | 234 | chart = sup.mirror(spectrum_top, spectrum_bottom) 235 | chart.properties(width=640, height=400).save("iplot_mirror.json") 236 | ``` 237 | 238 |
239 | 240 | 248 | 249 | For more information on how to manipulate these interactive plots, see the [Vega-Altair documentation](https://altair-viz.github.io/index.html). 250 | 251 | Interactive plots can be [saved](https://altair-viz.github.io/user_guide/saving_charts.html) as html files or other output formats, and can be embedded as JSON into web pages using [Vega-Embed](https://github.com/vega/vega-embed). 252 | 253 | ## Miscellaneous 254 | 255 | ### Peak colors 256 | 257 | By default, peaks are colored based on their [ion type](ion_types) as follows: 258 | 259 | - a peptide fragments (`"a"`) 260 | - b peptide fragments (`"b"`) 261 | - c peptide fragments (`"c"`) 262 | - x peptide fragments (`"x"`) 263 | - y peptide fragments (`"y"`) 264 | - z peptide fragments (`"z"`) 265 | - internal fragment ions (`"m"`) 266 | - immonium ions (`"I"`) 267 | - intact precursor ions (`"p"`) 268 | - unknown and unannotated ions 269 | 270 | To change these colors, overwrite values in the `spectrum_utils.plot.colors` dictionary with your preferred colors: 271 | 272 | ```python 273 | import spectrum_utils.plot as sup 274 | 275 | 276 | sup.colors["y"] = "#FF1493" 277 | ``` 278 | -------------------------------------------------------------------------------- /docs/src/proforma_ast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/proforma_ast.png -------------------------------------------------------------------------------- /docs/src/proforma_ex1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/proforma_ex1.png -------------------------------------------------------------------------------- /docs/src/proforma_ex2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/proforma_ex2.png -------------------------------------------------------------------------------- /docs/src/proforma_ex3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/proforma_ex3.png -------------------------------------------------------------------------------- /docs/src/quickstart.md: -------------------------------------------------------------------------------- 1 | # Quickstart 2 | 3 | Here we briefly introduce spectrum_utils' spectrum processing and visualization functionality: 4 | 5 | - Load a spectrum from an online data resource by its [Universal Spectrum Identifier (USI)](https://www.psidev.info/usi). 6 | - Restrict the mass range to 100–1400 _m_/_z_ to filter out irrelevant peaks. 7 | - Remove the precursor peak. 8 | - Remove low-intensity noise peaks by only retaining peaks that are at at least 5% of the base peak intensity and restrict the total number of peaks to the 50 most intense peaks. 9 | - Scale the peak intensities by their square root to de-emphasize overly intense peaks. 10 | - Annotate peaks corresponding to a, b, and y peptide fragments in the spectrum based on a [ProForma 2.0](https://www.psidev.info/proforma) peptide string. 11 | - Visualize the spectrum with the annotated peaks highlighted. 12 | 13 | IO functionality to read spectra from MS data files is not directly included in spectrum_utils. 14 | Instead you can use excellent libraries to read a variety of mass spectrometry data formats such as [Pyteomics](https://pyteomics.readthedocs.io/) or [pymzML](https://pymzml.readthedocs.io/). 15 | 16 | ```python 17 | import matplotlib.pyplot as plt 18 | import spectrum_utils.plot as sup 19 | import spectrum_utils.spectrum as sus 20 | 21 | 22 | # Retrieve the spectrum by its USI. 23 | usi = "mzspec:PXD004732:01650b_BC2-TUM_first_pool_53_01_01-3xHCD-1h-R2:scan:41840" 24 | peptide = "WNQLQAFWGTGK" 25 | spectrum = sus.MsmsSpectrum.from_usi(usi) 26 | 27 | # Process the spectrum. 28 | fragment_tol_mass, fragment_tol_mode = 10, "ppm" 29 | spectrum = ( 30 | spectrum.set_mz_range(min_mz=100, max_mz=1400) 31 | .remove_precursor_peak(fragment_tol_mass, fragment_tol_mode) 32 | .filter_intensity(min_intensity=0.05, max_num_peaks=50) 33 | .scale_intensity("root") 34 | .annotate_proforma( 35 | peptide, fragment_tol_mass, fragment_tol_mode, ion_types="aby" 36 | ) 37 | ) 38 | 39 | # Plot the spectrum. 40 | fig, ax = plt.subplots(figsize=(12, 6)) 41 | sup.spectrum(spectrum, grid=False, ax=ax) 42 | ax.spines["right"].set_visible(False) 43 | ax.spines["top"].set_visible(False) 44 | plt.savefig("quickstart.png", bbox_inches="tight", dpi=300, transparent=True) 45 | plt.close() 46 | ``` 47 | 48 | As demonstrated, each of the processing steps can be achieved using a single, high-level function call. 49 | These calls can be chained together to easily perform multiple processing steps. 50 | 51 | Spectrum plotting can similarly be achieved using a high-level function call, resulting in the following figure: 52 | 53 | ![](quickstart.png) 54 | 55 | Note that several processing steps modify the peak _m_/_z_ and intensity values and are thus not idempotent. 56 | It is recommended to make a copy of the `MsmsSpectrum` object prior to any processing if the raw peak values need to remain available as well. 57 | -------------------------------------------------------------------------------- /docs/src/quickstart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/quickstart.png -------------------------------------------------------------------------------- /docs/src/runtime.md: -------------------------------------------------------------------------------- 1 | # Computational efficiency 2 | 3 | Spectrum processing in spectrum_utils has been optimized for computational efficiency using [NumPy](https://www.numpy.org/) and [Numba](http://numba.pydata.org/) to be able to process thousands of spectra per second. 4 | 5 | As shown below, spectrum_utils (version 0.4.0) is faster than alternative libraries, such as [pymzML](https://github.com/pymzml/pymzML/) (version 2.5.2) and [pyOpenMS](https://pyopenms.readthedocs.io/) (version 2.7.0), when performing typical spectrum processing tasks, including the following steps: 6 | 7 | - The _m_/_z_ range is set to 100–1400 _m_/_z_. 8 | - The precursor peak is removed. 9 | - Low-intensity noise peaks are removed. 10 | - Peak intensities are scaled by their square root. 11 | 12 | ```python 13 | import time 14 | 15 | import matplotlib.pyplot as plt 16 | import numpy as np 17 | import pyopenms 18 | import pyteomics.mgf 19 | import seaborn as sns 20 | import spectrum_utils.spectrum as sus 21 | from pymzml.spec import Spectrum 22 | 23 | 24 | min_peaks = 10 25 | min_mz, max_mz = 100, 1400 26 | fragment_tol_mass, fragment_tol_mode = 0.02, "Da" 27 | min_intensity = 0.05 28 | max_num_peaks = 150 29 | 30 | 31 | def time_spectrum_utils(mgf_filename): 32 | runtimes = [] 33 | for spec_dict in pyteomics.mgf.read(mgf_filename): 34 | # Omit invalid spectra. 35 | if ( 36 | len(spec_dict["m/z array"]) < min_peaks 37 | or "charge" not in spec_dict["params"] 38 | ): 39 | continue 40 | 41 | spectrum = sus.MsmsSpectrum( 42 | spec_dict["params"]["title"], 43 | spec_dict["params"]["pepmass"][0], 44 | spec_dict["params"]["charge"][0], 45 | spec_dict["m/z array"], 46 | spec_dict["intensity array"], 47 | float(spec_dict["params"]["rtinseconds"]), 48 | )._inner 49 | 50 | start_time = time.time() 51 | 52 | spectrum.set_mz_range(min_mz, max_mz) 53 | spectrum.remove_precursor_peak(fragment_tol_mass, fragment_tol_mode) 54 | spectrum.filter_intensity(min_intensity, max_num_peaks) 55 | spectrum.scale_intensity("root", 1) 56 | 57 | runtimes.append(time.time() - start_time) 58 | 59 | return runtimes 60 | 61 | 62 | def time_pymzml(mgf_filename): 63 | runtimes = [] 64 | for spec_dict in pyteomics.mgf.read(mgf_filename): 65 | # Omit invalid spectra. 66 | if ( 67 | len(spec_dict["m/z array"]) < min_peaks 68 | or "charge" not in spec_dict["params"] 69 | ): 70 | continue 71 | 72 | spec = Spectrum() 73 | spec.set_peaks( 74 | [*zip(spec_dict["m/z array"], spec_dict["intensity array"])], "raw" 75 | ) 76 | 77 | start_time = time.time() 78 | 79 | spec.reduce("raw", (min_mz, max_mz)) 80 | spec.remove_precursor_peak() 81 | spec.remove_noise(noise_level=min_intensity) 82 | spec /= np.amax(spec.i) 83 | spec.i = np.sqrt(spec.i) 84 | 85 | runtimes.append(time.time() - start_time) 86 | 87 | return runtimes 88 | 89 | 90 | def time_pyopenms(mgf_filename): 91 | experiment = pyopenms.MSExperiment() 92 | pyopenms.MascotGenericFile().load(mgf_filename, experiment) 93 | 94 | runtimes = [] 95 | for spectrum in experiment: 96 | # Omit invalid spectra. 97 | if ( 98 | len(spectrum.get_peaks()[0]) < min_peaks 99 | or spectrum.getPrecursors()[0].getCharge() == 0 100 | ): 101 | continue 102 | 103 | start_time = time.time() 104 | 105 | # Set the m/z range. 106 | filtered_mz, filtered_intensity = [], [] 107 | for mz, intensity in zip(*spectrum.get_peaks()): 108 | if min_mz <= mz <= max_mz: 109 | filtered_mz.append(mz) 110 | filtered_intensity.append(intensity) 111 | spectrum.set_peaks((filtered_mz, filtered_intensity)) 112 | # Remove the precursor peak. 113 | parent_peak_mower = pyopenms.ParentPeakMower() 114 | parent_peak_mower_params = parent_peak_mower.getDefaults() 115 | parent_peak_mower_params.setValue( 116 | b"window_size", fragment_tol_mass, b"" 117 | ) 118 | parent_peak_mower.setParameters(parent_peak_mower_params) 119 | parent_peak_mower.filterSpectrum(spectrum) 120 | # Filter by base peak intensity percentage. 121 | pyopenms.Normalizer().filterSpectrum(spectrum) 122 | threshold_mower = pyopenms.ThresholdMower() 123 | threshold_mower_params = threshold_mower.getDefaults() 124 | threshold_mower_params.setValue(b"threshold", min_intensity, b"") 125 | threshold_mower.setParameters(threshold_mower_params) 126 | threshold_mower.filterSpectrum(spectrum) 127 | # Restrict to the most intense peaks. 128 | n_largest = pyopenms.NLargest() 129 | n_largest_params = n_largest.getDefaults() 130 | n_largest_params.setValue(b"n", max_num_peaks, b"") 131 | n_largest.setParameters(n_largest_params) 132 | n_largest.filterSpectrum(spectrum) 133 | # Scale the peak intensities by their square root and normalize. 134 | pyopenms.SqrtMower().filterSpectrum(spectrum) 135 | pyopenms.Normalizer().filterSpectrum(spectrum) 136 | 137 | runtimes.append(time.time() - start_time) 138 | 139 | return runtimes 140 | 141 | 142 | mgf_filename = "iPRG2012.mgf" 143 | runtimes_spectrum_utils = time_spectrum_utils(mgf_filename) 144 | runtimes_pyopenms = time_pyopenms(mgf_filename) 145 | runtimes_pymzml = time_pymzml(mgf_filename) 146 | 147 | fig, ax = plt.subplots() 148 | sns.boxplot( 149 | data=[runtimes_spectrum_utils, runtimes_pymzml, runtimes_pyopenms], 150 | flierprops={"markersize": 2}, 151 | ax=ax, 152 | ) 153 | ax.set_yscale("log") 154 | ax.xaxis.set_ticklabels(("spectrum_utils", "pymzML", "pyOpenMS")) 155 | ax.set_ylabel("Processing time per spectrum (s)") 156 | sns.despine() 157 | plt.savefig("runtime.png", bbox_inches="tight", dpi=300, transparent=True) 158 | plt.close() 159 | ``` 160 | 161 | ![](runtime.png) 162 | 163 | 164 | ## JIT compilation 165 | 166 | Note that the significant outlier for spectrum_utils is caused by Numba's JIT compilation of the first method call, allowing subsequent calls to be made very efficiently. 167 | 168 | If the user knows in advance that only a single method call needs to be made, Numba's JIT compilation can be disabled to avoid this overhead by setting the `NUMBA_DISABLE_JIT` environment variable to `1`. 169 | See the [Numba documentation](https://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#disabling-jit-compilation) for more information. 170 | -------------------------------------------------------------------------------- /docs/src/runtime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/runtime.png -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: spectrum_utils 2 | channels: 3 | - defaults 4 | - bioconda 5 | - conda-forge 6 | dependencies: 7 | - altair 8 | - fastobo 9 | - lark>=1.0 10 | - matplotlib 11 | - numba>=0.57 12 | - numpy 13 | - pandas 14 | - platformdirs 15 | - pyteomics>=4.5 16 | - python>=3.10 17 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.setuptools_scm] 6 | 7 | [tool.ruff] 8 | line-length = 79 9 | target-version = "py310" 10 | include = ["*.py"] 11 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = spectrum_utils 3 | author = Wout Bittremieux 4 | author_email = wout.bittremieux@uantwerpen.be 5 | description = Mass spectrometry utility functions 6 | long_description = file: README.md 7 | long_description_content_type = text/markdown 8 | url = https://github.com/bittremieux/spectrum_utils 9 | project_urls = 10 | Documentation = https://spectrum-utils.readthedocs.io/ 11 | Bug Tracker = https://github.com/bittremieux/spectrum_utils/issues 12 | license = Apache 2.0 13 | classifiers = 14 | Intended Audience :: Science/Research 15 | License :: OSI Approved :: Apache Software License 16 | Operating System :: MacOS 17 | Operating System :: Microsoft :: Windows 18 | Operating System :: Unix 19 | Programming Language :: Python :: 3 20 | Topic :: Scientific/Engineering :: Bio-Informatics 21 | 22 | [options] 23 | packages = find: 24 | include_package_data = True 25 | python_requires = >=3.10 26 | install_requires = 27 | fastobo 28 | lark>=1.0 29 | matplotlib>=3.5 30 | numba>=0.57 31 | numpy 32 | platformdirs 33 | pyteomics>=4.5 34 | 35 | [options.extras_require] 36 | dev = 37 | pytest 38 | pytest-cov 39 | ruff 40 | docs = 41 | myst-parser 42 | numpydoc>=1.1.0 43 | sphinx>=3.5.3 44 | sphinx-rtd-theme>=0.5.1 45 | sphinx_markdown_tables 46 | iplot = 47 | altair 48 | pandas 49 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | if __name__ == "__main__": 5 | setuptools.setup() 6 | -------------------------------------------------------------------------------- /spectrum_utils.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/spectrum_utils.png -------------------------------------------------------------------------------- /spectrum_utils/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | from importlib.metadata import version, PackageNotFoundError 3 | 4 | try: 5 | __version__ = version("spectrum_utils") 6 | except PackageNotFoundError: 7 | pass 8 | except ImportError: 9 | from pkg_resources import get_distribution, DistributionNotFound 10 | 11 | try: 12 | __version__ = get_distribution("spectrum_utils").version 13 | except DistributionNotFound: 14 | pass 15 | 16 | 17 | __all__ = [ 18 | "fragment_annotation", 19 | "iplot", 20 | "plot", 21 | "proforma", 22 | "spectrum", 23 | "utils", 24 | ] 25 | -------------------------------------------------------------------------------- /spectrum_utils/fragment_annotation.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import re 3 | from typing import Any, Dict, List, Optional, Tuple 4 | 5 | try: 6 | import pyteomics.cmass as pmass 7 | except ImportError: 8 | import pyteomics.mass as pmass 9 | 10 | from spectrum_utils import proforma 11 | 12 | 13 | # Amino acid and special amino acid masses. 14 | AA_MASS = { 15 | **pmass.std_aa_mass, 16 | # Aspartic acid / asparagine (ambiguous mass). 17 | # "B": 0, 18 | # Glutamic acid / glutamine (ambiguous mass). 19 | # "Z": 0, 20 | # Leucine / isoleucine. 21 | "J": 113.084_064, 22 | # Selenocysteine (in Pyteomics). 23 | # "U": 150.95363, 24 | # Pyrrolysine (in Pyteomics). 25 | # "O": 237.14772, 26 | # Any amino acid, gaps (zero mass). 27 | "X": 0, 28 | } 29 | 30 | # Offset for isotopic peaks. 31 | C13_MASS_DIFF = 1.003_354 32 | 33 | # Common neutral losses. 34 | NEUTRAL_LOSS = { 35 | # No neutral loss. 36 | None: 0, 37 | # Hydrogen. 38 | "H": -1.007_825, 39 | # Ammonia. 40 | "NH3": -17.026_549, 41 | # Water. 42 | "H2O": -18.010_565, 43 | # Carbon monoxide. 44 | "CO": -27.994_915, 45 | # Carbon dioxide. 46 | "CO2": -43.989_829, 47 | # Formamide. 48 | "HCONH2": -45.021_464, 49 | # Formic acid. 50 | "HCOOH": -46.005_479, 51 | # Methanesulfenic acid. 52 | "CH4OS": -63.998_301, 53 | # Sulfur trioxide. 54 | "SO3": -79.956_818, 55 | # Metaphosphoric acid. 56 | "HPO3": -79.966_331, 57 | # Mercaptoacetamide. 58 | "C2H5NOS": -91.009_195, 59 | # Mercaptoacetic acid. 60 | "C2H4O2S": -91.993_211, 61 | # Phosphoric acid. 62 | "H3PO4": -97.976_896, 63 | } 64 | 65 | SUPPORTED_IONS = "?abcxyzIm_prf" 66 | 67 | 68 | class FragmentAnnotation: 69 | def __init__( 70 | self, 71 | ion_type: str, 72 | neutral_loss: Optional[str] = None, 73 | isotope: int = 0, 74 | charge: Optional[int] = None, 75 | adduct: Optional[str] = None, 76 | analyte_number: Optional[int] = None, 77 | mz_delta: Optional[Tuple[float, str]] = None, 78 | ) -> None: 79 | """ 80 | Individual fragment ion annotation. 81 | 82 | This fragment annotation format is derived from the PSI peak 83 | interpretation specification: 84 | https://docs.google.com/document/d/1yEUNG4Ump6vnbMDs4iV4s3XISflmOkRAyqUuutcCG2w/edit?usp=sharing 85 | 86 | Fragment notations have the following format: 87 | 88 | (analyte_number)[ion_type](neutral_loss)(isotope)(charge)(adduct)(mz_delta) 89 | 90 | Examples: 91 | 92 | - "y4-H2O+2i^2[M+H+Na]" : Fragment annotation for a y4 ion, with a 93 | water neutral loss, the second isotopic peak, charge 2, adduct 94 | [M+H+Na]. 95 | 96 | Parameters 97 | ---------- 98 | ion_type : str 99 | Specifies the basic type of ion being described. 100 | Possible prefixes are: 101 | 102 | - "?": unknown ion 103 | - "a", "b", "c", "x", "y", "z": corresponding peptide fragments 104 | - "I": immonium ion 105 | - "m": internal fragment ion 106 | - "_": named compound 107 | - "p": precursor ion 108 | - "r": reporter ion (isobaric label) 109 | - "f": chemical formula 110 | neutral_loss : Optional[str] 111 | A string of neutral loss(es), described by their molecular formula. 112 | The default is no neutral loss. Note that the neutral loss string 113 | must include the sign (typically "-" for a neutral loss). 114 | isotope : int 115 | The isotope number above or below the monoisotope. The default is 116 | the monoisotopic peak (0). 117 | charge : Optional[int] 118 | The charge of the fragment. The default is an unknown charge (only 119 | valid for unknown ions). 120 | adduct : Optional[str] 121 | The adduct that ionized the fragment. The default is a hydrogen 122 | adduct matching the charge ([M+xH]). 123 | mz_delta : Optional[Tuple[float, str]] 124 | The m/z delta representing the observed m/z minus the theoretical 125 | m/z and its unit ("Da" or "ppm"). 126 | """ 127 | if ion_type[0] in "GLXS": 128 | raise NotImplementedError( 129 | "Advanced ion types are not yet supported" 130 | ) 131 | elif ion_type[0] not in SUPPORTED_IONS: 132 | raise ValueError("Unknown ion type") 133 | if ion_type == "?" and ( 134 | neutral_loss is not None 135 | or isotope != 0 136 | or charge is not None 137 | or adduct is not None 138 | or analyte_number is not None 139 | or mz_delta is not None 140 | ): 141 | raise ValueError( 142 | "Unknown ions should not contain additional information" 143 | ) 144 | self.ion_type = ion_type 145 | self.neutral_loss = neutral_loss 146 | self.isotope = isotope 147 | self.charge = charge 148 | self.adduct = f"[M+{self.charge}H]" if adduct is None else adduct 149 | self.analyte_number = analyte_number 150 | self.mz_delta = mz_delta 151 | 152 | @property 153 | def mz_delta(self) -> Optional[Tuple[float, str]]: 154 | return self._mz_delta 155 | 156 | @mz_delta.setter 157 | def mz_delta(self, mz_delta: Optional[Tuple[float, str]]): 158 | if mz_delta is not None and mz_delta[1] not in ("Da", "ppm"): 159 | raise ValueError( 160 | "The m/z delta must be specified in Dalton or ppm units" 161 | ) 162 | self._mz_delta = mz_delta 163 | 164 | @property 165 | def charge(self) -> Optional[int]: 166 | return self._charge 167 | 168 | @charge.setter 169 | def charge(self, charge: Optional[int]): 170 | if self.ion_type == "?" and charge is not None: 171 | raise ValueError("Invalid charge for unknown ions") 172 | elif self.ion_type != "?" and (charge is None or charge <= 0): 173 | raise ValueError( 174 | "The charge must be specified and strictly positive for known " 175 | "ion types" 176 | ) 177 | self._charge = charge 178 | 179 | def __repr__(self): 180 | return str(self) 181 | 182 | def __str__(self) -> str: 183 | if self.ion_type == "?": 184 | return "?" 185 | else: 186 | annot_str = [] 187 | if self.analyte_number is not None: 188 | annot_str.append(f"{self.analyte_number}@") 189 | annot_str.append(self.ion_type) 190 | if self.neutral_loss is not None: 191 | annot_str.append(self.neutral_loss) 192 | if abs(self.isotope) == 1: 193 | annot_str.append("+i" if self.isotope > 0 else "-i") 194 | elif self.isotope != 0: 195 | annot_str.append(f"{self.isotope:+}i") 196 | if self.charge is not None and self.charge > 1: 197 | annot_str.append(f"^{self.charge}") 198 | if re.match(r"\[M\+\d+H\]", self.adduct) is None: 199 | annot_str.append(self.adduct) 200 | if self.mz_delta is not None: 201 | annot_str.append( 202 | f"/{self.mz_delta[0]}" 203 | f"{'ppm' if self.mz_delta[1] == 'ppm' else ''}" 204 | ) 205 | return "".join(annot_str) 206 | 207 | def __eq__(self, other: Any) -> bool: 208 | if not isinstance(other, FragmentAnnotation): 209 | return False 210 | return ( 211 | self.ion_type == other.ion_type 212 | and self.neutral_loss == other.neutral_loss 213 | and self.isotope == other.isotope 214 | and self.charge == other.charge 215 | and self.adduct == other.adduct 216 | and self.analyte_number == other.analyte_number 217 | and self.mz_delta == other.mz_delta 218 | ) 219 | 220 | 221 | class PeakInterpretation: 222 | _unknown = FragmentAnnotation("?") 223 | 224 | def __init__(self): 225 | """ 226 | Fragment annotation(s) to interpret a specific peak. 227 | """ 228 | self.fragment_annotations = [] 229 | 230 | def __repr__(self) -> str: 231 | return str(self) 232 | 233 | def __str__(self) -> str: 234 | # If no fragment annotations have been specified, interpret as an 235 | # unknown ion. 236 | if len(self.fragment_annotations) > 0: 237 | return ",".join([str(a) for a in self.fragment_annotations]) 238 | else: 239 | return str(self._unknown) 240 | 241 | def __eq__(self, other: Any) -> bool: 242 | return isinstance(other, PeakInterpretation) and str(self) == str( 243 | other 244 | ) 245 | 246 | def __getitem__(self, key) -> FragmentAnnotation: 247 | if len(self.fragment_annotations) > 0: 248 | return self.fragment_annotations[key] 249 | else: 250 | return self._unknown 251 | 252 | 253 | def get_theoretical_fragments( 254 | proteoform: proforma.Proteoform, 255 | ion_types: str = "by", 256 | *, 257 | max_isotope: int = 0, 258 | max_charge: int = 1, 259 | neutral_losses: Optional[Dict[Optional[str], float]] = None, 260 | ) -> List[Tuple[FragmentAnnotation, float]]: 261 | """ 262 | Get fragment annotations with their theoretical masses for the given 263 | sequence. 264 | 265 | Parameters 266 | ---------- 267 | proteoform : proforma.Proteoform 268 | The proteoform for which the fragment annotations will be 269 | generated. 270 | ion_types : str 271 | The ion types to generate. Can be any combination of 'a', 'b', 272 | 'c', 'x', 'y', and 'z' for peptide fragments, 'I' for immonium 273 | ions, 'm' for internal fragment ions, 'p' for the precursor ion, 274 | and 'r' for reporter ions. The default is 'by', which means that 275 | b and y peptide ions will be generated. 276 | max_isotope : int 277 | The maximum isotope to consider (the default is 0 to only 278 | generate the monoisotopic peaks). 279 | max_charge : int 280 | All fragments up to and including the given charge will be 281 | generated (the default is 1 to only generate singly-charged 282 | fragments). 283 | neutral_losses : Optional[Dict[Optional[str], float]] 284 | A dictionary with neutral loss names and (negative) mass 285 | differences to be considered. 286 | 287 | Returns 288 | ------- 289 | List[Tuple[FragmentAnnotation, float]] 290 | All possible fragment annotations and their theoretical m/z in 291 | ascending m/z order. 292 | """ 293 | for ion_type in ion_types: 294 | if ion_type not in SUPPORTED_IONS: 295 | raise ValueError( 296 | f"{ion_type} is not a supported ion type ({SUPPORTED_IONS})" 297 | ) 298 | if "B" in proteoform.sequence: 299 | raise ValueError( 300 | "Explicitly specify aspartic acid (D) or asparagine (N) instead of" 301 | " the ambiguous B to compute the fragment annotations" 302 | ) 303 | if "Z" in proteoform.sequence: 304 | raise ValueError( 305 | "Explicitly specify glutamic acid (E) or glutamine (Q) instead of " 306 | "the ambiguous Z to compute the fragment annotations" 307 | ) 308 | 309 | neutral_losses = {None: 0} if neutral_losses is None else neutral_losses 310 | 311 | base_fragments = [] 312 | 313 | # Generate all peptide fragments ('a', 'b', 'c', 'x', 'y', 'z') and 314 | # calculate their theoretical masses. 315 | # Generate all N-terminal peptide fragments. 316 | for ion_type in set("abc") & set(ion_types): 317 | mod_i, mod_mass = 0, 0 318 | for fragment_i in range(1, len(proteoform.sequence)): 319 | fragment_sequence = proteoform.sequence[:fragment_i] 320 | # Ignore unlocalized modifications. 321 | while ( 322 | proteoform.modifications is not None 323 | and mod_i < len(proteoform.modifications) 324 | and isinstance(proteoform.modifications[mod_i].position, str) 325 | and proteoform.modifications[mod_i].position != "N-term" 326 | ): 327 | mod_i += 1 328 | # Include prefix modifications. 329 | while ( 330 | proteoform.modifications is not None 331 | and mod_i < len(proteoform.modifications) 332 | and ( 333 | proteoform.modifications[mod_i].position == "N-term" 334 | or ( 335 | isinstance( 336 | proteoform.modifications[mod_i].position, int 337 | ) 338 | and proteoform.modifications[mod_i].position 339 | < fragment_i 340 | ) 341 | ) 342 | ): 343 | mod_mass += proteoform.modifications[mod_i].mass 344 | mod_i += 1 345 | base_fragments.append( 346 | (fragment_sequence, ion_type, fragment_i, mod_mass) 347 | ) 348 | # Generate all C-terminal peptide fragments. 349 | for ion_type in set("xyz") & set(ion_types): 350 | if proteoform.modifications is not None: 351 | mod_i, mod_mass = len(proteoform.modifications) - 1, 0 352 | else: 353 | mod_i, mod_mass = None, 0 354 | for fragment_i in range(len(proteoform.sequence) - 1, 0, -1): 355 | fragment_sequence = proteoform.sequence[fragment_i:] 356 | # Include suffix modifications. 357 | while ( 358 | proteoform.modifications is not None 359 | and mod_i >= 0 360 | and ( 361 | proteoform.modifications[mod_i].position == "C-term" 362 | or ( 363 | isinstance( 364 | proteoform.modifications[mod_i].position, int 365 | ) 366 | and proteoform.modifications[mod_i].position 367 | >= fragment_i 368 | ) 369 | ) 370 | ): 371 | mod_mass += proteoform.modifications[mod_i].mass 372 | mod_i -= 1 373 | base_fragments.append( 374 | ( 375 | fragment_sequence, 376 | ion_type, 377 | len(proteoform.sequence) - fragment_i, 378 | mod_mass, 379 | ) 380 | ) 381 | 382 | # Generate all internal fragment ions. 383 | if "m" in ion_types: 384 | # Skip internal fragments with start position 1, which are 385 | # actually b ions. 386 | for start_i in range(1, len(proteoform.sequence)): 387 | mod_i_start, mod_mass = 0, 0 388 | # Skip unlocalized and prefix modifications. 389 | while ( 390 | proteoform.modifications is not None 391 | and mod_i_start < len(proteoform.modifications) 392 | and ( 393 | isinstance( 394 | proteoform.modifications[mod_i_start].position, str 395 | ) 396 | or proteoform.modifications[mod_i_start].position < start_i 397 | ) 398 | ): 399 | mod_i_start += 1 400 | mod_i_stop = mod_i_start 401 | # Internal fragments of only one residue are encoded as 402 | # immonium ions. 403 | for stop_i in range(start_i + 2, len(proteoform.sequence)): 404 | fragment_sequence = proteoform.sequence[start_i:stop_i] 405 | # Include internal modifications. 406 | while ( 407 | proteoform.modifications is not None 408 | and mod_i_stop < len(proteoform.modifications) 409 | and proteoform.modifications[mod_i_stop].position < stop_i 410 | ): 411 | mod_mass += proteoform.modifications[mod_i_stop].mass 412 | mod_i_stop += 1 413 | # Internal fragment mass calculation is equivalent to b 414 | # ion mass calculation. 415 | base_fragments.append( 416 | ( 417 | fragment_sequence, 418 | "b", 419 | f"{start_i + 1}:{stop_i + 1}", 420 | mod_mass, 421 | ) 422 | ) 423 | 424 | # Generate unfragmented precursor ion(s). 425 | if "p" in ion_types: 426 | if proteoform.modifications is not None: 427 | mod_mass = sum([mod.mass for mod in proteoform.modifications]) 428 | else: 429 | mod_mass = 0 430 | base_fragments.append((proteoform.sequence, "M", "p", mod_mass)) 431 | 432 | fragments_masses = [] 433 | # Compute the theoretical fragment masses (using Pyteomics) 434 | for fragment_sequence, ion_type, fragment_i, mod_mass in base_fragments: 435 | for charge in range(1, max_charge + 1): 436 | annot_type = "?" 437 | if isinstance(fragment_i, str): 438 | if ":" in fragment_i: 439 | annot_type = f"m{fragment_i}" 440 | elif fragment_i == "p": 441 | annot_type = "p" 442 | else: 443 | annot_type = f"{ion_type}{fragment_i}" 444 | fragments_masses.append( 445 | ( 446 | FragmentAnnotation(ion_type=annot_type, charge=charge), 447 | pmass.fast_mass( 448 | sequence=fragment_sequence, 449 | ion_type=ion_type, 450 | charge=charge, 451 | aa_mass=AA_MASS, 452 | ) 453 | + mod_mass / charge, 454 | ) 455 | ) 456 | 457 | # Generate all immonium ions (internal single amino acid from the 458 | # combination of a type and y type cleavage). 459 | if "I" in ion_types: 460 | # Amino acid mass minus CO plus charge 1. 461 | mass_diff = pmass.calculate_mass(formula="CO") - pmass.calculate_mass( 462 | formula="H" 463 | ) 464 | for aa, mass in AA_MASS.items(): 465 | if aa != "X": 466 | fragments_masses.append( 467 | ( 468 | FragmentAnnotation(ion_type=f"I{aa}", charge=1), 469 | mass - mass_diff, 470 | ) 471 | ) 472 | 473 | # Generate isotopic peaks for all fragments. 474 | isotope_fragments = [] 475 | for isotope in range(1, max_isotope + 1): 476 | for fragment, mass in fragments_masses: 477 | isotope_fragments.append( 478 | ( 479 | FragmentAnnotation( 480 | ion_type=fragment.ion_type, 481 | isotope=isotope, 482 | charge=fragment.charge, 483 | ), 484 | mass + isotope * C13_MASS_DIFF / fragment.charge, 485 | ) 486 | ) 487 | fragments_masses.extend(isotope_fragments) 488 | 489 | # Generate all fragments that differ by a neutral loss from the base 490 | # fragments. 491 | neutral_loss_fragments = [] 492 | for neutral_loss, mass_diff in neutral_losses.items(): 493 | if neutral_loss is None: 494 | continue 495 | neutral_loss = f"{'-' if mass_diff < 0 else '+'}{neutral_loss}" 496 | for fragment, mass in fragments_masses: 497 | if (fragment_mass := mass + mass_diff / fragment.charge) > 0: 498 | neutral_loss_fragments.append( 499 | ( 500 | FragmentAnnotation( 501 | ion_type=fragment.ion_type, 502 | neutral_loss=neutral_loss, 503 | isotope=fragment.isotope, 504 | charge=fragment.charge, 505 | ), 506 | fragment_mass, 507 | ) 508 | ) 509 | fragments_masses.extend(neutral_loss_fragments) 510 | 511 | # Sort the fragment annotations by their theoretical masses. 512 | return sorted(fragments_masses, key=operator.itemgetter(1)) 513 | -------------------------------------------------------------------------------- /spectrum_utils/iplot.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import operator 3 | from typing import Callable, Dict, Optional 4 | 5 | try: 6 | import altair 7 | import pandas as pd 8 | except ImportError: 9 | raise ImportError( 10 | "Missing dependencies for interactive plotting. Install using `pip " 11 | "install spectrum_utils[iplot]`, manually install Altair and Pandas, or" 12 | " use the default Matplotlib (`spectrum_utils.plot`) plotting backend." 13 | ) 14 | 15 | from spectrum_utils.plot import annotate_ion_type, colors 16 | from spectrum_utils.spectrum import MsmsSpectrum 17 | 18 | 19 | def spectrum( 20 | spec: MsmsSpectrum, 21 | *_, 22 | color_ions: bool = True, 23 | annot_fmt: Optional[Callable] = functools.partial( 24 | annotate_ion_type, ion_types="by" 25 | ), 26 | annot_kws: Optional[Dict] = None, 27 | mirror_intensity: bool = False, 28 | grid: bool = True, 29 | ) -> altair.LayerChart: 30 | """ 31 | Plot an MS/MS spectrum. 32 | 33 | Parameters 34 | ---------- 35 | spec : MsmsSpectrum 36 | The spectrum to be plotted. 37 | color_ions : bool, optional 38 | Flag indicating whether or not to color annotated fragment ions. The 39 | default is True. 40 | annot_fmt : Optional[Callable] 41 | Function to format the peak annotations. See `FragmentAnnotation` for 42 | supported elements. By default, only canonical b and y peptide fragments 43 | are annotated. If `None`, no peaks are annotated. 44 | annot_kws : Optional[Dict], optional 45 | Keyword arguments for `altair.Chart.mark_text` to customize peak 46 | annotations. 47 | mirror_intensity : bool, optional 48 | Flag indicating whether to flip the intensity axis or not. 49 | grid : bool, optional 50 | Draw grid lines or not. 51 | 52 | Returns 53 | ------- 54 | altair.LayerChart 55 | The Altair chart instance with the plotted spectrum. 56 | """ 57 | intensity = spec.intensity / spec.intensity.max() 58 | if mirror_intensity: 59 | intensity *= -1 60 | if spec.annotation is not None: 61 | annotations = list(map(operator.itemgetter(0), spec.annotation)) 62 | peak_labels = map(annot_fmt, annotations) 63 | peak_colors = [ 64 | colors.get(a.ion_type[0] if color_ions else None) 65 | for a in annotations 66 | ] 67 | mz_delta = [ 68 | None if a.mz_delta is None else "".join(map(str, a.mz_delta)) 69 | for a in annotations 70 | ] 71 | spec_df = pd.DataFrame( 72 | { 73 | "mz": spec.mz, 74 | "intensity": intensity, 75 | "fragment": peak_labels, 76 | "mz_delta": mz_delta, 77 | "color": peak_colors, 78 | } 79 | ) 80 | else: 81 | spec_df = pd.DataFrame( 82 | { 83 | "mz": spec.mz, 84 | "intensity": intensity, 85 | "color": [colors[None]] * len(spec.mz), 86 | } 87 | ) 88 | 89 | x_axis = altair.X( 90 | "mz", 91 | axis=altair.Axis(title="m/z", titleFontStyle="italic", grid=grid), 92 | scale=altair.Scale(nice=True, padding=5), 93 | ) 94 | y_axis = altair.Y( 95 | "intensity", 96 | axis=altair.Axis(title="Intensity", format="%", grid=grid), 97 | scale=altair.Scale(nice=True), 98 | ) 99 | color = altair.Color("color", scale=None, legend=None) 100 | tooltip_not_annotated = [ 101 | altair.Tooltip("mz", format=".4f", title="m/z"), 102 | altair.Tooltip("intensity", format=".1%", title="Intensity"), 103 | ] 104 | tooltip_annotated = [ 105 | altair.Tooltip("mz", format=".4f", title="m/z"), 106 | altair.Tooltip("intensity", format=".1%", title="Intensity"), 107 | altair.Tooltip("fragment", title="Fragment"), 108 | altair.Tooltip("mz_delta", title="m/z deviation"), 109 | ] 110 | # Unannotated peaks. 111 | mask_unannotated = spec_df["fragment"] == "" 112 | spec_plot = ( 113 | altair.Chart(spec_df[mask_unannotated]) 114 | .mark_rule(size=2) 115 | .encode(x=x_axis, y=y_axis, color=color, tooltip=tooltip_not_annotated) 116 | ) 117 | # Annotated peaks. 118 | annotation_kws = { 119 | "align": "left" if not mirror_intensity else "right", 120 | "angle": 270, 121 | "baseline": "middle", 122 | } 123 | if annot_kws is not None: 124 | annotation_kws.update(annot_kws) 125 | spec_plot += ( 126 | altair.Chart(spec_df[~mask_unannotated]) 127 | .mark_rule(size=2) 128 | .encode(x=x_axis, y=y_axis, color=color, tooltip=tooltip_annotated) 129 | ) 130 | spec_plot += ( 131 | altair.Chart(spec_df[~mask_unannotated]) 132 | .mark_text(dx=-5 if mirror_intensity else 5, **annotation_kws) 133 | .encode( 134 | x=x_axis, 135 | y=y_axis, 136 | text="fragment", 137 | color=color, 138 | tooltip=tooltip_annotated, 139 | ) 140 | ) 141 | 142 | return spec_plot 143 | 144 | 145 | def mirror( 146 | spec_top: MsmsSpectrum, 147 | spec_bottom: MsmsSpectrum, 148 | spectrum_kws: Optional[Dict] = None, 149 | *_, 150 | ) -> altair.LayerChart: 151 | """ 152 | Mirror plot two MS/MS spectra. 153 | 154 | Parameters 155 | ---------- 156 | spec_top : MsmsSpectrum 157 | The spectrum to be plotted on the top. 158 | spec_bottom : MsmsSpectrum 159 | The spectrum to be plotted on the bottom. 160 | spectrum_kws : Optional[Dict], optional 161 | Keyword arguments for `iplot.spectrum`. 162 | *_ 163 | Ignored, for consistency with the `plot.mirror` API. 164 | 165 | Returns 166 | ------- 167 | altair.LayerChart 168 | The Altair chart instance with the plotted spectrum. 169 | """ 170 | if spectrum_kws is None: 171 | spectrum_kws = {} 172 | # Top spectrum. 173 | spec_plot = spectrum(spec_top, mirror_intensity=False, **spectrum_kws) 174 | # Mirrored bottom spectrum. 175 | spec_plot += spectrum(spec_bottom, mirror_intensity=True, **spectrum_kws) 176 | 177 | spec_plot += ( 178 | altair.Chart(pd.DataFrame({"sep": [0]})) 179 | .mark_rule(size=3) 180 | .encode(y="sep", color=altair.value("lightGray")) 181 | ) 182 | 183 | return spec_plot 184 | -------------------------------------------------------------------------------- /spectrum_utils/monosaccharide.lark: -------------------------------------------------------------------------------- 1 | // From: https://github.com/HUPO-PSI/ProForma/tree/master/monosaccharides 2 | // Version: September 13, 2020 3 | // Monosaccharides have to be specified in reversed order to support greedy parsing. 4 | MONOSACCHARIDE: "uxxxxxxxxh" 5 | | "uxxxxxxh" 6 | | "uxxxxxh" 7 | | "uxxxxh_?*OSO/3=O/3=O" 8 | | "uxxxxh_?*OPO/3O/3=O" 9 | | "uxxxxh_?*NSO/3=O/3=O" 10 | | "uxxxxh_?*NCC/3=O_?*OSO/3=O/3=O" 11 | | "uxxxxh_?*NCC/3=O" 12 | | "uxxxxh_?*N" 13 | | "uxxxxh" 14 | | "uxxxh" 15 | | "uxxxdh" 16 | | "uxxh" 17 | | "uxh43344h" 18 | | "uxh" 19 | | "uh" 20 | | "u1221m" 21 | | "sulfate" 22 | | "phosphate" 23 | | "enHexA" 24 | | "en,aHex" 25 | | "en,a-Hex" 26 | | "dHex" 27 | | "d-Hex" 28 | | "aHex" 29 | | "a-Hex" 30 | | "Tri" 31 | | "Tet" 32 | | "Sug" 33 | | "Sialic Acid" 34 | | "S" 35 | | "RES\n1b:x-xgro-NON-x:x\nLIN\n" 36 | | "RES\n1b:x-xgro-HEP-x:x\nLIN\n" 37 | | "RES\n1b:x-lgal-HEX-x:x|6:d\nLIN\n" 38 | | "RES\n1b:x-dgro-dgal-NON-x:x|1:a|2:keto|3:d\n2s:n-glycolyl\nLIN\n1:1d(5+1)2n\n" 39 | | "RES\n1b:x-dgro-dgal-NON-x:x|1:a|2:keto|3:d\n2s:n-acetyl\nLIN\n1:1d(5+1)2n\n" 40 | | "RES\n1b:x-dgro-dgal-NON-x:x|1:a|2:keto|3:d\n2s:amino\nLIN\n1:1d(5+1)2n\n" 41 | | "RES\n1b:x-TRI-x:x\nLIN\n" 42 | | "RES\n1b:x-TET-x:x\nLIN\n" 43 | | "RES\n1b:x-SUG-x:x\nLIN\n" 44 | | "RES\n1b:x-PEN-x:x\nLIN\n" 45 | | "RES\n1b:x-OCT-x:x\nLIN\n" 46 | | "RES\n1b:x-HEX-x:x|-1:en|-1:a\nLIN\n" 47 | | "RES\n1b:x-HEX-x:x|-1:d\nLIN\n" 48 | | "RES\n1b:x-HEX-x:x|-1:a\nLIN\n" 49 | | "RES\n1b:x-HEX-x:x\nLIN\n" 50 | | "RES\n1b:x-HEX-x:x\n2s:sulfate\nLIN\n1:1o(-1+1)2n\n" 51 | | "RES\n1b:x-HEX-x:x\n2s:phosphate\nLIN\n1:1o(-1+1)2n\n" 52 | | "RES\n1b:x-HEX-x:x\n2s:n-sulfate\nLIN\n1:1d(-1+1)2n\n" 53 | | "RES\n1b:x-HEX-x:x\n2s:n-acetyl\nLIN\n1:1d(-1+1)2n\n" 54 | | "RES\n1b:x-HEX-x:x\n2s:n-acetyl\n3s:sulfate\nLIN\n1:1d(-1+1)2n\n2:1o(-1+1)3n\n" 55 | | "RES\n1b:x-HEX-x:x\n2s:amino\nLIN\n1:1d(-1+1)2n\n" 56 | | "RES\n1b:x-DEC-x:x\nLIN\n" 57 | | "Pen" 58 | | "P" 59 | | "Oct" 60 | | "Non" 61 | | "Neuraminic acid" 62 | | "NeuGc" 63 | | "NeuAc" 64 | | "Neu5Gc" 65 | | "Neu5Ac" 66 | | "Neu" 67 | | "HexS" 68 | | "HexP" 69 | | "HexNS" 70 | | "HexNAc(S)" 71 | | "HexNAc" 72 | | "HexN" 73 | | "HexA" 74 | | "Hex" 75 | | "Hep" 76 | | "Fucose" 77 | | "Fuc" 78 | | "Dec" 79 | | "Aud21122h_5*NCCO/3=O" 80 | | "Aud21122h_5*NCC/3=O" 81 | | "Aud21122h_5*N" 82 | | "?-L-Fucx" 83 | | "?-D-Neux5NGc" 84 | | "?-D-Neux5NAc" 85 | | "?-D-Neux" 86 | | "?-?-en,a-Hexx" 87 | | "?-?-d-Hexx" 88 | | "?-?-a-Hexx" 89 | | "?-?-Trix" 90 | | "?-?-Tetx" 91 | | "?-?-Sugx" 92 | | "?-?-Penx" 93 | | "?-?-Octx" 94 | | "?-?-Nonx" 95 | | "?-?-HexxS" 96 | | "?-?-HexxP" 97 | | "?-?-HexxNS" 98 | | "?-?-HexxNAc(S)" 99 | | "?-?-HexxNAc" 100 | | "?-?-HexxN" 101 | | "?-?-Hexx" 102 | | "?-?-Hepx" 103 | | "?-?-Decx" 104 | -------------------------------------------------------------------------------- /spectrum_utils/plot.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import itertools 3 | import math 4 | from typing import ( 5 | Any, 6 | Callable, 7 | Dict, 8 | Iterable, 9 | Mapping, 10 | Optional, 11 | Tuple, 12 | Union, 13 | ) 14 | 15 | import matplotlib.pyplot as plt 16 | import matplotlib.ticker as mticker 17 | import numpy as np 18 | 19 | import spectrum_utils.fragment_annotation as fa 20 | from spectrum_utils.spectrum import MsmsSpectrum 21 | from spectrum_utils.utils import da_to_ppm, ppm_to_da 22 | 23 | 24 | colors = { 25 | "a": "#388E3C", 26 | "b": "#1976D2", 27 | "c": "#00796B", 28 | "x": "#7B1FA2", 29 | "y": "#D32F2F", 30 | "z": "#F57C00", 31 | "m": "#FBC02D", 32 | "I": "#455A64", 33 | "p": "#512DA8", 34 | "?": "#212121", 35 | "f": "#212121", 36 | None: "#212121", 37 | } 38 | zorders = { 39 | "a": 3, 40 | "b": 4, 41 | "c": 3, 42 | "x": 3, 43 | "y": 4, 44 | "z": 3, 45 | "m": 2, 46 | "I": 3, 47 | "p": 3, 48 | "?": 2, 49 | "f": 5, 50 | None: 1, 51 | } 52 | 53 | 54 | def _format_ax( 55 | ax: plt.Axes, 56 | grid: Union[bool, str], 57 | ): 58 | """Set ax formatting options that are common to all plot types.""" 59 | ax.xaxis.set_minor_locator(mticker.AutoLocator()) 60 | ax.yaxis.set_minor_locator(mticker.AutoLocator()) 61 | ax.xaxis.set_minor_locator(mticker.AutoMinorLocator()) 62 | ax.yaxis.set_minor_locator(mticker.AutoMinorLocator()) 63 | if grid in (True, "both", "major"): 64 | ax.grid(True, "major", color="#9E9E9E", linewidth=0.2) 65 | if grid in (True, "both", "minor"): 66 | ax.grid(True, "minor", color="#9E9E9E", linewidth=0.2) 67 | ax.set_axisbelow(True) 68 | ax.tick_params(axis="both", which="both", labelsize="small") 69 | ax.set_xlabel("m/z", style="italic") 70 | 71 | 72 | def _get_xlim(spec: MsmsSpectrum) -> Tuple[float, float]: 73 | """Get plot x-axis limits for a given spectrum.""" 74 | round_mz = 50 75 | max_mz = math.ceil(spec.mz[-1] / round_mz + 1) * round_mz 76 | return 0.0, max_mz 77 | 78 | 79 | def _annotate_ion( 80 | mz: float, 81 | intensity: float, 82 | annotation: Optional[fa.FragmentAnnotation], 83 | color_ions: bool, 84 | annot_fmt: Optional[Callable], 85 | annot_kws: Dict[str, object], 86 | ax: plt.Axes, 87 | ) -> Tuple[str, int]: 88 | """ 89 | Annotate a specific fragment peak. 90 | 91 | Parameters 92 | ---------- 93 | mz : float 94 | The peak's m/z value (position of the annotation on the x axis). 95 | intensity : float 96 | The peak's intensity (position of the annotation on the y axis). 97 | annotation : Optional[fa.FragmentAnnotation] 98 | The annotation that will be plotted. 99 | color_ions : bool 100 | Flag whether to color the peak annotation or not. 101 | annot_fmt : Optional[Callable] 102 | Function to format the peak annotations. See `FragmentAnnotation` for 103 | supported elements. By default, only canonical b and y peptide fragments 104 | are annotated. If `None`, no peaks are annotated. 105 | annot_kws : Dict[str, object] 106 | Keyword arguments for `ax.text` to customize peak annotations. 107 | ax : plt.Axes 108 | Axes instance on which to plot the annotation. 109 | 110 | Returns 111 | ------- 112 | Tuple[str, int] 113 | A tuple of the annotation's color as a hex string and the annotation's 114 | zorder. 115 | """ 116 | ion_type = annotation.ion_type[0] if annotation is not None else None 117 | color = colors.get(ion_type if color_ions else None) 118 | zorder = zorders.get(ion_type) 119 | if annot_fmt is not None and annotation is not None: 120 | y = intensity + 0.02 * (intensity > 0) 121 | kws = annot_kws.copy() 122 | kws.update(dict(color=color, zorder=zorder)) 123 | ax.text(mz, y, annot_fmt(annotation), **kws) 124 | return color, zorder 125 | 126 | 127 | def annotate_ion_type( 128 | annotation: fa.FragmentAnnotation, ion_types: Iterable[str] 129 | ) -> str: 130 | """ 131 | Convert a `FragmentAnnotation` to a string for annotating peaks in a 132 | spectrum plot. 133 | 134 | This function will only annotate singly-charged, mono-isotopic canonical 135 | peaks with the given ion type(s). 136 | 137 | Parameters 138 | ---------- 139 | annotation : fa.FragmentAnnotation 140 | The peak's fragment annotation. 141 | ion_types : Iterable[str] 142 | Accepted ion types to annotate. 143 | 144 | Returns 145 | ------- 146 | str 147 | The peak's annotation string. 148 | """ 149 | if ( 150 | annotation.ion_type[0] in ion_types 151 | and annotation.neutral_loss is None 152 | and annotation.isotope == 0 153 | and annotation.charge == 1 154 | ): 155 | return str(annotation.ion_type) 156 | else: 157 | return "" 158 | 159 | 160 | def spectrum( 161 | spec: MsmsSpectrum, 162 | *, 163 | color_ions: bool = True, 164 | annot_fmt: Optional[Callable] = functools.partial( 165 | annotate_ion_type, ion_types="by" 166 | ), 167 | annot_kws: Optional[Dict] = None, 168 | mirror_intensity: bool = False, 169 | grid: Union[bool, str] = True, 170 | ax: Optional[plt.Axes] = None, 171 | ) -> plt.Axes: 172 | """ 173 | Plot an MS/MS spectrum. 174 | 175 | Parameters 176 | ---------- 177 | spec : MsmsSpectrum 178 | The spectrum to be plotted. 179 | color_ions : bool, optional 180 | Flag indicating whether or not to color annotated fragment ions. The 181 | default is True. 182 | annot_fmt : Optional[Callable] 183 | Function to format the peak annotations. See `FragmentAnnotation` for 184 | supported elements. By default, only canonical b and y peptide fragments 185 | are annotated. If `None`, no peaks are annotated. 186 | annot_kws : Optional[Dict], optional 187 | Keyword arguments for `ax.text` to customize peak annotations. 188 | mirror_intensity : bool, optional 189 | Flag indicating whether to flip the intensity axis or not. 190 | grid : Union[bool, str], optional 191 | Draw grid lines or not. Either a boolean to enable/disable both major 192 | and minor grid lines or 'major'/'minor' to enable major or minor grid 193 | lines respectively. 194 | ax : Optional[plt.Axes], optional 195 | Axes instance on which to plot the spectrum. If None the current Axes 196 | instance is used. 197 | 198 | Returns 199 | ------- 200 | plt.Axes 201 | The matplotlib Axes instance on which the spectrum is plotted. 202 | """ 203 | if ax is None: 204 | ax = plt.gca() 205 | 206 | _format_ax(ax, grid) 207 | ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0)) 208 | ax.set_ylim(*(0, 1.15) if not mirror_intensity else (-1.15, 0)) 209 | ax.set_ylabel("Intensity") 210 | 211 | if len(spec.mz) == 0: 212 | return ax 213 | 214 | ax.set_xlim(*_get_xlim(spec)) 215 | 216 | max_intensity = spec.intensity.max() 217 | annotations = ( 218 | spec.annotation 219 | if spec.annotation is not None 220 | else itertools.repeat(None) 221 | ) 222 | annotation_kws = { 223 | "horizontalalignment": "left" if not mirror_intensity else "right", 224 | "verticalalignment": "center", 225 | "rotation": 90, 226 | "rotation_mode": "anchor", 227 | "zorder": 5, 228 | } 229 | if annot_kws is not None: 230 | annotation_kws.update(annot_kws) 231 | for mz, intensity, annotation in zip(spec.mz, spec.intensity, annotations): 232 | peak_intensity = intensity / max_intensity 233 | if mirror_intensity: 234 | peak_intensity *= -1 235 | 236 | color, zorder = _annotate_ion( 237 | mz, 238 | peak_intensity, 239 | # Use the first annotation in case there are multiple options. 240 | annotation[0] if annotation is not None else None, 241 | color_ions, 242 | annot_fmt, 243 | annotation_kws, 244 | ax, 245 | ) 246 | ax.plot([mz, mz], [0, peak_intensity], color=color, zorder=zorder) 247 | 248 | return ax 249 | 250 | 251 | def mass_errors( 252 | spec: MsmsSpectrum, 253 | *, 254 | unit: Optional[str] = None, 255 | plot_unknown: bool = True, 256 | color_ions: bool = True, 257 | grid: Union[bool, str] = True, 258 | ax: Optional[plt.Axes] = None, 259 | ) -> plt.Axes: 260 | """ 261 | Plot mass error bubble plot for a given spectrum. 262 | 263 | A mass error bubble plot shows the error between observed and theoretical 264 | mass (y-axis) in function of the **m/z** (x-axis) for each peak in the 265 | spectrum. The size of the bubble is proportional to the intensity of the 266 | peak. 267 | 268 | Parameters 269 | ---------- 270 | spec : MsmsSpectrum 271 | The spectrum with mass errors to be plotted. 272 | unit : str, optional 273 | The unit of the mass errors, either 'ppm', 'Da', or None. If None, 274 | the unit that was used for spectrum annotation is used. The default is 275 | None. 276 | plot_unknown : bool, optional 277 | Flag indicating whether or not to plot mass errors for unknown peaks. 278 | color_ions : bool, optional 279 | Flag indicating whether or not to color dots for annotated fragment 280 | ions. The default is True. 281 | grid : Union[bool, str], optional 282 | Draw grid lines or not. Either a boolean to enable/disable both major 283 | and minor grid lines or 'major'/'minor' to enable major or minor grid 284 | lines respectively. 285 | ax : Optional[plt.Axes], optional 286 | Axes instance on which to plot the mass errors. If None the current 287 | Axes instance is used. 288 | 289 | Returns 290 | ------- 291 | plt.Axes 292 | The matplotlib Axes instance on which the mass errors are plotted. 293 | 294 | Notes 295 | ----- 296 | The mass error bubble plot was first introduced in [1]_. 297 | 298 | References 299 | ---------- 300 | .. [1] Barsnes,H., Eidhammer,I. and Martens,L. (2010) 301 | FragmentationAnalyzer: An open-source tool to analyze MS/MS 302 | fragmentation data. PROTEOMICS, 10, 1087–1090. 303 | doi:10.1002/pmic.200900681 304 | 305 | """ 306 | if ax is None: 307 | ax = plt.gca() 308 | 309 | _format_ax(ax, grid) 310 | 311 | if len(spec.mz) == 0: 312 | ax.set_ylabel("Mass error") 313 | ax.set_ylim(-1, 1) 314 | return ax 315 | 316 | annotations = ( 317 | spec.annotation 318 | if spec.annotation is not None 319 | else itertools.repeat(None, len(spec.mz)) 320 | ) 321 | 322 | known_ions = [] 323 | dot_colors = [] 324 | mz_deltas = [] 325 | mz_delta_units = [] 326 | for ann in annotations: 327 | # Use the first annotation in case there are multiple options. 328 | ion_type = ann[0].ion_type[0] if ann is not None else None 329 | is_known_ion = ion_type is not None and ion_type != "?" 330 | known_ions.append(is_known_ion) 331 | dot_colors.append(colors.get(ion_type if color_ions else None)) 332 | mz_deltas.append(ann[0].mz_delta[0] if is_known_ion else 0.0) 333 | mz_delta_units.append(ann[0].mz_delta[1] if is_known_ion else None) 334 | 335 | dot_colors = np.array(dot_colors) 336 | mz_deltas = np.array(mz_deltas) 337 | intensity_scaled = 500 * (spec.intensity / np.max(spec.intensity)) 338 | mask = ( 339 | np.ones_like(spec.mz, dtype=bool) 340 | if plot_unknown 341 | else np.array(known_ions) 342 | ) 343 | 344 | for known_unit in ["ppm", "Da"]: 345 | # Use `not any` instead of `all` to fail fast 346 | if not any(u and u != known_unit for u in mz_delta_units): 347 | annotation_unit = known_unit 348 | break 349 | else: 350 | raise ValueError("Inconsistent or unknown mass units in annotations.") 351 | if unit == "Da" and annotation_unit == "ppm": 352 | mz_deltas = ppm_to_da(mz_deltas, spec.mz) 353 | elif unit == "ppm" and annotation_unit == "Da": 354 | mz_deltas = da_to_ppm(mz_deltas, spec.mz) 355 | 356 | y_lim = 1.2 * np.max(np.abs(mz_deltas)) 357 | if y_lim > 0.0: 358 | ax.set_ylim(-y_lim, y_lim) 359 | ax.set_xlim(*_get_xlim(spec)) 360 | ax.set_ylabel(f"Mass error ({unit or annotation_unit})") 361 | 362 | ax.scatter( 363 | spec.mz[mask], 364 | mz_deltas[mask], 365 | s=intensity_scaled[mask], 366 | c=dot_colors[mask], 367 | alpha=0.5, 368 | edgecolors="none", 369 | ) 370 | 371 | return ax 372 | 373 | 374 | def mirror( 375 | spec_top: MsmsSpectrum, 376 | spec_bottom: MsmsSpectrum, 377 | spectrum_kws: Optional[Dict] = None, 378 | ax: Optional[plt.Axes] = None, 379 | ) -> plt.Axes: 380 | """ 381 | Mirror plot two MS/MS spectra. 382 | 383 | Parameters 384 | ---------- 385 | spec_top : MsmsSpectrum 386 | The spectrum to be plotted on the top. 387 | spec_bottom : MsmsSpectrum 388 | The spectrum to be plotted on the bottom. 389 | spectrum_kws : Optional[Dict], optional 390 | Keyword arguments for `plot.spectrum`. 391 | ax : Optional[plt.Axes], optional 392 | Axes instance on which to plot the spectrum. If None the current Axes 393 | instance is used. 394 | 395 | Returns 396 | ------- 397 | plt.Axes 398 | The matplotlib Axes instance on which the spectra are plotted. 399 | """ 400 | if ax is None: 401 | ax = plt.gca() 402 | 403 | if spectrum_kws is None: 404 | spectrum_kws = {} 405 | # Top spectrum. 406 | spectrum(spec_top, mirror_intensity=False, ax=ax, **spectrum_kws) 407 | y_max = ax.get_ylim()[1] 408 | # Mirrored bottom spectrum. 409 | spectrum(spec_bottom, mirror_intensity=True, ax=ax, **spectrum_kws) 410 | y_min = ax.get_ylim()[0] 411 | ax.set_ylim(y_min, y_max) 412 | 413 | ax.axhline(0, color="#9E9E9E", zorder=10) 414 | 415 | max_mz_top = spec_top.mz[-1] if len(spec_top.mz) > 0 else 1 416 | max_mz_bottom = spec_bottom.mz[-1] if len(spec_bottom.mz) > 0 else 1 417 | # Update axes so that both spectra fit. 418 | round_mz = 50 419 | max_mz = max( 420 | [ 421 | math.ceil(max_mz_top / round_mz + 1) * round_mz, 422 | math.ceil(max_mz_bottom / round_mz + 1) * round_mz, 423 | ] 424 | ) 425 | ax.set_xlim(0, max_mz) 426 | ax.yaxis.set_major_locator(mticker.AutoLocator()) 427 | ax.yaxis.set_minor_locator(mticker.AutoMinorLocator()) 428 | ax.yaxis.set_major_formatter( 429 | mticker.FuncFormatter(lambda x, pos: f"{abs(x):.0%}") 430 | ) 431 | 432 | return ax 433 | 434 | 435 | def facet( 436 | spec_top: MsmsSpectrum, 437 | spec_mass_errors: Optional[MsmsSpectrum] = None, 438 | spec_bottom: Optional[MsmsSpectrum] = None, 439 | spectrum_kws: Optional[Mapping[str, Any]] = None, 440 | mass_errors_kws: Optional[Mapping[str, Any]] = None, 441 | height: Optional[float] = None, 442 | width: Optional[float] = None, 443 | ) -> plt.Figure: 444 | """ 445 | Plot a spectrum, and optionally mass errors, and a mirror spectrum. 446 | 447 | Parameters 448 | ---------- 449 | spec_top : MsmsSpectrum 450 | The spectrum to be plotted on the top. 451 | spec_mass_errors : Optional[MsmsSpectrum], optional 452 | The spectrum for which mass errors are to be plotted in the middle. 453 | spec_bottom : Optional[MsmsSpectrum], optional 454 | The spectrum to be plotted on the bottom. 455 | spectrum_kws : Optional[Mapping[str, Any]], optional 456 | Keyword arguments for `plot.spectrum` for the top and bottom spectra. 457 | mass_errors_kws : Optional[Mapping[str, Any]], optional 458 | Keyword arguments for `plot.mass_errors`. 459 | height : Optional[float], optional 460 | The height of the figure in inches. 461 | width : Optional[float], optional 462 | The width of the figure in inches. 463 | 464 | Returns 465 | ------- 466 | plt.Figure 467 | The matplotlib Figure instance on which the spectra and mass errors 468 | are plotted. 469 | """ 470 | 471 | n_rows = 1 + (spec_mass_errors is not None) + (spec_bottom is not None) 472 | height_ratios = [1] 473 | if spec_mass_errors is not None: 474 | height_ratios.append(0.5) 475 | if spec_bottom is not None: 476 | height_ratios.append(1) 477 | 478 | fig, axes = plt.subplots( 479 | *(n_rows, 1), 480 | figsize=(width or 7.5, height or (3.75 if spec_bottom is None else 6)), 481 | sharex=True, 482 | gridspec_kw={"height_ratios": height_ratios}, 483 | ) 484 | axes = np.array(axes).flatten() 485 | 486 | spectrum(spec_top, ax=axes[0], **spectrum_kws or {}) 487 | 488 | if spec_mass_errors is not None: 489 | mass_errors(spec_mass_errors, ax=axes[1], **mass_errors_kws or {}) 490 | axes[0].get_xaxis().get_label().set_visible(False) 491 | 492 | if spec_bottom is not None: 493 | spectrum( 494 | spec_bottom, 495 | mirror_intensity=True, 496 | ax=axes[-1], 497 | **spectrum_kws or {}, 498 | ) 499 | for ax in axes[:-1]: 500 | ax.get_xaxis().get_label().set_visible(False) 501 | 502 | axes[-1].yaxis.set_major_formatter( 503 | mticker.FuncFormatter(lambda x, pos: f"{abs(x):.0%}") 504 | ) 505 | 506 | fig.align_ylabels(axes) 507 | fig.tight_layout() 508 | 509 | return fig 510 | -------------------------------------------------------------------------------- /spectrum_utils/proforma.ebnf: -------------------------------------------------------------------------------- 1 | %import common.DIGIT 2 | %import common.INT 3 | %import common.LETTER 4 | %import common.NUMBER 5 | %import common.SIGNED_INT 6 | %import common.SIGNED_NUMBER 7 | %import common.WS 8 | %import .monosaccharide.MONOSACCHARIDE 9 | 10 | // ProForma specification: https://github.com/HUPO-PSI/ProForma/ 11 | // Version: June 29, 2021 12 | proforma: (proteoform (CROSSLINK | CHIMERIC))* proteoform 13 | CROSSLINK: "//" 14 | CHIMERIC: "+" 15 | 16 | proteoform: peptide ["/" charge] 17 | 18 | peptide: mod_global* mod_unknown_pos? mod_labile* mod_n_term? (aa | mod_range)+ mod_c_term? 19 | // TODO: Amino acid sequence ambiguity (section 4.7). 20 | 21 | aa.10: AA [mod+ | (_MOD_L mod_label _MOD_R)] 22 | AA: LETTER 23 | 24 | mod_global: _MOD_GLOBAL_L (ISOTOPE | (mod "@" (AA ",")* AA)) _MOD_GLOBAL_R 25 | ISOTOPE: INT? LETTER+ SIGNED_INT? 26 | 27 | mod_unknown_pos: (mod ["^" MOD_COUNT])+ "?" 28 | 29 | mod: _MOD_L ((mod_name | mod_accession | mod_mass | mod_formula | mod_glycan | info) mod_label? "|")* (mod_name | mod_accession | mod_mass | mod_formula | mod_glycan | info) mod_label? _MOD_R 30 | mod_labile: _MOD_LABILE_L ((mod_name | mod_accession | mod_mass | mod_formula | mod_glycan | info) "|")* (mod_name | mod_accession | mod_mass | mod_formula | mod_glycan | info) _MOD_LABILE_R 31 | MOD_COUNT: INT 32 | 33 | mod_n_term: (mod | (_MOD_L mod_label _MOD_R)) "-" 34 | mod_c_term: "-" (mod | (_MOD_L mod_label _MOD_R)) 35 | 36 | mod_range: MOD_RANGE_L mod_range_pos _MOD_RANGE_R mod+ 37 | mod_range_pos: (aa | mod_range)+ 38 | 39 | mod_name.2: ((CV_ABBREV ":") | (CV_ABBREV_OPT ":")?) TEXT 40 | CV_ABBREV_OPT: "U"i | "M"i 41 | CV_ABBREV: "R"i | "X"i | "G"i 42 | 43 | mod_accession.5: CV_NAME ":" TEXT 44 | CV_NAME: "UNIMOD"i | "MOD"i | "RESID"i | "XLMOD"i | "GNO"i 45 | 46 | mod_mass.5: [(CV_ABBREV_OPT | CV_ABBREV | MOD_MASS_OBS) ":"] MOD_MASS 47 | MOD_MASS_OBS: "Obs"i 48 | MOD_MASS: ("+" | "-") NUMBER 49 | 50 | mod_formula.5: "Formula:"i (_MOD_L ISOTOPE _MOD_R)* FORMULA 51 | FORMULA: (LETTER+ SIGNED_INT? WS?)+ 52 | 53 | mod_glycan.5: "Glycan:" (monosaccharide WS?)+ 54 | monosaccharide: MONOSACCHARIDE MONOSACCHARIDE_COUNT? 55 | MONOSACCHARIDE_COUNT: INT 56 | 57 | info.5: "Info:"i TEXT 58 | 59 | mod_label.3 : "#" (MOD_LABEL_XL | MOD_LABEL_BRANCH | MOD_LABEL) ["(" MOD_SCORE ")"] 60 | MOD_LABEL_XL: "XL" MOD_LABEL 61 | MOD_LABEL_BRANCH: "BRANCH" 62 | MOD_LABEL: (LETTER | DIGIT)+ 63 | MOD_SCORE: SIGNED_NUMBER 64 | 65 | charge: CHARGE [_MOD_L ion _MOD_R] 66 | CHARGE: SIGNED_INT 67 | ion: [TEXT ","] TEXT 68 | 69 | TEXT: /.+/ 70 | 71 | _MOD_L: "[" 72 | _MOD_R: "]" 73 | _MOD_LABILE_L: "{" 74 | _MOD_LABILE_R: "}" 75 | _MOD_GLOBAL_L: "<" 76 | _MOD_GLOBAL_R: ">" 77 | MOD_RANGE_L: "(" 78 | _MOD_RANGE_R: ")" 79 | -------------------------------------------------------------------------------- /spectrum_utils/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numba as nb 4 | import numpy as np 5 | 6 | 7 | @nb.njit(fastmath=True, cache=True) 8 | def mass_diff(mz1, mz2, mode_is_da): 9 | """ 10 | Calculate the mass difference(s). 11 | 12 | Parameters 13 | ---------- 14 | mz1 15 | First m/z value(s). 16 | mz2 17 | Second m/z value(s). 18 | mode_is_da : bool 19 | Mass difference in Dalton (True) or in ppm (False). 20 | 21 | Returns 22 | ------- 23 | The mass difference(s) between the given m/z values. 24 | """ 25 | return mz1 - mz2 if mode_is_da else (mz1 - mz2) / mz2 * 10**6 26 | 27 | 28 | def da_to_ppm( 29 | delta_mz: Union[int, np.ndarray], mz: Union[int, np.ndarray] 30 | ) -> Union[int, np.ndarray]: 31 | """ 32 | Convert a mass difference in Dalton to ppm. 33 | 34 | Parameters 35 | ---------- 36 | delta_mz : int or np.ndarray 37 | Mass difference in Dalton. 38 | mz : int or np.ndarray 39 | m/z value of peak. 40 | 41 | Returns 42 | ------- 43 | int or np.ndarray 44 | 45 | """ 46 | return delta_mz / mz * 1e6 47 | 48 | 49 | def ppm_to_da( 50 | delta_mz: Union[int, np.ndarray], mz: Union[int, np.ndarray] 51 | ) -> Union[int, np.ndarray]: 52 | """ 53 | Convert a mass difference in ppm to Dalton. 54 | 55 | Parameters 56 | ---------- 57 | delta_mz : int or np.ndarray 58 | Mass difference in ppm. 59 | mz : int or np.ndarray 60 | m/z value of peak. 61 | 62 | Returns 63 | ------- 64 | int or np.ndarray 65 | 66 | """ 67 | return delta_mz / 1e6 * mz 68 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/tests/__init__.py -------------------------------------------------------------------------------- /tests/fragment_annotation_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from spectrum_utils import fragment_annotation, proforma 5 | 6 | 7 | @pytest.fixture(autouse=True) 8 | def set_random_seed(): 9 | np.random.seed(13) 10 | 11 | 12 | def test_fragment_annotation_unknown(): 13 | fragment_annotation.FragmentAnnotation("?") 14 | with pytest.raises(ValueError): 15 | fragment_annotation.FragmentAnnotation("?", neutral_loss="-H2O") 16 | with pytest.raises(ValueError): 17 | fragment_annotation.FragmentAnnotation("?", isotope=1) 18 | with pytest.raises(ValueError): 19 | fragment_annotation.FragmentAnnotation("?", charge=1) 20 | with pytest.raises(ValueError): 21 | fragment_annotation.FragmentAnnotation("?", adduct="[M+H]") 22 | 23 | 24 | def test_fragment_annotation_primary(): 25 | fragment_annotation.FragmentAnnotation( 26 | "b5", neutral_loss="-H2O", isotope=1, charge=1, adduct="[M+H]" 27 | ) 28 | with pytest.raises(ValueError): 29 | fragment_annotation.FragmentAnnotation("b5", charge=0) 30 | with pytest.raises(ValueError): 31 | fragment_annotation.FragmentAnnotation("b5", charge=-2) 32 | 33 | 34 | def test_get_theoretical_fragments(): 35 | peptide = proforma.parse("HPYLEDR")[0] 36 | fragments = { 37 | "b1^1": 138.066147, 38 | "b2^1": 235.118912, 39 | "b3^1": 398.182220, 40 | "b4^1": 511.266266, 41 | "b5^1": 640.308899, 42 | "b6^1": 755.335815, 43 | "y1^1": 175.118912, 44 | "y2^1": 290.145844, 45 | "y3^1": 419.188446, 46 | "y4^1": 532.272522, 47 | "y5^1": 695.335815, 48 | "y6^1": 792.388550, 49 | "b1^2": 69.536731, 50 | "b2^2": 118.063111, 51 | "b3^2": 199.594776, 52 | "b4^2": 256.136806, 53 | "b5^2": 320.658101, 54 | "b6^2": 378.171571, 55 | "y1^2": 88.063114, 56 | "y2^2": 145.576584, 57 | "y3^2": 210.097879, 58 | "y4^2": 266.639909, 59 | "y5^2": 348.171574, 60 | "y6^2": 396.697954, 61 | "b1^3": 46.693580, 62 | "b2^3": 79.044500, 63 | "b3^3": 133.398943, 64 | "b4^3": 171.093630, 65 | "b5^3": 214.107826, 66 | "b6^3": 252.450140, 67 | "y1^3": 59.044501, 68 | "y2^3": 97.386815, 69 | "y3^3": 140.401011, 70 | "y4^3": 178.095698, 71 | "y5^3": 232.450141, 72 | "y6^3": 264.801061, 73 | } 74 | for ( 75 | annotation, 76 | fragment_mz, 77 | ) in fragment_annotation.get_theoretical_fragments(peptide, max_charge=3): 78 | assert fragment_mz == pytest.approx( 79 | fragments[f"{annotation.ion_type}^{annotation.charge}"] 80 | ) 81 | 82 | 83 | def test_get_theoretical_fragments_static_mod(): 84 | peptide = proforma.parse("<[+79.96633]@Y>HPYLEDR")[0] 85 | fragments = { 86 | "b1^1": 138.066147, 87 | "b2^1": 235.118912, 88 | "b3^1": 478.148590, 89 | "b4^1": 591.232666, 90 | "b5^1": 720.275269, 91 | "b6^1": 835.302185, 92 | "y1^1": 175.118912, 93 | "y2^1": 290.145844, 94 | "y3^1": 419.188446, 95 | "y4^1": 532.272522, 96 | "y5^1": 775.302185, 97 | "y6^1": 872.354980, 98 | "b1^2": 69.536731, 99 | "b2^2": 118.063111, 100 | "b3^2": 239.577941, 101 | "b4^2": 296.119971, 102 | "b5^2": 360.641266, 103 | "b6^2": 418.154736, 104 | "y1^2": 88.063114, 105 | "y2^2": 145.576584, 106 | "y3^2": 210.097879, 107 | "y4^2": 266.639909, 108 | "y5^2": 388.154739, 109 | "y6^2": 436.681119, 110 | "b1^3": 46.693580, 111 | "b2^3": 79.044500, 112 | "b3^3": 160.054386, 113 | "b4^3": 197.749073, 114 | "b5^3": 240.763270, 115 | "b6^3": 279.105583, 116 | "y1^3": 59.044501, 117 | "y2^3": 97.386815, 118 | "y3^3": 140.401011, 119 | "y4^3": 178.095698, 120 | "y5^3": 259.105585, 121 | "y6^3": 291.456505, 122 | } 123 | for ( 124 | annotation, 125 | fragment_mz, 126 | ) in fragment_annotation.get_theoretical_fragments(peptide, max_charge=3): 127 | assert fragment_mz == pytest.approx( 128 | fragments[f"{annotation.ion_type}^{annotation.charge}"] 129 | ) 130 | 131 | 132 | def test_get_theoretical_fragments_mod(): 133 | peptide = proforma.parse("HPY[+79.96633]LEDR")[0] 134 | fragments = { 135 | "b1^1": 138.066147, 136 | "b2^1": 235.118912, 137 | "b3^1": 478.148590, 138 | "b4^1": 591.232666, 139 | "b5^1": 720.275269, 140 | "b6^1": 835.302185, 141 | "y1^1": 175.118912, 142 | "y2^1": 290.145844, 143 | "y3^1": 419.188446, 144 | "y4^1": 532.272522, 145 | "y5^1": 775.302185, 146 | "y6^1": 872.354980, 147 | "b1^2": 69.536731, 148 | "b2^2": 118.063111, 149 | "b3^2": 239.577941, 150 | "b4^2": 296.119971, 151 | "b5^2": 360.641266, 152 | "b6^2": 418.154736, 153 | "y1^2": 88.063114, 154 | "y2^2": 145.576584, 155 | "y3^2": 210.097879, 156 | "y4^2": 266.639909, 157 | "y5^2": 388.154739, 158 | "y6^2": 436.681119, 159 | "b1^3": 46.693580, 160 | "b2^3": 79.044500, 161 | "b3^3": 160.054386, 162 | "b4^3": 197.749073, 163 | "b5^3": 240.763270, 164 | "b6^3": 279.105583, 165 | "y1^3": 59.044501, 166 | "y2^3": 97.386815, 167 | "y3^3": 140.401011, 168 | "y4^3": 178.095698, 169 | "y5^3": 259.105585, 170 | "y6^3": 291.456505, 171 | } 172 | for ( 173 | annotation, 174 | fragment_mz, 175 | ) in fragment_annotation.get_theoretical_fragments(peptide, max_charge=3): 176 | assert fragment_mz == pytest.approx( 177 | fragments[f"{annotation.ion_type}^{annotation.charge}"] 178 | ) 179 | 180 | 181 | def test_get_theoretical_fragments_mod_term(): 182 | peptide = proforma.parse("[+42.01056]-HPYLEDR")[0] 183 | fragments = { 184 | "b1": 180.076706, 185 | "b2": 277.129486, 186 | "b3": 440.192810, 187 | "b4": 553.276917, 188 | "b5": 682.319519, 189 | "b6": 797.346436, 190 | "y1": 175.118912, 191 | "y2": 290.145844, 192 | "y3": 419.188446, 193 | "y4": 532.272522, 194 | "y5": 695.335815, 195 | "y6": 792.388550, 196 | } 197 | for ( 198 | annotation, 199 | fragment_mz, 200 | ) in fragment_annotation.get_theoretical_fragments(peptide): 201 | assert fragment_mz == pytest.approx( 202 | fragments[f"{annotation.ion_type}"] 203 | ) 204 | 205 | 206 | def test_get_theoretical_fragments_mod_multiple(): 207 | peptide = proforma.parse("[+42.01056]-HPY[+79.96633]LEDR")[0] 208 | fragments = { 209 | "b1": 180.076706, 210 | "b2": 277.129486, 211 | "b3": 520.159180, 212 | "b4": 633.243225, 213 | "b5": 762.285828, 214 | "b6": 877.312744, 215 | "y1": 175.118912, 216 | "y2": 290.145844, 217 | "y3": 419.188446, 218 | "y4": 532.272522, 219 | "y5": 775.302185, 220 | "y6": 872.354980, 221 | } 222 | for ( 223 | annotation, 224 | fragment_mz, 225 | ) in fragment_annotation.get_theoretical_fragments(peptide): 226 | assert fragment_mz == pytest.approx( 227 | fragments[f"{annotation.ion_type}"] 228 | ) 229 | 230 | 231 | def test_get_theoretical_fragments_isotope(): 232 | peptide = proforma.parse("HPYLEDR")[0] 233 | fragments = { 234 | "b1^1": 138.066147, 235 | "b2^1": 235.118912, 236 | "b3^1": 398.182220, 237 | "b4^1": 511.266266, 238 | "b5^1": 640.308899, 239 | "b6^1": 755.335815, 240 | "y1^1": 175.118912, 241 | "y2^1": 290.145844, 242 | "y3^1": 419.188446, 243 | "y4^1": 532.272522, 244 | "y5^1": 695.335815, 245 | "y6^1": 792.388550, 246 | "b1^2": 69.536731, 247 | "b2^2": 118.063111, 248 | "b3^2": 199.594776, 249 | "b4^2": 256.136806, 250 | "b5^2": 320.658101, 251 | "b6^2": 378.171571, 252 | "y1^2": 88.063114, 253 | "y2^2": 145.576584, 254 | "y3^2": 210.097879, 255 | "y4^2": 266.639909, 256 | "y5^2": 348.171574, 257 | "y6^2": 396.697954, 258 | } 259 | for num_isotopes in range(0, 3): 260 | annotations = fragment_annotation.get_theoretical_fragments( 261 | peptide, max_charge=2, max_isotope=num_isotopes 262 | ) 263 | assert len(annotations) == len(fragments) * (num_isotopes + 1) 264 | for annotation, fragment_mz in annotations: 265 | assert fragment_mz == pytest.approx( 266 | fragments[f"{annotation.ion_type}^{annotation.charge}"] 267 | + 1.003_354 * annotation.isotope / annotation.charge 268 | ) 269 | 270 | 271 | def test_get_theoretical_fragments_neutral_loss(): 272 | peptide = proforma.parse("HPYLEDR")[0] 273 | fragments = { 274 | "b1^1": 138.066147, 275 | "b2^1": 235.118912, 276 | "b3^1": 398.182220, 277 | "b4^1": 511.266266, 278 | "b5^1": 640.308899, 279 | "b6^1": 755.335815, 280 | "y1^1": 175.118912, 281 | "y2^1": 290.145844, 282 | "y3^1": 419.188446, 283 | "y4^1": 532.272522, 284 | "y5^1": 695.335815, 285 | "y6^1": 792.388550, 286 | "b1^2": 69.536731, 287 | "b2^2": 118.063111, 288 | "b3^2": 199.594776, 289 | "b4^2": 256.136806, 290 | "b5^2": 320.658101, 291 | "b6^2": 378.171571, 292 | "y1^2": 88.063114, 293 | "y2^2": 145.576584, 294 | "y3^2": 210.097879, 295 | "y4^2": 266.639909, 296 | "y5^2": 348.171574, 297 | "y6^2": 396.697954, 298 | "b1^3": 46.693580, 299 | "b2^3": 79.044500, 300 | "b3^3": 133.398943, 301 | "b4^3": 171.093630, 302 | "b5^3": 214.107826, 303 | "b6^3": 252.450140, 304 | "y1^3": 59.044501, 305 | "y2^3": 97.386815, 306 | "y3^3": 140.401011, 307 | "y4^3": 178.095698, 308 | "y5^3": 232.450141, 309 | "y6^3": 264.801061, 310 | } 311 | neutral_loss = "H2O", 18.010565 # water 312 | neutral_loss_fragments = {} 313 | for fragment, mz in fragments.items(): 314 | charge = int(fragment.split("^")[1]) 315 | fragment = f"{fragment}-{neutral_loss[0]}" 316 | neutral_loss_fragments[fragment] = mz - (neutral_loss[1] / charge) 317 | fragments = {**fragments, **neutral_loss_fragments} 318 | for ( 319 | annotation, 320 | fragment_mz, 321 | ) in fragment_annotation.get_theoretical_fragments( 322 | peptide, 323 | max_charge=3, 324 | neutral_losses={None: 0, neutral_loss[0]: -neutral_loss[1]}, 325 | ): 326 | assert fragment_mz == pytest.approx( 327 | fragments[ 328 | f"""{annotation.ion_type}^{annotation.charge}{ 329 | annotation.neutral_loss 330 | if annotation.neutral_loss is not None 331 | else "" 332 | }""" 333 | ] 334 | ) 335 | 336 | 337 | def test_get_theoretical_fragments_mod_neutral_loss(): 338 | peptide = proforma.parse("HPY[+79.96633]LEDR")[0] 339 | fragments = { 340 | "b1^1": 138.066147, 341 | "b2^1": 235.118912, 342 | "b3^1": 478.148590, 343 | "b4^1": 591.232666, 344 | "b5^1": 720.275269, 345 | "b6^1": 835.302185, 346 | "y1^1": 175.118912, 347 | "y2^1": 290.145844, 348 | "y3^1": 419.188446, 349 | "y4^1": 532.272522, 350 | "y5^1": 775.302185, 351 | "y6^1": 872.354980, 352 | "b1^2": 69.536731, 353 | "b2^2": 118.063111, 354 | "b3^2": 239.577941, 355 | "b4^2": 296.119971, 356 | "b5^2": 360.641266, 357 | "b6^2": 418.154736, 358 | "y1^2": 88.063114, 359 | "y2^2": 145.576584, 360 | "y3^2": 210.097879, 361 | "y4^2": 266.639909, 362 | "y5^2": 388.154739, 363 | "y6^2": 436.681119, 364 | "b1^3": 46.693580, 365 | "b2^3": 79.044500, 366 | "b3^3": 160.054386, 367 | "b4^3": 197.749073, 368 | "b5^3": 240.763270, 369 | "b6^3": 279.105583, 370 | "y1^3": 59.044501, 371 | "y2^3": 97.386815, 372 | "y3^3": 140.401011, 373 | "y4^3": 178.095698, 374 | "y5^3": 259.105585, 375 | "y6^3": 291.456505, 376 | } 377 | neutral_loss = "H2O", 18.010565 # water 378 | neutral_loss_fragments = {} 379 | for fragment, mz in fragments.items(): 380 | charge = int(fragment.split("^")[1]) 381 | fragment = f"{fragment}-{neutral_loss[0]}" 382 | neutral_loss_fragments[fragment] = mz - (neutral_loss[1] / charge) 383 | fragments = {**fragments, **neutral_loss_fragments} 384 | for ( 385 | annotation, 386 | fragment_mz, 387 | ) in fragment_annotation.get_theoretical_fragments( 388 | peptide, 389 | max_charge=3, 390 | neutral_losses={None: 0, neutral_loss[0]: -neutral_loss[1]}, 391 | ): 392 | assert fragment_mz == pytest.approx( 393 | fragments[ 394 | f"""{annotation.ion_type}^{annotation.charge}{ 395 | annotation.neutral_loss 396 | if annotation.neutral_loss is not None 397 | else "" 398 | }""" 399 | ] 400 | ) 401 | 402 | 403 | def test_get_theoretical_fragments_ambiguous(): 404 | with pytest.raises(ValueError): 405 | fragment_annotation.get_theoretical_fragments( 406 | proforma.parse("HPYLEBDR")[0] 407 | ) 408 | with pytest.raises(ValueError): 409 | fragment_annotation.get_theoretical_fragments( 410 | proforma.parse("HPZYLEDR")[0] 411 | ) 412 | 413 | 414 | def test_get_theoretical_fragments_unsupported_ion_type(): 415 | with pytest.raises(ValueError): 416 | fragment_annotation.get_theoretical_fragments( 417 | proforma.parse("HPYLEDR")[0], "l" 418 | ) 419 | -------------------------------------------------------------------------------- /tests/spectrum_test.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import os 3 | import pickle 4 | 5 | import numpy as np 6 | import pytest 7 | from pyteomics import mass 8 | 9 | from spectrum_utils import fragment_annotation as fa, proforma, spectrum 10 | 11 | 12 | @pytest.fixture(autouse=True) 13 | def set_random_seed(): 14 | np.random.seed(13) 15 | 16 | 17 | def test_mz_intensity_len(): 18 | mz = np.random.uniform(100, 1400, 150) 19 | intensity = np.random.exponential(1, 100) 20 | with pytest.raises(ValueError): 21 | spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 22 | 23 | 24 | def test_init_mz_sorted(): 25 | num_peaks = 150 26 | mz = np.random.uniform(100, 1400, num_peaks) 27 | intensity = np.random.lognormal(0, 1, num_peaks) 28 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 29 | for mz1, mz2 in zip(spec.mz[:-1], spec.mz[1:]): 30 | assert mz1 <= mz2 31 | 32 | 33 | def test_init_intensity_order(): 34 | num_peaks = 150 35 | mz = np.random.uniform(100, 1400, num_peaks) 36 | intensity = np.random.lognormal(0, 1, num_peaks) 37 | mz_intensity_tuples = sorted( 38 | zip(mz, intensity), key=operator.itemgetter(0) 39 | ) 40 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 41 | for this_mz, this_intensity, mz_intensity_tuple in zip( 42 | spec.mz, spec.intensity, mz_intensity_tuples 43 | ): 44 | assert (this_mz, this_intensity) == pytest.approx(mz_intensity_tuple) 45 | 46 | 47 | def test_mz_array(): 48 | num_peaks = 150 49 | mz = np.random.uniform(100, 1400, num_peaks).tolist() 50 | intensity = np.random.lognormal(0, 1, num_peaks) 51 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 52 | assert isinstance(spec.mz, np.ndarray) 53 | with pytest.raises(AttributeError): 54 | spec.mz = np.random.uniform(100, 1400, num_peaks) 55 | 56 | 57 | def test_intensity_array(): 58 | num_peaks = 150 59 | mz = np.random.uniform(100, 1400, num_peaks) 60 | intensity = np.random.lognormal(0, 1, num_peaks).tolist() 61 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 62 | assert isinstance(spec.intensity, np.ndarray) 63 | with pytest.raises(AttributeError): 64 | spec.intensity = np.random.lognormal(0, 1, num_peaks) 65 | 66 | 67 | def test_from_usi(): 68 | for usi in [ 69 | # USI from PRIDE/MassIVE/PeptideAtlas. 70 | "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555", 71 | # USI from PRIDE/MassIVE/PeptideAtlas with ProForma annotation. 72 | "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555:" 73 | "VLHPLEGAVVIIFK/2", 74 | # USI from PRIDE/MassIVE/PeptideAtlas. 75 | "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_05_2Feb12_Cougar_11-10-09:" 76 | "scan:12298", 77 | # USI from PRIDE/MassIVE/PeptideAtlas with ProForma annotation. 78 | "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_05_2Feb12_Cougar_11-10-09:" 79 | "scan:12298:[iTRAQ4plex]-LHFFM[Oxidation]PGFAPLTSR/3", 80 | # USI from MassIVE. 81 | "mzspec:PXD022531:j12541_C5orf38:scan:12368", 82 | # USI from MassIVE with ProForma annotation. 83 | "mzspec:PXD022531:j12541_C5orf38:scan:12368:VAATLEILTLK/2", 84 | # USI from MassIVE. 85 | "mzspec:PXD022531:b11156_PRAMEF17:scan:22140", 86 | # USI from MassIVE with ProForma annotation. 87 | "mzspec:PXD022531:b11156_PRAMEF17:scan:22140:VAATLEILTLK/2", 88 | # USI from PRIDE/MassIVE/PeptideAtlas. 89 | "mzspec:PXD000394:20130504_EXQ3_MiBa_SA_Fib-2:scan:4234", 90 | # USI from PRIDE/MassIVE/PeptideAtlas with ProForma annotation. 91 | "mzspec:PXD000394:20130504_EXQ3_MiBa_SA_Fib-2:scan:4234:SGVSRKPAPG/2", 92 | # USI from PRIDE. 93 | "mzspec:PXD010793:20170817_QEh1_LC1_HuPa_SplicingPep_10pmol_G2_R01:" 94 | "scan:8296", 95 | # USI from PRIDE with ProForma annotation. 96 | "mzspec:PXD010793:20170817_QEh1_LC1_HuPa_SplicingPep_10pmol_G2_R01:" 97 | "scan:8296:SGVSRKPAPG/2", 98 | # USI from PRIDE/MassIVE/PeptideAtlas. 99 | "mzspec:PXD010154:01284_E04_P013188_B00_N29_R1.mzML:scan:31291", 100 | # USI from PRIDE/MassIVE/PeptideAtlas with ProForma annotation. 101 | "mzspec:PXD010154:01284_E04_P013188_B00_N29_R1.mzML:scan:31291:" 102 | "DQNGTWEM[Oxidation]ESNENFEGYM[Oxidation]K/2", 103 | # USI from GNPS to a task spectrum. 104 | "mzspec:GNPS:TASK-c95481f0c53d42e78a61bf899e9f9adb-spectra/" 105 | "specs_ms.mgf:scan:1943", 106 | # USI from GNPS to a library spectrum. 107 | "mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00005436077", 108 | # USI to a GNPS/MassIVE spectrum. 109 | "mzspec:MSV000078547:120228_nbut_3610_it_it_take2:scan:389", 110 | ]: 111 | spec = spectrum.MsmsSpectrum.from_usi(usi) 112 | assert spec.identifier == usi 113 | with pytest.raises(ValueError): 114 | spectrum.MsmsSpectrum.from_usi( 115 | "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555", 116 | "massive", 117 | ) 118 | 119 | 120 | def test_round_no_merge(): 121 | num_peaks = 150 122 | mz = np.arange(1, num_peaks + 1) + np.random.uniform(-0.49, 0.5, num_peaks) 123 | intensity = np.random.exponential(1, num_peaks) 124 | spec = spectrum.MsmsSpectrum( 125 | "test_spectrum", 126 | 500, 127 | 2, 128 | mz.copy(), 129 | intensity.copy(), 130 | ) 131 | decimals = 0 132 | spec.round(decimals) 133 | assert len(spec.mz) == num_peaks 134 | assert len(spec.intensity) == num_peaks 135 | np.testing.assert_allclose(spec.mz, np.around(mz, decimals)) 136 | np.testing.assert_allclose(spec.intensity, intensity) 137 | 138 | 139 | def test_round_merge_len(): 140 | num_peaks = 10 141 | mz = np.arange(1, num_peaks + 1) + np.random.uniform(-0.2, 0.2, num_peaks) 142 | mz[4] = mz[3] + 0.0002 143 | mz[5] = mz[3] + 0.0005 144 | mz[7] = mz[8] - 0.00037 145 | intensity = np.random.exponential(1, num_peaks) 146 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 147 | spec.annotate_proforma(f"X[+{mz[3]}]", 10, "ppm") 148 | assert spec.annotation is not None 149 | spec.round(1) 150 | assert len(spec.mz) == len(mz) - 3 151 | assert len(spec.mz) == len(spec.intensity) 152 | assert spec.annotation is None 153 | 154 | 155 | def test_round_merge_sum(): 156 | num_peaks = 10 157 | mz = np.arange(1, num_peaks + 1) + np.random.uniform(-0.2, 0.2, num_peaks) 158 | mz[4] = mz[3] + 0.0002 159 | mz[5] = mz[3] + 0.0005 160 | mz[7] = mz[8] - 0.00037 161 | intensity = np.random.exponential(1, num_peaks) 162 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity.copy()) 163 | spec.round(1, "sum") 164 | assert np.sum(spec.intensity) == pytest.approx(np.sum(intensity)) 165 | 166 | 167 | def test_round_merge_max(): 168 | num_peaks = 10 169 | mz = np.arange(1, num_peaks + 1) + np.random.uniform(-0.2, 0.2, num_peaks) 170 | mz[4] = mz[3] + 0.0002 171 | mz[5] = mz[3] + 0.0005 172 | mz[7] = mz[8] - 0.00037 173 | intensity = np.arange(1, 11) 174 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity.copy()) 175 | spec.round(1, "max") 176 | np.testing.assert_allclose(spec.intensity, [1, 2, 3, 6, 7, 9, 10]) 177 | 178 | 179 | def test_set_mz_range_keep_all(): 180 | num_peaks = 150 181 | mz = np.random.uniform(100, 1400, num_peaks) 182 | intensity = np.random.lognormal(0, 1, num_peaks) 183 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 184 | min_mz, max_mz = 0, 1500 185 | spec.set_mz_range(min_mz, max_mz) 186 | assert len(spec.mz) == num_peaks 187 | assert len(spec.intensity) == num_peaks 188 | 189 | 190 | def test_set_mz_range_truncate(): 191 | num_peaks = 150 192 | mz = np.random.uniform(100, 1400, num_peaks) 193 | intensity = np.random.lognormal(0, 1, num_peaks) 194 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 195 | spec.annotate_proforma(f"X[+{mz[75]}]", 10, "ppm") 196 | min_mz, max_mz = 400, 1200 197 | assert spec.annotation is not None 198 | assert spec.mz.min() < min_mz 199 | assert spec.mz.max() > max_mz 200 | spec.set_mz_range(min_mz, max_mz) 201 | assert len(spec.mz) < num_peaks 202 | assert len(spec.intensity) < num_peaks 203 | assert spec.annotation is None 204 | assert spec.mz.min() >= min_mz 205 | assert spec.mz.max() <= max_mz 206 | 207 | 208 | def test_set_mz_range_truncate_left(): 209 | num_peaks = 150 210 | mz = np.random.uniform(100, 1400, num_peaks) 211 | intensity = np.random.lognormal(0, 1, num_peaks) 212 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 213 | min_mz, max_mz = 400, 1500 214 | assert spec.mz.min() < min_mz 215 | spec.set_mz_range(min_mz, max_mz) 216 | assert len(spec.mz) < num_peaks 217 | assert len(spec.intensity) < num_peaks 218 | assert spec.mz.min() >= min_mz 219 | 220 | 221 | def test_set_mz_range_truncate_right(): 222 | num_peaks = 150 223 | mz = np.random.uniform(100, 1400, num_peaks) 224 | intensity = np.random.lognormal(0, 1, num_peaks) 225 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 226 | min_mz, max_mz = 0, 1200 227 | assert spec.mz.max() > max_mz 228 | spec.set_mz_range(min_mz, max_mz) 229 | assert len(spec.mz) < num_peaks 230 | assert len(spec.intensity) < num_peaks 231 | assert spec.mz.max() <= max_mz 232 | 233 | 234 | def test_set_mz_range_none(): 235 | num_peaks, min_mz, max_mz = 150, 400, 1200 236 | mz = np.random.uniform(100, 1400, num_peaks) 237 | intensity = np.random.lognormal(0, 1, num_peaks) 238 | spec = spectrum.MsmsSpectrum( 239 | "test_spectrum", 500, 2, mz.copy(), intensity.copy() 240 | ) 241 | spec.set_mz_range(None, None) 242 | assert len(spec.mz) == num_peaks 243 | assert len(spec.intensity) == num_peaks 244 | assert spec.mz.min() == mz.min() 245 | assert spec.mz.max() == mz.max() 246 | spec = spectrum.MsmsSpectrum( 247 | "test_spectrum", 500, 2, mz.copy(), intensity.copy() 248 | ) 249 | spec.set_mz_range(None, max_mz) 250 | assert len(spec.mz) < num_peaks 251 | assert len(spec.intensity) < num_peaks 252 | assert spec.mz.max() <= max_mz 253 | assert spec.mz.min() == mz.min() 254 | spec = spectrum.MsmsSpectrum( 255 | "test_spectrum", 500, 2, mz.copy(), intensity.copy() 256 | ) 257 | spec.set_mz_range(min_mz, None) 258 | assert len(spec.mz) < num_peaks 259 | assert len(spec.intensity) < num_peaks 260 | assert spec.mz.min() >= min_mz 261 | assert spec.mz.max() == mz.max() 262 | 263 | 264 | def test_set_mz_range_reversed(): 265 | num_peaks = 150 266 | mz = np.random.uniform(100, 1400, num_peaks) 267 | intensity = np.random.lognormal(0, 1, num_peaks) 268 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 269 | min_mz, max_mz = 400, 1200 270 | assert spec.mz.min() < min_mz 271 | assert spec.mz.max() > max_mz 272 | spec.set_mz_range(max_mz, min_mz) 273 | assert len(spec.mz) < num_peaks 274 | assert len(spec.intensity) < num_peaks 275 | assert spec.mz.min() >= min_mz 276 | assert spec.mz.max() <= max_mz 277 | 278 | 279 | def test_remove_precursor_peak(): 280 | num_peaks = 150 281 | mz = np.random.uniform(100, 1400, num_peaks) 282 | fragment_tol_mass = np.random.uniform(0, 0.5) 283 | fragment_tol_mode = "Da" 284 | precursor_mz = mz[np.random.randint(0, num_peaks)] + fragment_tol_mass / 2 285 | intensity = np.random.lognormal(0, 1, num_peaks) 286 | spec = spectrum.MsmsSpectrum( 287 | "test_spectrum", precursor_mz, 2, mz, intensity 288 | ) 289 | spec.annotate_proforma(f"X[+{mz[75]}]", 10, "ppm") 290 | assert spec.annotation is not None 291 | spec.remove_precursor_peak(fragment_tol_mass, fragment_tol_mode) 292 | assert np.abs(precursor_mz - spec.mz).all() > fragment_tol_mass 293 | assert len(spec.mz) <= num_peaks - 1 294 | assert len(spec.intensity) <= num_peaks - 1 295 | assert spec.annotation is None 296 | 297 | 298 | def test_remove_precursor_peak_none(): 299 | num_peaks = 150 300 | mz = np.random.uniform(100, 1400, num_peaks) 301 | fragment_tol_mass = np.random.uniform(0, 0.5) 302 | fragment_tol_mode = "Da" 303 | precursor_mz = mz[np.random.randint(0, num_peaks)] + fragment_tol_mass * 2 304 | intensity = np.random.lognormal(0, 1, num_peaks) 305 | spec = spectrum.MsmsSpectrum( 306 | "test_spectrum", precursor_mz, 2, mz, intensity 307 | ) 308 | spec.remove_precursor_peak(fragment_tol_mass, fragment_tol_mode) 309 | assert len(spec.mz) == num_peaks 310 | assert len(spec.intensity) == num_peaks 311 | assert np.abs(precursor_mz - spec.mz).all() > fragment_tol_mass 312 | 313 | 314 | def test_remove_precursor_peak_charge(): 315 | num_peaks = 150 316 | mz = np.random.uniform(100, 1400, num_peaks) 317 | fragment_tol_mass = np.random.uniform(0, 0.5) 318 | fragment_tol_mode = "Da" 319 | precursor_mz = mz[np.random.randint(0, num_peaks)] + fragment_tol_mass / 2 320 | precursor_charge = 3 321 | mz[-1] = ((precursor_mz - 1.0072766) * precursor_charge) / 2 + 1.0072766 322 | mz[-2] = ((precursor_mz - 1.0072766) * precursor_charge) + 1.0072766 323 | intensity = np.random.lognormal(0, 1, num_peaks) 324 | spec = spectrum.MsmsSpectrum( 325 | "test_spectrum", precursor_mz, precursor_charge, mz, intensity 326 | ) 327 | spec.remove_precursor_peak(fragment_tol_mass, fragment_tol_mode) 328 | assert np.abs(precursor_mz - spec.mz).all() > fragment_tol_mass 329 | assert len(spec.mz) <= num_peaks - 3 330 | assert len(spec.intensity) <= num_peaks - 3 331 | 332 | 333 | def test_remove_precursor_peak_isotope(): 334 | num_peaks = 150 335 | mz = np.random.uniform(100, 1400, num_peaks) 336 | fragment_tol_mass = np.random.uniform(0, 0.5) 337 | fragment_tol_mode = "Da" 338 | precursor_mz = mz[np.random.randint(0, num_peaks)] + fragment_tol_mass / 2 339 | precursor_charge = 3 340 | mz[-1] = precursor_mz + 1 / precursor_charge 341 | mz[-2] = precursor_mz + 2 / precursor_charge 342 | intensity = np.random.lognormal(0, 1, num_peaks) 343 | spec = spectrum.MsmsSpectrum( 344 | "test_spectrum", precursor_mz, precursor_charge, mz, intensity 345 | ) 346 | spec.remove_precursor_peak(fragment_tol_mass, fragment_tol_mode, 2) 347 | assert np.abs(precursor_mz - spec.mz).all() > fragment_tol_mass 348 | assert len(spec.mz) <= num_peaks - 3 349 | assert len(spec.intensity) <= num_peaks - 3 350 | 351 | 352 | def test_filter_intensity_keep_all(): 353 | num_peaks = 150 354 | mz = np.random.uniform(100, 1400, num_peaks) 355 | intensity = np.random.lognormal(0, 1, num_peaks) 356 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 357 | spec.filter_intensity() 358 | assert len(spec.mz) == num_peaks 359 | assert len(spec.intensity) == num_peaks 360 | 361 | 362 | def test_filter_intensity_remove_low_intensity(): 363 | num_peaks = 150 364 | mz = np.random.uniform(100, 1400, num_peaks) 365 | intensity = np.random.lognormal(0, 1, num_peaks) 366 | max_intensity = intensity.max() 367 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 368 | spec.annotate_proforma(f"X[+{mz[75]}]", 10, "ppm") 369 | min_intensity = 0.05 370 | assert spec.intensity.min() < min_intensity * spec.intensity.max() 371 | assert spec.annotation is not None 372 | spec.filter_intensity(min_intensity=min_intensity) 373 | assert len(spec.mz) < num_peaks 374 | assert len(spec.intensity) < num_peaks 375 | assert spec.annotation is None 376 | assert spec.intensity.max() == pytest.approx(max_intensity) 377 | assert spec.intensity.min() >= min_intensity * max_intensity 378 | 379 | 380 | def test_filter_intensity_max_num_peaks(): 381 | num_peaks = 150 382 | mz = np.random.uniform(100, 1400, num_peaks) 383 | intensity = np.random.lognormal(0, 1, num_peaks) 384 | max_intensity = intensity.max() 385 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 386 | max_num_peaks = 50 387 | spec.filter_intensity(max_num_peaks=max_num_peaks) 388 | assert len(spec.mz) == max_num_peaks 389 | assert len(spec.intensity) == max_num_peaks 390 | assert spec.intensity.max() == pytest.approx(max_intensity) 391 | 392 | 393 | def test_filter_intensity_remove_low_intensity_max_num_peaks(): 394 | num_peaks = 150 395 | mz = np.random.uniform(100, 1400, num_peaks) 396 | intensity = np.random.lognormal(0, 1, num_peaks) 397 | max_intensity = intensity.max() 398 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 399 | min_intensity = 0.05 400 | assert spec.intensity.min() < min_intensity * max_intensity 401 | max_num_peaks = 50 402 | spec.filter_intensity( 403 | min_intensity=min_intensity, max_num_peaks=max_num_peaks 404 | ) 405 | assert len(spec.mz) <= max_num_peaks 406 | assert len(spec.intensity) <= max_num_peaks 407 | assert spec.intensity.max() == pytest.approx(max_intensity) 408 | assert spec.intensity.min() >= min_intensity * max_intensity 409 | 410 | 411 | def test_scale_intensity_root(): 412 | num_peaks = 150 413 | mz = np.random.uniform(100, 1400, num_peaks) 414 | intensity = np.random.lognormal(0, 1, num_peaks) 415 | for degree in [2, 4, 10]: 416 | spec = spectrum.MsmsSpectrum( 417 | "test_spectrum", 500, 2, mz, intensity.copy() 418 | ) 419 | intensity_unscaled = spec.intensity.copy() 420 | spec.scale_intensity(scaling="root", degree=degree) 421 | np.testing.assert_allclose( 422 | spec.intensity**degree, intensity_unscaled, rtol=1e-5 423 | ) 424 | 425 | 426 | def test_scale_intensity_log(): 427 | num_peaks = 150 428 | mz = np.random.uniform(100, 1400, num_peaks) 429 | intensity = np.random.lognormal(0, 1, num_peaks) 430 | for base in [2, np.e, 10]: 431 | spec = spectrum.MsmsSpectrum( 432 | "test_spectrum", 500, 2, mz, intensity.copy() 433 | ) 434 | intensity_unscaled = spec.intensity.copy() 435 | spec.scale_intensity(scaling="log", base=base) 436 | np.testing.assert_allclose( 437 | base**spec.intensity - 1, intensity_unscaled, rtol=1e-5 438 | ) 439 | 440 | 441 | def test_scale_intensity_rank(): 442 | num_peaks = 150 443 | mz = np.random.uniform(100, 1400, num_peaks) 444 | intensity = np.random.lognormal(0, 1, num_peaks) 445 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 446 | spec.scale_intensity(scaling="rank") 447 | np.testing.assert_allclose( 448 | np.sort(spec.intensity), np.arange(1, num_peaks + 1) 449 | ) 450 | 451 | 452 | def test_scale_intensity_rank_less_peaks(): 453 | num_peaks = 50 454 | mz = np.random.uniform(100, 1400, num_peaks) 455 | intensity = np.random.lognormal(0, 1, num_peaks) 456 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 457 | max_rank = num_peaks + 50 458 | spec.scale_intensity(scaling="rank", max_rank=max_rank) 459 | np.testing.assert_allclose( 460 | np.sort(spec.intensity), np.arange(num_peaks + 1, max_rank + 1) 461 | ) 462 | 463 | 464 | def test_scale_intensity_rank_more_peaks(): 465 | num_peaks = 150 466 | mz = np.random.uniform(100, 1400, num_peaks) 467 | intensity = np.random.lognormal(0, 1, num_peaks) 468 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 469 | with pytest.raises(ValueError): 470 | spec.scale_intensity(scaling="rank", max_rank=num_peaks - 50) 471 | 472 | 473 | def test_scale_intensity_max(): 474 | num_peaks = 150 475 | mz = np.random.uniform(100, 1400, num_peaks) 476 | intensity = np.random.lognormal(0, 1, num_peaks) 477 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 478 | intensity_copy, max_intensity = spec.intensity.copy(), spec.intensity.max() 479 | spec.scale_intensity(max_intensity=1.0) 480 | assert spec.intensity.max() == pytest.approx(1.0) 481 | np.testing.assert_allclose( 482 | spec.intensity * max_intensity, intensity_copy, rtol=1e-5 483 | ) 484 | 485 | 486 | def test_pickle(): 487 | num_peaks = 150 488 | mz = np.random.uniform(100, 1400, num_peaks) 489 | intensity = np.random.lognormal(0, 1, num_peaks) 490 | spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity) 491 | spec.annotate_proforma(f"X[+{mz[75]}]", 10, "ppm") 492 | with open("temp.pkl", "wb") as f: 493 | pickle.dump(spec, f) 494 | with open("temp.pkl", "rb") as f: 495 | spec_pickled = pickle.load(f) 496 | assert spec.identifier == spec_pickled.identifier 497 | assert spec.precursor_mz == spec_pickled.precursor_mz 498 | assert spec.precursor_charge == spec_pickled.precursor_charge 499 | np.testing.assert_array_equal(spec.mz, spec_pickled.mz) 500 | np.testing.assert_array_equal(spec.intensity, spec_pickled.intensity) 501 | np.testing.assert_equal(spec.retention_time, spec_pickled.retention_time) 502 | assert spec.proforma == spec_pickled.proforma 503 | np.testing.assert_equal(spec.annotation, spec_pickled.annotation) 504 | os.remove("temp.pkl") 505 | 506 | 507 | def test_annotate_proforma(): 508 | fragment_tol_mass, fragment_tol_mode = 0.02, "Da" 509 | peptides = [ 510 | "SYELPDGQVITIGNER", 511 | "MFLSFPTTK", 512 | "DLYANTVLSGGTTMYPGIADR", 513 | "YLYEIAR", 514 | "VAPEEHPVLLTEAPLNPK", 515 | ] 516 | for charge, peptide in enumerate(peptides, 2): 517 | fragment_mz = np.asarray( 518 | [ 519 | fragment_mz 520 | for fragment, fragment_mz in fa.get_theoretical_fragments( 521 | proforma.parse(peptide)[0], max_charge=2 522 | ) 523 | ] 524 | ) 525 | fragment_mz += np.random.uniform( 526 | -0.9 * fragment_tol_mass, 0.9 * fragment_tol_mass, len(fragment_mz) 527 | ) 528 | fragment_mz = np.random.choice( 529 | fragment_mz, min(50, len(fragment_mz)), False 530 | ) 531 | num_peaks = 150 532 | mz = np.random.uniform(100, 1400, num_peaks) 533 | mz[: len(fragment_mz)] = fragment_mz 534 | intensity = np.random.lognormal(0, 1, num_peaks) 535 | spec = spectrum.MsmsSpectrum( 536 | "test_spectrum", 537 | mass.calculate_mass(sequence=peptide, charge=charge), 538 | charge, 539 | mz, 540 | intensity, 541 | ) 542 | spec.annotate_proforma(peptide, fragment_tol_mass, fragment_tol_mode) 543 | assert np.count_nonzero(spec.annotation) >= len(fragment_mz) 544 | 545 | 546 | def test_annotate_proforma_neutral_loss(): 547 | fragment_tol_mass, fragment_tol_mode = 0.02, "Da" 548 | neutral_loss = "H2O", 18.010565 # water 549 | n_peaks = 150 550 | peptides = [ 551 | "SYELPDGQVITIGNER", 552 | "MFLSFPTTK", 553 | "DLYANTVLSGGTTMYPGIADR", 554 | "YLYEIAR", 555 | "VAPEEHPVLLTEAPLNPK", 556 | ] 557 | for charge, peptide in enumerate(peptides, 2): 558 | fragment_mz = np.asarray( 559 | [ 560 | fragment_mz 561 | for fragment, fragment_mz in fa.get_theoretical_fragments( 562 | proforma.parse(peptide)[0], 563 | max_charge=2, 564 | neutral_losses={ 565 | None: 0, 566 | neutral_loss[0]: -neutral_loss[1], 567 | }, 568 | ) 569 | ] 570 | ) 571 | fragment_mz += np.random.uniform( 572 | -0.9 * fragment_tol_mass, 0.9 * fragment_tol_mass, len(fragment_mz) 573 | ) 574 | fragment_mz = np.random.choice( 575 | fragment_mz, min(50, len(fragment_mz)), False 576 | ) 577 | mz = np.random.uniform(100, 1400, n_peaks) 578 | mz[: len(fragment_mz)] = fragment_mz 579 | intensity = np.random.lognormal(0, 1, n_peaks) 580 | spec = spectrum.MsmsSpectrum( 581 | "test_spectrum", 582 | mass.calculate_mass(sequence=peptide, charge=charge), 583 | charge, 584 | mz, 585 | intensity, 586 | ) 587 | spec.annotate_proforma( 588 | peptide, 589 | fragment_tol_mass, 590 | fragment_tol_mode, 591 | neutral_losses={neutral_loss[0]: -neutral_loss[1]}, 592 | ) 593 | assert np.count_nonzero(spec.annotation) >= len(fragment_mz) 594 | --------------------------------------------------------------------------------