├── .github
    └── workflows
    │   ├── lint.yml
    │   ├── publish.yml
    │   └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── codecov.yml
├── docs
    ├── Makefile
    ├── make.bat
    └── src
    │   ├── annot_fmt.png
    │   ├── annotating.md
    │   ├── api.md
    │   ├── conf.py
    │   ├── contact.md
    │   ├── facet.png
    │   ├── index.md
    │   ├── install.md
    │   ├── ion_types.png
    │   ├── iplot_mirror.json
    │   ├── iplot_spectrum.json
    │   ├── mass_errors.png
    │   ├── mirror.png
    │   ├── neutral_losses_1.png
    │   ├── neutral_losses_2.png
    │   ├── plotting.md
    │   ├── proforma_ast.png
    │   ├── proforma_ex1.png
    │   ├── proforma_ex2.png
    │   ├── proforma_ex3.png
    │   ├── quickstart.md
    │   ├── quickstart.png
    │   ├── runtime.md
    │   └── runtime.png
├── environment.yml
├── pyproject.toml
├── setup.cfg
├── setup.py
├── spectrum_utils.png
├── spectrum_utils
    ├── __init__.py
    ├── fragment_annotation.py
    ├── iplot.py
    ├── monosaccharide.lark
    ├── plot.py
    ├── proforma.ebnf
    ├── proforma.py
    ├── spectrum.py
    └── utils.py
└── tests
    ├── __init__.py
    ├── fragment_annotation_test.py
    ├── proforma_test.py
    └── spectrum_test.py


/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main, dev ]
 6 |   pull_request:
 7 |     branches: [ main, dev ]
 8 | 
 9 | jobs:
10 |   lint:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - name: Setup Python 3.12
15 |         uses: actions/setup-python@v5
16 |         with:
17 |           python-version: "3.12"
18 | 
19 |       - name: Install Ruff
20 |         run: |
21 |           python -m pip install --upgrade pip
22 |           pip install ruff
23 | 
24 |       - name: Lint with Ruff
25 |         run: |
26 |           ruff check . --output-format=github
27 | 
28 |       - name: Check formatting with Ruff
29 |         run: |
30 |           ruff format --check .
31 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [ created ]
 6 | 
 7 | jobs:
 8 |   deploy:
 9 |     runs-on: ubuntu-latest
10 | 
11 |     steps:
12 |     - uses: actions/checkout@v4
13 |     - name: Set up Python
14 |       uses: actions/setup-python@v5
15 |       with:
16 |         python-version: "3.x"
17 |     - name: Install dependencies
18 |       run: |
19 |         python -m pip install --upgrade pip
20 |         pip install setuptools wheel twine build
21 |     - name: Build and publish
22 |       env:
23 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
24 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
25 |       run: |
26 |         python -m build --sdist --wheel .
27 |         twine upload dist/*
28 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Run tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main, dev ]
 6 |   pull_request:
 7 |     branches: [ main, dev ]
 8 |   schedule:
 9 |   - cron: "0 0 1 1/1 *" # Run monthly.
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       matrix:
16 |         os: [ ubuntu-latest, windows-latest, macos-latest ]
17 |         python-version: [ "3.10", "3.11", "3.12" ]
18 | 
19 |     steps:
20 |     - uses: actions/checkout@v4
21 |     - name: Set up Python ${{ matrix.python-version }}
22 |       uses: actions/setup-python@v5
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 | 
26 |     - name: Install dependencies
27 |       run: |
28 |         pip install uv
29 |         uv pip install pytest pytest-cov wheel --system
30 |         uv pip install -e . --system
31 |     - name: Run unit and system tests
32 |       run: |
33 |         pytest --cov=spectrum_utils --verbose tests/
34 |     - name: Upload coverage to codecov
35 |       uses: codecov/codecov-action@v4
36 |       with:
37 |         token: ${{ secrets.CODECOV_TOKEN }}
38 |         fail_ci_if_error: true
39 |         verbose: true
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # https://gist.github.com/octocat/9257657
  2 | 
  3 | # Compiled source #
  4 | ###################
  5 | *.com
  6 | *.class
  7 | *.dll
  8 | *.exe
  9 | *.o
 10 | *.so
 11 | 
 12 | # Packages #
 13 | ############
 14 | # it's better to unpack these files and commit the raw source
 15 | # git has its own built in compression methods
 16 | *.7z
 17 | *.dmg
 18 | *.gz
 19 | *.iso
 20 | *.jar
 21 | *.rar
 22 | *.tar
 23 | *.zip
 24 | 
 25 | # Logs and databases #
 26 | ######################
 27 | *.log
 28 | *.sql
 29 | *.sqlite
 30 | 
 31 | # OS generated files #
 32 | ######################
 33 | .DS_Store
 34 | .DS_Store?
 35 | ._*
 36 | .Spotlight-V100
 37 | .Trashes
 38 | ehthumbs.db
 39 | Thumbs.db
 40 | 
 41 | # https://github.com/github/gitignore/blob/master/ArchLinuxPackages.gitignore
 42 | 
 43 | *.tar
 44 | *.tar.*
 45 | *.jar
 46 | *.exe
 47 | *.msi
 48 | *.zip
 49 | *.tgz
 50 | *.log
 51 | *.log.*
 52 | *.sig
 53 | 
 54 | pkg/
 55 | #src/
 56 | 
 57 | # https://github.com/github/gitignore/blob/master/C%2B%2B.gitignore
 58 | 
 59 | # Prerequisites
 60 | *.d
 61 | 
 62 | # Compiled Object files
 63 | *.slo
 64 | *.lo
 65 | *.o
 66 | *.obj
 67 | 
 68 | # Precompiled Headers
 69 | *.gch
 70 | *.pch
 71 | 
 72 | # Compiled Dynamic libraries
 73 | *.so
 74 | *.dylib
 75 | *.dll
 76 | 
 77 | # Fortran module files
 78 | *.mod
 79 | *.smod
 80 | 
 81 | # Compiled Static libraries
 82 | *.lai
 83 | *.la
 84 | *.a
 85 | *.lib
 86 | 
 87 | # Executables
 88 | *.exe
 89 | *.out
 90 | *.app
 91 | 
 92 | # https://github.com/github/gitignore/blob/master/C.gitignore
 93 | 
 94 | # Prerequisites
 95 | *.d
 96 | 
 97 | # Object files
 98 | *.o
 99 | *.ko
100 | *.obj
101 | *.elf
102 | 
103 | # Linker output
104 | *.ilk
105 | *.map
106 | *.exp
107 | 
108 | # Precompiled Headers
109 | *.gch
110 | *.pch
111 | 
112 | # Libraries
113 | *.lib
114 | *.a
115 | *.la
116 | *.lo
117 | 
118 | # Shared objects (inc. Windows DLLs)
119 | *.dll
120 | *.so
121 | *.so.*
122 | *.dylib
123 | 
124 | # Executables
125 | *.exe
126 | *.out
127 | *.app
128 | *.i*86
129 | *.x86_64
130 | *.hex
131 | 
132 | # Debug files
133 | *.dSYM/
134 | *.su
135 | *.idb
136 | *.pdb
137 | 
138 | # Kernel Module Compile Results
139 | *.mod*
140 | *.cmd
141 | .tmp_versions/
142 | modules.order
143 | Module.symvers
144 | Mkfile.old
145 | dkms.conf
146 | 
147 | # https://github.com/github/gitignore/blob/master/CMake.gitignore
148 | 
149 | CMakeLists.txt.user
150 | CMakeCache.txt
151 | CMakeFiles
152 | CMakeScripts
153 | Testing
154 | Makefile
155 | cmake_install.cmake
156 | install_manifest.txt
157 | compile_commands.json
158 | CTestTestfile.cmake
159 | 
160 | # https://github.com/github/gitignore/blob/master/CUDA.gitignore
161 | 
162 | *.i
163 | *.ii
164 | *.gpu
165 | *.ptx
166 | *.cubin
167 | *.fatbin
168 | 
169 | # https://github.com/github/gitignore/blob/master/Java.gitignore
170 | 
171 | # Compiled class file
172 | *.class
173 | 
174 | # Log file
175 | *.log
176 | 
177 | # BlueJ files
178 | *.ctxt
179 | 
180 | # Mobile Tools for Java (J2ME)
181 | .mtj.tmp/
182 | 
183 | # Package Files #
184 | *.jar
185 | *.war
186 | *.nar
187 | *.ear
188 | *.zip
189 | *.tar.gz
190 | *.rar
191 | 
192 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
193 | hs_err_pid*
194 | 
195 | # https://github.com/github/gitignore/blob/master/Maven.gitignore
196 | 
197 | target/
198 | pom.xml.tag
199 | pom.xml.releaseBackup
200 | pom.xml.versionsBackup
201 | pom.xml.next
202 | release.properties
203 | dependency-reduced-pom.xml
204 | buildNumber.properties
205 | .mvn/timing.properties
206 | .mvn/wrapper/maven-wrapper.jar
207 | 
208 | # https://github.com/github/gitignore/blob/master/Python.gitignore
209 | 
210 | # Byte-compiled / optimized / DLL files
211 | __pycache__/
212 | *.py[cod]
213 | *$py.class
214 | 
215 | # C extensions
216 | *.so
217 | 
218 | # Distribution / packaging
219 | .Python
220 | build/
221 | develop-eggs/
222 | dist/
223 | downloads/
224 | eggs/
225 | .eggs/
226 | lib/
227 | lib64/
228 | parts/
229 | sdist/
230 | var/
231 | wheels/
232 | share/python-wheels/
233 | *.egg-info/
234 | .installed.cfg
235 | *.egg
236 | MANIFEST
237 | 
238 | # PyInstaller
239 | #  Usually these files are written by a python script from a template
240 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
241 | *.manifest
242 | *.spec
243 | 
244 | # Installer logs
245 | pip-log.txt
246 | pip-delete-this-directory.txt
247 | 
248 | # Unit test / coverage reports
249 | htmlcov/
250 | .tox/
251 | .nox/
252 | .coverage
253 | .coverage.*
254 | .cache
255 | nosetests.xml
256 | coverage.xml
257 | *.cover
258 | .hypothesis/
259 | .pytest_cache/
260 | 
261 | # Translations
262 | *.mo
263 | *.pot
264 | 
265 | # Django stuff:
266 | *.log
267 | local_settings.py
268 | db.sqlite3
269 | 
270 | # Flask stuff:
271 | instance/
272 | .webassets-cache
273 | 
274 | # Scrapy stuff:
275 | .scrapy
276 | 
277 | # Sphinx documentation
278 | docs/_build/
279 | 
280 | # PyBuilder
281 | target/
282 | 
283 | # Jupyter Notebook
284 | .ipynb_checkpoints
285 | 
286 | # IPython
287 | profile_default/
288 | ipython_config.py
289 | 
290 | # pyenv
291 | .python-version
292 | 
293 | # celery beat schedule file
294 | celerybeat-schedule
295 | 
296 | # SageMath parsed files
297 | *.sage.py
298 | 
299 | # Environments
300 | .env
301 | .venv
302 | env/
303 | venv/
304 | ENV/
305 | env.bak/
306 | venv.bak/
307 | 
308 | # Spyder project settings
309 | .spyderproject
310 | .spyproject
311 | 
312 | # Rope project settings
313 | .ropeproject
314 | 
315 | # mkdocs documentation
316 | /site
317 | 
318 | # mypy
319 | .mypy_cache/
320 | .dmypy.json
321 | dmypy.json
322 | 
323 | # Pyre type checker
324 | .pyre/
325 | 
326 | # https://github.com/github/gitignore/blob/master/R.gitignore
327 | 
328 | # History files
329 | .Rhistory
330 | .Rapp.history
331 | 
332 | # Session Data files
333 | .RData
334 | 
335 | # Example code in package build process
336 | *-Ex.R
337 | 
338 | # Output files from R CMD build
339 | /*.tar.gz
340 | 
341 | # Output files from R CMD check
342 | /*.Rcheck/
343 | 
344 | # RStudio files
345 | .Rproj.user/
346 | 
347 | # produced vignettes
348 | vignettes/*.html
349 | vignettes/*.pdf
350 | 
351 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
352 | .httr-oauth
353 | 
354 | # knitr and R markdown default cache directories
355 | /*_cache/
356 | /cache/
357 | 
358 | # Temporary files created by R markdown
359 | *.utf8.md
360 | *.knit.md
361 | 
362 | # https://github.com/github/gitignore/blob/master/TeX.gitignore
363 | 
364 | ## Core latex/pdflatex auxiliary files:
365 | *.aux
366 | *.lof
367 | *.log
368 | *.lot
369 | *.fls
370 | *.out
371 | *.toc
372 | *.fmt
373 | *.fot
374 | *.cb
375 | *.cb2
376 | .*.lb
377 | 
378 | ## Intermediate documents:
379 | *.dvi
380 | *.xdv
381 | *-converted-to.*
382 | # these rules might exclude image files for figures etc.
383 | # *.ps
384 | # *.eps
385 | # *.pdf
386 | 
387 | ## Generated if empty string is given at "Please type another file name for output:"
388 | .pdf
389 | 
390 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
391 | *.bbl
392 | *.bcf
393 | *.blg
394 | *-blx.aux
395 | *-blx.bib
396 | *.run.xml
397 | 
398 | ## Build tool auxiliary files:
399 | *.fdb_latexmk
400 | *.synctex
401 | *.synctex(busy)
402 | *.synctex.gz
403 | *.synctex.gz(busy)
404 | *.pdfsync
405 | 
406 | ## Build tool directories for auxiliary files
407 | # latexrun
408 | latex.out/
409 | 
410 | ## Auxiliary and intermediate files from other packages:
411 | # algorithms
412 | *.alg
413 | *.loa
414 | 
415 | # achemso
416 | acs-*.bib
417 | 
418 | # amsthm
419 | *.thm
420 | 
421 | # beamer
422 | *.nav
423 | *.pre
424 | *.snm
425 | *.vrb
426 | 
427 | # changes
428 | *.soc
429 | 
430 | # comment
431 | *.cut
432 | 
433 | # cprotect
434 | *.cpt
435 | 
436 | # elsarticle (documentclass of Elsevier journals)
437 | *.spl
438 | 
439 | # endnotes
440 | *.ent
441 | 
442 | # fixme
443 | *.lox
444 | 
445 | # feynmf/feynmp
446 | *.mf
447 | *.mp
448 | *.t[1-9]
449 | *.t[1-9][0-9]
450 | *.tfm
451 | 
452 | #(r)(e)ledmac/(r)(e)ledpar
453 | *.end
454 | *.?end
455 | *.[1-9]
456 | *.[1-9][0-9]
457 | *.[1-9][0-9][0-9]
458 | *.[1-9]R
459 | *.[1-9][0-9]R
460 | *.[1-9][0-9][0-9]R
461 | *.eledsec[1-9]
462 | *.eledsec[1-9]R
463 | *.eledsec[1-9][0-9]
464 | *.eledsec[1-9][0-9]R
465 | *.eledsec[1-9][0-9][0-9]
466 | *.eledsec[1-9][0-9][0-9]R
467 | 
468 | # glossaries
469 | *.acn
470 | *.acr
471 | *.glg
472 | *.glo
473 | *.gls
474 | *.glsdefs
475 | 
476 | # gnuplottex
477 | *-gnuplottex-*
478 | 
479 | # gregoriotex
480 | *.gaux
481 | *.gtex
482 | 
483 | # htlatex
484 | *.4ct
485 | *.4tc
486 | *.idv
487 | *.lg
488 | *.trc
489 | *.xref
490 | 
491 | # hyperref
492 | *.brf
493 | 
494 | # knitr
495 | *-concordance.tex
496 | # TODO Comment the next line if you want to keep your tikz graphics files
497 | *.tikz
498 | *-tikzDictionary
499 | 
500 | # listings
501 | *.lol
502 | 
503 | # makeidx
504 | *.idx
505 | *.ilg
506 | *.ind
507 | *.ist
508 | 
509 | # minitoc
510 | *.maf
511 | *.mlf
512 | *.mlt
513 | *.mtc[0-9]*
514 | *.slf[0-9]*
515 | *.slt[0-9]*
516 | *.stc[0-9]*
517 | 
518 | # minted
519 | _minted*
520 | *.pyg
521 | 
522 | # morewrites
523 | *.mw
524 | 
525 | # nomencl
526 | *.nlg
527 | *.nlo
528 | *.nls
529 | 
530 | # pax
531 | *.pax
532 | 
533 | # pdfpcnotes
534 | *.pdfpc
535 | 
536 | # sagetex
537 | *.sagetex.sage
538 | *.sagetex.py
539 | *.sagetex.scmd
540 | 
541 | # scrwfile
542 | *.wrt
543 | 
544 | # sympy
545 | *.sout
546 | *.sympy
547 | sympy-plots-for-*.tex/
548 | 
549 | # pdfcomment
550 | *.upa
551 | *.upb
552 | 
553 | # pythontex
554 | *.pytxcode
555 | pythontex-files-*/
556 | 
557 | # tcolorbox
558 | *.listing
559 | 
560 | # thmtools
561 | *.loe
562 | 
563 | # TikZ & PGF
564 | *.dpth
565 | *.md5
566 | *.auxlock
567 | 
568 | # todonotes
569 | *.tdo
570 | 
571 | # vhistory
572 | *.hst
573 | *.ver
574 | 
575 | # easy-todo
576 | *.lod
577 | 
578 | # xcolor
579 | *.xcp
580 | 
581 | # xmpincl
582 | *.xmpi
583 | 
584 | # xindy
585 | *.xdy
586 | 
587 | # xypic precompiled matrices
588 | *.xyc
589 | 
590 | # endfloat
591 | *.ttt
592 | *.fff
593 | 
594 | # Latexian
595 | TSWLatexianTemp*
596 | 
597 | ## Editors:
598 | # WinEdt
599 | *.bak
600 | *.sav
601 | 
602 | # Texpad
603 | .texpadtmp
604 | 
605 | # LyX
606 | *.lyx~
607 | 
608 | # Kile
609 | *.backup
610 | 
611 | # KBibTeX
612 | *~[0-9]*
613 | 
614 | # auto folder when using emacs and auctex
615 | ./auto/*
616 | *.el
617 | 
618 | # expex forward references with \gathertags
619 | *-tags.tex
620 | 
621 | # standalone packages
622 | *.sta
623 | 
624 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
625 | 
626 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
627 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
628 | 
629 | .idea/
630 | 
631 | # User-specific stuff
632 | .idea/**/workspace.xml
633 | .idea/**/tasks.xml
634 | .idea/**/usage.statistics.xml
635 | .idea/**/dictionaries
636 | .idea/**/shelf
637 | 
638 | # Generated files
639 | .idea/**/contentModel.xml
640 | 
641 | # Sensitive or high-churn files
642 | .idea/**/dataSources/
643 | .idea/**/dataSources.ids
644 | .idea/**/dataSources.local.xml
645 | .idea/**/sqlDataSources.xml
646 | .idea/**/dynamic.xml
647 | .idea/**/uiDesigner.xml
648 | .idea/**/dbnavigator.xml
649 | 
650 | # Gradle
651 | .idea/**/gradle.xml
652 | .idea/**/libraries
653 | 
654 | # Gradle and Maven with auto-import
655 | # When using Gradle or Maven with auto-import, you should exclude module files,
656 | # since they will be recreated, and may cause churn.  Uncomment if using
657 | # auto-import.
658 | # .idea/modules.xml
659 | # .idea/*.iml
660 | # .idea/modules
661 | 
662 | # CMake
663 | cmake-build-*/
664 | 
665 | # Mongo Explorer plugin
666 | .idea/**/mongoSettings.xml
667 | 
668 | # File-based project format
669 | *.iws
670 | 
671 | # IntelliJ
672 | out/
673 | 
674 | # mpeltonen/sbt-idea plugin
675 | .idea_modules/
676 | 
677 | # JIRA plugin
678 | atlassian-ide-plugin.xml
679 | 
680 | # Cursive Clojure plugin
681 | .idea/replstate.xml
682 | 
683 | # Crashlytics plugin (for Android Studio and IntelliJ)
684 | com_crashlytics_export_strings.xml
685 | crashlytics.properties
686 | crashlytics-build.properties
687 | fabric.properties
688 | 
689 | # Editor-based Rest Client
690 | .idea/httpRequests
691 | 
692 | # Android studio 3.1+ serialized cache file
693 | .idea/caches/build_file_checksums.ser
694 | 
695 | # https://github.com/github/gitignore/blob/master/Global/Kate.gitignore
696 | 
697 | # Swap Files #
698 | .*.kate-swp
699 | .swp.*
700 | 
701 | # https://github.com/github/gitignore/blob/master/Global/LibreOffice.gitignore
702 | 
703 | # LibreOffice locks
704 | .~lock.*#
705 | 
706 | # https://github.com/github/gitignore/blob/master/Global/Linux.gitignore
707 | 
708 | *~
709 | 
710 | # temporary files which can be created if a process still has a handle open of a deleted file
711 | .fuse_hidden*
712 | 
713 | # KDE directory preferences
714 | .directory
715 | 
716 | # Linux trash folder which might appear on any partition or disk
717 | .Trash-*
718 | 
719 | # .nfs files are created when an open file is removed but is still being accessed
720 | .nfs*
721 | 
722 | # https://github.com/github/gitignore/blob/master/Global/SublimeText.gitignore
723 | 
724 | # Cache files for Sublime Text
725 | *.tmlanguage.cache
726 | *.tmPreferences.cache
727 | *.stTheme.cache
728 | 
729 | # Workspace files are user-specific
730 | *.sublime-workspace
731 | 
732 | # Project files should be checked into the repository, unless a significant
733 | # proportion of contributors will probably not be using Sublime Text
734 | # *.sublime-project
735 | 
736 | # SFTP configuration file
737 | sftp-config.json
738 | 
739 | # Package control specific files
740 | Package Control.last-run
741 | Package Control.ca-list
742 | Package Control.ca-bundle
743 | Package Control.system-ca-bundle
744 | Package Control.cache/
745 | Package Control.ca-certs/
746 | Package Control.merged-ca-bundle
747 | Package Control.user-ca-bundle
748 | oscrypto-ca-bundle.crt
749 | bh_unicode_properties.cache
750 | 
751 | # Sublime-github package stores a github token in this file
752 | # https://packagecontrol.io/packages/sublime-github
753 | GitHub.sublime-settings
754 | 
755 | # https://github.com/github/gitignore/blob/master/Global/VisualStudioCode.gitignore
756 | 
757 | .vscode/*
758 | !.vscode/settings.json
759 | !.vscode/tasks.json
760 | !.vscode/launch.json
761 | !.vscode/extensions.json
762 | 
763 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
764 | 
765 | # General
766 | .DS_Store
767 | .AppleDouble
768 | .LSOverride
769 | 
770 | # Icon must end with two \r
771 | Icon
772 | 
773 | 
774 | # Thumbnails
775 | ._*
776 | 
777 | # Files that might appear in the root of a volume
778 | .DocumentRevisions-V100
779 | .fseventsd
780 | .Spotlight-V100
781 | .TemporaryItems
782 | .Trashes
783 | .VolumeIcon.icns
784 | .com.apple.timemachine.donotpresent
785 | 
786 | # Directories potentially created on remote AFP share
787 | .AppleDB
788 | .AppleDesktop
789 | Network Trash Folder
790 | Temporary Items
791 | .apdisk
792 | 
793 | # https://github.com/github/gitignore/blob/master/community/Python/JupyterNotebooks.gitignore
794 | 
795 | # gitignore template for Jupyter Notebooks
796 | # website: http://jupyter.org/
797 | 
798 | .ipynb_checkpoints
799 | */.ipynb_checkpoints/*
800 | 
801 | # Remove previous ipynb_checkpoints
802 | #   git rm -r .ipynb_checkpoints/
803 | #
804 | 
805 | # https://github.com/github/gitignore/blob/master/community/Python/Nikola.gitignore
806 | 
807 | # gitignore template for Nikola static site generator
808 | # website: https://getnikola.com/
809 | 
810 | .doit.db
811 | *.py[cod]
812 | cache/
813 | output/
814 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |   # Ruff version.
 4 |   rev: v0.4.1
 5 |   hooks:
 6 |     # Run the linter.
 7 |     - id: ruff
 8 |       types_or: [ python, pyi, jupyter ]
 9 |       args: [ --fix ]
10 |     # Run the formatter.
11 |     - id: ruff-format
12 |       types_or: [ python, pyi, jupyter ]
13 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | python:
 4 |    version: 3.10
 5 |    install:
 6 |       - method: pip
 7 |         path: .
 8 |         extra_requirements:
 9 |            - docs, iplot
10 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # spectrum_utils Code of Conduct
 3 | 
 4 | ## Our Pledge
 5 | 
 6 | We as members, contributors, and leaders pledge to make participation in our
 7 | community a harassment-free experience for everyone, regardless of age, body
 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
 9 | identity and expression, level of experience, education, socio-economic status,
10 | nationality, personal appearance, race, religion, or sexual identity
11 | and orientation.
12 | 
13 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
14 | 
15 | ## Our Standards
16 | 
17 | Examples of behavior that contributes to a positive environment for our community include:
18 | 
19 | * Demonstrating empathy and kindness toward other people.
20 | * Being respectful of differing opinions, viewpoints, and experiences.
21 | * Giving and gracefully accepting constructive feedback.
22 | * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience.
23 | * Focusing on what is best not just for us as individuals, but for the overall community.
24 | 
25 | Examples of unacceptable behavior include:
26 | 
27 | * The use of sexualized language or imagery, and sexual attention or advances of any kind.
28 | * Trolling, insulting or derogatory comments, and personal or political attacks.
29 | * Public or private harassment.
30 | * Publishing others' private information, such as a physical or email address, without their explicit permission.
31 | * Other conduct which could reasonably be considered inappropriate in a professional setting.
32 | 
33 | ## Enforcement Responsibilities
34 | 
35 | Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.
36 | 
37 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.
38 | 
39 | ## Scope
40 | 
41 | This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces.
42 | Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
43 | 
44 | ## Enforcement
45 | 
46 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at <wout.bittremieux@uantwerpen.be>.
47 | All complaints will be reviewed and investigated promptly and fairly.
48 | 
49 | All community leaders are obligated to respect the privacy and security of the reporter of any incident.
50 | 
51 | ## Enforcement Guidelines
52 | 
53 | Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:
54 | 
55 | ### 1. Correction
56 | 
57 | **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.
58 | 
59 | **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate.
60 | A public apology may be requested.
61 | 
62 | ### 2. Warning
63 | 
64 | **Community Impact**: A violation through a single incident or series of actions.
65 | 
66 | **Consequence**: A warning with consequences for continued behavior.
67 | No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time.
68 | This includes avoiding interactions in community spaces as well as external channels like social media.
69 | Violating these terms may lead to a temporary or permanent ban.
70 | 
71 | ### 3. Temporary Ban
72 | 
73 | **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior.
74 | 
75 | **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time.
76 | No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period.
77 | Violating these terms may lead to a permanent ban.
78 | 
79 | ### 4. Permanent Ban
80 | 
81 | **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.
82 | 
83 | **Consequence**: A permanent ban from any sort of public interaction within the community.
84 | 
85 | ## Attribution
86 | 
87 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
88 | 
89 | [homepage]: https://www.contributor-covenant.org
90 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html
91 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to spectrum_utils
 2 | 
 3 | :+1::tada: First off, thanks for taking the time to contribute! :tada::+1:
 4 | 
 5 | The following document provides guidelines for contributing to the documentation and the code of spectrum_utils. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request.
 6 | **No contribution is too small!**
 7 | Even fixing a simple typo in the documentation is immensely helpful.
 8 | 
 9 | ## Contributing to the documentation
10 | 
11 | We use [sphinx](https://www.sphinx-doc.org/en/master/) to generate our documentation and deploy it to this site.
12 | Most of the pages on the site are created from simple text files written in markdown.
13 | There are two exceptions to this:
14 | 
15 | 1. The API documentation is automatically generated from the documentation contained in the code.
16 | 
17 | 2. The vignettes are created from Jupyter notebooks.
18 | 
19 | ### Editing most documents
20 | 
21 | The easiest way to edit a document is by clicking the "Edit on GitHub" like in the top right hand corner of each page.
22 | You'll be taken to GitHub where you can click on the pencil to edit the document.
23 | 
24 | You can then make your changes directly on GitHub.
25 | Once you're finished, fill in a description of what you changed and click the "Propose Changes" button.
26 | 
27 | Alternatively, these documents live in the `docs/src` directory of the repository and can be edited like code.
28 | See [Contributing to the code](#contributing-to-the-code) below for more details on contributing this way.
29 | 
30 | 
31 | ## Contributing to the code
32 | 
33 | We welcome contributions to the source code of spectrum_utils---particularly ones that address discussed [issues](https://github.com/bittremieux/spectrum_utils/issues).
34 | 
35 | Contributions to spectrum_utils follow a standard GitHub contribution workflow:
36 | 
37 | 1. Create your own fork of the spectrum_utils repository on GitHub.
38 | 
39 | 2. Clone your forked spectrum_utils repository to work on locally.
40 | 
41 | 3. Create a new branch with a descriptive name for your changes:
42 | 
43 |     ```bash
44 |     git checkout -b fix_x
45 |     ```
46 | 
47 | 4. Make your changes (make sure to read below first).
48 | 
49 | 5. Add, commit, and push your changes to your forked repository.
50 | 
51 | 6. On the GitHub page for you forked repository, click "Pull request" to propose adding your changes to spectrum_utils.
52 | 
53 | 7. We'll review, discuss, and help you make any revisions that are required.
54 | If all goes well, your changes will be added to spectrum_utils in the next release!
55 | 
56 | 
57 | ### Python code style
58 | 
59 | The spectrum_utils project follows the [PEP 8 guidelines](https://www.python.org/dev/peps/pep-0008/) for Python code style.
60 | More specifically, we use [black](https://black.readthedocs.io/en/stable/) to format and lint Python code in spectrum_utils.
61 | 
62 | We highly recommend setting up a pre-commit hook for black.
63 | This will run black on all of the Python source files before the changes can be committed.
64 | Because we run black for code linting as part of our tests, setting up this hook can save you from having to revise code formatting.
65 | Take the following steps to set up the pre-commit hook:
66 | 
67 | 1. Verify that black and pre-commit are installed.
68 | If not, you can install them with pip or conda:
69 | 
70 |     ```bash
71 |     # Using pip
72 |     pip install black pre-commit
73 |     
74 |     # Using conda
75 |     conda -c conda-forge black pre-commit
76 |     ```
77 | 
78 | 2. Navigate to your local copy of the spectrum_utils repository and activate the hook:
79 | 
80 |     ```bash
81 |     pre-commit install
82 |     ```
83 | 
84 | One the hook is installed, black will be run before any commit is made.
85 | If a file is changed by black, then you need to `git add` the file again before finished the commit.
86 |  
87 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include spectrum_utils/monosaccharide.lark
2 | include spectrum_utils/proforma.ebnf
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # spectrum_utils
 2 | 
 3 | [![conda](https://img.shields.io/conda/vn/bioconda/spectrum_utils?color=green)](http://bioconda.github.io/recipes/spectrum_utils/README.html)
 4 | [![PyPI](https://img.shields.io/pypi/v/spectrum_utils?color=green)](https://pypi.org/project/spectrum_utils/)
 5 | [![Build status](https://github.com/bittremieux/spectrum_utils/workflows/tests/badge.svg)](https://github.com/bittremieux/spectrum_utils/actions?query=workflow:tests)
 6 | [![docs](https://readthedocs.org/projects/spectrum-utils/badge/?version=latest)](https://spectrum-utils.readthedocs.io/en/latest/?badge=latest)
 7 | 
 8 | spectrum_utils is a Python package for efficient mass spectrometry data processing and visualization.
 9 | 
10 | spectrum_utils contains the following features:
11 | 
12 | - Spectrum loading from online proteomics and metabolomics data resources using the [Universal Spectrum Identifier (USI)](https://www.psidev.info/usi) mechanism.
13 | - Common spectrum processing operations (precursor & noise peak removal, intensity filtering, intensity scaling) optimized for computational efficiency.
14 | - Annotating observed spectrum fragments using the [ProForma 2.0 specification](https://www.psidev.info/proforma) for (modified) peptidoforms.
15 | - Publication-quality, fully customizable spectrum plotting and interactive spectrum plotting.
16 | 
17 | ![spectrum_utils logo](spectrum_utils.png)
18 | 
19 | ## Installation
20 | 
21 | spectrum_utils requires Python version 3.10+ and can be installed with pip or conda.
22 | 
23 | Using pip:
24 | 
25 |     pip install spectrum_utils[iplot]
26 | 
27 | Using conda:
28 | 
29 |     conda install -c bioconda spectrum_utils
30 | 
31 | ## Documentation
32 | 
33 | Please see the [documentation](https://spectrum-utils.readthedocs.io/) for detailed installation instructions, usage examples, the API reference, and more information.
34 | 
35 | ## Citation
36 |  
37 | spectrum_utils is freely available as open source under the [Apache 2.0 license](http://opensource.org/licenses/Apache-2.0).
38 | 
39 | When using spectrum_utils, please cite the following manuscripts:
40 |  
41 | - Wout Bittremieux. "spectrum_utils: A Python package for mass spectrometry data processing and visualization." _Analytical Chemistry_ **92**, 659--661 (2020) doi:[10.1021/acs.analchem.9b04884](https://doi.org/10.1021/acs.analchem.9b04884).
42 | - Wout Bittremieux, Lev Levitsky, Matteo Pilz, Timo Sachsenberg, Florian Huber, Mingxun Wang, Pieter C. Dorrestein. "Unified and standardized mass spectrometry data processing in Python using spectrum_utils" _Journal of Proteome Research_ **22**, 625--631 (2023) doi:[10.1021/acs.jproteome.2c00632](https://doi.org/10.1021/acs.jproteome.2c00632).
43 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | # Only allow commits that increase or maintain test coverage.
2 | coverage:
3 |   status:
4 |     project:
5 |       default:
6 |         target: auto  # auto compares coverage to the previous base commit.
7 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = src
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | buildapi:
15 | 	sphinx-apidoc -feT ../spectrum_utils -o $(SOURCEDIR)/api
16 | 	@echo "Auto-generation of API documentation finished. The generated files are in '$(SOURCEDIR)/api/'"
17 | 
18 | .PHONY: help Makefile
19 | 
20 | # Catch-all target: route all unknown targets to Sphinx using the new
21 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
22 | %: Makefile
23 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
24 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/src/annot_fmt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/annot_fmt.png


--------------------------------------------------------------------------------
/docs/src/annotating.md:
--------------------------------------------------------------------------------
  1 | # Spectrum annotating
  2 | 
  3 | See the [quickstart](quickstart.md) for a brief introduction on how to start using spectrum_utils.
  4 | Here we will describe the spectrum annotation functionality provided by spectrum_utils in more detail.
  5 | 
  6 | ## Fragment ion annotation
  7 | 
  8 | As demonstrated in the [quickstart](quickstart.md), fragment ions can be annotated based on the [ProForma 2.0](https://www.psidev.info/proforma) specification.
  9 | 
 10 | The ProForma specification makes it possible to unambiguously represent peptide sequences and peptidoforms, which are specific forms of peptides that results from the combination of the amino acid sequences and modification(s) at specific amino acid positions.
 11 | Modifications are defined by controlled vocabularies (CVs), including [Unimod](https://www.unimod.org/), [PSI-MOD](https://github.com/HUPO-PSI/psi-mod-CV/), and others, and can be specified by their name or CV accession.
 12 | ProForma also supports special modification use cases, including support for modifications from cross-linking (using [XL-MOD](https://arxiv.org/abs/2003.00329)), glycans (using the [Glycan Naming Ontology](https://gnome.glyomics.org/)), and top-down extensions.
 13 | 
 14 | The following are (non-exhaustive) examples to demonstrate how ProForma can be used to annotate peaks in spectra:
 15 | 
 16 | - Specify modifications by their name: `EM[Oxidation]EVEES[Phospho]PEK`.
 17 |     ```python
 18 |     import matplotlib.pyplot as plt
 19 |     import spectrum_utils.plot as sup
 20 |     import spectrum_utils.spectrum as sus
 21 | 
 22 | 
 23 |     # Retrieve the spectrum by its USI.
 24 |     usi = "mzspec:MSV000082283:f07074:scan:5475"
 25 |     spectrum = sus.MsmsSpectrum.from_usi(usi)
 26 | 
 27 |     # Annotate the spectrum with its ProForma string.
 28 |     peptide = "EM[Oxidation]EVEES[Phospho]PEK"
 29 |     spectrum = spectrum.annotate_proforma(peptide, 10, "ppm")
 30 | 
 31 |     # Plot the spectrum.
 32 |     fig, ax = plt.subplots(figsize=(12, 6))
 33 |     sup.spectrum(spectrum, grid=False, ax=ax)
 34 |     ax.set_title(peptide, fontdict={"fontsize": "xx-large"})
 35 |     ax.spines["right"].set_visible(False)
 36 |     ax.spines["top"].set_visible(False)
 37 |     plt.savefig("proforma_ex1.png", bbox_inches="tight", dpi=300, transparent=True)
 38 |     plt.close()
 39 |     ```
 40 |     ![ProForma example spectrum plot](proforma_ex1.png)
 41 | 
 42 | - Specify modifications by their CV accession: `EM[MOD:00719]EVEES[MOD:00046]PEK`.
 43 |     ```python
 44 |     import matplotlib.pyplot as plt
 45 |     import spectrum_utils.plot as sup
 46 |     import spectrum_utils.spectrum as sus
 47 | 
 48 | 
 49 |     # Retrieve the spectrum by its USI.
 50 |     usi = "mzspec:MSV000082283:f07074:scan:5475"
 51 |     spectrum = sus.MsmsSpectrum.from_usi(usi)
 52 | 
 53 |     # Annotate the spectrum with its ProForma string.
 54 |     peptide = "EM[MOD:00719]EVEES[MOD:00046]PEK"
 55 |     spectrum = spectrum.annotate_proforma(peptide, 10, "ppm")
 56 | 
 57 |     # Plot the spectrum.
 58 |     fig, ax = plt.subplots(figsize=(12, 6))
 59 |     sup.spectrum(spectrum, grid=False, ax=ax)
 60 |     ax.set_title(peptide, fontdict={"fontsize": "xx-large"})
 61 |     ax.spines["right"].set_visible(False)
 62 |     ax.spines["top"].set_visible(False)
 63 |     plt.savefig("proforma_ex2.png", bbox_inches="tight", dpi=300, transparent=True)
 64 |     plt.close()
 65 |     ```
 66 |     ![ProForma example spectrum plot](proforma_ex2.png)
 67 | 
 68 | - Specify modifications by their delta mass: `EM[+15.9949]EVEES[+79.9663]PEK`.
 69 |     ```python
 70 |     import matplotlib.pyplot as plt
 71 |     import spectrum_utils.plot as sup
 72 |     import spectrum_utils.spectrum as sus
 73 | 
 74 | 
 75 |     # Retrieve the spectrum by its USI.
 76 |     usi = "mzspec:MSV000082283:f07074:scan:5475"
 77 |     spectrum = sus.MsmsSpectrum.from_usi(usi)
 78 | 
 79 |     # Annotate the spectrum with its ProForma string.
 80 |     peptide = "EM[+15.9949]EVEES[+79.9663]PEK"
 81 |     spectrum = spectrum.annotate_proforma(peptide, 10, "ppm")
 82 | 
 83 |     # Plot the spectrum.
 84 |     fig, ax = plt.subplots(figsize=(12, 6))
 85 |     sup.spectrum(spectrum, grid=False, ax=ax)
 86 |     ax.set_title(peptide, fontdict={"fontsize": "xx-large"})
 87 |     ax.spines["right"].set_visible(False)
 88 |     ax.spines["top"].set_visible(False)
 89 |     plt.savefig("proforma_ex3.png", bbox_inches="tight", dpi=300, transparent=True)
 90 |     plt.close()
 91 |     ```
 92 |     ![ProForma example spectrum plot](proforma_ex3.png)
 93 | 
 94 | For full details and advanced use cases, please consult the [ProForma 2.0](https://www.psidev.info/proforma) specification.
 95 | 
 96 | ### Supported ProForma 2.0 features
 97 | 
 98 | As described in the [ProForma 2.0 specification](https://www.psidev.info/proforma), there are several levels of compliance.
 99 | spectrum_utils uniquely supports the _full_ ProForma 2.0 specification, including the following features:
100 | 
101 | 1. Base Level Support.
102 | Represents the lowest level of compliance, this level involves providing support for:
103 |     - Amino acid sequences.
104 |     - Protein modifications using two of the supported CVs/ontologies: Unimod and PSI-MOD.
105 |     - Protein modifications using delta masses (without prefixes).
106 |     - N-terminal, C-terminal, and labile modifications.
107 |     - Ambiguity in the modification position, including support for localization scores.
108 |     - `INFO` tag.
109 | 
110 | 2. Additional Separate Support.
111 | These features are independent from each other:
112 |     - Unusual amino acids (O and U).
113 |     - Ambiguous amino acids (e.g. X, B, Z).
114 |     This would include support for sequence tags of known mass (using the character X).
115 |     - Protein modifications using delta masses (using prefixes for the different CVs/ontologies).
116 |     - Use of prefixes for Unimod (`U:`) and PSI-MOD (`M:`) names.
117 |     - Support for the joint representation of experimental data and its interpretation.
118 | 
119 | 3. Top Down Extensions.
120 |     - Additional CV/ontologies for protein modifications: RESID (the prefix `R` MUST be used for RESID CV/ontology term names).
121 |     - Chemical formulas (this feature occurs in two places in this list).
122 | 
123 | 4. Cross-Linking Extensions
124 |     - Cross-linked peptides (using the XL-MOD CV/ontology, the prefix `X` MUST be used for XL-MOD CV/ontology term names).
125 | 
126 | 5. Glycan Extensions.
127 |     - Additional CV/ontologies for protein modifications: GNO (the prefix `G` MUST be used for GNO CV/ontology term names).
128 |     - Glycan composition.
129 |     - Chemical formulas (this feature occurs in two places in this list).
130 | 
131 | 6. Spectral Support.
132 |     - Charge and chimeric spectra are special cases (see Appendix II).
133 |     - Global modifications (e.g., every C is C13).
134 | 
135 | ### Implementation details
136 | 
137 | Internally, spectrum_utils represents the ProForma 2.0 specification as a formal grammar which is used to create an abstract syntax tree when parsing a ProForma string.
138 | This approach is similar to how compilers interpret complex source code instructions, and the formal grammar is the only existing codified representation for ProForma 2.0 that is machine-readable.
139 | This is an extremely robust and scalable solution to cover the full ProForma 2.0 specification, including optional extensions and edge cases, compared to alternative approaches, such as combinations of regular expressions.
140 | 
141 | Example abstract syntax tree for `{Glycan:HexNAcHex2}[Acetyl]-EM[UNIMOD:35]EVNES[Obs:+79.966|Phospho|Sulfo]PEK`, which demonstrates several functionalities of the ProForma specification:
142 | 
143 | - Multiple labile glycan modifications (1 HexNAc and 2 Hex).
144 | - An N-terminal acetylation specified by its [modification name (in Unimod)](https://www.unimod.org/modifications_view.php?editid1=1).
145 | - Oxidation of methionine specified by its [Unimod accession (`UNIMOD:35`)](https://www.unimod.org/modifications_view.php?editid1=35).
146 | - An observed mass difference of 79.966 Da that can interpreted as a [phosphorylation](https://www.unimod.org/modifications_view.php?editid1=21) or [sulfation](https://www.unimod.org/modifications_view.php?editid1=40).
147 | 
148 | ![ProForma abstract syntax tree](proforma_ast.png)
149 | 
150 | (ion_types)=
151 | ## Ion types
152 | 
153 | During fragment ion annotation, by default peptide b and y ions will be annotated.
154 | Additionally, spectrum_utils supports several other ion types:
155 | 
156 | - Primary `"a"`, `"b"`, `"c"`, `"x"`, `"y"`, and `"z"` peptide fragments.
157 | - Internal fragment ions `"m"`, which result from two amide bond cleavages and thus do not contain either terminus.
158 | - Immonium ions `"I"`, which are internal fragments for individual amino acids formed by a b/y cleavage on the N-terminal side and an a/x cleavage on the C-terminal side.
159 | - Intact precursor ions `"p"`.
160 | - Reporter ions from isobaric labeling `"r"`.
161 | 
162 | Specify the desired ion types when annotating a spectrum using its ProForma string.
163 | For example, `MsmsSpectrum.annotate_proforma(..., ion_types="abyIm")` will find matching peaks for the a, b, and y peptide fragments, immonium ions, and internal fragment ions.
164 | 
165 | ```python
166 | import matplotlib.pyplot as plt
167 | import spectrum_utils.plot as sup
168 | import spectrum_utils.spectrum as sus
169 | 
170 | 
171 | usi = "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555"
172 | peptide = "VLHPLEGAVVIIFK"
173 | spectrum = sus.MsmsSpectrum.from_usi(usi)
174 | spectrum.annotate_proforma(peptide, 10, "ppm", ion_types="abyIm")
175 | 
176 | fig, ax = plt.subplots(figsize=(12, 6))
177 | sup.spectrum(spectrum, grid=False, ax=ax)
178 | ax.set_title(peptide, fontdict={"fontsize": "xx-large"})
179 | ax.spines["right"].set_visible(False)
180 | ax.spines["top"].set_visible(False)
181 | plt.savefig("ion_types.png", dpi=300, bbox_inches="tight", transparent=True)
182 | plt.close()
183 | ```
184 | ![Ion types example spectrum plot](ion_types.png)
185 | 
186 | Besides the canonical peptide fragments, we can also observe immomium ions (dark gray) and several internal fragment ions (yellow).
187 | 
188 | ## Neutral losses
189 | 
190 | Each of the above ions can also be automatically considered with a neutral loss (or gain).
191 | Neutral losses need to be specified by a concise label (preferably their molecular formula) and mass difference:
192 | 
193 | The following example demonstrates how the number of observed peaks that can be interpreted increases by considering fragments with an optional ammonia (NH3) or water (H2O) neutral loss:
194 | 
195 | ```python
196 | import matplotlib.pyplot as plt
197 | import spectrum_utils.plot as sup
198 | import spectrum_utils.spectrum as sus
199 | 
200 | 
201 | usi = "mzspec:PXD014834:TCGA-AA-3518-01A-11_W_VU_20120915_A0218_3F_R_FR01:scan:8370"
202 | peptide = "WNQLQAFWGTGK"
203 | spectrum = sus.MsmsSpectrum.from_usi(usi)
204 | spectrum.annotate_proforma(
205 |     peptide,
206 |     fragment_tol_mass=0.05,
207 |     fragment_tol_mode="Da",
208 |     ion_types="aby",
209 |     neutral_losses={"NH3": -17.026549, "H2O": -18.010565},
210 | )
211 | 
212 | fig, ax = plt.subplots(figsize=(12, 6))
213 | sup.spectrum(spectrum, grid=False, ax=ax)
214 | ax.set_title(peptide, fontdict={"fontsize": "xx-large"})
215 | ax.spines["right"].set_visible(False)
216 | ax.spines["top"].set_visible(False)
217 | plt.savefig("neutral_losses_1.png", dpi=300, bbox_inches="tight", transparent=True)
218 | plt.close()
219 | ```
220 | 
221 | ![Neutral losses example spectrum plot](neutral_losses_1.png)
222 | 
223 | Peaks that correspond to peptide fragments with a neutral loss are highlighted in the matching color.
224 | 
225 | In contrast, the same peptide--spectrum match without considering neutral losses is able to explain far fewer peaks:
226 | 
227 | ```python
228 | import matplotlib.pyplot as plt
229 | import spectrum_utils.plot as sup
230 | import spectrum_utils.spectrum as sus
231 | 
232 | 
233 | usi = "mzspec:PXD014834:TCGA-AA-3518-01A-11_W_VU_20120915_A0218_3F_R_FR01:scan:8370"
234 | peptide = "WNQLQAFWGTGK"
235 | spectrum = sus.MsmsSpectrum.from_usi(usi)
236 | spectrum.annotate_proforma(
237 |     peptide, fragment_tol_mass=0.05, fragment_tol_mode="Da", ion_types="aby",
238 | )
239 | 
240 | fig, ax = plt.subplots(figsize=(12, 6))
241 | sup.spectrum(spectrum, grid=False, ax=ax)
242 | ax.set_title(peptide, fontdict={"fontsize": "xx-large"})
243 | ax.spines["right"].set_visible(False)
244 | ax.spines["top"].set_visible(False)
245 | plt.savefig("neutral_losses_2.png", dpi=300, bbox_inches="tight", transparent=True)
246 | plt.close()
247 | ```
248 | 
249 | ![Neutral losses example spectrum plot](neutral_losses_2.png)
250 | 
251 | ### Common neutral losses
252 | 
253 | Overview of common neutral losses:
254 | 
255 | | Neutral loss/gain | Molecular formula | Mass difference |
256 | | --- | --- | --- |
257 | | Hydrogen | H | 1.007825 |
258 | | Ammonia | NH3 | 17.026549 |
259 | | Water | H2O | 18.010565 |
260 | | Carbon monoxide | CO | 27.994915 |
261 | | Carbon dioxide | CO2 | 43.989829 |
262 | | Formamide | HCONH2 | 45.021464 |
263 | | Formic acid | HCOOH | 46.005479 |
264 | | Methanesulfenic acid | CH4OS | 63.998301 |
265 | | Sulfur trioxide | SO3 | 79.956818 |
266 | | Metaphosphoric acid | HPO3 | 79.966331 |
267 | | Mercaptoacetamide | C2H5NOS | 91.009195 |
268 | | Mercaptoacetic acid | C2H4O2S | 91.993211 |
269 | | Phosphoric acid | H3PO4 | 97.976896 |
270 | 
271 | Note that typically the neutral _loss_ mass difference should be negative.
272 | 
273 | By default, no neutral losses are considered.
274 | If the `neutral_losses` argument of `MsmsSpectrum.annotate_proforma(...)` is set to `True`, all above mass differences will be considered as neutral losses (negative).
275 | 


--------------------------------------------------------------------------------
/docs/src/api.md:
--------------------------------------------------------------------------------
 1 | # Python API
 2 | 
 3 | ## spectrum_utils.spectrum module
 4 | 
 5 | ```{eval-rst}
 6 | .. autoclass:: spectrum_utils.spectrum.MsmsSpectrum
 7 |     :members:
 8 |     :undoc-members:
 9 |     :private-members:
10 |     :show-inheritance:
11 | ```
12 | 
13 | ## spectrum_utils.proforma module
14 | 
15 | ```{eval-rst}
16 | .. automodule:: spectrum_utils.proforma
17 |     :members:
18 |     :undoc-members:
19 |     :private-members:
20 |     :show-inheritance:
21 | ```
22 | 
23 | ## spectrum_utils.fragment_annotation module
24 | 
25 | ```{eval-rst}
26 | .. automodule:: spectrum_utils.fragment_annotation
27 |     :members:
28 |     :undoc-members:
29 |     :private-members:
30 |     :show-inheritance:
31 | ```
32 | 
33 | ## spectrum_utils.plot module
34 | 
35 | ```{eval-rst}
36 | .. automodule:: spectrum_utils.plot
37 |     :members:
38 |     :undoc-members:
39 |     :private-members:
40 |     :show-inheritance:
41 | ```
42 | 
43 | ## spectrum_utils.iplot module
44 | 
45 | ```{eval-rst}
46 | .. automodule:: spectrum_utils.iplot
47 |     :members:
48 |     :undoc-members:
49 |     :private-members:
50 |     :show-inheritance:
51 | ```
52 | 
53 | ## spectrum_utils.utils module
54 | 
55 | ```{eval-rst}
56 | .. automodule:: spectrum_utils.utils
57 |     :members:
58 |     :undoc-members:
59 |     :private-members:
60 |     :show-inheritance:
61 | ```
62 | 


--------------------------------------------------------------------------------
/docs/src/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | import os
 15 | import sys
 16 | 
 17 | sys.path.insert(0, os.path.abspath(os.path.join("..", "..")))
 18 | 
 19 | import spectrum_utils
 20 | 
 21 | 
 22 | # -- Project information -----------------------------------------------------
 23 | 
 24 | project = "spectrum_utils"
 25 | copyright = "2019–2022, Wout Bittremieux"
 26 | author = "Wout Bittremieux"
 27 | 
 28 | # The short X.Y version
 29 | version = spectrum_utils.__version__
 30 | # The full version, including alpha/beta/rc tags
 31 | release = spectrum_utils.__version__
 32 | 
 33 | 
 34 | # -- General configuration ---------------------------------------------------
 35 | 
 36 | # If your documentation needs a minimal Sphinx version, state it here.
 37 | # needs_sphinx = "1.0"
 38 | 
 39 | # Add any Sphinx extension module names here, as strings. They can be
 40 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 41 | # ones.
 42 | extensions = [
 43 |     "myst_parser",  # Use Markdown instead of reStructuredText.
 44 |     "sphinx_markdown_tables",  # Support tables in Markdown.
 45 |     "sphinx.ext.autodoc",  # Include documentation from docstrings.
 46 |     # "sphinx.ext.autosummary",  # Generate documentation summary one-liners.
 47 |     # "sphinx.ext.doctest",  # Test code in the documentation.
 48 |     # "sphinx.ext.coverage",  # Collect documentation coverage statistics.
 49 |     "sphinx.ext.napoleon",  # Support NumPy and Google style docstrings.
 50 |     "sphinx.ext.viewcode",  # Add links to the source code.
 51 |     "sphinx_rtd_theme",  # Read-the-docs theme.
 52 | ]
 53 | 
 54 | # Generate documentation from all docstrings.
 55 | autodoc_default_options = {
 56 |     "member-order": "bysource",  # Sort by order in the source.
 57 |     "special-members": "__init__",  # Include __init__ methods.
 58 |     "undoc-members": True,  # Include methods without a docstring.
 59 | }
 60 | # Prevent import errors from these modules.
 61 | # autodoc_mock_imports = []
 62 | # Scan all found documents for autosummary directives and generate stub pages
 63 | # for each.
 64 | autosummary_generate = True
 65 | 
 66 | # Add any paths that contain templates here, relative to this directory.
 67 | # templates_path = ["_templates"]
 68 | 
 69 | # The suffix(es) of source filenames.
 70 | # You can specify multiple suffix as a list of string:
 71 | source_suffix = [".rst", ".md"]
 72 | 
 73 | # The master toctree document.
 74 | master_doc = "index"
 75 | 
 76 | # The language for content autogenerated by Sphinx. Refer to documentation
 77 | # for a list of supported languages.
 78 | # This is also used if you do content translation via gettext catalogs.
 79 | # Usually you set "language" from the command line for these cases.
 80 | language = "en"
 81 | 
 82 | # List of patterns, relative to source directory, that match files and
 83 | # directories to ignore when looking for source files.
 84 | # This pattern also affects html_static_path and html_extra_path.
 85 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 86 | 
 87 | # The name of the Pygments (syntax highlighting) style to use.
 88 | pygments_style = None
 89 | 
 90 | 
 91 | # -- Options for HTML output -------------------------------------------------
 92 | 
 93 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 94 | # a list of builtin themes.
 95 | html_theme = "sphinx_rtd_theme"
 96 | 
 97 | # Theme options are theme-specific and customize the look and feel of a theme
 98 | # further.  For a list of options available for each theme, see the
 99 | # documentation.
100 | # html_theme_options = {}
101 | 
102 | # Add any paths that contain custom static files (such as style sheets) here,
103 | # relative to this directory. They are copied after the builtin static files,
104 | # so a file named "default.css" will overwrite the builtin "default.css".
105 | # html_static_path = ['_static']
106 | 
107 | # Custom sidebar templates, must be a dictionary that maps document names
108 | # to template names.
109 | # The default sidebars (for documents that don't match any pattern) are
110 | # defined by theme itself.  Builtin themes are using these templates by
111 | # default: ``["localtoc.html", "relations.html", "sourcelink.html",
112 | # "searchbox.html"]``.
113 | # html_sidebars = {}
114 | 
115 | 
116 | # -- Options for HTMLHelp output ---------------------------------------------
117 | 
118 | # Output file base name for HTML help builder.
119 | htmlhelp_basename = "spectrum_utilsdoc"
120 | 
121 | 
122 | # -- Options for LaTeX output ------------------------------------------------
123 | 
124 | latex_elements = {
125 |     # The paper size ("letterpaper" or "a4paper").
126 |     # "papersize": "letterpaper",
127 |     # The font size ("10pt", "11pt" or "12pt").
128 |     # "pointsize": "10pt",
129 |     # Additional stuff for the LaTeX preamble.
130 |     # "preamble": "",
131 |     # Latex figure (float) alignment
132 |     # "figure_align": "htbp",
133 | }
134 | 
135 | # Grouping the document tree into LaTeX files. List of tuples
136 | # (source start file, target name, title,
137 | #  author, documentclass [howto, manual, or own class]).
138 | latex_documents = [
139 |     (
140 |         master_doc,
141 |         "spectrum_utils.tex",
142 |         "spectrum\\_utils Documentation",
143 |         "Wout Bittremieux",
144 |         "manual",
145 |     ),
146 | ]
147 | 
148 | 
149 | # -- Options for manual page output ------------------------------------------
150 | 
151 | # One entry per manual page. List of tuples
152 | # (source start file, name, description, authors, manual section).
153 | man_pages = [
154 |     (master_doc, "spectrum_utils", "spectrum_utils Documentation", [author], 1)
155 | ]
156 | 
157 | 
158 | # -- Options for Texinfo output ----------------------------------------------
159 | 
160 | # Grouping the document tree into Texinfo files. List of tuples
161 | # (source start file, target name, title, author,
162 | #  dir menu entry, description, category)
163 | texinfo_documents = [
164 |     (
165 |         master_doc,
166 |         "spectrum_utils",
167 |         "spectrum_utils Documentation",
168 |         author,
169 |         "spectrum_utils",
170 |         " Python package for efficient MS/MS spectrum processing and "
171 |         "visualization.",
172 |         "Miscellaneous",
173 |     ),
174 | ]
175 | 
176 | 
177 | # -- Options for Epub output -------------------------------------------------
178 | 
179 | # Bibliographic Dublin Core info.
180 | epub_title = project
181 | 
182 | # The unique identifier of the text. This can be a ISBN number
183 | # or the project homepage.
184 | # epub_identifier = ""
185 | 
186 | # A unique identification for the text.
187 | # epub_uid = ""
188 | 
189 | # A list of files that should not be packed into the epub file.
190 | epub_exclude_files = ["search.html"]
191 | 
192 | 
193 | # -- Extension configuration -------------------------------------------------
194 | 
195 | autodoc_mock_imports = [
196 |     "fastobo",
197 |     "lark",
198 |     "matplotlib",
199 |     "numba",
200 |     "numpy",
201 |     "pandas",
202 |     "pyteomics",
203 | ]
204 | 


--------------------------------------------------------------------------------
/docs/src/contact.md:
--------------------------------------------------------------------------------
 1 | # Contact
 2 | 
 3 | For more information you can visit the [official GitHub repository](https://github.com/bittremieux/spectrum_utils/).
 4 | 
 5 | ## Citation
 6 | 
 7 | When using spectrum_utils, please cite the following manuscripts:
 8 |  
 9 | - Wout Bittremieux. "spectrum_utils: A Python package for mass spectrometry data processing and visualization." _Analytical Chemistry_ **92**, 659--661 (2020) doi:[10.1021/acs.analchem.9b04884](https://doi.org/10.1021/acs.analchem.9b04884).
10 | - Wout Bittremieux, Lev Levitsky, Matteo Pilz, Timo Sachsenberg, Florian Huber, Mingxun Wang, Pieter C. Dorrestein. "Unified and standardized mass spectrometry data processing in Python using spectrum_utils" _bioRxiv_ (2022).
11 | 


--------------------------------------------------------------------------------
/docs/src/facet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/facet.png


--------------------------------------------------------------------------------
/docs/src/index.md:
--------------------------------------------------------------------------------
 1 | # spectrum_utils
 2 | 
 3 | [![conda](https://img.shields.io/conda/vn/bioconda/spectrum_utils?color=green)](http://bioconda.github.io/recipes/spectrum_utils/README.html)
 4 | [![PyPI](https://img.shields.io/pypi/v/spectrum_utils?color=green)](https://pypi.org/project/spectrum_utils/)
 5 | [![Build status](https://github.com/bittremieux/spectrum_utils/workflows/tests/badge.svg)](https://github.com/bittremieux/spectrum_utils/actions?query=workflow:tests)
 6 | [![docs](https://readthedocs.org/projects/spectrum-utils/badge/?version=latest)](https://spectrum-utils.readthedocs.io/en/latest/?badge=latest)
 7 | 
 8 | ## About spectrum_utils
 9 | 
10 | spectrum_utils is a Python package for efficient mass spectrometry data processing and visualization.
11 | 
12 | spectrum_utils contains the following features:
13 | 
14 | - Spectrum loading from online proteomics and metabolomics data resources using the [Universal Spectrum Identifier (USI)](https://www.psidev.info/usi) mechanism.
15 | - Common spectrum processing operations (precursor & noise peak removal, intensity filtering, intensity scaling) optimized for computational efficiency.
16 | - Annotating observed spectrum fragments using the [ProForma 2.0 specification](https://www.psidev.info/proforma) for (modified) peptidoforms.
17 | - Publication-quality, fully customizable spectrum plotting and interactive spectrum plotting.
18 |  
19 | See the documentation for more information and detailed examples on how to get started with spectrum_utils for versatile mass spectrometry data manipulation in Python.
20 |  
21 | ## Citation
22 |  
23 | spectrum_utils is freely available as open source under the [Apache 2.0 license](http://opensource.org/licenses/Apache-2.0).
24 | 
25 | When using spectrum_utils, please cite the following manuscripts:
26 |  
27 | - Wout Bittremieux. "spectrum_utils: A Python package for mass spectrometry data processing and visualization." _Analytical Chemistry_ **92**, 659--661 (2020) doi:[10.1021/acs.analchem.9b04884](https://doi.org/10.1021/acs.analchem.9b04884).
28 | - Wout Bittremieux, Lev Levitsky, Matteo Pilz, Timo Sachsenberg, Florian Huber, Mingxun Wang, Pieter C. Dorrestein. "Unified and standardized mass spectrometry data processing in Python using spectrum_utils" _Journal of Proteome Research_ **22**, 625--631 (2023) doi:[10.1021/acs.jproteome.2c00632](https://doi.org/10.1021/acs.jproteome.2c00632).
29 | 
30 | ```{toctree}
31 | ---
32 | caption: Contents
33 | maxdepth: 1
34 | ---
35 | 
36 | install
37 | quickstart
38 | annotating
39 | plotting
40 | runtime
41 | api
42 | contact
43 | ```
44 | 


--------------------------------------------------------------------------------
/docs/src/install.md:
--------------------------------------------------------------------------------
 1 | # Install
 2 | 
 3 | spectrum_utils requires Python version 3.8+ and can be installed with pip or conda.
 4 | 
 5 | Using pip:
 6 | 
 7 |     pip install spectrum_utils[iplot]
 8 | 
 9 | Using conda:
10 | 
11 |     conda install -c bioconda spectrum_utils
12 | 
13 | ## Supported Python versions
14 | 
15 | spectrum_utils supports Python version 3.8 and above.
16 | 
17 | ## Dependencies
18 | 
19 | spectrum_utils has the following third-party dependencies:
20 | 
21 | - [fastobo](https://fastobo.readthedocs.io/)
22 | - [Lark](https://lark-parser.readthedocs.io/)
23 | - [Matplotlib](https://matplotlib.org/)
24 | - [Numba](http://numba.pydata.org/)
25 | - [NumPy](https://www.numpy.org/)
26 | - [Pandas](https://pandas.pydata.org/)
27 | - [platformdirs](https://github.com/platformdirs/platformdirs)
28 | - [Pyteomics](https://pyteomics.readthedocs.io/)
29 | - [Vega-Altair](https://altair-viz.github.io/)
30 | 
31 | Missing dependencies will be automatically installed when you install spectrum_utils using pip or conda.
32 | 
33 | Additionally, we recommend manually installing [pyteomics.cythonize](https://pypi.org/project/pyteomics.cythonize/) as a plug-in replacement for faster fragment ion mass calculations.
34 | 
35 | ## Advanced installation instructions
36 | 
37 | spectrum_utils provides modular installation capabilities to minimize the number of third-party dependencies that will be installed when only a subset of the spectrum_utils functionality is required.
38 | The previous pip and conda commands will install all optional spectrum_utils extensions (excluding developer and documentation dependencies).
39 | Power users can customize their spectrum_utils installation by specifying one or more of the following sets of dependencies:
40 | 
41 | - `dev`: Developer dependencies for automatic linting and testing.
42 | - `docs`: Dependencies to generate these documentation pages.
43 | - `iplot`: Interactive spectrum plotting using [Vega-Altair](https://altair-viz.github.io/).
44 | 


--------------------------------------------------------------------------------
/docs/src/ion_types.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/ion_types.png


--------------------------------------------------------------------------------
/docs/src/iplot_spectrum.json:
--------------------------------------------------------------------------------
1 | {"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}}, "layer": [{"data": {"name": "data-a64c5702074096e6cd965645a52da966"}, "mark": {"type": "rule", "size": 2}, "encoding": {"color": {"type": "nominal", "field": "color", "legend": null, "scale": null}, "tooltip": [{"type": "quantitative", "field": "mz", "format": ".4f", "title": "m/z"}, {"type": "quantitative", "field": "intensity", "format": ".1%", "title": "Intensity"}], "x": {"type": "quantitative", "axis": {"grid": true, "title": "m/z", "titleFontStyle": "italic"}, "field": "mz", "scale": {"nice": true, "padding": 5}}, "y": {"type": "quantitative", "axis": {"format": "%", "grid": true, "title": "Intensity"}, "field": "intensity", "scale": {"nice": true}}}}, {"data": {"name": "data-74878dd49dd20ad3d314195fc2ba5f7b"}, "mark": {"type": "rule", "size": 2}, "encoding": {"color": {"type": "nominal", "field": "color", "legend": null, "scale": null}, "tooltip": [{"type": "quantitative", "field": "mz", "format": ".4f", "title": "m/z"}, {"type": "quantitative", "field": "intensity", "format": ".1%", "title": "Intensity"}, {"type": "nominal", "field": "fragment", "title": "Fragment"}, {"type": "nominal", "field": "mz_delta", "title": "m/z deviation"}], "x": {"type": "quantitative", "axis": {"grid": true, "title": "m/z", "titleFontStyle": "italic"}, "field": "mz", "scale": {"nice": true, "padding": 5}}, "y": {"type": "quantitative", "axis": {"format": "%", "grid": true, "title": "Intensity"}, "field": "intensity", "scale": {"nice": true}}}}, {"data": {"name": "data-74878dd49dd20ad3d314195fc2ba5f7b"}, "mark": {"type": "text", "align": "left", "angle": 270, "baseline": "middle", "dx": 5}, "encoding": {"color": {"type": "nominal", "field": "color", "legend": null, "scale": null}, "text": {"type": "nominal", "field": "fragment"}, "tooltip": [{"type": "quantitative", "field": "mz", "format": ".4f", "title": "m/z"}, {"type": "quantitative", "field": "intensity", "format": ".1%", "title": "Intensity"}, {"type": "nominal", "field": "fragment", "title": "Fragment"}, {"type": "nominal", "field": "mz_delta", "title": "m/z deviation"}], "x": {"type": "quantitative", "axis": {"grid": true, "title": "m/z", "titleFontStyle": "italic"}, "field": "mz", "scale": {"nice": true, "padding": 5}}, "y": {"type": "quantitative", "axis": {"format": "%", "grid": true, "title": "Intensity"}, "field": "intensity", "scale": {"nice": true}}}}], "height": 400, "width": 640, "$schema": "https://vega.github.io/schema/vega-lite/v4.8.1.json", "datasets": {"data-a64c5702074096e6cd965645a52da966": [{"mz": 101.07122039794922, "intensity": 0.148338183760643, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 109.68924713134766, "intensity": 0.007550157606601715, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 115.86998748779297, "intensity": 0.007336989510804415, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 120.08110046386719, "intensity": 0.12255959212779999, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 129.06594848632812, "intensity": 0.020143359899520874, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 129.102294921875, "intensity": 0.12308508157730103, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 130.06533813476562, "intensity": 0.1629226803779602, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 130.08633422851562, "intensity": 0.04563209041953087, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 130.95578002929688, "intensity": 0.008904634043574333, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 131.11859130859375, "intensity": 0.010367393493652344, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 132.07968139648438, "intensity": 0.07641462981700897, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 136.07566833496094, "intensity": 0.05691216513514519, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 141.3184814453125, "intensity": 0.008756570518016815, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 142.21034240722656, "intensity": 0.009229789488017559, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 155.08184814453125, "intensity": 0.011481806635856628, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 159.0760498046875, "intensity": 0.03314025327563286, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 159.09169006347656, "intensity": 1.0, "fragment": "", "mz_delta": "0.1ppm", "color": "#388E3C"}, {"mz": 160.07516479492188, "intensity": 0.011401713825762272, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 160.09506225585938, "intensity": 0.054223813116550446, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 170.06008911132812, "intensity": 0.32688644528388977, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 171.0631866455078, "intensity": 0.02041677013039589, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 175.11871337890625, "intensity": 0.028740430250763893, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 186.12335205078125, "intensity": 0.056916072964668274, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 197.12814331054688, "intensity": 0.0751049593091011, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 199.109130859375, "intensity": 0.0103872399777174, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 199.17962646484375, "intensity": 0.023118557408452034, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 200.1028594970703, "intensity": 0.039687901735305786, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 201.10281372070312, "intensity": 0.041108112782239914, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 201.1234130859375, "intensity": 0.03132535144686699, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 204.1133575439453, "intensity": 0.02866506576538086, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 211.14495849609375, "intensity": 0.020317386835813522, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 213.1597900390625, "intensity": 0.021088870242238045, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 214.09768676757812, "intensity": 0.22772477567195892, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 214.15350341796875, "intensity": 0.0830322653055191, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 215.10003662109375, "intensity": 0.02158365398645401, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 216.6425323486328, "intensity": 0.01774018071591854, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 217.13331604003906, "intensity": 0.011508912779390812, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 224.1397705078125, "intensity": 0.018519965931773186, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 225.1234893798828, "intensity": 0.031006278470158577, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 226.08267211914062, "intensity": 0.027994472533464432, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 228.13485717773438, "intensity": 0.01959451474249363, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 229.12010192871094, "intensity": 0.03323378413915634, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 233.16494750976562, "intensity": 0.07654907554388046, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 239.082275390625, "intensity": 0.03774549812078476, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 242.14993286132812, "intensity": 0.11195865273475647, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 243.10874938964844, "intensity": 0.13127824664115906, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 244.10833740234375, "intensity": 0.025648383423686028, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 245.3516845703125, "intensity": 0.020252369344234467, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 253.09628295898438, "intensity": 0.032357994467020035, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 256.10833740234375, "intensity": 0.033013418316841125, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 261.15960693359375, "intensity": 0.06451421976089478, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 273.1348571777344, "intensity": 0.03534402325749397, "fragment": "", "mz_delta": "0.9ppm", "color": "#388E3C"}, {"mz": 282.1797790527344, "intensity": 0.019065726548433304, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 284.1026611328125, "intensity": 0.6415199041366577, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 285.1063232421875, "intensity": 0.07752740383148193, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 287.1734313964844, "intensity": 0.026046147570014, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 289.11083984375, "intensity": 0.017870688810944557, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 302.1326904296875, "intensity": 0.02836090512573719, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 310.1726989746094, "intensity": 0.027750393375754356, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 313.1876220703125, "intensity": 0.04817057028412819, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 318.1550598144531, "intensity": 0.061667412519454956, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 321.15631103515625, "intensity": 0.028431694954633713, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 326.1781311035156, "intensity": 0.04074738919734955, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 328.2327880859375, "intensity": 0.017787983641028404, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 334.1546630859375, "intensity": 0.019314678385853767, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 344.0024719238281, "intensity": 0.010485530830919743, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 344.1928405761719, "intensity": 0.10169167816638947, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 345.1581115722656, "intensity": 0.03624998405575752, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 347.1709289550781, "intensity": 0.06590598076581955, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 352.1987609863281, "intensity": 0.026266280561685562, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 353.18170166015625, "intensity": 0.14427943527698517, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 356.19281005859375, "intensity": 0.07075531035661697, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 361.2311096191406, "intensity": 0.03169279918074608, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 363.2070007324219, "intensity": 0.09083585441112518, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 367.14434814453125, "intensity": 0.05584902688860893, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 370.21044921875, "intensity": 0.05039053410291672, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 384.1676330566406, "intensity": 0.043628327548503876, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 385.1712341308594, "intensity": 0.04909650981426239, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 395.1347961425781, "intensity": 0.19516165554523468, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 402.1809387207031, "intensity": 0.020460274070501328, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 409.2093505859375, "intensity": 0.029131930321455002, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 412.1607360839844, "intensity": 0.2674732506275177, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 413.1644592285156, "intensity": 0.04392719641327858, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 417.177734375, "intensity": 0.034838926047086716, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 424.217529296875, "intensity": 0.08377550542354584, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 425.22711181640625, "intensity": 0.020911267027258873, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 430.19342041015625, "intensity": 0.04080050066113472, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 441.2466735839844, "intensity": 0.030935177579522133, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 454.23114013671875, "intensity": 0.03229103237390518, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 462.21368408203125, "intensity": 0.034385476261377335, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 463.1971130371094, "intensity": 0.07402148842811584, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 480.2240905761719, "intensity": 0.13736052811145782, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 481.226318359375, "intensity": 0.02832726202905178, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 484.24505615234375, "intensity": 0.022815125063061714, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 488.21954345703125, "intensity": 0.02879846654832363, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 497.2502746582031, "intensity": 0.09249623119831085, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 498.25396728515625, "intensity": 0.06746581196784973, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 506.25054931640625, "intensity": 0.020595932379364967, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 508.2207336425781, "intensity": 0.03447815775871277, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 512.2582397460938, "intensity": 0.02941327542066574, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 516.2634887695312, "intensity": 0.02493799850344658, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 517.2271728515625, "intensity": 0.02447432652115822, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 521.2412719726562, "intensity": 0.01960829272866249, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 523.7442626953125, "intensity": 0.01984090358018875, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 525.2426147460938, "intensity": 0.0960967093706131, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 526.2479248046875, "intensity": 0.035229749977588654, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 530.2679443359375, "intensity": 0.05706870183348656, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 538.2637939453125, "intensity": 0.02259320579469204, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 545.2481689453125, "intensity": 0.028604505583643913, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 549.2872924804688, "intensity": 0.14675451815128326, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 550.78271484375, "intensity": 0.031602609902620316, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 555.285888671875, "intensity": 0.026801517233252525, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 559.2943115234375, "intensity": 0.12255251407623291, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 559.7947998046875, "intensity": 0.06386437267065048, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 571.28662109375, "intensity": 0.047733355313539505, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 587.294189453125, "intensity": 0.022753508761525154, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 599.2872314453125, "intensity": 0.035877007991075516, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 599.785400390625, "intensity": 0.034990787506103516, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 600.4091186523438, "intensity": 0.02041812427341938, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 608.2901611328125, "intensity": 0.047849226742982864, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 629.7884521484375, "intensity": 0.024940097704529762, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 636.2815551757812, "intensity": 0.03865744173526764, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 639.3478393554688, "intensity": 0.018782714381814003, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 646.3280029296875, "intensity": 0.030414801090955734, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 647.328369140625, "intensity": 0.02265116572380066, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 653.3023071289062, "intensity": 0.031094927340745926, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 670.3162231445312, "intensity": 0.02026170864701271, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 677.3325805664062, "intensity": 0.03700347617268562, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 685.322998046875, "intensity": 0.031203679740428925, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 692.3241577148438, "intensity": 0.021440720185637474, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 696.351318359375, "intensity": 0.13134905695915222, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 702.3402709960938, "intensity": 0.019092896953225136, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 703.3506469726562, "intensity": 0.024013128131628036, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 707.3213500976562, "intensity": 0.024482762441039085, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 709.8466186523438, "intensity": 0.03129369020462036, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 710.3462524414062, "intensity": 0.06588925421237946, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 717.8883666992188, "intensity": 0.04304360970854759, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 718.3870849609375, "intensity": 0.32829058170318604, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 718.8858642578125, "intensity": 0.03321429714560509, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 724.34130859375, "intensity": 0.04471525177359581, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 729.3600463867188, "intensity": 0.020301733165979385, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 730.3282470703125, "intensity": 0.02373546175658703, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 748.375244140625, "intensity": 0.04757232218980789, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 757.3715209960938, "intensity": 0.03368370234966278, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 763.1437377929688, "intensity": 0.020129267126321793, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 766.2276000976562, "intensity": 0.03150264918804169, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 767.3910522460938, "intensity": 0.19662556052207947, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 767.736572265625, "intensity": 0.019396141171455383, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 783.81298828125, "intensity": 0.020094377920031548, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 831.4067993164062, "intensity": 0.02632063627243042, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 835.418701171875, "intensity": 0.026114407926797867, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 858.4285278320312, "intensity": 0.03353990986943245, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 859.4189453125, "intensity": 0.045826975256204605, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 869.4483642578125, "intensity": 0.019475221633911133, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 871.410400390625, "intensity": 0.028745388612151146, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 876.4351806640625, "intensity": 0.13731563091278076, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 877.419677734375, "intensity": 0.31163832545280457, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 878.4134521484375, "intensity": 0.11510690301656723, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 886.4219360351562, "intensity": 0.02274133637547493, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 887.4178466796875, "intensity": 0.02371060661971569, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 895.4476318359375, "intensity": 0.21141238510608673, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 904.451904296875, "intensity": 0.025122331455349922, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 905.44287109375, "intensity": 0.023934079334139824, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 906.4541015625, "intensity": 0.019386500120162964, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 972.4820556640625, "intensity": 0.031104160472750664, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 989.505859375, "intensity": 0.030449291691184044, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 990.4984130859375, "intensity": 0.0242347102612257, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1008.5327758789062, "intensity": 0.24949823319911957, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1017.518798828125, "intensity": 0.023513633757829666, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1018.51953125, "intensity": 0.021530823782086372, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1037.5977783203125, "intensity": 0.021331775933504105, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1046.4642333984375, "intensity": 0.021640582010149956, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1082.492919921875, "intensity": 0.0222757738083601, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1100.57470703125, "intensity": 0.022380664944648743, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1117.570556640625, "intensity": 0.02564471773803234, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1118.573974609375, "intensity": 0.1540607213973999, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1119.55908203125, "intensity": 0.07648862898349762, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1136.58984375, "intensity": 0.14887091517448425, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1145.5716552734375, "intensity": 0.021669859066605568, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1214.585693359375, "intensity": 0.02321801893413067, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1215.5576171875, "intensity": 0.02200160175561905, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1227.5980224609375, "intensity": 0.022091779857873917, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1233.6329345703125, "intensity": 0.030069278553128242, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1250.6297607421875, "intensity": 0.11248490959405899, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1260.6072998046875, "intensity": 0.040360987186431885, "fragment": "", "mz_delta": null, "color": "#212121"}, {"mz": 1261.614013671875, "intensity": 0.033135995268821716, "fragment": "", "mz_delta": "2.2ppm", "color": "#388E3C"}, {"mz": 1272.6572265625, "intensity": 0.02319318614900112, "fragment": "", "mz_delta": null, "color": "#212121"}], "data-74878dd49dd20ad3d314195fc2ba5f7b": [{"mz": 147.11273193359375, "intensity": 0.09118045121431351, "fragment": "y1", "mz_delta": "-0.5ppm", "color": "#D32F2F"}, {"mz": 187.086669921875, "intensity": 0.11504451185464859, "fragment": "b1", "mz_delta": "0.4ppm", "color": "#1976D2"}, {"mz": 204.13414001464844, "intensity": 0.22047029435634613, "fragment": "y2", "mz_delta": "-0.6ppm", "color": "#D32F2F"}, {"mz": 301.1289978027344, "intensity": 0.4070082902908325, "fragment": "b2", "mz_delta": "-1.7ppm", "color": "#1976D2"}, {"mz": 305.18109130859375, "intensity": 0.2134922742843628, "fragment": "y3", "mz_delta": "-2.8ppm", "color": "#D32F2F"}, {"mz": 362.2037353515625, "intensity": 0.5880805253982544, "fragment": "y4", "mz_delta": "0.9ppm", "color": "#D32F2F"}, {"mz": 429.1875915527344, "intensity": 0.30558738112449646, "fragment": "b3", "mz_delta": "-1.2ppm", "color": "#1976D2"}, {"mz": 542.272216796875, "intensity": 0.07725690305233002, "fragment": "b4", "mz_delta": "0.1ppm", "color": "#1976D2"}, {"mz": 548.280517578125, "intensity": 0.6340454816818237, "fragment": "y5", "mz_delta": "-4.0ppm", "color": "#D32F2F"}, {"mz": 695.351318359375, "intensity": 0.4454374611377716, "fragment": "y6", "mz_delta": "0.3ppm", "color": "#D32F2F"}, {"mz": 766.387451171875, "intensity": 0.585404098033905, "fragment": "y7", "mz_delta": "-1.0ppm", "color": "#D32F2F"}, {"mz": 894.4446411132812, "intensity": 0.44065672159194946, "fragment": "y8", "mz_delta": "-2.4ppm", "color": "#D32F2F"}, {"mz": 1007.5315551757812, "intensity": 0.4444051682949066, "fragment": "y9", "mz_delta": "0.7ppm", "color": "#D32F2F"}, {"mz": 1135.5819091796875, "intensity": 0.24221596121788025, "fragment": "y10", "mz_delta": "-6.7ppm", "color": "#D32F2F"}, {"mz": 1232.5897216796875, "intensity": 0.04881492629647255, "fragment": "b10", "mz_delta": "4.1ppm", "color": "#1976D2"}, {"mz": 1249.630615234375, "intensity": 0.1376694291830063, "fragment": "y11", "mz_delta": "-1.4ppm", "color": "#D32F2F"}]}}


--------------------------------------------------------------------------------
/docs/src/mass_errors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/mass_errors.png


--------------------------------------------------------------------------------
/docs/src/mirror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/mirror.png


--------------------------------------------------------------------------------
/docs/src/neutral_losses_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/neutral_losses_1.png


--------------------------------------------------------------------------------
/docs/src/neutral_losses_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/neutral_losses_2.png


--------------------------------------------------------------------------------
/docs/src/plotting.md:
--------------------------------------------------------------------------------
  1 | # Spectrum visualization
  2 | 
  3 | The [quickstart](quickstart.md) briefly introduced the spectrum_utils plotting functionality.
  4 | Often, nice spectrum graphics can be created with only a single line of code; it's as easy as using `spectrum_utils.plot.spectrum(...)` to visualize a single spectrum.
  5 | 
  6 | Here we will briefly describe some advanced functionality to customize your spectrum plots.
  7 | Some of the arguments that can be provided to `spectrum_utils.plot.spectrum(...)` are:
  8 | 
  9 | - `color_ions`: Boolean flag indicating whether the annotated peaks should be colored.
 10 | - `annot_fmt`: A function that converts a `FragmentAnnotation` to a label to annotate the corresponding peak (see below).
 11 | - `annot_kws`: A dictionary with options to customize peak label texts.
 12 |   See the [`matplotlib.text.Text` documentation](https://matplotlib.org/3.1.1/api/text_api.html#matplotlib.text.Text) for available options.
 13 | - `grid`: Enable/disable the grid.
 14 | 
 15 | See the [API reference](api.md) for full details on how to use these settings.
 16 | 
 17 | ## Peak annotations
 18 | 
 19 | By default, singly-charged b and y peptide fragment ions are annotated with a label in the spectrum plots.
 20 | To avoid overcrowding the spectrum plots, other peaks will be highlighted in the matching color, but will not receive an annotation label.
 21 | However, which peaks to annotate and the format of the label can be fully customized by providing a callable that implements your desired behavior to `annot_fmt`.
 22 | 
 23 | Here we will show example code that can guide you in implementing your custom peak labeling functionality.
 24 | As mentioned previously, the default implementation labels singly-charged b and y peptide ions that have not undergone a neutral loss (slightly adapted for conciseness):
 25 | 
 26 | ```python
 27 | def annotate_ion_type(annotation, ion_types="by"):
 28 |     if (
 29 |         annotation.ion_type[0] in ion_types
 30 |         and annotation.neutral_loss is None
 31 |         and annotation.isotope == 0
 32 |         and annotation.charge == 1
 33 |     ):
 34 |         return annotation.ion_type
 35 |     else:
 36 |         return ""
 37 | ```
 38 | 
 39 | To annotate additional [ion types](ion_types), you can reuse the default implementation in combination with `functools.partial`:
 40 | 
 41 | ```python
 42 | import functools
 43 | import spectrum_utils.plot
 44 | 
 45 | spectrum_utils.plot.spectrum(..., annot_fmt=functools.partial(ion_types="abyIm"))
 46 | ```
 47 | 
 48 | Peak labels that contain full information according to the [PSI peak interpretation specification](https://docs.google.com/document/d/1yEUNG4Ump6vnbMDs4iV4s3XISflmOkRAyqUuutcCG2w/edit?usp=sharing) can be achieved by using the `str` function: `spectrum_utils.plot.spectrum(..., annot_fmt=str)`.
 49 | 
 50 | Finally, we'll show an example of custom peak labeling functionality that indicates the charge state by repeated `+` symbols, neutral loss of ammonia by the `*` symbol, and neutral loss of water by the `o` symbol. This behavior is similar to the [Lorikeet spectrum viewer](https://uwpr.github.io/Lorikeet/).
 51 | 
 52 | ```python
 53 | import matplotlib.pyplot as plt
 54 | import spectrum_utils.plot as sup
 55 | import spectrum_utils.spectrum as sus
 56 | 
 57 | 
 58 | def annotate_ion_type(annotation, ion_types="aby"):
 59 |     if annotation.ion_type[0] in ion_types:
 60 |         if abs(annotation.isotope) == 1:
 61 |             iso = "+i" if annotation.isotope > 0 else "-i"
 62 |         elif annotation.isotope != 0:
 63 |             iso = f"{annotation.isotope:+}i"
 64 |         else:
 65 |             iso = ""
 66 |         nl = {"-NH3": "*", "-H2O": "o"}.get(annotation.neutral_loss, "")
 67 |         return f"{annotation.ion_type}{iso}{'+' * annotation.charge}{nl}"
 68 |     else:
 69 |         return ""
 70 | 
 71 | 
 72 | usi = "mzspec:PXD014834:TCGA-AA-3518-01A-11_W_VU_20120915_A0218_3F_R_FR01:scan:8370"
 73 | peptide = "WNQLQAFWGTGK"
 74 | spectrum = sus.MsmsSpectrum.from_usi(usi)
 75 | spectrum.annotate_proforma(
 76 |     peptide,
 77 |     fragment_tol_mass=0.05,
 78 |     fragment_tol_mode="Da",
 79 |     ion_types="aby",
 80 |     max_ion_charge=2,
 81 |     neutral_losses={"NH3": -17.026549, "H2O": -18.010565},
 82 | )
 83 | 
 84 | fig, ax = plt.subplots(figsize=(12, 6))
 85 | sup.spectrum(spectrum, annot_fmt=annotate_ion_type, grid=False, ax=ax)
 86 | ax.set_title(peptide, fontdict={"fontsize": "xx-large"})
 87 | ax.spines["right"].set_visible(False)
 88 | ax.spines["top"].set_visible(False)
 89 | plt.savefig("annot_fmt.png", dpi=300, bbox_inches="tight", transparent=True)
 90 | plt.close()
 91 | ```
 92 | 
 93 | ![Spectrum plot with neutral losses labeled](annot_fmt.png)
 94 | 
 95 | ## Mirror plot
 96 | 
 97 | A mirror plot can be used to visualize matching spectra, for example, to plot identifications from spectral library searching.
 98 | Again, only a single line of code is required to do the actual plotting: `spectrum_utils.plot.mirror(...)`
 99 | 
100 | ```python
101 | import matplotlib.pyplot as plt
102 | import spectrum_utils.plot as sup
103 | import spectrum_utils.spectrum as sus
104 | 
105 | 
106 | peptide = "DLTDYLM[Oxidation]K"
107 | usi_top = "mzspec:MSV000079960:DY_HS_Exp7-Ad1:scan:30372"
108 | spectrum_top = sus.MsmsSpectrum.from_usi(usi_top)
109 | spectrum_top.annotate_proforma(peptide, 0.5, "Da", ion_types="aby")
110 | usi_bottom = "mzspec:MSV000080679:j11962_C1orf144:scan:10671"
111 | spectrum_bottom = sus.MsmsSpectrum.from_usi(usi_bottom)
112 | spectrum_bottom.annotate_proforma(peptide, 0.5, "Da", ion_types="aby")
113 | 
114 | fig, ax = plt.subplots(figsize=(12, 6))
115 | sup.mirror(spectrum_top, spectrum_bottom, ax=ax)
116 | plt.savefig("mirror.png", dpi=300, bbox_inches="tight", transparent=True)
117 | plt.close()
118 | ```
119 | 
120 | ![Mirror spectrum plot](mirror.png)
121 | 
122 | All of the advanced plotting arguments described above can be provided for the mirror plot as well using the `spectrum_kws` argument.
123 | 
124 | ## Mass error plot
125 | 
126 | The difference between the observed and the theoretical mass of annotated fragment ions can be visualized in a mass error plot. In these bubble plots, the size of the bubbles corresponds to the intensity of the fragment ions, the x-axis shows the observed _m/z_, and the y-axis shows the mass error either ppm or in Dalton. Use `spectrum_utils.plot.mass_errors(...)` to plot mass errors:
127 | 
128 | ```python
129 | import matplotlib.pyplot as plt
130 | import spectrum_utils.plot as sup
131 | import spectrum_utils.spectrum as sus
132 | 
133 | usi = "mzspec:PXD022531:j12541_C5orf38:scan:12368"
134 | peptide = "VAATLEILTLK/2"
135 | spectrum = sus.MsmsSpectrum.from_usi(usi)
136 | spectrum.annotate_proforma(
137 |     peptide,
138 |     fragment_tol_mass=0.05,
139 |     fragment_tol_mode="Da",
140 |     ion_types="aby",
141 |     max_ion_charge=2,
142 |     neutral_losses={"NH3": -17.026549, "H2O": -18.010565},
143 | )
144 | 
145 | fig, ax = plt.subplots(figsize=(10.5, 3))
146 | sup.mass_errors(spectrum, plot_unknown=False, ax=ax)
147 | plt.savefig("mass_errors.png", dpi=300, bbox_inches="tight", transparent=True)
148 | plt.close()
149 | ```
150 | 
151 | ![Mass error plot](mass_errors.png)
152 | 
153 | ## Figure-level facet plot
154 | 
155 | The figure-level `spectrum_utils.plot.facet` function combines the `spectrum_utils.plot.mirror` and `spectrum_utils.plot.mass_errors` functionality:
156 | 
157 | ```python
158 | import matplotlib.pyplot as plt
159 | import spectrum_utils.plot as sup
160 | import spectrum_utils.spectrum as sus
161 | 
162 | peptide = "VAATLEILTLK/2"
163 | annotation_settings = {
164 |     "fragment_tol_mass": 0.05,
165 |     "fragment_tol_mode": "Da",
166 |     "ion_types": "aby",
167 |     "max_ion_charge": 2,
168 |     "neutral_losses": {"NH3": -17.026549, "H2O": -18.010565},
169 | }
170 | 
171 | usi_top = "mzspec:PXD022531:j12541_C5orf38:scan:12368"
172 | spectrum_top = sus.MsmsSpectrum.from_usi(usi_top)
173 | spectrum_top.annotate_proforma(peptide, **annotation_settings)
174 | 
175 | usi_bottom = "mzspec:PXD022531:b11156_PRAMEF17:scan:22140"
176 | spectrum_bottom = sus.MsmsSpectrum.from_usi(usi_bottom)
177 | spectrum_bottom.annotate_proforma(peptide, **annotation_settings)
178 | 
179 | fig = sup.facet(
180 |     spec_top=spectrum_top,
181 |     spec_mass_errors=spectrum_top,
182 |     spec_bottom=spectrum_bottom,
183 |     mass_errors_kws={"plot_unknown": False},
184 |     height=7,
185 |     width=10.5,
186 | )
187 | plt.savefig("facet.png", dpi=300, bbox_inches="tight", transparent=True)
188 | plt.close()
189 | ```
190 | 
191 | ![Facet plot](facet.png)
192 | 
193 | ## Interactive plotting
194 | 
195 | Besides the standard plotting functionality in `spectrum_utils.plot`, spectrum_utils also contains interactive plotting functionality in `spectrum_utils.iplot`.
196 | `iplot` is a drop-in replacement for `plot`; only the import statement needs to be changed to produce interactive plots.
197 | 
198 | Interactive plot of an individual spectrum:
199 | 
200 | <script src="https://cdn.jsdelivr.net/npm/vega@5"></script>
201 | <script src="https://cdn.jsdelivr.net/npm/vega-lite@3"></script>
202 | <script src="https://cdn.jsdelivr.net/npm/vega-embed@4"></script>
203 | 
204 | ```python
205 | import spectrum_utils.iplot as sup
206 | import spectrum_utils.spectrum as sus
207 | 
208 | 
209 | usi = "mzspec:PXD004732:01650b_BC2-TUM_first_pool_53_01_01-3xHCD-1h-R2:scan:41840"
210 | spectrum = sus.MsmsSpectrum.from_usi(usi)
211 | spectrum.annotate_proforma("WNQLQAFWGTGK", 10, "ppm", ion_types="aby")
212 | 
213 | chart = sup.spectrum(spectrum)
214 | chart.properties(width=640, height=400).save("iplot_spectrum.json")
215 | ```
216 | 
217 | <div id="spectrum"></div>
218 | 
219 | Interactive mirror plot of two spectra:
220 | 
221 | ```python
222 | import spectrum_utils.iplot as sup
223 | import spectrum_utils.spectrum as sus
224 | 
225 | 
226 | peptide = "DLTDYLM[Oxidation]K"
227 | usi_top = "mzspec:MSV000079960:DY_HS_Exp7-Ad1:scan:30372"
228 | spectrum_top = sus.MsmsSpectrum.from_usi(usi_top)
229 | spectrum_top.annotate_proforma(peptide, 0.5, "Da", ion_types="aby")
230 | usi_bottom = "mzspec:MSV000080679:j11962_C1orf144:scan:10671"
231 | spectrum_bottom = sus.MsmsSpectrum.from_usi(usi_bottom)
232 | spectrum_bottom.annotate_proforma(peptide, 0.5, "Da", ion_types="aby")
233 | 
234 | chart = sup.mirror(spectrum_top, spectrum_bottom)
235 | chart.properties(width=640, height=400).save("iplot_mirror.json")
236 | ```
237 | 
238 | <div id="mirror"></div>
239 | 
240 | <script type="text/javascript">
241 |   var spectrum = "https://raw.githubusercontent.com/bittremieux/spectrum_utils/master/docs/src/iplot_spectrum.json";
242 |   var mirror = "https://raw.githubusercontent.com/bittremieux/spectrum_utils/master/docs/src/iplot_mirror.json";
243 |   vegaEmbed('#spectrum', spectrum).then(function(result) {
244 |   }).catch(console.error);
245 |   vegaEmbed('#mirror', mirror).then(function(result) {
246 |   }).catch(console.error);
247 | </script>
248 | 
249 | For more information on how to manipulate these interactive plots, see the [Vega-Altair documentation](https://altair-viz.github.io/index.html).
250 | 
251 | Interactive plots can be [saved](https://altair-viz.github.io/user_guide/saving_charts.html) as html files or other output formats, and can be embedded as JSON into web pages using [Vega-Embed](https://github.com/vega/vega-embed).
252 | 
253 | ## Miscellaneous
254 | 
255 | ### Peak colors
256 | 
257 | By default, peaks are colored based on their [ion type](ion_types) as follows:
258 | 
259 | - <svg width="10" height="10"><rect width="10" height="10" style="fill:#388E3C;stroke-width:1;stroke:rgb(0,0,0)" /></svg> a peptide fragments (`"a"`)
260 | - <svg width="10" height="10"><rect width="10" height="10" style="fill:#1976D2;stroke-width:1;stroke:rgb(0,0,0)" /></svg> b peptide fragments (`"b"`)
261 | - <svg width="10" height="10"><rect width="10" height="10" style="fill:#00796B;stroke-width:1;stroke:rgb(0,0,0)" /></svg> c peptide fragments (`"c"`)
262 | - <svg width="10" height="10"><rect width="10" height="10" style="fill:#7B1FA2;stroke-width:1;stroke:rgb(0,0,0)" /></svg> x peptide fragments (`"x"`)
263 | - <svg width="10" height="10"><rect width="10" height="10" style="fill:#D32F2F;stroke-width:1;stroke:rgb(0,0,0)" /></svg> y peptide fragments (`"y"`)
264 | - <svg width="10" height="10"><rect width="10" height="10" style="fill:#F57C00;stroke-width:1;stroke:rgb(0,0,0)" /></svg> z peptide fragments (`"z"`)
265 | - <svg width="10" height="10"><rect width="10" height="10" style="fill:#FBC02D;stroke-width:1;stroke:rgb(0,0,0)" /></svg> internal fragment ions (`"m"`)
266 | - <svg width="10" height="10"><rect width="10" height="10" style="fill:#455A64;stroke-width:1;stroke:rgb(0,0,0)" /></svg> immonium ions (`"I"`)
267 | - <svg width="10" height="10"><rect width="10" height="10" style="fill:#512DA8;stroke-width:1;stroke:rgb(0,0,0)" /></svg> intact precursor ions (`"p"`)
268 | - <svg width="10" height="10"><rect width="10" height="10" style="fill:#212121;stroke-width:1;stroke:rgb(0,0,0)" /></svg> unknown and unannotated ions
269 | 
270 | To change these colors, overwrite values in the `spectrum_utils.plot.colors` dictionary with your preferred colors:
271 | 
272 | ```python
273 | import spectrum_utils.plot as sup
274 | 
275 | 
276 | sup.colors["y"] = "#FF1493"
277 | ```
278 | 


--------------------------------------------------------------------------------
/docs/src/proforma_ast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/proforma_ast.png


--------------------------------------------------------------------------------
/docs/src/proforma_ex1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/proforma_ex1.png


--------------------------------------------------------------------------------
/docs/src/proforma_ex2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/proforma_ex2.png


--------------------------------------------------------------------------------
/docs/src/proforma_ex3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/proforma_ex3.png


--------------------------------------------------------------------------------
/docs/src/quickstart.md:
--------------------------------------------------------------------------------
 1 | # Quickstart
 2 | 
 3 | Here we briefly introduce spectrum_utils' spectrum processing and visualization functionality:
 4 | 
 5 | - Load a spectrum from an online data resource by its [Universal Spectrum Identifier (USI)](https://www.psidev.info/usi).
 6 | - Restrict the mass range to 100–1400 _m_/_z_ to filter out irrelevant peaks.
 7 | - Remove the precursor peak.
 8 | - Remove low-intensity noise peaks by only retaining peaks that are at at least 5% of the base peak intensity and restrict the total number of peaks to the 50 most intense peaks.
 9 | - Scale the peak intensities by their square root to de-emphasize overly intense peaks.
10 | - Annotate peaks corresponding to a, b, and y peptide fragments in the spectrum based on a [ProForma 2.0](https://www.psidev.info/proforma) peptide string.
11 | - Visualize the spectrum with the annotated peaks highlighted.
12 | 
13 | IO functionality to read spectra from MS data files is not directly included in spectrum_utils.
14 | Instead you can use excellent libraries to read a variety of mass spectrometry data formats such as [Pyteomics](https://pyteomics.readthedocs.io/) or [pymzML](https://pymzml.readthedocs.io/).
15 | 
16 | ```python
17 | import matplotlib.pyplot as plt
18 | import spectrum_utils.plot as sup
19 | import spectrum_utils.spectrum as sus
20 | 
21 | 
22 | # Retrieve the spectrum by its USI.
23 | usi = "mzspec:PXD004732:01650b_BC2-TUM_first_pool_53_01_01-3xHCD-1h-R2:scan:41840"
24 | peptide = "WNQLQAFWGTGK"
25 | spectrum = sus.MsmsSpectrum.from_usi(usi)
26 | 
27 | # Process the spectrum.
28 | fragment_tol_mass, fragment_tol_mode = 10, "ppm"
29 | spectrum = (
30 |     spectrum.set_mz_range(min_mz=100, max_mz=1400)
31 |     .remove_precursor_peak(fragment_tol_mass, fragment_tol_mode)
32 |     .filter_intensity(min_intensity=0.05, max_num_peaks=50)
33 |     .scale_intensity("root")
34 |     .annotate_proforma(
35 |         peptide, fragment_tol_mass, fragment_tol_mode, ion_types="aby"
36 |     )
37 | )
38 | 
39 | # Plot the spectrum.
40 | fig, ax = plt.subplots(figsize=(12, 6))
41 | sup.spectrum(spectrum, grid=False, ax=ax)
42 | ax.spines["right"].set_visible(False)
43 | ax.spines["top"].set_visible(False)
44 | plt.savefig("quickstart.png", bbox_inches="tight", dpi=300, transparent=True)
45 | plt.close()
46 | ```
47 | 
48 | As demonstrated, each of the processing steps can be achieved using a single, high-level function call.
49 | These calls can be chained together to easily perform multiple processing steps.
50 | 
51 | Spectrum plotting can similarly be achieved using a high-level function call, resulting in the following figure:
52 | 
53 | ![](quickstart.png)
54 | 
55 | Note that several processing steps modify the peak _m_/_z_ and intensity values and are thus not idempotent.
56 | It is recommended to make a copy of the `MsmsSpectrum` object prior to any processing if the raw peak values need to remain available as well.
57 | 


--------------------------------------------------------------------------------
/docs/src/quickstart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/quickstart.png


--------------------------------------------------------------------------------
/docs/src/runtime.md:
--------------------------------------------------------------------------------
  1 | # Computational efficiency
  2 | 
  3 | Spectrum processing in spectrum_utils has been optimized for computational efficiency using [NumPy](https://www.numpy.org/) and [Numba](http://numba.pydata.org/) to be able to process thousands of spectra per second.
  4 | 
  5 | As shown below, spectrum_utils (version 0.4.0) is faster than alternative libraries, such as [pymzML](https://github.com/pymzml/pymzML/) (version 2.5.2) and [pyOpenMS](https://pyopenms.readthedocs.io/) (version 2.7.0), when performing typical spectrum processing tasks, including the following steps:
  6 | 
  7 | - The _m_/_z_ range is set to 100–1400 _m_/_z_.
  8 | - The precursor peak is removed.
  9 | - Low-intensity noise peaks are removed.
 10 | - Peak intensities are scaled by their square root.
 11 | 
 12 | ```python
 13 | import time
 14 | 
 15 | import matplotlib.pyplot as plt
 16 | import numpy as np
 17 | import pyopenms
 18 | import pyteomics.mgf
 19 | import seaborn as sns
 20 | import spectrum_utils.spectrum as sus
 21 | from pymzml.spec import Spectrum
 22 | 
 23 | 
 24 | min_peaks = 10
 25 | min_mz, max_mz = 100, 1400
 26 | fragment_tol_mass, fragment_tol_mode = 0.02, "Da"
 27 | min_intensity = 0.05
 28 | max_num_peaks = 150
 29 | 
 30 | 
 31 | def time_spectrum_utils(mgf_filename):
 32 |     runtimes = []
 33 |     for spec_dict in pyteomics.mgf.read(mgf_filename):
 34 |         # Omit invalid spectra.
 35 |         if (
 36 |             len(spec_dict["m/z array"]) < min_peaks
 37 |             or "charge" not in spec_dict["params"]
 38 |         ):
 39 |             continue
 40 | 
 41 |         spectrum = sus.MsmsSpectrum(
 42 |             spec_dict["params"]["title"],
 43 |             spec_dict["params"]["pepmass"][0],
 44 |             spec_dict["params"]["charge"][0],
 45 |             spec_dict["m/z array"],
 46 |             spec_dict["intensity array"],
 47 |             float(spec_dict["params"]["rtinseconds"]),
 48 |         )._inner
 49 | 
 50 |         start_time = time.time()
 51 | 
 52 |         spectrum.set_mz_range(min_mz, max_mz)
 53 |         spectrum.remove_precursor_peak(fragment_tol_mass, fragment_tol_mode)
 54 |         spectrum.filter_intensity(min_intensity, max_num_peaks)
 55 |         spectrum.scale_intensity("root", 1)
 56 | 
 57 |         runtimes.append(time.time() - start_time)
 58 | 
 59 |     return runtimes
 60 | 
 61 | 
 62 | def time_pymzml(mgf_filename):
 63 |     runtimes = []
 64 |     for spec_dict in pyteomics.mgf.read(mgf_filename):
 65 |         # Omit invalid spectra.
 66 |         if (
 67 |             len(spec_dict["m/z array"]) < min_peaks
 68 |             or "charge" not in spec_dict["params"]
 69 |         ):
 70 |             continue
 71 | 
 72 |         spec = Spectrum()
 73 |         spec.set_peaks(
 74 |             [*zip(spec_dict["m/z array"], spec_dict["intensity array"])], "raw"
 75 |         )
 76 | 
 77 |         start_time = time.time()
 78 | 
 79 |         spec.reduce("raw", (min_mz, max_mz))
 80 |         spec.remove_precursor_peak()
 81 |         spec.remove_noise(noise_level=min_intensity)
 82 |         spec /= np.amax(spec.i)
 83 |         spec.i = np.sqrt(spec.i)
 84 | 
 85 |         runtimes.append(time.time() - start_time)
 86 | 
 87 |     return runtimes
 88 | 
 89 | 
 90 | def time_pyopenms(mgf_filename):
 91 |     experiment = pyopenms.MSExperiment()
 92 |     pyopenms.MascotGenericFile().load(mgf_filename, experiment)
 93 | 
 94 |     runtimes = []
 95 |     for spectrum in experiment:
 96 |         # Omit invalid spectra.
 97 |         if (
 98 |             len(spectrum.get_peaks()[0]) < min_peaks
 99 |             or spectrum.getPrecursors()[0].getCharge() == 0
100 |         ):
101 |             continue
102 | 
103 |         start_time = time.time()
104 | 
105 |         # Set the m/z range.
106 |         filtered_mz, filtered_intensity = [], []
107 |         for mz, intensity in zip(*spectrum.get_peaks()):
108 |             if min_mz <= mz <= max_mz:
109 |                 filtered_mz.append(mz)
110 |                 filtered_intensity.append(intensity)
111 |             spectrum.set_peaks((filtered_mz, filtered_intensity))
112 |         # Remove the precursor peak.
113 |         parent_peak_mower = pyopenms.ParentPeakMower()
114 |         parent_peak_mower_params = parent_peak_mower.getDefaults()
115 |         parent_peak_mower_params.setValue(
116 |             b"window_size", fragment_tol_mass, b""
117 |         )
118 |         parent_peak_mower.setParameters(parent_peak_mower_params)
119 |         parent_peak_mower.filterSpectrum(spectrum)
120 |         # Filter by base peak intensity percentage.
121 |         pyopenms.Normalizer().filterSpectrum(spectrum)
122 |         threshold_mower = pyopenms.ThresholdMower()
123 |         threshold_mower_params = threshold_mower.getDefaults()
124 |         threshold_mower_params.setValue(b"threshold", min_intensity, b"")
125 |         threshold_mower.setParameters(threshold_mower_params)
126 |         threshold_mower.filterSpectrum(spectrum)
127 |         # Restrict to the most intense peaks.
128 |         n_largest = pyopenms.NLargest()
129 |         n_largest_params = n_largest.getDefaults()
130 |         n_largest_params.setValue(b"n", max_num_peaks, b"")
131 |         n_largest.setParameters(n_largest_params)
132 |         n_largest.filterSpectrum(spectrum)
133 |         # Scale the peak intensities by their square root and normalize.
134 |         pyopenms.SqrtMower().filterSpectrum(spectrum)
135 |         pyopenms.Normalizer().filterSpectrum(spectrum)
136 | 
137 |         runtimes.append(time.time() - start_time)
138 | 
139 |     return runtimes
140 | 
141 | 
142 | mgf_filename = "iPRG2012.mgf"
143 | runtimes_spectrum_utils = time_spectrum_utils(mgf_filename)
144 | runtimes_pyopenms = time_pyopenms(mgf_filename)
145 | runtimes_pymzml = time_pymzml(mgf_filename)
146 | 
147 | fig, ax = plt.subplots()
148 | sns.boxplot(
149 |     data=[runtimes_spectrum_utils, runtimes_pymzml, runtimes_pyopenms],
150 |     flierprops={"markersize": 2},
151 |     ax=ax,
152 | )
153 | ax.set_yscale("log")
154 | ax.xaxis.set_ticklabels(("spectrum_utils", "pymzML", "pyOpenMS"))
155 | ax.set_ylabel("Processing time per spectrum (s)")
156 | sns.despine()
157 | plt.savefig("runtime.png", bbox_inches="tight", dpi=300, transparent=True)
158 | plt.close()
159 | ```
160 | 
161 | ![](runtime.png)
162 | 
163 | 
164 | ## JIT compilation
165 | 
166 | Note that the significant outlier for spectrum_utils is caused by Numba's JIT compilation of the first method call, allowing subsequent calls to be made very efficiently.
167 | 
168 | If the user knows in advance that only a single method call needs to be made, Numba's JIT compilation can be disabled to avoid this overhead by setting the `NUMBA_DISABLE_JIT` environment variable to `1`.
169 | See the [Numba documentation](https://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#disabling-jit-compilation) for more information.
170 | 


--------------------------------------------------------------------------------
/docs/src/runtime.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/docs/src/runtime.png


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: spectrum_utils
 2 | channels:
 3 |   - defaults
 4 |   - bioconda
 5 |   - conda-forge
 6 | dependencies:
 7 |   - altair
 8 |   - fastobo
 9 |   - lark>=1.0
10 |   - matplotlib
11 |   - numba>=0.57
12 |   - numpy
13 |   - pandas
14 |   - platformdirs
15 |   - pyteomics>=4.5
16 |   - python>=3.10
17 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.setuptools_scm]
 6 | 
 7 | [tool.ruff]
 8 | line-length = 79
 9 | target-version = "py310"
10 | include = ["*.py"]
11 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = spectrum_utils
 3 | author = Wout Bittremieux
 4 | author_email = wout.bittremieux@uantwerpen.be
 5 | description =  Mass spectrometry utility functions
 6 | long_description = file: README.md
 7 | long_description_content_type = text/markdown
 8 | url = https://github.com/bittremieux/spectrum_utils
 9 | project_urls =
10 |     Documentation = https://spectrum-utils.readthedocs.io/
11 |     Bug Tracker = https://github.com/bittremieux/spectrum_utils/issues
12 | license = Apache 2.0
13 | classifiers =
14 |     Intended Audience :: Science/Research
15 |     License :: OSI Approved :: Apache Software License
16 |     Operating System :: MacOS
17 |     Operating System :: Microsoft :: Windows
18 |     Operating System :: Unix
19 |     Programming Language :: Python :: 3
20 |     Topic :: Scientific/Engineering :: Bio-Informatics
21 | 
22 | [options]
23 | packages = find:
24 | include_package_data = True
25 | python_requires = >=3.10
26 | install_requires =
27 |     fastobo
28 |     lark>=1.0
29 |     matplotlib>=3.5
30 |     numba>=0.57
31 |     numpy
32 |     platformdirs
33 |     pyteomics>=4.5
34 | 
35 | [options.extras_require]
36 | dev =
37 |     pytest
38 |     pytest-cov
39 |     ruff
40 | docs =
41 |     myst-parser
42 |     numpydoc>=1.1.0
43 |     sphinx>=3.5.3
44 |     sphinx-rtd-theme>=0.5.1
45 |     sphinx_markdown_tables
46 | iplot =
47 |     altair
48 |     pandas
49 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | 
3 | 
4 | if __name__ == "__main__":
5 |     setuptools.setup()
6 | 


--------------------------------------------------------------------------------
/spectrum_utils.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/spectrum_utils.png


--------------------------------------------------------------------------------
/spectrum_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from importlib.metadata import version, PackageNotFoundError
 3 | 
 4 |     try:
 5 |         __version__ = version("spectrum_utils")
 6 |     except PackageNotFoundError:
 7 |         pass
 8 | except ImportError:
 9 |     from pkg_resources import get_distribution, DistributionNotFound
10 | 
11 |     try:
12 |         __version__ = get_distribution("spectrum_utils").version
13 |     except DistributionNotFound:
14 |         pass
15 | 
16 | 
17 | __all__ = [
18 |     "fragment_annotation",
19 |     "iplot",
20 |     "plot",
21 |     "proforma",
22 |     "spectrum",
23 |     "utils",
24 | ]
25 | 


--------------------------------------------------------------------------------
/spectrum_utils/fragment_annotation.py:
--------------------------------------------------------------------------------
  1 | import operator
  2 | import re
  3 | from typing import Any, Dict, List, Optional, Tuple
  4 | 
  5 | try:
  6 |     import pyteomics.cmass as pmass
  7 | except ImportError:
  8 |     import pyteomics.mass as pmass
  9 | 
 10 | from spectrum_utils import proforma
 11 | 
 12 | 
 13 | # Amino acid and special amino acid masses.
 14 | AA_MASS = {
 15 |     **pmass.std_aa_mass,
 16 |     # Aspartic acid / asparagine (ambiguous mass).
 17 |     # "B": 0,
 18 |     # Glutamic acid / glutamine (ambiguous mass).
 19 |     # "Z": 0,
 20 |     # Leucine / isoleucine.
 21 |     "J": 113.084_064,
 22 |     # Selenocysteine (in Pyteomics).
 23 |     # "U": 150.95363,
 24 |     # Pyrrolysine (in Pyteomics).
 25 |     # "O": 237.14772,
 26 |     # Any amino acid, gaps (zero mass).
 27 |     "X": 0,
 28 | }
 29 | 
 30 | # Offset for isotopic peaks.
 31 | C13_MASS_DIFF = 1.003_354
 32 | 
 33 | # Common neutral losses.
 34 | NEUTRAL_LOSS = {
 35 |     # No neutral loss.
 36 |     None: 0,
 37 |     # Hydrogen.
 38 |     "H": -1.007_825,
 39 |     # Ammonia.
 40 |     "NH3": -17.026_549,
 41 |     # Water.
 42 |     "H2O": -18.010_565,
 43 |     # Carbon monoxide.
 44 |     "CO": -27.994_915,
 45 |     # Carbon dioxide.
 46 |     "CO2": -43.989_829,
 47 |     # Formamide.
 48 |     "HCONH2": -45.021_464,
 49 |     # Formic acid.
 50 |     "HCOOH": -46.005_479,
 51 |     # Methanesulfenic acid.
 52 |     "CH4OS": -63.998_301,
 53 |     # Sulfur trioxide.
 54 |     "SO3": -79.956_818,
 55 |     # Metaphosphoric acid.
 56 |     "HPO3": -79.966_331,
 57 |     # Mercaptoacetamide.
 58 |     "C2H5NOS": -91.009_195,
 59 |     # Mercaptoacetic acid.
 60 |     "C2H4O2S": -91.993_211,
 61 |     # Phosphoric acid.
 62 |     "H3PO4": -97.976_896,
 63 | }
 64 | 
 65 | SUPPORTED_IONS = "?abcxyzIm_prf"
 66 | 
 67 | 
 68 | class FragmentAnnotation:
 69 |     def __init__(
 70 |         self,
 71 |         ion_type: str,
 72 |         neutral_loss: Optional[str] = None,
 73 |         isotope: int = 0,
 74 |         charge: Optional[int] = None,
 75 |         adduct: Optional[str] = None,
 76 |         analyte_number: Optional[int] = None,
 77 |         mz_delta: Optional[Tuple[float, str]] = None,
 78 |     ) -> None:
 79 |         """
 80 |         Individual fragment ion annotation.
 81 | 
 82 |         This fragment annotation format is derived from the PSI peak
 83 |         interpretation specification:
 84 |         https://docs.google.com/document/d/1yEUNG4Ump6vnbMDs4iV4s3XISflmOkRAyqUuutcCG2w/edit?usp=sharing
 85 | 
 86 |         Fragment notations have the following format:
 87 | 
 88 |         (analyte_number)[ion_type](neutral_loss)(isotope)(charge)(adduct)(mz_delta)
 89 | 
 90 |         Examples:
 91 | 
 92 |         - "y4-H2O+2i^2[M+H+Na]" : Fragment annotation for a y4 ion, with a
 93 |           water neutral loss, the second isotopic peak, charge 2, adduct
 94 |           [M+H+Na].
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         ion_type : str
 99 |             Specifies the basic type of ion being described.
100 |             Possible prefixes are:
101 | 
102 |             - "?": unknown ion
103 |             - "a", "b", "c", "x", "y", "z": corresponding peptide fragments
104 |             - "I": immonium ion
105 |             - "m": internal fragment ion
106 |             - "_": named compound
107 |             - "p": precursor ion
108 |             - "r": reporter ion (isobaric label)
109 |             - "f": chemical formula
110 |         neutral_loss : Optional[str]
111 |             A string of neutral loss(es), described by their molecular formula.
112 |             The default is no neutral loss. Note that the neutral loss string
113 |             must include the sign (typically "-" for a neutral loss).
114 |         isotope : int
115 |             The isotope number above or below the monoisotope. The default is
116 |             the monoisotopic peak (0).
117 |         charge : Optional[int]
118 |             The charge of the fragment. The default is an unknown charge (only
119 |             valid for unknown ions).
120 |         adduct : Optional[str]
121 |             The adduct that ionized the fragment. The default is a hydrogen
122 |             adduct matching the charge ([M+xH]).
123 |         mz_delta : Optional[Tuple[float, str]]
124 |             The m/z delta representing the observed m/z minus the theoretical
125 |             m/z and its unit ("Da" or "ppm").
126 |         """
127 |         if ion_type[0] in "GLXS":
128 |             raise NotImplementedError(
129 |                 "Advanced ion types are not yet supported"
130 |             )
131 |         elif ion_type[0] not in SUPPORTED_IONS:
132 |             raise ValueError("Unknown ion type")
133 |         if ion_type == "?" and (
134 |             neutral_loss is not None
135 |             or isotope != 0
136 |             or charge is not None
137 |             or adduct is not None
138 |             or analyte_number is not None
139 |             or mz_delta is not None
140 |         ):
141 |             raise ValueError(
142 |                 "Unknown ions should not contain additional information"
143 |             )
144 |         self.ion_type = ion_type
145 |         self.neutral_loss = neutral_loss
146 |         self.isotope = isotope
147 |         self.charge = charge
148 |         self.adduct = f"[M+{self.charge}H]" if adduct is None else adduct
149 |         self.analyte_number = analyte_number
150 |         self.mz_delta = mz_delta
151 | 
152 |     @property
153 |     def mz_delta(self) -> Optional[Tuple[float, str]]:
154 |         return self._mz_delta
155 | 
156 |     @mz_delta.setter
157 |     def mz_delta(self, mz_delta: Optional[Tuple[float, str]]):
158 |         if mz_delta is not None and mz_delta[1] not in ("Da", "ppm"):
159 |             raise ValueError(
160 |                 "The m/z delta must be specified in Dalton or ppm units"
161 |             )
162 |         self._mz_delta = mz_delta
163 | 
164 |     @property
165 |     def charge(self) -> Optional[int]:
166 |         return self._charge
167 | 
168 |     @charge.setter
169 |     def charge(self, charge: Optional[int]):
170 |         if self.ion_type == "?" and charge is not None:
171 |             raise ValueError("Invalid charge for unknown ions")
172 |         elif self.ion_type != "?" and (charge is None or charge <= 0):
173 |             raise ValueError(
174 |                 "The charge must be specified and strictly positive for known "
175 |                 "ion types"
176 |             )
177 |         self._charge = charge
178 | 
179 |     def __repr__(self):
180 |         return str(self)
181 | 
182 |     def __str__(self) -> str:
183 |         if self.ion_type == "?":
184 |             return "?"
185 |         else:
186 |             annot_str = []
187 |             if self.analyte_number is not None:
188 |                 annot_str.append(f"{self.analyte_number}@")
189 |             annot_str.append(self.ion_type)
190 |             if self.neutral_loss is not None:
191 |                 annot_str.append(self.neutral_loss)
192 |             if abs(self.isotope) == 1:
193 |                 annot_str.append("+i" if self.isotope > 0 else "-i")
194 |             elif self.isotope != 0:
195 |                 annot_str.append(f"{self.isotope:+}i")
196 |             if self.charge is not None and self.charge > 1:
197 |                 annot_str.append(f"^{self.charge}")
198 |             if re.match(r"\[M\+\d+H\]", self.adduct) is None:
199 |                 annot_str.append(self.adduct)
200 |             if self.mz_delta is not None:
201 |                 annot_str.append(
202 |                     f"/{self.mz_delta[0]}"
203 |                     f"{'ppm' if self.mz_delta[1] == 'ppm' else ''}"
204 |                 )
205 |             return "".join(annot_str)
206 | 
207 |     def __eq__(self, other: Any) -> bool:
208 |         if not isinstance(other, FragmentAnnotation):
209 |             return False
210 |         return (
211 |             self.ion_type == other.ion_type
212 |             and self.neutral_loss == other.neutral_loss
213 |             and self.isotope == other.isotope
214 |             and self.charge == other.charge
215 |             and self.adduct == other.adduct
216 |             and self.analyte_number == other.analyte_number
217 |             and self.mz_delta == other.mz_delta
218 |         )
219 | 
220 | 
221 | class PeakInterpretation:
222 |     _unknown = FragmentAnnotation("?")
223 | 
224 |     def __init__(self):
225 |         """
226 |         Fragment annotation(s) to interpret a specific peak.
227 |         """
228 |         self.fragment_annotations = []
229 | 
230 |     def __repr__(self) -> str:
231 |         return str(self)
232 | 
233 |     def __str__(self) -> str:
234 |         # If no fragment annotations have been specified, interpret as an
235 |         # unknown ion.
236 |         if len(self.fragment_annotations) > 0:
237 |             return ",".join([str(a) for a in self.fragment_annotations])
238 |         else:
239 |             return str(self._unknown)
240 | 
241 |     def __eq__(self, other: Any) -> bool:
242 |         return isinstance(other, PeakInterpretation) and str(self) == str(
243 |             other
244 |         )
245 | 
246 |     def __getitem__(self, key) -> FragmentAnnotation:
247 |         if len(self.fragment_annotations) > 0:
248 |             return self.fragment_annotations[key]
249 |         else:
250 |             return self._unknown
251 | 
252 | 
253 | def get_theoretical_fragments(
254 |     proteoform: proforma.Proteoform,
255 |     ion_types: str = "by",
256 |     *,
257 |     max_isotope: int = 0,
258 |     max_charge: int = 1,
259 |     neutral_losses: Optional[Dict[Optional[str], float]] = None,
260 | ) -> List[Tuple[FragmentAnnotation, float]]:
261 |     """
262 |     Get fragment annotations with their theoretical masses for the given
263 |     sequence.
264 | 
265 |     Parameters
266 |     ----------
267 |     proteoform : proforma.Proteoform
268 |         The proteoform for which the fragment annotations will be
269 |         generated.
270 |     ion_types : str
271 |         The ion types to generate. Can be any combination of 'a', 'b',
272 |         'c', 'x', 'y', and 'z' for peptide fragments, 'I' for immonium
273 |         ions, 'm' for internal fragment ions, 'p' for the precursor ion,
274 |         and 'r' for reporter ions. The default is 'by', which means that
275 |         b and y peptide ions will be generated.
276 |     max_isotope : int
277 |         The maximum isotope to consider (the default is 0 to only
278 |         generate the monoisotopic peaks).
279 |     max_charge : int
280 |         All fragments up to and including the given charge will be
281 |         generated (the default is 1 to only generate singly-charged
282 |         fragments).
283 |     neutral_losses : Optional[Dict[Optional[str], float]]
284 |         A dictionary with neutral loss names and (negative) mass
285 |         differences to be considered.
286 | 
287 |     Returns
288 |     -------
289 |     List[Tuple[FragmentAnnotation, float]]
290 |         All possible fragment annotations and their theoretical m/z in
291 |         ascending m/z order.
292 |     """
293 |     for ion_type in ion_types:
294 |         if ion_type not in SUPPORTED_IONS:
295 |             raise ValueError(
296 |                 f"{ion_type} is not a supported ion type ({SUPPORTED_IONS})"
297 |             )
298 |     if "B" in proteoform.sequence:
299 |         raise ValueError(
300 |             "Explicitly specify aspartic acid (D) or asparagine (N) instead of"
301 |             " the ambiguous B to compute the fragment annotations"
302 |         )
303 |     if "Z" in proteoform.sequence:
304 |         raise ValueError(
305 |             "Explicitly specify glutamic acid (E) or glutamine (Q) instead of "
306 |             "the ambiguous Z to compute the fragment annotations"
307 |         )
308 | 
309 |     neutral_losses = {None: 0} if neutral_losses is None else neutral_losses
310 | 
311 |     base_fragments = []
312 | 
313 |     # Generate all peptide fragments ('a', 'b', 'c', 'x', 'y', 'z') and
314 |     # calculate their theoretical masses.
315 |     # Generate all N-terminal peptide fragments.
316 |     for ion_type in set("abc") & set(ion_types):
317 |         mod_i, mod_mass = 0, 0
318 |         for fragment_i in range(1, len(proteoform.sequence)):
319 |             fragment_sequence = proteoform.sequence[:fragment_i]
320 |             # Ignore unlocalized modifications.
321 |             while (
322 |                 proteoform.modifications is not None
323 |                 and mod_i < len(proteoform.modifications)
324 |                 and isinstance(proteoform.modifications[mod_i].position, str)
325 |                 and proteoform.modifications[mod_i].position != "N-term"
326 |             ):
327 |                 mod_i += 1
328 |             # Include prefix modifications.
329 |             while (
330 |                 proteoform.modifications is not None
331 |                 and mod_i < len(proteoform.modifications)
332 |                 and (
333 |                     proteoform.modifications[mod_i].position == "N-term"
334 |                     or (
335 |                         isinstance(
336 |                             proteoform.modifications[mod_i].position, int
337 |                         )
338 |                         and proteoform.modifications[mod_i].position
339 |                         < fragment_i
340 |                     )
341 |                 )
342 |             ):
343 |                 mod_mass += proteoform.modifications[mod_i].mass
344 |                 mod_i += 1
345 |             base_fragments.append(
346 |                 (fragment_sequence, ion_type, fragment_i, mod_mass)
347 |             )
348 |     # Generate all C-terminal peptide fragments.
349 |     for ion_type in set("xyz") & set(ion_types):
350 |         if proteoform.modifications is not None:
351 |             mod_i, mod_mass = len(proteoform.modifications) - 1, 0
352 |         else:
353 |             mod_i, mod_mass = None, 0
354 |         for fragment_i in range(len(proteoform.sequence) - 1, 0, -1):
355 |             fragment_sequence = proteoform.sequence[fragment_i:]
356 |             # Include suffix modifications.
357 |             while (
358 |                 proteoform.modifications is not None
359 |                 and mod_i >= 0
360 |                 and (
361 |                     proteoform.modifications[mod_i].position == "C-term"
362 |                     or (
363 |                         isinstance(
364 |                             proteoform.modifications[mod_i].position, int
365 |                         )
366 |                         and proteoform.modifications[mod_i].position
367 |                         >= fragment_i
368 |                     )
369 |                 )
370 |             ):
371 |                 mod_mass += proteoform.modifications[mod_i].mass
372 |                 mod_i -= 1
373 |             base_fragments.append(
374 |                 (
375 |                     fragment_sequence,
376 |                     ion_type,
377 |                     len(proteoform.sequence) - fragment_i,
378 |                     mod_mass,
379 |                 )
380 |             )
381 | 
382 |     # Generate all internal fragment ions.
383 |     if "m" in ion_types:
384 |         # Skip internal fragments with start position 1, which are
385 |         # actually b ions.
386 |         for start_i in range(1, len(proteoform.sequence)):
387 |             mod_i_start, mod_mass = 0, 0
388 |             # Skip unlocalized and prefix modifications.
389 |             while (
390 |                 proteoform.modifications is not None
391 |                 and mod_i_start < len(proteoform.modifications)
392 |                 and (
393 |                     isinstance(
394 |                         proteoform.modifications[mod_i_start].position, str
395 |                     )
396 |                     or proteoform.modifications[mod_i_start].position < start_i
397 |                 )
398 |             ):
399 |                 mod_i_start += 1
400 |             mod_i_stop = mod_i_start
401 |             # Internal fragments of only one residue are encoded as
402 |             # immonium ions.
403 |             for stop_i in range(start_i + 2, len(proteoform.sequence)):
404 |                 fragment_sequence = proteoform.sequence[start_i:stop_i]
405 |                 # Include internal modifications.
406 |                 while (
407 |                     proteoform.modifications is not None
408 |                     and mod_i_stop < len(proteoform.modifications)
409 |                     and proteoform.modifications[mod_i_stop].position < stop_i
410 |                 ):
411 |                     mod_mass += proteoform.modifications[mod_i_stop].mass
412 |                     mod_i_stop += 1
413 |                 # Internal fragment mass calculation is equivalent to b
414 |                 # ion mass calculation.
415 |                 base_fragments.append(
416 |                     (
417 |                         fragment_sequence,
418 |                         "b",
419 |                         f"{start_i + 1}:{stop_i + 1}",
420 |                         mod_mass,
421 |                     )
422 |                 )
423 | 
424 |     # Generate unfragmented precursor ion(s).
425 |     if "p" in ion_types:
426 |         if proteoform.modifications is not None:
427 |             mod_mass = sum([mod.mass for mod in proteoform.modifications])
428 |         else:
429 |             mod_mass = 0
430 |         base_fragments.append((proteoform.sequence, "M", "p", mod_mass))
431 | 
432 |     fragments_masses = []
433 |     # Compute the theoretical fragment masses (using Pyteomics)
434 |     for fragment_sequence, ion_type, fragment_i, mod_mass in base_fragments:
435 |         for charge in range(1, max_charge + 1):
436 |             annot_type = "?"
437 |             if isinstance(fragment_i, str):
438 |                 if ":" in fragment_i:
439 |                     annot_type = f"m{fragment_i}"
440 |                 elif fragment_i == "p":
441 |                     annot_type = "p"
442 |             else:
443 |                 annot_type = f"{ion_type}{fragment_i}"
444 |             fragments_masses.append(
445 |                 (
446 |                     FragmentAnnotation(ion_type=annot_type, charge=charge),
447 |                     pmass.fast_mass(
448 |                         sequence=fragment_sequence,
449 |                         ion_type=ion_type,
450 |                         charge=charge,
451 |                         aa_mass=AA_MASS,
452 |                     )
453 |                     + mod_mass / charge,
454 |                 )
455 |             )
456 | 
457 |     # Generate all immonium ions (internal single amino acid from the
458 |     # combination of a type and y type cleavage).
459 |     if "I" in ion_types:
460 |         # Amino acid mass minus CO plus charge 1.
461 |         mass_diff = pmass.calculate_mass(formula="CO") - pmass.calculate_mass(
462 |             formula="H"
463 |         )
464 |         for aa, mass in AA_MASS.items():
465 |             if aa != "X":
466 |                 fragments_masses.append(
467 |                     (
468 |                         FragmentAnnotation(ion_type=f"I{aa}", charge=1),
469 |                         mass - mass_diff,
470 |                     )
471 |                 )
472 | 
473 |     # Generate isotopic peaks for all fragments.
474 |     isotope_fragments = []
475 |     for isotope in range(1, max_isotope + 1):
476 |         for fragment, mass in fragments_masses:
477 |             isotope_fragments.append(
478 |                 (
479 |                     FragmentAnnotation(
480 |                         ion_type=fragment.ion_type,
481 |                         isotope=isotope,
482 |                         charge=fragment.charge,
483 |                     ),
484 |                     mass + isotope * C13_MASS_DIFF / fragment.charge,
485 |                 )
486 |             )
487 |     fragments_masses.extend(isotope_fragments)
488 | 
489 |     # Generate all fragments that differ by a neutral loss from the base
490 |     # fragments.
491 |     neutral_loss_fragments = []
492 |     for neutral_loss, mass_diff in neutral_losses.items():
493 |         if neutral_loss is None:
494 |             continue
495 |         neutral_loss = f"{'-' if mass_diff < 0 else '+'}{neutral_loss}"
496 |         for fragment, mass in fragments_masses:
497 |             if (fragment_mass := mass + mass_diff / fragment.charge) > 0:
498 |                 neutral_loss_fragments.append(
499 |                     (
500 |                         FragmentAnnotation(
501 |                             ion_type=fragment.ion_type,
502 |                             neutral_loss=neutral_loss,
503 |                             isotope=fragment.isotope,
504 |                             charge=fragment.charge,
505 |                         ),
506 |                         fragment_mass,
507 |                     )
508 |                 )
509 |     fragments_masses.extend(neutral_loss_fragments)
510 | 
511 |     # Sort the fragment annotations by their theoretical masses.
512 |     return sorted(fragments_masses, key=operator.itemgetter(1))
513 | 


--------------------------------------------------------------------------------
/spectrum_utils/iplot.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import operator
  3 | from typing import Callable, Dict, Optional
  4 | 
  5 | try:
  6 |     import altair
  7 |     import pandas as pd
  8 | except ImportError:
  9 |     raise ImportError(
 10 |         "Missing dependencies for interactive plotting. Install using `pip "
 11 |         "install spectrum_utils[iplot]`, manually install Altair and Pandas, or"
 12 |         " use the default Matplotlib (`spectrum_utils.plot`) plotting backend."
 13 |     )
 14 | 
 15 | from spectrum_utils.plot import annotate_ion_type, colors
 16 | from spectrum_utils.spectrum import MsmsSpectrum
 17 | 
 18 | 
 19 | def spectrum(
 20 |     spec: MsmsSpectrum,
 21 |     *_,
 22 |     color_ions: bool = True,
 23 |     annot_fmt: Optional[Callable] = functools.partial(
 24 |         annotate_ion_type, ion_types="by"
 25 |     ),
 26 |     annot_kws: Optional[Dict] = None,
 27 |     mirror_intensity: bool = False,
 28 |     grid: bool = True,
 29 | ) -> altair.LayerChart:
 30 |     """
 31 |     Plot an MS/MS spectrum.
 32 | 
 33 |     Parameters
 34 |     ----------
 35 |     spec : MsmsSpectrum
 36 |         The spectrum to be plotted.
 37 |     color_ions : bool, optional
 38 |         Flag indicating whether or not to color annotated fragment ions. The
 39 |         default is True.
 40 |     annot_fmt : Optional[Callable]
 41 |         Function to format the peak annotations. See `FragmentAnnotation` for
 42 |         supported elements. By default, only canonical b and y peptide fragments
 43 |         are annotated. If `None`, no peaks are annotated.
 44 |     annot_kws : Optional[Dict], optional
 45 |         Keyword arguments for `altair.Chart.mark_text` to customize peak
 46 |         annotations.
 47 |     mirror_intensity : bool, optional
 48 |         Flag indicating whether to flip the intensity axis or not.
 49 |     grid : bool, optional
 50 |         Draw grid lines or not.
 51 | 
 52 |     Returns
 53 |     -------
 54 |     altair.LayerChart
 55 |         The Altair chart instance with the plotted spectrum.
 56 |     """
 57 |     intensity = spec.intensity / spec.intensity.max()
 58 |     if mirror_intensity:
 59 |         intensity *= -1
 60 |     if spec.annotation is not None:
 61 |         annotations = list(map(operator.itemgetter(0), spec.annotation))
 62 |         peak_labels = map(annot_fmt, annotations)
 63 |         peak_colors = [
 64 |             colors.get(a.ion_type[0] if color_ions else None)
 65 |             for a in annotations
 66 |         ]
 67 |         mz_delta = [
 68 |             None if a.mz_delta is None else "".join(map(str, a.mz_delta))
 69 |             for a in annotations
 70 |         ]
 71 |         spec_df = pd.DataFrame(
 72 |             {
 73 |                 "mz": spec.mz,
 74 |                 "intensity": intensity,
 75 |                 "fragment": peak_labels,
 76 |                 "mz_delta": mz_delta,
 77 |                 "color": peak_colors,
 78 |             }
 79 |         )
 80 |     else:
 81 |         spec_df = pd.DataFrame(
 82 |             {
 83 |                 "mz": spec.mz,
 84 |                 "intensity": intensity,
 85 |                 "color": [colors[None]] * len(spec.mz),
 86 |             }
 87 |         )
 88 | 
 89 |     x_axis = altair.X(
 90 |         "mz",
 91 |         axis=altair.Axis(title="m/z", titleFontStyle="italic", grid=grid),
 92 |         scale=altair.Scale(nice=True, padding=5),
 93 |     )
 94 |     y_axis = altair.Y(
 95 |         "intensity",
 96 |         axis=altair.Axis(title="Intensity", format="%", grid=grid),
 97 |         scale=altair.Scale(nice=True),
 98 |     )
 99 |     color = altair.Color("color", scale=None, legend=None)
100 |     tooltip_not_annotated = [
101 |         altair.Tooltip("mz", format=".4f", title="m/z"),
102 |         altair.Tooltip("intensity", format=".1%", title="Intensity"),
103 |     ]
104 |     tooltip_annotated = [
105 |         altair.Tooltip("mz", format=".4f", title="m/z"),
106 |         altair.Tooltip("intensity", format=".1%", title="Intensity"),
107 |         altair.Tooltip("fragment", title="Fragment"),
108 |         altair.Tooltip("mz_delta", title="m/z deviation"),
109 |     ]
110 |     # Unannotated peaks.
111 |     mask_unannotated = spec_df["fragment"] == ""
112 |     spec_plot = (
113 |         altair.Chart(spec_df[mask_unannotated])
114 |         .mark_rule(size=2)
115 |         .encode(x=x_axis, y=y_axis, color=color, tooltip=tooltip_not_annotated)
116 |     )
117 |     # Annotated peaks.
118 |     annotation_kws = {
119 |         "align": "left" if not mirror_intensity else "right",
120 |         "angle": 270,
121 |         "baseline": "middle",
122 |     }
123 |     if annot_kws is not None:
124 |         annotation_kws.update(annot_kws)
125 |     spec_plot += (
126 |         altair.Chart(spec_df[~mask_unannotated])
127 |         .mark_rule(size=2)
128 |         .encode(x=x_axis, y=y_axis, color=color, tooltip=tooltip_annotated)
129 |     )
130 |     spec_plot += (
131 |         altair.Chart(spec_df[~mask_unannotated])
132 |         .mark_text(dx=-5 if mirror_intensity else 5, **annotation_kws)
133 |         .encode(
134 |             x=x_axis,
135 |             y=y_axis,
136 |             text="fragment",
137 |             color=color,
138 |             tooltip=tooltip_annotated,
139 |         )
140 |     )
141 | 
142 |     return spec_plot
143 | 
144 | 
145 | def mirror(
146 |     spec_top: MsmsSpectrum,
147 |     spec_bottom: MsmsSpectrum,
148 |     spectrum_kws: Optional[Dict] = None,
149 |     *_,
150 | ) -> altair.LayerChart:
151 |     """
152 |     Mirror plot two MS/MS spectra.
153 | 
154 |     Parameters
155 |     ----------
156 |     spec_top : MsmsSpectrum
157 |         The spectrum to be plotted on the top.
158 |     spec_bottom : MsmsSpectrum
159 |         The spectrum to be plotted on the bottom.
160 |     spectrum_kws : Optional[Dict], optional
161 |         Keyword arguments for `iplot.spectrum`.
162 |     *_
163 |         Ignored, for consistency with the `plot.mirror` API.
164 | 
165 |     Returns
166 |     -------
167 |     altair.LayerChart
168 |         The Altair chart instance with the plotted spectrum.
169 |     """
170 |     if spectrum_kws is None:
171 |         spectrum_kws = {}
172 |     # Top spectrum.
173 |     spec_plot = spectrum(spec_top, mirror_intensity=False, **spectrum_kws)
174 |     # Mirrored bottom spectrum.
175 |     spec_plot += spectrum(spec_bottom, mirror_intensity=True, **spectrum_kws)
176 | 
177 |     spec_plot += (
178 |         altair.Chart(pd.DataFrame({"sep": [0]}))
179 |         .mark_rule(size=3)
180 |         .encode(y="sep", color=altair.value("lightGray"))
181 |     )
182 | 
183 |     return spec_plot
184 | 


--------------------------------------------------------------------------------
/spectrum_utils/monosaccharide.lark:
--------------------------------------------------------------------------------
  1 | // From: https://github.com/HUPO-PSI/ProForma/tree/master/monosaccharides
  2 | // Version: September 13, 2020
  3 | // Monosaccharides have to be specified in reversed order to support greedy parsing.
  4 | MONOSACCHARIDE: "uxxxxxxxxh"
  5 | 	| "uxxxxxxh"
  6 | 	| "uxxxxxh"
  7 | 	| "uxxxxh_?*OSO/3=O/3=O"
  8 | 	| "uxxxxh_?*OPO/3O/3=O"
  9 | 	| "uxxxxh_?*NSO/3=O/3=O"
 10 | 	| "uxxxxh_?*NCC/3=O_?*OSO/3=O/3=O"
 11 | 	| "uxxxxh_?*NCC/3=O"
 12 | 	| "uxxxxh_?*N"
 13 | 	| "uxxxxh"
 14 | 	| "uxxxh"
 15 | 	| "uxxxdh"
 16 | 	| "uxxh"
 17 | 	| "uxh43344h"
 18 | 	| "uxh"
 19 | 	| "uh"
 20 | 	| "u1221m"
 21 | 	| "sulfate"
 22 | 	| "phosphate"
 23 | 	| "enHexA"
 24 | 	| "en,aHex"
 25 | 	| "en,a-Hex"
 26 | 	| "dHex"
 27 | 	| "d-Hex"
 28 | 	| "aHex"
 29 | 	| "a-Hex"
 30 | 	| "Tri"
 31 | 	| "Tet"
 32 | 	| "Sug"
 33 | 	| "Sialic Acid"
 34 | 	| "S"
 35 | 	| "RES\n1b:x-xgro-NON-x:x\nLIN\n"
 36 | 	| "RES\n1b:x-xgro-HEP-x:x\nLIN\n"
 37 | 	| "RES\n1b:x-lgal-HEX-x:x|6:d\nLIN\n"
 38 | 	| "RES\n1b:x-dgro-dgal-NON-x:x|1:a|2:keto|3:d\n2s:n-glycolyl\nLIN\n1:1d(5+1)2n\n"
 39 | 	| "RES\n1b:x-dgro-dgal-NON-x:x|1:a|2:keto|3:d\n2s:n-acetyl\nLIN\n1:1d(5+1)2n\n"
 40 | 	| "RES\n1b:x-dgro-dgal-NON-x:x|1:a|2:keto|3:d\n2s:amino\nLIN\n1:1d(5+1)2n\n"
 41 | 	| "RES\n1b:x-TRI-x:x\nLIN\n"
 42 | 	| "RES\n1b:x-TET-x:x\nLIN\n"
 43 | 	| "RES\n1b:x-SUG-x:x\nLIN\n"
 44 | 	| "RES\n1b:x-PEN-x:x\nLIN\n"
 45 | 	| "RES\n1b:x-OCT-x:x\nLIN\n"
 46 | 	| "RES\n1b:x-HEX-x:x|-1:en|-1:a\nLIN\n"
 47 | 	| "RES\n1b:x-HEX-x:x|-1:d\nLIN\n"
 48 | 	| "RES\n1b:x-HEX-x:x|-1:a\nLIN\n"
 49 | 	| "RES\n1b:x-HEX-x:x\nLIN\n"
 50 | 	| "RES\n1b:x-HEX-x:x\n2s:sulfate\nLIN\n1:1o(-1+1)2n\n"
 51 | 	| "RES\n1b:x-HEX-x:x\n2s:phosphate\nLIN\n1:1o(-1+1)2n\n"
 52 | 	| "RES\n1b:x-HEX-x:x\n2s:n-sulfate\nLIN\n1:1d(-1+1)2n\n"
 53 | 	| "RES\n1b:x-HEX-x:x\n2s:n-acetyl\nLIN\n1:1d(-1+1)2n\n"
 54 | 	| "RES\n1b:x-HEX-x:x\n2s:n-acetyl\n3s:sulfate\nLIN\n1:1d(-1+1)2n\n2:1o(-1+1)3n\n"
 55 | 	| "RES\n1b:x-HEX-x:x\n2s:amino\nLIN\n1:1d(-1+1)2n\n"
 56 | 	| "RES\n1b:x-DEC-x:x\nLIN\n"
 57 | 	| "Pen"
 58 | 	| "P"
 59 | 	| "Oct"
 60 | 	| "Non"
 61 | 	| "Neuraminic acid"
 62 | 	| "NeuGc"
 63 | 	| "NeuAc"
 64 | 	| "Neu5Gc"
 65 | 	| "Neu5Ac"
 66 | 	| "Neu"
 67 | 	| "HexS"
 68 | 	| "HexP"
 69 | 	| "HexNS"
 70 | 	| "HexNAc(S)"
 71 | 	| "HexNAc"
 72 | 	| "HexN"
 73 | 	| "HexA"
 74 | 	| "Hex"
 75 | 	| "Hep"
 76 | 	| "Fucose"
 77 | 	| "Fuc"
 78 | 	| "Dec"
 79 | 	| "Aud21122h_5*NCCO/3=O"
 80 | 	| "Aud21122h_5*NCC/3=O"
 81 | 	| "Aud21122h_5*N"
 82 | 	| "?-L-Fucx"
 83 | 	| "?-D-Neux5NGc"
 84 | 	| "?-D-Neux5NAc"
 85 | 	| "?-D-Neux"
 86 | 	| "?-?-en,a-Hexx"
 87 | 	| "?-?-d-Hexx"
 88 | 	| "?-?-a-Hexx"
 89 | 	| "?-?-Trix"
 90 | 	| "?-?-Tetx"
 91 | 	| "?-?-Sugx"
 92 | 	| "?-?-Penx"
 93 | 	| "?-?-Octx"
 94 | 	| "?-?-Nonx"
 95 | 	| "?-?-HexxS"
 96 | 	| "?-?-HexxP"
 97 | 	| "?-?-HexxNS"
 98 | 	| "?-?-HexxNAc(S)"
 99 | 	| "?-?-HexxNAc"
100 | 	| "?-?-HexxN"
101 | 	| "?-?-Hexx"
102 | 	| "?-?-Hepx"
103 | 	| "?-?-Decx"
104 | 


--------------------------------------------------------------------------------
/spectrum_utils/plot.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import itertools
  3 | import math
  4 | from typing import (
  5 |     Any,
  6 |     Callable,
  7 |     Dict,
  8 |     Iterable,
  9 |     Mapping,
 10 |     Optional,
 11 |     Tuple,
 12 |     Union,
 13 | )
 14 | 
 15 | import matplotlib.pyplot as plt
 16 | import matplotlib.ticker as mticker
 17 | import numpy as np
 18 | 
 19 | import spectrum_utils.fragment_annotation as fa
 20 | from spectrum_utils.spectrum import MsmsSpectrum
 21 | from spectrum_utils.utils import da_to_ppm, ppm_to_da
 22 | 
 23 | 
 24 | colors = {
 25 |     "a": "#388E3C",
 26 |     "b": "#1976D2",
 27 |     "c": "#00796B",
 28 |     "x": "#7B1FA2",
 29 |     "y": "#D32F2F",
 30 |     "z": "#F57C00",
 31 |     "m": "#FBC02D",
 32 |     "I": "#455A64",
 33 |     "p": "#512DA8",
 34 |     "?": "#212121",
 35 |     "f": "#212121",
 36 |     None: "#212121",
 37 | }
 38 | zorders = {
 39 |     "a": 3,
 40 |     "b": 4,
 41 |     "c": 3,
 42 |     "x": 3,
 43 |     "y": 4,
 44 |     "z": 3,
 45 |     "m": 2,
 46 |     "I": 3,
 47 |     "p": 3,
 48 |     "?": 2,
 49 |     "f": 5,
 50 |     None: 1,
 51 | }
 52 | 
 53 | 
 54 | def _format_ax(
 55 |     ax: plt.Axes,
 56 |     grid: Union[bool, str],
 57 | ):
 58 |     """Set ax formatting options that are common to all plot types."""
 59 |     ax.xaxis.set_minor_locator(mticker.AutoLocator())
 60 |     ax.yaxis.set_minor_locator(mticker.AutoLocator())
 61 |     ax.xaxis.set_minor_locator(mticker.AutoMinorLocator())
 62 |     ax.yaxis.set_minor_locator(mticker.AutoMinorLocator())
 63 |     if grid in (True, "both", "major"):
 64 |         ax.grid(True, "major", color="#9E9E9E", linewidth=0.2)
 65 |     if grid in (True, "both", "minor"):
 66 |         ax.grid(True, "minor", color="#9E9E9E", linewidth=0.2)
 67 |     ax.set_axisbelow(True)
 68 |     ax.tick_params(axis="both", which="both", labelsize="small")
 69 |     ax.set_xlabel("m/z", style="italic")
 70 | 
 71 | 
 72 | def _get_xlim(spec: MsmsSpectrum) -> Tuple[float, float]:
 73 |     """Get plot x-axis limits for a given spectrum."""
 74 |     round_mz = 50
 75 |     max_mz = math.ceil(spec.mz[-1] / round_mz + 1) * round_mz
 76 |     return 0.0, max_mz
 77 | 
 78 | 
 79 | def _annotate_ion(
 80 |     mz: float,
 81 |     intensity: float,
 82 |     annotation: Optional[fa.FragmentAnnotation],
 83 |     color_ions: bool,
 84 |     annot_fmt: Optional[Callable],
 85 |     annot_kws: Dict[str, object],
 86 |     ax: plt.Axes,
 87 | ) -> Tuple[str, int]:
 88 |     """
 89 |     Annotate a specific fragment peak.
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     mz : float
 94 |         The peak's m/z value (position of the annotation on the x axis).
 95 |     intensity : float
 96 |         The peak's intensity (position of the annotation on the y axis).
 97 |     annotation : Optional[fa.FragmentAnnotation]
 98 |         The annotation that will be plotted.
 99 |     color_ions : bool
100 |         Flag whether to color the peak annotation or not.
101 |     annot_fmt : Optional[Callable]
102 |         Function to format the peak annotations. See `FragmentAnnotation` for
103 |         supported elements. By default, only canonical b and y peptide fragments
104 |         are annotated. If `None`, no peaks are annotated.
105 |     annot_kws : Dict[str, object]
106 |         Keyword arguments for `ax.text` to customize peak annotations.
107 |     ax : plt.Axes
108 |         Axes instance on which to plot the annotation.
109 | 
110 |     Returns
111 |     -------
112 |     Tuple[str, int]
113 |         A tuple of the annotation's color as a hex string and the annotation's
114 |         zorder.
115 |     """
116 |     ion_type = annotation.ion_type[0] if annotation is not None else None
117 |     color = colors.get(ion_type if color_ions else None)
118 |     zorder = zorders.get(ion_type)
119 |     if annot_fmt is not None and annotation is not None:
120 |         y = intensity + 0.02 * (intensity > 0)
121 |         kws = annot_kws.copy()
122 |         kws.update(dict(color=color, zorder=zorder))
123 |         ax.text(mz, y, annot_fmt(annotation), **kws)
124 |     return color, zorder
125 | 
126 | 
127 | def annotate_ion_type(
128 |     annotation: fa.FragmentAnnotation, ion_types: Iterable[str]
129 | ) -> str:
130 |     """
131 |     Convert a `FragmentAnnotation` to a string for annotating peaks in a
132 |     spectrum plot.
133 | 
134 |     This function will only annotate singly-charged, mono-isotopic canonical
135 |     peaks with the given ion type(s).
136 | 
137 |     Parameters
138 |     ----------
139 |     annotation : fa.FragmentAnnotation
140 |         The peak's fragment annotation.
141 |     ion_types : Iterable[str]
142 |         Accepted ion types to annotate.
143 | 
144 |     Returns
145 |     -------
146 |     str
147 |         The peak's annotation string.
148 |     """
149 |     if (
150 |         annotation.ion_type[0] in ion_types
151 |         and annotation.neutral_loss is None
152 |         and annotation.isotope == 0
153 |         and annotation.charge == 1
154 |     ):
155 |         return str(annotation.ion_type)
156 |     else:
157 |         return ""
158 | 
159 | 
160 | def spectrum(
161 |     spec: MsmsSpectrum,
162 |     *,
163 |     color_ions: bool = True,
164 |     annot_fmt: Optional[Callable] = functools.partial(
165 |         annotate_ion_type, ion_types="by"
166 |     ),
167 |     annot_kws: Optional[Dict] = None,
168 |     mirror_intensity: bool = False,
169 |     grid: Union[bool, str] = True,
170 |     ax: Optional[plt.Axes] = None,
171 | ) -> plt.Axes:
172 |     """
173 |     Plot an MS/MS spectrum.
174 | 
175 |     Parameters
176 |     ----------
177 |     spec : MsmsSpectrum
178 |         The spectrum to be plotted.
179 |     color_ions : bool, optional
180 |         Flag indicating whether or not to color annotated fragment ions. The
181 |         default is True.
182 |     annot_fmt : Optional[Callable]
183 |         Function to format the peak annotations. See `FragmentAnnotation` for
184 |         supported elements. By default, only canonical b and y peptide fragments
185 |         are annotated. If `None`, no peaks are annotated.
186 |     annot_kws : Optional[Dict], optional
187 |         Keyword arguments for `ax.text` to customize peak annotations.
188 |     mirror_intensity : bool, optional
189 |         Flag indicating whether to flip the intensity axis or not.
190 |     grid : Union[bool, str], optional
191 |         Draw grid lines or not. Either a boolean to enable/disable both major
192 |         and minor grid lines or 'major'/'minor' to enable major or minor grid
193 |         lines respectively.
194 |     ax : Optional[plt.Axes], optional
195 |         Axes instance on which to plot the spectrum. If None the current Axes
196 |         instance is used.
197 | 
198 |     Returns
199 |     -------
200 |     plt.Axes
201 |         The matplotlib Axes instance on which the spectrum is plotted.
202 |     """
203 |     if ax is None:
204 |         ax = plt.gca()
205 | 
206 |     _format_ax(ax, grid)
207 |     ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0))
208 |     ax.set_ylim(*(0, 1.15) if not mirror_intensity else (-1.15, 0))
209 |     ax.set_ylabel("Intensity")
210 | 
211 |     if len(spec.mz) == 0:
212 |         return ax
213 | 
214 |     ax.set_xlim(*_get_xlim(spec))
215 | 
216 |     max_intensity = spec.intensity.max()
217 |     annotations = (
218 |         spec.annotation
219 |         if spec.annotation is not None
220 |         else itertools.repeat(None)
221 |     )
222 |     annotation_kws = {
223 |         "horizontalalignment": "left" if not mirror_intensity else "right",
224 |         "verticalalignment": "center",
225 |         "rotation": 90,
226 |         "rotation_mode": "anchor",
227 |         "zorder": 5,
228 |     }
229 |     if annot_kws is not None:
230 |         annotation_kws.update(annot_kws)
231 |     for mz, intensity, annotation in zip(spec.mz, spec.intensity, annotations):
232 |         peak_intensity = intensity / max_intensity
233 |         if mirror_intensity:
234 |             peak_intensity *= -1
235 | 
236 |         color, zorder = _annotate_ion(
237 |             mz,
238 |             peak_intensity,
239 |             # Use the first annotation in case there are multiple options.
240 |             annotation[0] if annotation is not None else None,
241 |             color_ions,
242 |             annot_fmt,
243 |             annotation_kws,
244 |             ax,
245 |         )
246 |         ax.plot([mz, mz], [0, peak_intensity], color=color, zorder=zorder)
247 | 
248 |     return ax
249 | 
250 | 
251 | def mass_errors(
252 |     spec: MsmsSpectrum,
253 |     *,
254 |     unit: Optional[str] = None,
255 |     plot_unknown: bool = True,
256 |     color_ions: bool = True,
257 |     grid: Union[bool, str] = True,
258 |     ax: Optional[plt.Axes] = None,
259 | ) -> plt.Axes:
260 |     """
261 |     Plot mass error bubble plot for a given spectrum.
262 | 
263 |     A mass error bubble plot shows the error between observed and theoretical
264 |     mass (y-axis) in function of the **m/z** (x-axis) for each peak in the
265 |     spectrum. The size of the bubble is proportional to the intensity of the
266 |     peak.
267 | 
268 |     Parameters
269 |     ----------
270 |     spec : MsmsSpectrum
271 |         The spectrum with mass errors to be plotted.
272 |     unit : str, optional
273 |         The unit of the mass errors, either 'ppm', 'Da', or None. If None,
274 |         the unit that was used for spectrum annotation is used. The default is
275 |         None.
276 |     plot_unknown : bool, optional
277 |         Flag indicating whether or not to plot mass errors for unknown peaks.
278 |     color_ions : bool, optional
279 |         Flag indicating whether or not to color dots for annotated fragment
280 |         ions. The default is True.
281 |     grid : Union[bool, str], optional
282 |         Draw grid lines or not. Either a boolean to enable/disable both major
283 |         and minor grid lines or 'major'/'minor' to enable major or minor grid
284 |         lines respectively.
285 |     ax : Optional[plt.Axes], optional
286 |         Axes instance on which to plot the mass errors. If None the current
287 |         Axes instance is used.
288 | 
289 |     Returns
290 |     -------
291 |     plt.Axes
292 |         The matplotlib Axes instance on which the mass errors are plotted.
293 | 
294 |     Notes
295 |     -----
296 |     The mass error bubble plot was first introduced in [1]_.
297 | 
298 |     References
299 |     ----------
300 |     .. [1] Barsnes,H., Eidhammer,I. and Martens,L. (2010)
301 |        FragmentationAnalyzer: An open-source tool to analyze MS/MS
302 |        fragmentation data. PROTEOMICS, 10, 1087–1090.
303 |        doi:10.1002/pmic.200900681
304 | 
305 |     """
306 |     if ax is None:
307 |         ax = plt.gca()
308 | 
309 |     _format_ax(ax, grid)
310 | 
311 |     if len(spec.mz) == 0:
312 |         ax.set_ylabel("Mass error")
313 |         ax.set_ylim(-1, 1)
314 |         return ax
315 | 
316 |     annotations = (
317 |         spec.annotation
318 |         if spec.annotation is not None
319 |         else itertools.repeat(None, len(spec.mz))
320 |     )
321 | 
322 |     known_ions = []
323 |     dot_colors = []
324 |     mz_deltas = []
325 |     mz_delta_units = []
326 |     for ann in annotations:
327 |         # Use the first annotation in case there are multiple options.
328 |         ion_type = ann[0].ion_type[0] if ann is not None else None
329 |         is_known_ion = ion_type is not None and ion_type != "?"
330 |         known_ions.append(is_known_ion)
331 |         dot_colors.append(colors.get(ion_type if color_ions else None))
332 |         mz_deltas.append(ann[0].mz_delta[0] if is_known_ion else 0.0)
333 |         mz_delta_units.append(ann[0].mz_delta[1] if is_known_ion else None)
334 | 
335 |     dot_colors = np.array(dot_colors)
336 |     mz_deltas = np.array(mz_deltas)
337 |     intensity_scaled = 500 * (spec.intensity / np.max(spec.intensity))
338 |     mask = (
339 |         np.ones_like(spec.mz, dtype=bool)
340 |         if plot_unknown
341 |         else np.array(known_ions)
342 |     )
343 | 
344 |     for known_unit in ["ppm", "Da"]:
345 |         # Use `not any` instead of `all` to fail fast
346 |         if not any(u and u != known_unit for u in mz_delta_units):
347 |             annotation_unit = known_unit
348 |             break
349 |     else:
350 |         raise ValueError("Inconsistent or unknown mass units in annotations.")
351 |     if unit == "Da" and annotation_unit == "ppm":
352 |         mz_deltas = ppm_to_da(mz_deltas, spec.mz)
353 |     elif unit == "ppm" and annotation_unit == "Da":
354 |         mz_deltas = da_to_ppm(mz_deltas, spec.mz)
355 | 
356 |     y_lim = 1.2 * np.max(np.abs(mz_deltas))
357 |     if y_lim > 0.0:
358 |         ax.set_ylim(-y_lim, y_lim)
359 |     ax.set_xlim(*_get_xlim(spec))
360 |     ax.set_ylabel(f"Mass error ({unit or annotation_unit})")
361 | 
362 |     ax.scatter(
363 |         spec.mz[mask],
364 |         mz_deltas[mask],
365 |         s=intensity_scaled[mask],
366 |         c=dot_colors[mask],
367 |         alpha=0.5,
368 |         edgecolors="none",
369 |     )
370 | 
371 |     return ax
372 | 
373 | 
374 | def mirror(
375 |     spec_top: MsmsSpectrum,
376 |     spec_bottom: MsmsSpectrum,
377 |     spectrum_kws: Optional[Dict] = None,
378 |     ax: Optional[plt.Axes] = None,
379 | ) -> plt.Axes:
380 |     """
381 |     Mirror plot two MS/MS spectra.
382 | 
383 |     Parameters
384 |     ----------
385 |     spec_top : MsmsSpectrum
386 |         The spectrum to be plotted on the top.
387 |     spec_bottom : MsmsSpectrum
388 |         The spectrum to be plotted on the bottom.
389 |     spectrum_kws : Optional[Dict], optional
390 |         Keyword arguments for `plot.spectrum`.
391 |     ax : Optional[plt.Axes], optional
392 |         Axes instance on which to plot the spectrum. If None the current Axes
393 |         instance is used.
394 | 
395 |     Returns
396 |     -------
397 |     plt.Axes
398 |         The matplotlib Axes instance on which the spectra are plotted.
399 |     """
400 |     if ax is None:
401 |         ax = plt.gca()
402 | 
403 |     if spectrum_kws is None:
404 |         spectrum_kws = {}
405 |     # Top spectrum.
406 |     spectrum(spec_top, mirror_intensity=False, ax=ax, **spectrum_kws)
407 |     y_max = ax.get_ylim()[1]
408 |     # Mirrored bottom spectrum.
409 |     spectrum(spec_bottom, mirror_intensity=True, ax=ax, **spectrum_kws)
410 |     y_min = ax.get_ylim()[0]
411 |     ax.set_ylim(y_min, y_max)
412 | 
413 |     ax.axhline(0, color="#9E9E9E", zorder=10)
414 | 
415 |     max_mz_top = spec_top.mz[-1] if len(spec_top.mz) > 0 else 1
416 |     max_mz_bottom = spec_bottom.mz[-1] if len(spec_bottom.mz) > 0 else 1
417 |     # Update axes so that both spectra fit.
418 |     round_mz = 50
419 |     max_mz = max(
420 |         [
421 |             math.ceil(max_mz_top / round_mz + 1) * round_mz,
422 |             math.ceil(max_mz_bottom / round_mz + 1) * round_mz,
423 |         ]
424 |     )
425 |     ax.set_xlim(0, max_mz)
426 |     ax.yaxis.set_major_locator(mticker.AutoLocator())
427 |     ax.yaxis.set_minor_locator(mticker.AutoMinorLocator())
428 |     ax.yaxis.set_major_formatter(
429 |         mticker.FuncFormatter(lambda x, pos: f"{abs(x):.0%}")
430 |     )
431 | 
432 |     return ax
433 | 
434 | 
435 | def facet(
436 |     spec_top: MsmsSpectrum,
437 |     spec_mass_errors: Optional[MsmsSpectrum] = None,
438 |     spec_bottom: Optional[MsmsSpectrum] = None,
439 |     spectrum_kws: Optional[Mapping[str, Any]] = None,
440 |     mass_errors_kws: Optional[Mapping[str, Any]] = None,
441 |     height: Optional[float] = None,
442 |     width: Optional[float] = None,
443 | ) -> plt.Figure:
444 |     """
445 |     Plot a spectrum, and optionally mass errors, and a mirror spectrum.
446 | 
447 |     Parameters
448 |     ----------
449 |     spec_top : MsmsSpectrum
450 |         The spectrum to be plotted on the top.
451 |     spec_mass_errors : Optional[MsmsSpectrum], optional
452 |         The spectrum for which mass errors are to be plotted in the middle.
453 |     spec_bottom : Optional[MsmsSpectrum], optional
454 |         The spectrum to be plotted on the bottom.
455 |     spectrum_kws : Optional[Mapping[str, Any]], optional
456 |         Keyword arguments for `plot.spectrum` for the top and bottom spectra.
457 |     mass_errors_kws : Optional[Mapping[str, Any]], optional
458 |         Keyword arguments for `plot.mass_errors`.
459 |     height : Optional[float], optional
460 |         The height of the figure in inches.
461 |     width : Optional[float], optional
462 |         The width of the figure in inches.
463 | 
464 |     Returns
465 |     -------
466 |     plt.Figure
467 |         The matplotlib Figure instance on which the spectra and mass errors
468 |         are plotted.
469 |     """
470 | 
471 |     n_rows = 1 + (spec_mass_errors is not None) + (spec_bottom is not None)
472 |     height_ratios = [1]
473 |     if spec_mass_errors is not None:
474 |         height_ratios.append(0.5)
475 |     if spec_bottom is not None:
476 |         height_ratios.append(1)
477 | 
478 |     fig, axes = plt.subplots(
479 |         *(n_rows, 1),
480 |         figsize=(width or 7.5, height or (3.75 if spec_bottom is None else 6)),
481 |         sharex=True,
482 |         gridspec_kw={"height_ratios": height_ratios},
483 |     )
484 |     axes = np.array(axes).flatten()
485 | 
486 |     spectrum(spec_top, ax=axes[0], **spectrum_kws or {})
487 | 
488 |     if spec_mass_errors is not None:
489 |         mass_errors(spec_mass_errors, ax=axes[1], **mass_errors_kws or {})
490 |         axes[0].get_xaxis().get_label().set_visible(False)
491 | 
492 |     if spec_bottom is not None:
493 |         spectrum(
494 |             spec_bottom,
495 |             mirror_intensity=True,
496 |             ax=axes[-1],
497 |             **spectrum_kws or {},
498 |         )
499 |         for ax in axes[:-1]:
500 |             ax.get_xaxis().get_label().set_visible(False)
501 | 
502 |         axes[-1].yaxis.set_major_formatter(
503 |             mticker.FuncFormatter(lambda x, pos: f"{abs(x):.0%}")
504 |         )
505 | 
506 |     fig.align_ylabels(axes)
507 |     fig.tight_layout()
508 | 
509 |     return fig
510 | 


--------------------------------------------------------------------------------
/spectrum_utils/proforma.ebnf:
--------------------------------------------------------------------------------
 1 | %import common.DIGIT
 2 | %import common.INT
 3 | %import common.LETTER
 4 | %import common.NUMBER
 5 | %import common.SIGNED_INT
 6 | %import common.SIGNED_NUMBER
 7 | %import common.WS
 8 | %import .monosaccharide.MONOSACCHARIDE
 9 | 
10 | // ProForma specification: https://github.com/HUPO-PSI/ProForma/
11 | // Version: June 29, 2021
12 | proforma: (proteoform (CROSSLINK | CHIMERIC))* proteoform
13 | CROSSLINK: "//"
14 | CHIMERIC: "+"
15 | 
16 | proteoform: peptide ["/" charge]
17 | 
18 | peptide: mod_global* mod_unknown_pos? mod_labile* mod_n_term? (aa | mod_range)+ mod_c_term?
19 | // TODO: Amino acid sequence ambiguity (section 4.7).
20 | 
21 | aa.10: AA [mod+ | (_MOD_L mod_label _MOD_R)]
22 | AA: LETTER
23 | 
24 | mod_global: _MOD_GLOBAL_L (ISOTOPE | (mod "@" (AA ",")* AA)) _MOD_GLOBAL_R
25 | ISOTOPE: INT? LETTER+ SIGNED_INT?
26 | 
27 | mod_unknown_pos: (mod ["^" MOD_COUNT])+ "?"
28 | 
29 | mod:        _MOD_L        ((mod_name | mod_accession | mod_mass | mod_formula | mod_glycan | info) mod_label? "|")* (mod_name | mod_accession | mod_mass | mod_formula | mod_glycan | info) mod_label? _MOD_R
30 | mod_labile: _MOD_LABILE_L ((mod_name | mod_accession | mod_mass | mod_formula | mod_glycan | info)            "|")* (mod_name | mod_accession | mod_mass | mod_formula | mod_glycan | info)            _MOD_LABILE_R
31 | MOD_COUNT: INT
32 | 
33 | mod_n_term: (mod | (_MOD_L mod_label _MOD_R)) "-"
34 | mod_c_term: "-" (mod | (_MOD_L mod_label _MOD_R))
35 | 
36 | mod_range: MOD_RANGE_L mod_range_pos _MOD_RANGE_R mod+
37 | mod_range_pos: (aa | mod_range)+
38 | 
39 | mod_name.2: ((CV_ABBREV ":") | (CV_ABBREV_OPT ":")?) TEXT
40 | CV_ABBREV_OPT: "U"i | "M"i
41 | CV_ABBREV: "R"i | "X"i | "G"i
42 | 
43 | mod_accession.5: CV_NAME ":" TEXT
44 | CV_NAME: "UNIMOD"i | "MOD"i | "RESID"i | "XLMOD"i | "GNO"i
45 | 
46 | mod_mass.5: [(CV_ABBREV_OPT | CV_ABBREV | MOD_MASS_OBS) ":"] MOD_MASS
47 | MOD_MASS_OBS: "Obs"i
48 | MOD_MASS: ("+" | "-") NUMBER
49 | 
50 | mod_formula.5: "Formula:"i (_MOD_L ISOTOPE _MOD_R)* FORMULA
51 | FORMULA: (LETTER+ SIGNED_INT? WS?)+
52 | 
53 | mod_glycan.5: "Glycan:" (monosaccharide WS?)+
54 | monosaccharide: MONOSACCHARIDE MONOSACCHARIDE_COUNT?
55 | MONOSACCHARIDE_COUNT: INT
56 | 
57 | info.5: "Info:"i TEXT
58 | 
59 | mod_label.3 : "#" (MOD_LABEL_XL | MOD_LABEL_BRANCH | MOD_LABEL) ["(" MOD_SCORE ")"]
60 | MOD_LABEL_XL: "XL" MOD_LABEL
61 | MOD_LABEL_BRANCH: "BRANCH"
62 | MOD_LABEL: (LETTER | DIGIT)+
63 | MOD_SCORE: SIGNED_NUMBER
64 | 
65 | charge: CHARGE [_MOD_L ion _MOD_R]
66 | CHARGE: SIGNED_INT
67 | ion: [TEXT ","] TEXT
68 | 
69 | TEXT: /.+/
70 | 
71 | _MOD_L: "["
72 | _MOD_R: "]"
73 | _MOD_LABILE_L: "{"
74 | _MOD_LABILE_R: "}"
75 | _MOD_GLOBAL_L: "<"
76 | _MOD_GLOBAL_R: ">"
77 | MOD_RANGE_L: "("
78 | _MOD_RANGE_R: ")"
79 | 


--------------------------------------------------------------------------------
/spectrum_utils/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | import numba as nb
 4 | import numpy as np
 5 | 
 6 | 
 7 | @nb.njit(fastmath=True, cache=True)
 8 | def mass_diff(mz1, mz2, mode_is_da):
 9 |     """
10 |     Calculate the mass difference(s).
11 | 
12 |     Parameters
13 |     ----------
14 |     mz1
15 |         First m/z value(s).
16 |     mz2
17 |         Second m/z value(s).
18 |     mode_is_da : bool
19 |         Mass difference in Dalton (True) or in ppm (False).
20 | 
21 |     Returns
22 |     -------
23 |         The mass difference(s) between the given m/z values.
24 |     """
25 |     return mz1 - mz2 if mode_is_da else (mz1 - mz2) / mz2 * 10**6
26 | 
27 | 
28 | def da_to_ppm(
29 |     delta_mz: Union[int, np.ndarray], mz: Union[int, np.ndarray]
30 | ) -> Union[int, np.ndarray]:
31 |     """
32 |     Convert a mass difference in Dalton to ppm.
33 | 
34 |     Parameters
35 |     ----------
36 |     delta_mz : int or np.ndarray
37 |         Mass difference in Dalton.
38 |     mz : int or np.ndarray
39 |         m/z value of peak.
40 | 
41 |     Returns
42 |     -------
43 |     int or np.ndarray
44 | 
45 |     """
46 |     return delta_mz / mz * 1e6
47 | 
48 | 
49 | def ppm_to_da(
50 |     delta_mz: Union[int, np.ndarray], mz: Union[int, np.ndarray]
51 | ) -> Union[int, np.ndarray]:
52 |     """
53 |     Convert a mass difference in ppm to Dalton.
54 | 
55 |     Parameters
56 |     ----------
57 |     delta_mz : int or np.ndarray
58 |         Mass difference in ppm.
59 |     mz : int or np.ndarray
60 |         m/z value of peak.
61 | 
62 |     Returns
63 |     -------
64 |     int or np.ndarray
65 | 
66 |     """
67 |     return delta_mz / 1e6 * mz
68 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bittremieux-lab/spectrum_utils/a12f453040bb9bd36a261ae46063dfe828d4fe5e/tests/__init__.py


--------------------------------------------------------------------------------
/tests/fragment_annotation_test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from spectrum_utils import fragment_annotation, proforma
  5 | 
  6 | 
  7 | @pytest.fixture(autouse=True)
  8 | def set_random_seed():
  9 |     np.random.seed(13)
 10 | 
 11 | 
 12 | def test_fragment_annotation_unknown():
 13 |     fragment_annotation.FragmentAnnotation("?")
 14 |     with pytest.raises(ValueError):
 15 |         fragment_annotation.FragmentAnnotation("?", neutral_loss="-H2O")
 16 |     with pytest.raises(ValueError):
 17 |         fragment_annotation.FragmentAnnotation("?", isotope=1)
 18 |     with pytest.raises(ValueError):
 19 |         fragment_annotation.FragmentAnnotation("?", charge=1)
 20 |     with pytest.raises(ValueError):
 21 |         fragment_annotation.FragmentAnnotation("?", adduct="[M+H]")
 22 | 
 23 | 
 24 | def test_fragment_annotation_primary():
 25 |     fragment_annotation.FragmentAnnotation(
 26 |         "b5", neutral_loss="-H2O", isotope=1, charge=1, adduct="[M+H]"
 27 |     )
 28 |     with pytest.raises(ValueError):
 29 |         fragment_annotation.FragmentAnnotation("b5", charge=0)
 30 |     with pytest.raises(ValueError):
 31 |         fragment_annotation.FragmentAnnotation("b5", charge=-2)
 32 | 
 33 | 
 34 | def test_get_theoretical_fragments():
 35 |     peptide = proforma.parse("HPYLEDR")[0]
 36 |     fragments = {
 37 |         "b1^1": 138.066147,
 38 |         "b2^1": 235.118912,
 39 |         "b3^1": 398.182220,
 40 |         "b4^1": 511.266266,
 41 |         "b5^1": 640.308899,
 42 |         "b6^1": 755.335815,
 43 |         "y1^1": 175.118912,
 44 |         "y2^1": 290.145844,
 45 |         "y3^1": 419.188446,
 46 |         "y4^1": 532.272522,
 47 |         "y5^1": 695.335815,
 48 |         "y6^1": 792.388550,
 49 |         "b1^2": 69.536731,
 50 |         "b2^2": 118.063111,
 51 |         "b3^2": 199.594776,
 52 |         "b4^2": 256.136806,
 53 |         "b5^2": 320.658101,
 54 |         "b6^2": 378.171571,
 55 |         "y1^2": 88.063114,
 56 |         "y2^2": 145.576584,
 57 |         "y3^2": 210.097879,
 58 |         "y4^2": 266.639909,
 59 |         "y5^2": 348.171574,
 60 |         "y6^2": 396.697954,
 61 |         "b1^3": 46.693580,
 62 |         "b2^3": 79.044500,
 63 |         "b3^3": 133.398943,
 64 |         "b4^3": 171.093630,
 65 |         "b5^3": 214.107826,
 66 |         "b6^3": 252.450140,
 67 |         "y1^3": 59.044501,
 68 |         "y2^3": 97.386815,
 69 |         "y3^3": 140.401011,
 70 |         "y4^3": 178.095698,
 71 |         "y5^3": 232.450141,
 72 |         "y6^3": 264.801061,
 73 |     }
 74 |     for (
 75 |         annotation,
 76 |         fragment_mz,
 77 |     ) in fragment_annotation.get_theoretical_fragments(peptide, max_charge=3):
 78 |         assert fragment_mz == pytest.approx(
 79 |             fragments[f"{annotation.ion_type}^{annotation.charge}"]
 80 |         )
 81 | 
 82 | 
 83 | def test_get_theoretical_fragments_static_mod():
 84 |     peptide = proforma.parse("<[+79.96633]@Y>HPYLEDR")[0]
 85 |     fragments = {
 86 |         "b1^1": 138.066147,
 87 |         "b2^1": 235.118912,
 88 |         "b3^1": 478.148590,
 89 |         "b4^1": 591.232666,
 90 |         "b5^1": 720.275269,
 91 |         "b6^1": 835.302185,
 92 |         "y1^1": 175.118912,
 93 |         "y2^1": 290.145844,
 94 |         "y3^1": 419.188446,
 95 |         "y4^1": 532.272522,
 96 |         "y5^1": 775.302185,
 97 |         "y6^1": 872.354980,
 98 |         "b1^2": 69.536731,
 99 |         "b2^2": 118.063111,
100 |         "b3^2": 239.577941,
101 |         "b4^2": 296.119971,
102 |         "b5^2": 360.641266,
103 |         "b6^2": 418.154736,
104 |         "y1^2": 88.063114,
105 |         "y2^2": 145.576584,
106 |         "y3^2": 210.097879,
107 |         "y4^2": 266.639909,
108 |         "y5^2": 388.154739,
109 |         "y6^2": 436.681119,
110 |         "b1^3": 46.693580,
111 |         "b2^3": 79.044500,
112 |         "b3^3": 160.054386,
113 |         "b4^3": 197.749073,
114 |         "b5^3": 240.763270,
115 |         "b6^3": 279.105583,
116 |         "y1^3": 59.044501,
117 |         "y2^3": 97.386815,
118 |         "y3^3": 140.401011,
119 |         "y4^3": 178.095698,
120 |         "y5^3": 259.105585,
121 |         "y6^3": 291.456505,
122 |     }
123 |     for (
124 |         annotation,
125 |         fragment_mz,
126 |     ) in fragment_annotation.get_theoretical_fragments(peptide, max_charge=3):
127 |         assert fragment_mz == pytest.approx(
128 |             fragments[f"{annotation.ion_type}^{annotation.charge}"]
129 |         )
130 | 
131 | 
132 | def test_get_theoretical_fragments_mod():
133 |     peptide = proforma.parse("HPY[+79.96633]LEDR")[0]
134 |     fragments = {
135 |         "b1^1": 138.066147,
136 |         "b2^1": 235.118912,
137 |         "b3^1": 478.148590,
138 |         "b4^1": 591.232666,
139 |         "b5^1": 720.275269,
140 |         "b6^1": 835.302185,
141 |         "y1^1": 175.118912,
142 |         "y2^1": 290.145844,
143 |         "y3^1": 419.188446,
144 |         "y4^1": 532.272522,
145 |         "y5^1": 775.302185,
146 |         "y6^1": 872.354980,
147 |         "b1^2": 69.536731,
148 |         "b2^2": 118.063111,
149 |         "b3^2": 239.577941,
150 |         "b4^2": 296.119971,
151 |         "b5^2": 360.641266,
152 |         "b6^2": 418.154736,
153 |         "y1^2": 88.063114,
154 |         "y2^2": 145.576584,
155 |         "y3^2": 210.097879,
156 |         "y4^2": 266.639909,
157 |         "y5^2": 388.154739,
158 |         "y6^2": 436.681119,
159 |         "b1^3": 46.693580,
160 |         "b2^3": 79.044500,
161 |         "b3^3": 160.054386,
162 |         "b4^3": 197.749073,
163 |         "b5^3": 240.763270,
164 |         "b6^3": 279.105583,
165 |         "y1^3": 59.044501,
166 |         "y2^3": 97.386815,
167 |         "y3^3": 140.401011,
168 |         "y4^3": 178.095698,
169 |         "y5^3": 259.105585,
170 |         "y6^3": 291.456505,
171 |     }
172 |     for (
173 |         annotation,
174 |         fragment_mz,
175 |     ) in fragment_annotation.get_theoretical_fragments(peptide, max_charge=3):
176 |         assert fragment_mz == pytest.approx(
177 |             fragments[f"{annotation.ion_type}^{annotation.charge}"]
178 |         )
179 | 
180 | 
181 | def test_get_theoretical_fragments_mod_term():
182 |     peptide = proforma.parse("[+42.01056]-HPYLEDR")[0]
183 |     fragments = {
184 |         "b1": 180.076706,
185 |         "b2": 277.129486,
186 |         "b3": 440.192810,
187 |         "b4": 553.276917,
188 |         "b5": 682.319519,
189 |         "b6": 797.346436,
190 |         "y1": 175.118912,
191 |         "y2": 290.145844,
192 |         "y3": 419.188446,
193 |         "y4": 532.272522,
194 |         "y5": 695.335815,
195 |         "y6": 792.388550,
196 |     }
197 |     for (
198 |         annotation,
199 |         fragment_mz,
200 |     ) in fragment_annotation.get_theoretical_fragments(peptide):
201 |         assert fragment_mz == pytest.approx(
202 |             fragments[f"{annotation.ion_type}"]
203 |         )
204 | 
205 | 
206 | def test_get_theoretical_fragments_mod_multiple():
207 |     peptide = proforma.parse("[+42.01056]-HPY[+79.96633]LEDR")[0]
208 |     fragments = {
209 |         "b1": 180.076706,
210 |         "b2": 277.129486,
211 |         "b3": 520.159180,
212 |         "b4": 633.243225,
213 |         "b5": 762.285828,
214 |         "b6": 877.312744,
215 |         "y1": 175.118912,
216 |         "y2": 290.145844,
217 |         "y3": 419.188446,
218 |         "y4": 532.272522,
219 |         "y5": 775.302185,
220 |         "y6": 872.354980,
221 |     }
222 |     for (
223 |         annotation,
224 |         fragment_mz,
225 |     ) in fragment_annotation.get_theoretical_fragments(peptide):
226 |         assert fragment_mz == pytest.approx(
227 |             fragments[f"{annotation.ion_type}"]
228 |         )
229 | 
230 | 
231 | def test_get_theoretical_fragments_isotope():
232 |     peptide = proforma.parse("HPYLEDR")[0]
233 |     fragments = {
234 |         "b1^1": 138.066147,
235 |         "b2^1": 235.118912,
236 |         "b3^1": 398.182220,
237 |         "b4^1": 511.266266,
238 |         "b5^1": 640.308899,
239 |         "b6^1": 755.335815,
240 |         "y1^1": 175.118912,
241 |         "y2^1": 290.145844,
242 |         "y3^1": 419.188446,
243 |         "y4^1": 532.272522,
244 |         "y5^1": 695.335815,
245 |         "y6^1": 792.388550,
246 |         "b1^2": 69.536731,
247 |         "b2^2": 118.063111,
248 |         "b3^2": 199.594776,
249 |         "b4^2": 256.136806,
250 |         "b5^2": 320.658101,
251 |         "b6^2": 378.171571,
252 |         "y1^2": 88.063114,
253 |         "y2^2": 145.576584,
254 |         "y3^2": 210.097879,
255 |         "y4^2": 266.639909,
256 |         "y5^2": 348.171574,
257 |         "y6^2": 396.697954,
258 |     }
259 |     for num_isotopes in range(0, 3):
260 |         annotations = fragment_annotation.get_theoretical_fragments(
261 |             peptide, max_charge=2, max_isotope=num_isotopes
262 |         )
263 |         assert len(annotations) == len(fragments) * (num_isotopes + 1)
264 |         for annotation, fragment_mz in annotations:
265 |             assert fragment_mz == pytest.approx(
266 |                 fragments[f"{annotation.ion_type}^{annotation.charge}"]
267 |                 + 1.003_354 * annotation.isotope / annotation.charge
268 |             )
269 | 
270 | 
271 | def test_get_theoretical_fragments_neutral_loss():
272 |     peptide = proforma.parse("HPYLEDR")[0]
273 |     fragments = {
274 |         "b1^1": 138.066147,
275 |         "b2^1": 235.118912,
276 |         "b3^1": 398.182220,
277 |         "b4^1": 511.266266,
278 |         "b5^1": 640.308899,
279 |         "b6^1": 755.335815,
280 |         "y1^1": 175.118912,
281 |         "y2^1": 290.145844,
282 |         "y3^1": 419.188446,
283 |         "y4^1": 532.272522,
284 |         "y5^1": 695.335815,
285 |         "y6^1": 792.388550,
286 |         "b1^2": 69.536731,
287 |         "b2^2": 118.063111,
288 |         "b3^2": 199.594776,
289 |         "b4^2": 256.136806,
290 |         "b5^2": 320.658101,
291 |         "b6^2": 378.171571,
292 |         "y1^2": 88.063114,
293 |         "y2^2": 145.576584,
294 |         "y3^2": 210.097879,
295 |         "y4^2": 266.639909,
296 |         "y5^2": 348.171574,
297 |         "y6^2": 396.697954,
298 |         "b1^3": 46.693580,
299 |         "b2^3": 79.044500,
300 |         "b3^3": 133.398943,
301 |         "b4^3": 171.093630,
302 |         "b5^3": 214.107826,
303 |         "b6^3": 252.450140,
304 |         "y1^3": 59.044501,
305 |         "y2^3": 97.386815,
306 |         "y3^3": 140.401011,
307 |         "y4^3": 178.095698,
308 |         "y5^3": 232.450141,
309 |         "y6^3": 264.801061,
310 |     }
311 |     neutral_loss = "H2O", 18.010565  # water
312 |     neutral_loss_fragments = {}
313 |     for fragment, mz in fragments.items():
314 |         charge = int(fragment.split("^")[1])
315 |         fragment = f"{fragment}-{neutral_loss[0]}"
316 |         neutral_loss_fragments[fragment] = mz - (neutral_loss[1] / charge)
317 |     fragments = {**fragments, **neutral_loss_fragments}
318 |     for (
319 |         annotation,
320 |         fragment_mz,
321 |     ) in fragment_annotation.get_theoretical_fragments(
322 |         peptide,
323 |         max_charge=3,
324 |         neutral_losses={None: 0, neutral_loss[0]: -neutral_loss[1]},
325 |     ):
326 |         assert fragment_mz == pytest.approx(
327 |             fragments[
328 |                 f"""{annotation.ion_type}^{annotation.charge}{
329 |                     annotation.neutral_loss
330 |                     if annotation.neutral_loss is not None
331 |                     else ""
332 |                 }"""
333 |             ]
334 |         )
335 | 
336 | 
337 | def test_get_theoretical_fragments_mod_neutral_loss():
338 |     peptide = proforma.parse("HPY[+79.96633]LEDR")[0]
339 |     fragments = {
340 |         "b1^1": 138.066147,
341 |         "b2^1": 235.118912,
342 |         "b3^1": 478.148590,
343 |         "b4^1": 591.232666,
344 |         "b5^1": 720.275269,
345 |         "b6^1": 835.302185,
346 |         "y1^1": 175.118912,
347 |         "y2^1": 290.145844,
348 |         "y3^1": 419.188446,
349 |         "y4^1": 532.272522,
350 |         "y5^1": 775.302185,
351 |         "y6^1": 872.354980,
352 |         "b1^2": 69.536731,
353 |         "b2^2": 118.063111,
354 |         "b3^2": 239.577941,
355 |         "b4^2": 296.119971,
356 |         "b5^2": 360.641266,
357 |         "b6^2": 418.154736,
358 |         "y1^2": 88.063114,
359 |         "y2^2": 145.576584,
360 |         "y3^2": 210.097879,
361 |         "y4^2": 266.639909,
362 |         "y5^2": 388.154739,
363 |         "y6^2": 436.681119,
364 |         "b1^3": 46.693580,
365 |         "b2^3": 79.044500,
366 |         "b3^3": 160.054386,
367 |         "b4^3": 197.749073,
368 |         "b5^3": 240.763270,
369 |         "b6^3": 279.105583,
370 |         "y1^3": 59.044501,
371 |         "y2^3": 97.386815,
372 |         "y3^3": 140.401011,
373 |         "y4^3": 178.095698,
374 |         "y5^3": 259.105585,
375 |         "y6^3": 291.456505,
376 |     }
377 |     neutral_loss = "H2O", 18.010565  # water
378 |     neutral_loss_fragments = {}
379 |     for fragment, mz in fragments.items():
380 |         charge = int(fragment.split("^")[1])
381 |         fragment = f"{fragment}-{neutral_loss[0]}"
382 |         neutral_loss_fragments[fragment] = mz - (neutral_loss[1] / charge)
383 |     fragments = {**fragments, **neutral_loss_fragments}
384 |     for (
385 |         annotation,
386 |         fragment_mz,
387 |     ) in fragment_annotation.get_theoretical_fragments(
388 |         peptide,
389 |         max_charge=3,
390 |         neutral_losses={None: 0, neutral_loss[0]: -neutral_loss[1]},
391 |     ):
392 |         assert fragment_mz == pytest.approx(
393 |             fragments[
394 |                 f"""{annotation.ion_type}^{annotation.charge}{
395 |                     annotation.neutral_loss
396 |                     if annotation.neutral_loss is not None
397 |                     else ""
398 |                 }"""
399 |             ]
400 |         )
401 | 
402 | 
403 | def test_get_theoretical_fragments_ambiguous():
404 |     with pytest.raises(ValueError):
405 |         fragment_annotation.get_theoretical_fragments(
406 |             proforma.parse("HPYLEBDR")[0]
407 |         )
408 |     with pytest.raises(ValueError):
409 |         fragment_annotation.get_theoretical_fragments(
410 |             proforma.parse("HPZYLEDR")[0]
411 |         )
412 | 
413 | 
414 | def test_get_theoretical_fragments_unsupported_ion_type():
415 |     with pytest.raises(ValueError):
416 |         fragment_annotation.get_theoretical_fragments(
417 |             proforma.parse("HPYLEDR")[0], "l"
418 |         )
419 | 


--------------------------------------------------------------------------------
/tests/spectrum_test.py:
--------------------------------------------------------------------------------
  1 | import operator
  2 | import os
  3 | import pickle
  4 | 
  5 | import numpy as np
  6 | import pytest
  7 | from pyteomics import mass
  8 | 
  9 | from spectrum_utils import fragment_annotation as fa, proforma, spectrum
 10 | 
 11 | 
 12 | @pytest.fixture(autouse=True)
 13 | def set_random_seed():
 14 |     np.random.seed(13)
 15 | 
 16 | 
 17 | def test_mz_intensity_len():
 18 |     mz = np.random.uniform(100, 1400, 150)
 19 |     intensity = np.random.exponential(1, 100)
 20 |     with pytest.raises(ValueError):
 21 |         spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
 22 | 
 23 | 
 24 | def test_init_mz_sorted():
 25 |     num_peaks = 150
 26 |     mz = np.random.uniform(100, 1400, num_peaks)
 27 |     intensity = np.random.lognormal(0, 1, num_peaks)
 28 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
 29 |     for mz1, mz2 in zip(spec.mz[:-1], spec.mz[1:]):
 30 |         assert mz1 <= mz2
 31 | 
 32 | 
 33 | def test_init_intensity_order():
 34 |     num_peaks = 150
 35 |     mz = np.random.uniform(100, 1400, num_peaks)
 36 |     intensity = np.random.lognormal(0, 1, num_peaks)
 37 |     mz_intensity_tuples = sorted(
 38 |         zip(mz, intensity), key=operator.itemgetter(0)
 39 |     )
 40 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
 41 |     for this_mz, this_intensity, mz_intensity_tuple in zip(
 42 |         spec.mz, spec.intensity, mz_intensity_tuples
 43 |     ):
 44 |         assert (this_mz, this_intensity) == pytest.approx(mz_intensity_tuple)
 45 | 
 46 | 
 47 | def test_mz_array():
 48 |     num_peaks = 150
 49 |     mz = np.random.uniform(100, 1400, num_peaks).tolist()
 50 |     intensity = np.random.lognormal(0, 1, num_peaks)
 51 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
 52 |     assert isinstance(spec.mz, np.ndarray)
 53 |     with pytest.raises(AttributeError):
 54 |         spec.mz = np.random.uniform(100, 1400, num_peaks)
 55 | 
 56 | 
 57 | def test_intensity_array():
 58 |     num_peaks = 150
 59 |     mz = np.random.uniform(100, 1400, num_peaks)
 60 |     intensity = np.random.lognormal(0, 1, num_peaks).tolist()
 61 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
 62 |     assert isinstance(spec.intensity, np.ndarray)
 63 |     with pytest.raises(AttributeError):
 64 |         spec.intensity = np.random.lognormal(0, 1, num_peaks)
 65 | 
 66 | 
 67 | def test_from_usi():
 68 |     for usi in [
 69 |         # USI from PRIDE/MassIVE/PeptideAtlas.
 70 |         "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555",
 71 |         # USI from PRIDE/MassIVE/PeptideAtlas with ProForma annotation.
 72 |         "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555:"
 73 |         "VLHPLEGAVVIIFK/2",
 74 |         # USI from PRIDE/MassIVE/PeptideAtlas.
 75 |         "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_05_2Feb12_Cougar_11-10-09:"
 76 |         "scan:12298",
 77 |         # USI from PRIDE/MassIVE/PeptideAtlas with ProForma annotation.
 78 |         "mzspec:PXD000966:CPTAC_CompRef_00_iTRAQ_05_2Feb12_Cougar_11-10-09:"
 79 |         "scan:12298:[iTRAQ4plex]-LHFFM[Oxidation]PGFAPLTSR/3",
 80 |         # USI from MassIVE.
 81 |         "mzspec:PXD022531:j12541_C5orf38:scan:12368",
 82 |         # USI from MassIVE with ProForma annotation.
 83 |         "mzspec:PXD022531:j12541_C5orf38:scan:12368:VAATLEILTLK/2",
 84 |         # USI from MassIVE.
 85 |         "mzspec:PXD022531:b11156_PRAMEF17:scan:22140",
 86 |         # USI from MassIVE with ProForma annotation.
 87 |         "mzspec:PXD022531:b11156_PRAMEF17:scan:22140:VAATLEILTLK/2",
 88 |         # USI from PRIDE/MassIVE/PeptideAtlas.
 89 |         "mzspec:PXD000394:20130504_EXQ3_MiBa_SA_Fib-2:scan:4234",
 90 |         # USI from PRIDE/MassIVE/PeptideAtlas with ProForma annotation.
 91 |         "mzspec:PXD000394:20130504_EXQ3_MiBa_SA_Fib-2:scan:4234:SGVSRKPAPG/2",
 92 |         # USI from PRIDE.
 93 |         "mzspec:PXD010793:20170817_QEh1_LC1_HuPa_SplicingPep_10pmol_G2_R01:"
 94 |         "scan:8296",
 95 |         # USI from PRIDE with ProForma annotation.
 96 |         "mzspec:PXD010793:20170817_QEh1_LC1_HuPa_SplicingPep_10pmol_G2_R01:"
 97 |         "scan:8296:SGVSRKPAPG/2",
 98 |         # USI from PRIDE/MassIVE/PeptideAtlas.
 99 |         "mzspec:PXD010154:01284_E04_P013188_B00_N29_R1.mzML:scan:31291",
100 |         # USI from PRIDE/MassIVE/PeptideAtlas with ProForma annotation.
101 |         "mzspec:PXD010154:01284_E04_P013188_B00_N29_R1.mzML:scan:31291:"
102 |         "DQNGTWEM[Oxidation]ESNENFEGYM[Oxidation]K/2",
103 |         # USI from GNPS to a task spectrum.
104 |         "mzspec:GNPS:TASK-c95481f0c53d42e78a61bf899e9f9adb-spectra/"
105 |         "specs_ms.mgf:scan:1943",
106 |         # USI from GNPS to a library spectrum.
107 |         "mzspec:GNPS:GNPS-LIBRARY:accession:CCMSLIB00005436077",
108 |         # USI to a GNPS/MassIVE spectrum.
109 |         "mzspec:MSV000078547:120228_nbut_3610_it_it_take2:scan:389",
110 |     ]:
111 |         spec = spectrum.MsmsSpectrum.from_usi(usi)
112 |         assert spec.identifier == usi
113 |     with pytest.raises(ValueError):
114 |         spectrum.MsmsSpectrum.from_usi(
115 |             "mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555",
116 |             "massive",
117 |         )
118 | 
119 | 
120 | def test_round_no_merge():
121 |     num_peaks = 150
122 |     mz = np.arange(1, num_peaks + 1) + np.random.uniform(-0.49, 0.5, num_peaks)
123 |     intensity = np.random.exponential(1, num_peaks)
124 |     spec = spectrum.MsmsSpectrum(
125 |         "test_spectrum",
126 |         500,
127 |         2,
128 |         mz.copy(),
129 |         intensity.copy(),
130 |     )
131 |     decimals = 0
132 |     spec.round(decimals)
133 |     assert len(spec.mz) == num_peaks
134 |     assert len(spec.intensity) == num_peaks
135 |     np.testing.assert_allclose(spec.mz, np.around(mz, decimals))
136 |     np.testing.assert_allclose(spec.intensity, intensity)
137 | 
138 | 
139 | def test_round_merge_len():
140 |     num_peaks = 10
141 |     mz = np.arange(1, num_peaks + 1) + np.random.uniform(-0.2, 0.2, num_peaks)
142 |     mz[4] = mz[3] + 0.0002
143 |     mz[5] = mz[3] + 0.0005
144 |     mz[7] = mz[8] - 0.00037
145 |     intensity = np.random.exponential(1, num_peaks)
146 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
147 |     spec.annotate_proforma(f"X[+{mz[3]}]", 10, "ppm")
148 |     assert spec.annotation is not None
149 |     spec.round(1)
150 |     assert len(spec.mz) == len(mz) - 3
151 |     assert len(spec.mz) == len(spec.intensity)
152 |     assert spec.annotation is None
153 | 
154 | 
155 | def test_round_merge_sum():
156 |     num_peaks = 10
157 |     mz = np.arange(1, num_peaks + 1) + np.random.uniform(-0.2, 0.2, num_peaks)
158 |     mz[4] = mz[3] + 0.0002
159 |     mz[5] = mz[3] + 0.0005
160 |     mz[7] = mz[8] - 0.00037
161 |     intensity = np.random.exponential(1, num_peaks)
162 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity.copy())
163 |     spec.round(1, "sum")
164 |     assert np.sum(spec.intensity) == pytest.approx(np.sum(intensity))
165 | 
166 | 
167 | def test_round_merge_max():
168 |     num_peaks = 10
169 |     mz = np.arange(1, num_peaks + 1) + np.random.uniform(-0.2, 0.2, num_peaks)
170 |     mz[4] = mz[3] + 0.0002
171 |     mz[5] = mz[3] + 0.0005
172 |     mz[7] = mz[8] - 0.00037
173 |     intensity = np.arange(1, 11)
174 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity.copy())
175 |     spec.round(1, "max")
176 |     np.testing.assert_allclose(spec.intensity, [1, 2, 3, 6, 7, 9, 10])
177 | 
178 | 
179 | def test_set_mz_range_keep_all():
180 |     num_peaks = 150
181 |     mz = np.random.uniform(100, 1400, num_peaks)
182 |     intensity = np.random.lognormal(0, 1, num_peaks)
183 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
184 |     min_mz, max_mz = 0, 1500
185 |     spec.set_mz_range(min_mz, max_mz)
186 |     assert len(spec.mz) == num_peaks
187 |     assert len(spec.intensity) == num_peaks
188 | 
189 | 
190 | def test_set_mz_range_truncate():
191 |     num_peaks = 150
192 |     mz = np.random.uniform(100, 1400, num_peaks)
193 |     intensity = np.random.lognormal(0, 1, num_peaks)
194 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
195 |     spec.annotate_proforma(f"X[+{mz[75]}]", 10, "ppm")
196 |     min_mz, max_mz = 400, 1200
197 |     assert spec.annotation is not None
198 |     assert spec.mz.min() < min_mz
199 |     assert spec.mz.max() > max_mz
200 |     spec.set_mz_range(min_mz, max_mz)
201 |     assert len(spec.mz) < num_peaks
202 |     assert len(spec.intensity) < num_peaks
203 |     assert spec.annotation is None
204 |     assert spec.mz.min() >= min_mz
205 |     assert spec.mz.max() <= max_mz
206 | 
207 | 
208 | def test_set_mz_range_truncate_left():
209 |     num_peaks = 150
210 |     mz = np.random.uniform(100, 1400, num_peaks)
211 |     intensity = np.random.lognormal(0, 1, num_peaks)
212 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
213 |     min_mz, max_mz = 400, 1500
214 |     assert spec.mz.min() < min_mz
215 |     spec.set_mz_range(min_mz, max_mz)
216 |     assert len(spec.mz) < num_peaks
217 |     assert len(spec.intensity) < num_peaks
218 |     assert spec.mz.min() >= min_mz
219 | 
220 | 
221 | def test_set_mz_range_truncate_right():
222 |     num_peaks = 150
223 |     mz = np.random.uniform(100, 1400, num_peaks)
224 |     intensity = np.random.lognormal(0, 1, num_peaks)
225 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
226 |     min_mz, max_mz = 0, 1200
227 |     assert spec.mz.max() > max_mz
228 |     spec.set_mz_range(min_mz, max_mz)
229 |     assert len(spec.mz) < num_peaks
230 |     assert len(spec.intensity) < num_peaks
231 |     assert spec.mz.max() <= max_mz
232 | 
233 | 
234 | def test_set_mz_range_none():
235 |     num_peaks, min_mz, max_mz = 150, 400, 1200
236 |     mz = np.random.uniform(100, 1400, num_peaks)
237 |     intensity = np.random.lognormal(0, 1, num_peaks)
238 |     spec = spectrum.MsmsSpectrum(
239 |         "test_spectrum", 500, 2, mz.copy(), intensity.copy()
240 |     )
241 |     spec.set_mz_range(None, None)
242 |     assert len(spec.mz) == num_peaks
243 |     assert len(spec.intensity) == num_peaks
244 |     assert spec.mz.min() == mz.min()
245 |     assert spec.mz.max() == mz.max()
246 |     spec = spectrum.MsmsSpectrum(
247 |         "test_spectrum", 500, 2, mz.copy(), intensity.copy()
248 |     )
249 |     spec.set_mz_range(None, max_mz)
250 |     assert len(spec.mz) < num_peaks
251 |     assert len(spec.intensity) < num_peaks
252 |     assert spec.mz.max() <= max_mz
253 |     assert spec.mz.min() == mz.min()
254 |     spec = spectrum.MsmsSpectrum(
255 |         "test_spectrum", 500, 2, mz.copy(), intensity.copy()
256 |     )
257 |     spec.set_mz_range(min_mz, None)
258 |     assert len(spec.mz) < num_peaks
259 |     assert len(spec.intensity) < num_peaks
260 |     assert spec.mz.min() >= min_mz
261 |     assert spec.mz.max() == mz.max()
262 | 
263 | 
264 | def test_set_mz_range_reversed():
265 |     num_peaks = 150
266 |     mz = np.random.uniform(100, 1400, num_peaks)
267 |     intensity = np.random.lognormal(0, 1, num_peaks)
268 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
269 |     min_mz, max_mz = 400, 1200
270 |     assert spec.mz.min() < min_mz
271 |     assert spec.mz.max() > max_mz
272 |     spec.set_mz_range(max_mz, min_mz)
273 |     assert len(spec.mz) < num_peaks
274 |     assert len(spec.intensity) < num_peaks
275 |     assert spec.mz.min() >= min_mz
276 |     assert spec.mz.max() <= max_mz
277 | 
278 | 
279 | def test_remove_precursor_peak():
280 |     num_peaks = 150
281 |     mz = np.random.uniform(100, 1400, num_peaks)
282 |     fragment_tol_mass = np.random.uniform(0, 0.5)
283 |     fragment_tol_mode = "Da"
284 |     precursor_mz = mz[np.random.randint(0, num_peaks)] + fragment_tol_mass / 2
285 |     intensity = np.random.lognormal(0, 1, num_peaks)
286 |     spec = spectrum.MsmsSpectrum(
287 |         "test_spectrum", precursor_mz, 2, mz, intensity
288 |     )
289 |     spec.annotate_proforma(f"X[+{mz[75]}]", 10, "ppm")
290 |     assert spec.annotation is not None
291 |     spec.remove_precursor_peak(fragment_tol_mass, fragment_tol_mode)
292 |     assert np.abs(precursor_mz - spec.mz).all() > fragment_tol_mass
293 |     assert len(spec.mz) <= num_peaks - 1
294 |     assert len(spec.intensity) <= num_peaks - 1
295 |     assert spec.annotation is None
296 | 
297 | 
298 | def test_remove_precursor_peak_none():
299 |     num_peaks = 150
300 |     mz = np.random.uniform(100, 1400, num_peaks)
301 |     fragment_tol_mass = np.random.uniform(0, 0.5)
302 |     fragment_tol_mode = "Da"
303 |     precursor_mz = mz[np.random.randint(0, num_peaks)] + fragment_tol_mass * 2
304 |     intensity = np.random.lognormal(0, 1, num_peaks)
305 |     spec = spectrum.MsmsSpectrum(
306 |         "test_spectrum", precursor_mz, 2, mz, intensity
307 |     )
308 |     spec.remove_precursor_peak(fragment_tol_mass, fragment_tol_mode)
309 |     assert len(spec.mz) == num_peaks
310 |     assert len(spec.intensity) == num_peaks
311 |     assert np.abs(precursor_mz - spec.mz).all() > fragment_tol_mass
312 | 
313 | 
314 | def test_remove_precursor_peak_charge():
315 |     num_peaks = 150
316 |     mz = np.random.uniform(100, 1400, num_peaks)
317 |     fragment_tol_mass = np.random.uniform(0, 0.5)
318 |     fragment_tol_mode = "Da"
319 |     precursor_mz = mz[np.random.randint(0, num_peaks)] + fragment_tol_mass / 2
320 |     precursor_charge = 3
321 |     mz[-1] = ((precursor_mz - 1.0072766) * precursor_charge) / 2 + 1.0072766
322 |     mz[-2] = ((precursor_mz - 1.0072766) * precursor_charge) + 1.0072766
323 |     intensity = np.random.lognormal(0, 1, num_peaks)
324 |     spec = spectrum.MsmsSpectrum(
325 |         "test_spectrum", precursor_mz, precursor_charge, mz, intensity
326 |     )
327 |     spec.remove_precursor_peak(fragment_tol_mass, fragment_tol_mode)
328 |     assert np.abs(precursor_mz - spec.mz).all() > fragment_tol_mass
329 |     assert len(spec.mz) <= num_peaks - 3
330 |     assert len(spec.intensity) <= num_peaks - 3
331 | 
332 | 
333 | def test_remove_precursor_peak_isotope():
334 |     num_peaks = 150
335 |     mz = np.random.uniform(100, 1400, num_peaks)
336 |     fragment_tol_mass = np.random.uniform(0, 0.5)
337 |     fragment_tol_mode = "Da"
338 |     precursor_mz = mz[np.random.randint(0, num_peaks)] + fragment_tol_mass / 2
339 |     precursor_charge = 3
340 |     mz[-1] = precursor_mz + 1 / precursor_charge
341 |     mz[-2] = precursor_mz + 2 / precursor_charge
342 |     intensity = np.random.lognormal(0, 1, num_peaks)
343 |     spec = spectrum.MsmsSpectrum(
344 |         "test_spectrum", precursor_mz, precursor_charge, mz, intensity
345 |     )
346 |     spec.remove_precursor_peak(fragment_tol_mass, fragment_tol_mode, 2)
347 |     assert np.abs(precursor_mz - spec.mz).all() > fragment_tol_mass
348 |     assert len(spec.mz) <= num_peaks - 3
349 |     assert len(spec.intensity) <= num_peaks - 3
350 | 
351 | 
352 | def test_filter_intensity_keep_all():
353 |     num_peaks = 150
354 |     mz = np.random.uniform(100, 1400, num_peaks)
355 |     intensity = np.random.lognormal(0, 1, num_peaks)
356 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
357 |     spec.filter_intensity()
358 |     assert len(spec.mz) == num_peaks
359 |     assert len(spec.intensity) == num_peaks
360 | 
361 | 
362 | def test_filter_intensity_remove_low_intensity():
363 |     num_peaks = 150
364 |     mz = np.random.uniform(100, 1400, num_peaks)
365 |     intensity = np.random.lognormal(0, 1, num_peaks)
366 |     max_intensity = intensity.max()
367 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
368 |     spec.annotate_proforma(f"X[+{mz[75]}]", 10, "ppm")
369 |     min_intensity = 0.05
370 |     assert spec.intensity.min() < min_intensity * spec.intensity.max()
371 |     assert spec.annotation is not None
372 |     spec.filter_intensity(min_intensity=min_intensity)
373 |     assert len(spec.mz) < num_peaks
374 |     assert len(spec.intensity) < num_peaks
375 |     assert spec.annotation is None
376 |     assert spec.intensity.max() == pytest.approx(max_intensity)
377 |     assert spec.intensity.min() >= min_intensity * max_intensity
378 | 
379 | 
380 | def test_filter_intensity_max_num_peaks():
381 |     num_peaks = 150
382 |     mz = np.random.uniform(100, 1400, num_peaks)
383 |     intensity = np.random.lognormal(0, 1, num_peaks)
384 |     max_intensity = intensity.max()
385 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
386 |     max_num_peaks = 50
387 |     spec.filter_intensity(max_num_peaks=max_num_peaks)
388 |     assert len(spec.mz) == max_num_peaks
389 |     assert len(spec.intensity) == max_num_peaks
390 |     assert spec.intensity.max() == pytest.approx(max_intensity)
391 | 
392 | 
393 | def test_filter_intensity_remove_low_intensity_max_num_peaks():
394 |     num_peaks = 150
395 |     mz = np.random.uniform(100, 1400, num_peaks)
396 |     intensity = np.random.lognormal(0, 1, num_peaks)
397 |     max_intensity = intensity.max()
398 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
399 |     min_intensity = 0.05
400 |     assert spec.intensity.min() < min_intensity * max_intensity
401 |     max_num_peaks = 50
402 |     spec.filter_intensity(
403 |         min_intensity=min_intensity, max_num_peaks=max_num_peaks
404 |     )
405 |     assert len(spec.mz) <= max_num_peaks
406 |     assert len(spec.intensity) <= max_num_peaks
407 |     assert spec.intensity.max() == pytest.approx(max_intensity)
408 |     assert spec.intensity.min() >= min_intensity * max_intensity
409 | 
410 | 
411 | def test_scale_intensity_root():
412 |     num_peaks = 150
413 |     mz = np.random.uniform(100, 1400, num_peaks)
414 |     intensity = np.random.lognormal(0, 1, num_peaks)
415 |     for degree in [2, 4, 10]:
416 |         spec = spectrum.MsmsSpectrum(
417 |             "test_spectrum", 500, 2, mz, intensity.copy()
418 |         )
419 |         intensity_unscaled = spec.intensity.copy()
420 |         spec.scale_intensity(scaling="root", degree=degree)
421 |         np.testing.assert_allclose(
422 |             spec.intensity**degree, intensity_unscaled, rtol=1e-5
423 |         )
424 | 
425 | 
426 | def test_scale_intensity_log():
427 |     num_peaks = 150
428 |     mz = np.random.uniform(100, 1400, num_peaks)
429 |     intensity = np.random.lognormal(0, 1, num_peaks)
430 |     for base in [2, np.e, 10]:
431 |         spec = spectrum.MsmsSpectrum(
432 |             "test_spectrum", 500, 2, mz, intensity.copy()
433 |         )
434 |         intensity_unscaled = spec.intensity.copy()
435 |         spec.scale_intensity(scaling="log", base=base)
436 |         np.testing.assert_allclose(
437 |             base**spec.intensity - 1, intensity_unscaled, rtol=1e-5
438 |         )
439 | 
440 | 
441 | def test_scale_intensity_rank():
442 |     num_peaks = 150
443 |     mz = np.random.uniform(100, 1400, num_peaks)
444 |     intensity = np.random.lognormal(0, 1, num_peaks)
445 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
446 |     spec.scale_intensity(scaling="rank")
447 |     np.testing.assert_allclose(
448 |         np.sort(spec.intensity), np.arange(1, num_peaks + 1)
449 |     )
450 | 
451 | 
452 | def test_scale_intensity_rank_less_peaks():
453 |     num_peaks = 50
454 |     mz = np.random.uniform(100, 1400, num_peaks)
455 |     intensity = np.random.lognormal(0, 1, num_peaks)
456 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
457 |     max_rank = num_peaks + 50
458 |     spec.scale_intensity(scaling="rank", max_rank=max_rank)
459 |     np.testing.assert_allclose(
460 |         np.sort(spec.intensity), np.arange(num_peaks + 1, max_rank + 1)
461 |     )
462 | 
463 | 
464 | def test_scale_intensity_rank_more_peaks():
465 |     num_peaks = 150
466 |     mz = np.random.uniform(100, 1400, num_peaks)
467 |     intensity = np.random.lognormal(0, 1, num_peaks)
468 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
469 |     with pytest.raises(ValueError):
470 |         spec.scale_intensity(scaling="rank", max_rank=num_peaks - 50)
471 | 
472 | 
473 | def test_scale_intensity_max():
474 |     num_peaks = 150
475 |     mz = np.random.uniform(100, 1400, num_peaks)
476 |     intensity = np.random.lognormal(0, 1, num_peaks)
477 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
478 |     intensity_copy, max_intensity = spec.intensity.copy(), spec.intensity.max()
479 |     spec.scale_intensity(max_intensity=1.0)
480 |     assert spec.intensity.max() == pytest.approx(1.0)
481 |     np.testing.assert_allclose(
482 |         spec.intensity * max_intensity, intensity_copy, rtol=1e-5
483 |     )
484 | 
485 | 
486 | def test_pickle():
487 |     num_peaks = 150
488 |     mz = np.random.uniform(100, 1400, num_peaks)
489 |     intensity = np.random.lognormal(0, 1, num_peaks)
490 |     spec = spectrum.MsmsSpectrum("test_spectrum", 500, 2, mz, intensity)
491 |     spec.annotate_proforma(f"X[+{mz[75]}]", 10, "ppm")
492 |     with open("temp.pkl", "wb") as f:
493 |         pickle.dump(spec, f)
494 |     with open("temp.pkl", "rb") as f:
495 |         spec_pickled = pickle.load(f)
496 |     assert spec.identifier == spec_pickled.identifier
497 |     assert spec.precursor_mz == spec_pickled.precursor_mz
498 |     assert spec.precursor_charge == spec_pickled.precursor_charge
499 |     np.testing.assert_array_equal(spec.mz, spec_pickled.mz)
500 |     np.testing.assert_array_equal(spec.intensity, spec_pickled.intensity)
501 |     np.testing.assert_equal(spec.retention_time, spec_pickled.retention_time)
502 |     assert spec.proforma == spec_pickled.proforma
503 |     np.testing.assert_equal(spec.annotation, spec_pickled.annotation)
504 |     os.remove("temp.pkl")
505 | 
506 | 
507 | def test_annotate_proforma():
508 |     fragment_tol_mass, fragment_tol_mode = 0.02, "Da"
509 |     peptides = [
510 |         "SYELPDGQVITIGNER",
511 |         "MFLSFPTTK",
512 |         "DLYANTVLSGGTTMYPGIADR",
513 |         "YLYEIAR",
514 |         "VAPEEHPVLLTEAPLNPK",
515 |     ]
516 |     for charge, peptide in enumerate(peptides, 2):
517 |         fragment_mz = np.asarray(
518 |             [
519 |                 fragment_mz
520 |                 for fragment, fragment_mz in fa.get_theoretical_fragments(
521 |                     proforma.parse(peptide)[0], max_charge=2
522 |                 )
523 |             ]
524 |         )
525 |         fragment_mz += np.random.uniform(
526 |             -0.9 * fragment_tol_mass, 0.9 * fragment_tol_mass, len(fragment_mz)
527 |         )
528 |         fragment_mz = np.random.choice(
529 |             fragment_mz, min(50, len(fragment_mz)), False
530 |         )
531 |         num_peaks = 150
532 |         mz = np.random.uniform(100, 1400, num_peaks)
533 |         mz[: len(fragment_mz)] = fragment_mz
534 |         intensity = np.random.lognormal(0, 1, num_peaks)
535 |         spec = spectrum.MsmsSpectrum(
536 |             "test_spectrum",
537 |             mass.calculate_mass(sequence=peptide, charge=charge),
538 |             charge,
539 |             mz,
540 |             intensity,
541 |         )
542 |         spec.annotate_proforma(peptide, fragment_tol_mass, fragment_tol_mode)
543 |         assert np.count_nonzero(spec.annotation) >= len(fragment_mz)
544 | 
545 | 
546 | def test_annotate_proforma_neutral_loss():
547 |     fragment_tol_mass, fragment_tol_mode = 0.02, "Da"
548 |     neutral_loss = "H2O", 18.010565  # water
549 |     n_peaks = 150
550 |     peptides = [
551 |         "SYELPDGQVITIGNER",
552 |         "MFLSFPTTK",
553 |         "DLYANTVLSGGTTMYPGIADR",
554 |         "YLYEIAR",
555 |         "VAPEEHPVLLTEAPLNPK",
556 |     ]
557 |     for charge, peptide in enumerate(peptides, 2):
558 |         fragment_mz = np.asarray(
559 |             [
560 |                 fragment_mz
561 |                 for fragment, fragment_mz in fa.get_theoretical_fragments(
562 |                     proforma.parse(peptide)[0],
563 |                     max_charge=2,
564 |                     neutral_losses={
565 |                         None: 0,
566 |                         neutral_loss[0]: -neutral_loss[1],
567 |                     },
568 |                 )
569 |             ]
570 |         )
571 |         fragment_mz += np.random.uniform(
572 |             -0.9 * fragment_tol_mass, 0.9 * fragment_tol_mass, len(fragment_mz)
573 |         )
574 |         fragment_mz = np.random.choice(
575 |             fragment_mz, min(50, len(fragment_mz)), False
576 |         )
577 |         mz = np.random.uniform(100, 1400, n_peaks)
578 |         mz[: len(fragment_mz)] = fragment_mz
579 |         intensity = np.random.lognormal(0, 1, n_peaks)
580 |         spec = spectrum.MsmsSpectrum(
581 |             "test_spectrum",
582 |             mass.calculate_mass(sequence=peptide, charge=charge),
583 |             charge,
584 |             mz,
585 |             intensity,
586 |         )
587 |         spec.annotate_proforma(
588 |             peptide,
589 |             fragment_tol_mass,
590 |             fragment_tol_mode,
591 |             neutral_losses={neutral_loss[0]: -neutral_loss[1]},
592 |         )
593 |         assert np.count_nonzero(spec.annotation) >= len(fragment_mz)
594 | 


--------------------------------------------------------------------------------