├── .gitignore ├── .readthedocs.yml ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.rst ├── RawFileReaderLicense.rst ├── appveyor.yml ├── binder └── environment.yml ├── dimspy ├── __init__.py ├── __main__.py ├── metadata.py ├── models │ ├── __init__.py │ ├── peak_matrix.py │ ├── peaklist.py │ ├── peaklist_metadata.py │ └── peaklist_tags.py ├── portals │ ├── ThermoRawFileReader_3_0_41 │ │ ├── Libraries │ │ │ ├── ThermoFisher.CommonCore.BackgroundSubtraction.XML │ │ │ ├── ThermoFisher.CommonCore.BackgroundSubtraction.dll │ │ │ ├── ThermoFisher.CommonCore.Data.XML │ │ │ ├── ThermoFisher.CommonCore.Data.dll │ │ │ ├── ThermoFisher.CommonCore.MassPrecisionEstimator.XML │ │ │ ├── ThermoFisher.CommonCore.MassPrecisionEstimator.dll │ │ │ ├── ThermoFisher.CommonCore.RawFileReader.XML │ │ │ └── ThermoFisher.CommonCore.RawFileReader.dll │ │ └── License │ │ │ └── RawFileRdr_License_Agreement_RevA.doc │ ├── __init__.py │ ├── hdf5_portal.py │ ├── mzml_portal.py │ ├── paths.py │ ├── thermo_raw_portal.py │ └── txt_portal.py ├── process │ ├── __init__.py │ ├── peak_alignment.py │ ├── peak_filters.py │ └── replicate_processing.py └── tools.py ├── docs ├── Makefile └── source │ ├── api-reference.rst │ ├── bugs-and-issues.rst │ ├── changelog.rst │ ├── citation.rst │ ├── cli.rst │ ├── conf.py │ ├── credits.rst │ ├── dimspy.metadata.rst │ ├── dimspy.models.rst │ ├── dimspy.portals.rst │ ├── dimspy.process.rst │ ├── dimspy.tools.rst │ ├── images │ └── alignment.png │ ├── index.rst │ ├── installation.rst │ └── license.rst ├── environment.yml ├── examples ├── examples.py ├── run.bat └── run.sh ├── notebooks └── workflow.ipynb ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── data ├── MTBLS79_subset │ ├── MTBLS79_mzml_peak_matrix_v1.hdf5 │ ├── MTBLS79_mzml_peak_matrix_v2.hdf5 │ ├── MTBLS79_mzml_single.zip │ ├── MTBLS79_mzml_single_report.txt │ ├── MTBLS79_mzml_triplicates.zip │ ├── MTBLS79_mzml_triplicates_report.txt │ ├── MTBLS79_mzml_triplicates_v1.hdf5 │ ├── MTBLS79_mzml_triplicates_v2.hdf5 │ ├── batch04_QC17_rep01_262_v1.txt │ ├── batch04_QC17_rep01_262_v2.txt │ ├── filelist_class_label_error.txt │ ├── filelist_csl_MTBLS79_mzml_peak_matrix.txt │ ├── filelist_csl_MTBLS79_mzml_triplicates.txt │ ├── filelist_filename_error.txt │ ├── filelist_injection_order_error.txt │ ├── filelist_multi.txt │ ├── filelist_multi_error.txt │ ├── filelist_mzml_single.txt │ ├── filelist_mzml_triplicates.txt │ ├── filelist_raw_triplicates.txt │ ├── filelist_replicate_error_1.txt │ ├── filelist_replicate_error_2.txt │ ├── mzml │ │ ├── batch04_QC17_rep01_262.mzML │ │ ├── batch04_QC17_rep02_263.mzML │ │ └── batch04_QC17_rep03_264.mzML │ ├── pm_mzml_triplicates_v1.txt │ ├── pm_mzml_triplicates_v2.txt │ └── raw │ │ ├── batch04_QC17_rep01_262.RAW │ │ ├── batch04_QC17_rep02_263.RAW │ │ └── batch04_QC17_rep03_264.RAW └── mzml_DIMSn.zip ├── test_hdf5_portal.py ├── test_metadata.py ├── test_mzml_portal.py ├── test_paths_portal.py ├── test_peak_alignment.py ├── test_peak_filters.py ├── test_peak_matrix.py ├── test_peaklist.py ├── test_peaklist_metadata.py ├── test_peaklist_tags.py ├── test_replicate_processing.py ├── test_suite_models.py ├── test_suite_portals.py ├── test_suite_process.py ├── test_suite_tools.py ├── test_thermo_raw_portal.py ├── test_tools.py └── test_txt_portal.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | debug.py 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | docs/source/images/alignment.graffle 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # dotenv 82 | .env 83 | 84 | # virtualenv 85 | .venv 86 | venv/ 87 | venv-py3/ 88 | ENV/ 89 | 90 | # PyCharm 91 | # User-specific stuff: 92 | .idea/ 93 | 94 | # vscode 95 | .vscode 96 | 97 | # R-lanaguage 98 | # History files 99 | .Rhistory 100 | .Rapp.history 101 | 102 | # Session Data files 103 | .RData 104 | 105 | #macOS 106 | *.DS_Store 107 | 108 | *~ 109 | 110 | 111 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Build documentation in the docs/ directory with Sphinx 9 | sphinx: 10 | configuration: docs/source/conf.py 11 | 12 | # Optionally build your docs in additional formats such as PDF and ePub 13 | formats: all 14 | 15 | conda: 16 | environment: environment.yml 17 | 18 | python: 19 | version: 3.7 20 | install: 21 | - method: pip 22 | path: . 23 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: python 3 | python: 4 | - "3.7" 5 | 6 | install: 7 | - sudo apt-get update 8 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 9 | - bash miniconda.sh -b -p $HOME/miniconda 10 | - export PATH="$HOME/miniconda/bin:$PATH" 11 | - hash -r 12 | - conda config --set always_yes yes --set changeps1 no 13 | - conda update -q conda 14 | - conda info -a 15 | - conda env create -n test-environment -f environment.yml 16 | - source activate test-environment 17 | - conda install pytest codecov pytest-cov -c conda-forge 18 | - pip install . 19 | 20 | script: 21 | - dimspy --help 22 | - pytest --cov=dimspy tests/ 23 | 24 | after_script: 25 | - codecov 26 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE 3 | include RawFileReaderLicense.rst 4 | include requirements.txt 5 | include examples/*.* 6 | include tests/*.py 7 | include tests/data/*.* 8 | include tests/data/MTBLS79_subset/*.* 9 | include tests/data/MTBLS79_subset/mzml/*.* 10 | include tests/data/MTBLS79_subset/raw/*.* 11 | include tests/results/*.* 12 | include dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/*.XML 13 | include dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/*.dll 14 | include dimspy/portals/ThermoRawFileReader_3_0_41/License/RawFileRdr_License_Agreement_RevA.doc 15 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | DIMSpy 2 | ====== 3 | |Py versions| |Version| |Bioconda| |Galaxy-eu| |Git| |Build Status (Travis)| |Build Status (AppVeyor)| |codecov| |License| |binder| |RTD doc| |gitter| 4 | 5 | Python package for processing direct-infusion mass spectrometry-based metabolomics and lipidomics data 6 | 7 | - **Documentation:** https://dimspy.readthedocs.io/en/latest 8 | - **Source:** https://github.com/computational-metabolomics/dimspy 9 | - **Bug reports:** https://github.com/computational-metabolomics/dimspy/issues 10 | 11 | 12 | Installation (Conda, PyPi and Galaxy) 13 | ------------------------------------- 14 | See the `Installation page `__ of 15 | the `online documentation `__. 16 | 17 | 18 | Bug reports 19 | ----------- 20 | Please report any bugs that you find `here `_. 21 | Or fork the repository on `GitHub `_ 22 | and create a pull request (PR). We welcome all contributions, and we 23 | will help you to make the PR if you are new to `git`. 24 | 25 | 26 | Credits 27 | ------- 28 | DIMSpy was originally written by Ralf Weber and Albert Zhou and has been developed with the help of many others. 29 | Thanks to everyone who has improved DIMSpy by contributing code, adding features, bug reports and fixes, and documentation. 30 | 31 | **Developers and contributers** 32 | - Ralf J. M. Weber (r.j.weber@bham.ac.uk) - `University of Birmingham (UK) `__ 33 | - Jiarui (Albert) Zhou (j.zhou.3@bham.ac.uk) - `University of Birmingham (UK) `__, `HIT Shenzhen (China) `_ 34 | - Thomas N. Lawson (t.n.lawson@bham.ac.uk) - `University of Birmingham (UK) `__ 35 | - Martin R. Jones (martin.jones@eawag.ch) - `Eawag (Switzerland) `_ 36 | 37 | **DIMSpy acknowledges support from the following funders:** 38 | - BBSRC, grant number BB/M019985/1 39 | - European Commission's H2020 programme, grant agreement number 654241 40 | - Wellcome Trust, grant number 202952/Z/16/Z 41 | 42 | **Citation** 43 | 44 | To cite DIMSpy please use one of the Zenodo references listed `here `__. 45 | 46 | 47 | License 48 | -------- 49 | DIMSpy is licensed under the GNU General Public License v3.0 (see `LICENSE file `_ for licensing information). Copyright © 2017 - 2020 Ralf Weber, Albert Zhou 50 | 51 | **Third-party licenses and copyright** 52 | 53 | RawFileReader reading tool. Copyright © 2016 by Thermo Fisher Scientific, Inc. All rights reserved. See `RawFileReaderLicense `_ for licensing information. 54 | Using DIMSpy software for processing Thermo Fisher Scientific \*.raw files implies the acceptance of the RawFileReader license terms. 55 | Anyone receiving RawFileReader as part of a larger software distribution (in the current context, as part of DIMSpy) is considered an "end user" under 56 | section 3.3 of the RawFileReader License, and is not granted rights to redistribute RawFileReader. 57 | 58 | 59 | .. |Build Status (Travis)| image:: https://img.shields.io/travis/computational-metabolomics/dimspy.svg?logo=travis&maxAge=600&style=flat-square 60 | :target: https://travis-ci.com/computational-metabolomics/dimspy 61 | 62 | .. |Build Status (AppVeyor)| image:: https://img.shields.io/appveyor/ci/RJMW/dimspy.svg?logo=appveyor&style=flat-square&maxAge=600 63 | :target: https://ci.appveyor.com/project/RJMW/dimspy/branch/master 64 | 65 | .. |Py versions| image:: https://img.shields.io/pypi/pyversions/dimspy.svg?style=flat&maxAge=3600 66 | :target: https://pypi.python.org/pypi/dimspy/ 67 | 68 | .. |Version| image:: https://img.shields.io/pypi/v/dimspy.svg?style=flat&maxAge=3600 69 | :target: https://pypi.python.org/pypi/dimspy/ 70 | 71 | .. |Git| image:: https://img.shields.io/badge/repository-GitHub-blue.svg?style=flat&maxAge=3600 72 | :target: https://github.com/computational-metabolomics/dimspy 73 | 74 | .. |Bioconda| image:: https://img.shields.io/conda/vn/bioconda/dimspy?style=flat-square&maxAge=3600 75 | :target: http://bioconda.github.io/recipes/dimspy/README.html 76 | 77 | .. |galaxy-eu| image:: https://img.shields.io/badge/usegalaxy-.eu-brightgreen?logo= 78 | :target: http://usegalaxy.eu 79 | 80 | .. |License| image:: https://img.shields.io/pypi/l/dimspy.svg?style=flat&maxAge=3600 81 | :target: https://www.gnu.org/licenses/gpl-3.0.html 82 | 83 | .. |RTD doc| image:: https://img.shields.io/badge/documentation-RTD-71B360.svg?style=flat&maxAge=3600 84 | :target: https://dimspy.readthedocs.io/en/latest/ 85 | 86 | .. |codecov| image:: https://codecov.io/gh/computational-metabolomics/dimspy/branch/master/graph/badge.svg 87 | :target: https://codecov.io/gh/computational-metabolomics/dimspy 88 | 89 | .. |binder| image:: https://mybinder.org/badge_logo.svg 90 | :target: https://mybinder.org/v2/gh/computational-metabolomics/dimspy/master?filepath=notebooks%2Fworkflow.ipynb 91 | 92 | .. |gitter| image:: https://badges.gitter.im/Join%20Chat.svg 93 | :target: https://gitter.im/computational-metabolomics/dimspy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge 94 | -------------------------------------------------------------------------------- /RawFileReaderLicense.rst: -------------------------------------------------------------------------------- 1 | 2 | This license (see "SOFTWARE LICENSE AGREEMENT" below) covers the following files which are distributed with the DIMSpy software package: 3 | 4 | - ThermoFisher.CommonCore.BackgroundSubtraction.dll 5 | - ThermoFisher.CommonCore.BackgroundSubtraction.XML 6 | - ThermoFisher.CommonCore.Data.dll 7 | - ThermoFisher.CommonCore.Data.XML 8 | - ThermoFisher.CommonCore.MassPrecisionEstimator.dll 9 | - ThermoFisher.CommonCore.MassPrecisionEstimator.XML 10 | - ThermoFisher.CommonCore.RawFileReader.dll 11 | - ThermoFisher.CommonCore.RawFileReader.XML 12 | 13 | Anyone receiving RawFileReader as part of a larger software distribution (in the current context, as part of DIMSpy) is considered an "end user" under 14 | section 3.3 of the RawFileReader License, and is not granted rights to redistribute RawFileReader. 15 | 16 | | 17 | 18 | **SOFTWARE LICENSE AGREEMENT ("License") FOR RawFileReader** 19 | ---------------------------------------------------------------------- 20 | These License terms are an agreement between you and Thermo Finnigan LLC ("Licensor"). They apply to Licensor's MSFileReader software program ("Software"), which includes documentation and any media on which you received it. These terms also apply to any updates or supplements for this Software, unless other terms accompany those items, in which case those terms apply. **If you use this Software, you accept this License. If you do not accept this License, you are prohibited from using this software. If you comply with these License terms, you have the rights set forth below.** 21 | 22 | 1. Rights Granted: 23 | 24 | 1.1. You may install and use this Software on any of your computing devices. 25 | 26 | 1.2. You may distribute this Software to others, but only in combination with other software components and/or programs that you provide and subject to the distribution requirements and restrictions below. 27 | 28 | 2. Use Restrictions: 29 | 30 | 2.1. You may not decompile, disassemble, reverse engineer, use reflection or modify this Software. 31 | 32 | 3. Distribution Requirements: 33 | 34 | If you distribute this Software to others, you agree to: 35 | 36 | 3.1. Indemnify, defend and hold harmless the Licensor from any claims, including attorneys' fees, related to the distribution or use of this Software; 37 | 38 | 3.2. Display the following text in your software's "About" box: " **RawFileReader reading tool. Copyright © 2016 by Thermo Fisher Scientific, Inc. All rights reserved**."; 39 | 40 | 3.3. Require your end users to agree to a license agreement that prohibits them from redistributing this Software to others. 41 | 42 | 4. Distribution Restrictions: 43 | 44 | 4.1. You may not use the Licensor's trademarks in a way that suggests your software components and/or programs are provided by or are endorsed by the Licensor; and 45 | 46 | 4.2. You may not commercially exploit this Software or products that incorporate this Software without the prior written consent of Licensor. Commercial exploitation includes, but is not limited to, charging a purchase price, license fee, maintenance fee, or subscription fee; or licensing, transferring or redistributing the Software in exchange for consideration of any kind. 47 | 48 | 4.3. Your rights to this Software do not include any license, right, power or authority to subject this Software in whole or in part to any of the terms of an Excluded License. "Excluded License" means any license that requires as a condition of use, modification and/or distribution of software subject to the Excluded License, that such software or other software combined and/or distributed with such software be (a) disclosed or distributed in source code form; or (b) licensed for the purpose of making derivative works. Without limiting the foregoing obligation, you are specifically prohibited from distributing this Software with any software that is subject to the General Public License (GPL) or similar license in a manner that would create a combined work. 49 | 50 | 5. Additional Terms Applicable to Software: 51 | 52 | 5.1. This Software is licensed, not sold. This License only gives you some rights to use this Software; the Licensor reserves all other rights. Unless applicable law gives you more rights despite this limitation, you may use this Software only as expressly permitted in this License. 53 | 54 | 5.2. Licensor has no obligation to fix, update, supplement or support this Software. 55 | 56 | 5.3. This Software is not designed, manufactured or intended for any use requiring fail-safe performance in which the failure of this Software could lead to death, serious personal injury or severe physical and environmental damage ("High Risk Activities"), such as the operation of aircraft, medical or nuclear facilities. You agree not to use, or license the use of, this Software in connection with any High Risk Activities. 57 | 58 | 5.4. Your rights under this License terminate automatically if you breach this License in any way. Termination of this License will not affect any of your obligations or liabilities arising prior to termination. The following sections of this License shall survive termination: 2.1, 3.1, 3.2, 3.3, 4.1, 4.2, 4.3, 5.1, 5.2, 5.3, 5.5, 5.6, 5.7, 5.8, and 5.9. 59 | 60 | 5.5. This Software is subject to United States export laws and regulations. You agree to comply with all domestic and international export laws and regulations that apply to this Software. These laws include restrictions on destinations, end users and end use. 61 | 62 | 5.6. This License shall be construed and controlled by the laws of the State of California, U.S.A., without regard to conflicts of law. You consent to the jurisdiction of the state and federal courts situated in the State of California in any action arising under this License. The application of the U.N. Convention on Contracts for the International Sale of Goods to this License is hereby expressly excluded. If any provision of this License shall be deemed unenforceable or contrary to law, the rest of this License shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. 63 | 64 | 5.7. THIS SOFTWARE IS LICENSED "AS IS". YOU BEAR ALL RISKS OF USING IT. LICENSOR GIVES NO AND DISCLAIMS ALL EXPRESS AND IMPLIED WARRANTIES, REPRESENTATIONS OR GUARANTEES. YOU MAY HAVE ADDITIONAL CONSUMER RIGHTS UNDER YOUR LOCAL LAWS WHICH THIS LICENSE CANNOT CHANGE. TO THE EXTENT PERMITTED UNDER YOUR LOCAL LAWS, LICENSOR EXCLUDES THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 65 | 66 | 5.8. LICENSOR'S TOTAL LIABILITY TO YOU FOR DIRECT DAMAGES ARISING UNDER THIS LICENSE IS LIMITED TO U.S. $1.00. YOU CANNOT RECOVER ANY OTHER DAMAGES, INCLUDING CONSEQUENTIAL, LOST PROFITS, SPECIAL, INDIRECT OR INCIDENTAL DAMAGES, EVEN IF LICENSOR IS EXPRESSLY MADE AWARE OF THE POSSIBILITY THEREOF OR IS NEGLIGENT. THIS LIMITATION APPLIES TO ANYTHING RELATED TO THIS SOFTWARE, SERVICES, CONTENT (INCLUDING CODE) ON THIRD PARTY INTERNET SITES, OR THIRD PARTY PROGRAMS, AND CLAIMS FOR BREACH OF CONTRACT, BREACH OF WARRANTY, GUARANTEE OR CONDITION, STRICT LIABILITY, NEGLIGENCE, OR OTHER TORT TO THE EXTENT PERMITTED BY APPLICABLE LAW. 67 | 68 | 5.9. Use, duplication or disclosure of this Software by the U.S. Government is subject to the restricted rights applicable to commercial computer software (under FAR 52.227019 and DFARS 252.227-7013 or parallel regulations). The manufacturer for this purpose is Thermo Finnigan LLC, 355 River Oaks Parkway, San Jose, California 95134, U.S.A. 69 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | build: false 2 | 3 | environment: 4 | matrix: 5 | - PYTHON_VERSION: 3.7 6 | MINICONDA: C:\Miniconda-x64 7 | 8 | init: 9 | - "ECHO %PYTHON_VERSION% %MINICONDA%" 10 | 11 | install: 12 | - "set PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%" 13 | - conda config --set always_yes yes --set changeps1 no 14 | - conda update -q conda 15 | - conda info -a 16 | - conda env create -n test-environment -f environment.yml 17 | - activate test-environment 18 | - conda install pytest -c conda-forge 19 | - pip install . 20 | 21 | test_script: 22 | - dimspy --help 23 | - pytest 24 | -------------------------------------------------------------------------------- /binder/environment.yml: -------------------------------------------------------------------------------- 1 | name: dimspy 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - computational-metabolomics 6 | dependencies: 7 | - python=3.7 8 | - dimspy 9 | 10 | -------------------------------------------------------------------------------- /dimspy/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | __author__ = 'Ralf Weber (r.j.weber@bham.ac.uk), Albert Zhou (j.zhou.3@bham.ac.uk)' 24 | __credits__ = 'Ralf Weber (r.j.weber@bham.ac.uk), Albert Zhou (j.zhou.3@bham.ac.uk)' 25 | __version__ = '2.0.0' 26 | __license__ = 'GPLv3' 27 | -------------------------------------------------------------------------------- /dimspy/models/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | from .peak_matrix import PeakMatrix, mask_peakmatrix, unmask_peakmatrix, unmask_all_peakmatrix 24 | from .peaklist import PeakList 25 | 26 | -------------------------------------------------------------------------------- /dimspy/models/peaklist_metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | # DO NOT try metadata.metadata.attr. 24 | # All attribute methods overrided 25 | class PeakList_Metadata(dict): 26 | """ 27 | The PeakList_Metadata class. 28 | 29 | Dictionary-like container for PeakList metadata storage. 30 | 31 | :param args: Iterable object of key-value pairs 32 | :param kwargs: Metadata key-value pairs 33 | 34 | >>> PeakList_Metadata([('name', 'sample_1'), ('qc', False)]) 35 | >>> PeakList_Metadata(name = 'sample_1', qc = False) 36 | 37 | metadata attributes can be accessed in both dictionary-like and property-like manners. 38 | 39 | >>> meta = PeakList_Metadata(name = 'sample_1', qc = False) 40 | >>> meta['name'] 41 | sample_1 42 | >>> meta.qc 43 | False 44 | >>> del meta.qc 45 | >>> meta.has_key('qc') 46 | False 47 | 48 | .. warning:: 49 | The *__getattr__*, *__setattr__*, and *__delattr__* methods are overrided. **DO NOT** assign a metadata object 50 | to another metadata object, e.g., metadata.metadata.attr = value. 51 | 52 | """ 53 | 54 | def __getattr__(self, item): 55 | return self[item] if item in self else super().__getattribute__(item) 56 | 57 | def __setattr__(self, item, value): 58 | if item == '__dict__': 59 | raise ValueError('"__dict__" is not an acceptable metadata key') 60 | if type(value) == PeakList_Metadata: 61 | raise ValueError('metadata object is not an acceptable metadata value') 62 | 63 | if item not in self.__dict__: 64 | self[item] = value 65 | else: 66 | super().__setattr__(item, value) 67 | 68 | def __delattr__(self, item): 69 | if item in self: 70 | del self[item] 71 | else: 72 | super().__delattr__(item) 73 | -------------------------------------------------------------------------------- /dimspy/models/peaklist_tags.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | from __future__ import annotations 24 | 25 | from typing import Union 26 | 27 | 28 | class Tag(object): 29 | """ 30 | The Tag class. 31 | 32 | This class is mainly used in PeakList and PeakMatrix classes for sample filtering. 33 | 34 | :param value: Tag value, must be number (int, float), string (ascii, unicode), or Tag object (ignore ttype setting) 35 | :param ttype: Tag type, must be string or None (untyped), default = None 36 | 37 | Single value will be treated as untyped tag: 38 | 39 | >>> tag = Tag(1) 40 | >>> tag == 1 41 | True 42 | >>> tag = Tag(1, 'batch') 43 | >>> tag == 1 44 | False 45 | 46 | """ 47 | 48 | def __init__(self, value: Union[int, float, str, Tag], ttype: Union[str, None] = None): 49 | self._value, self._type = None, None 50 | self.value, self.ttype = (value.value, value.ttype) if isinstance(value, Tag) else (value, ttype) 51 | 52 | @property 53 | def value(self): 54 | """ 55 | Property of tag value. 56 | 57 | :getter: Returns the value of the tag 58 | :setter: Set the tag value, must be number or string 59 | :type: int, float, str, unicode 60 | 61 | """ 62 | return self._value 63 | 64 | @value.setter 65 | def value(self, value: Union[int, float, str]): # numpy types should be manually converted 66 | self._value = value 67 | 68 | @property 69 | def ttype(self): 70 | """ 71 | Property of tag type. None indicates untyped tag. 72 | 73 | :getter: Returns the type of the tag 74 | :setter: Set the tag type, must be None or string 75 | :type: None, str, unicode 76 | 77 | """ 78 | return self._type 79 | 80 | @ttype.setter 81 | def ttype(self, value: Union[str, None]): 82 | if value in ('None', ''): # reserve for hdf5 protal 83 | raise KeyError('["%s"] is not an acceptable tag type' % value) 84 | self._type = None if value is None else value 85 | 86 | @property 87 | def typed(self): 88 | """ 89 | Property to decide if the tag is typed or untyped. 90 | 91 | :getter: Returns typed status of the tag 92 | :type: bool 93 | 94 | """ 95 | return not self._type is None 96 | 97 | def __eq__(self, other: Union[int, float, str, Tag]): 98 | v, t = (other.value, other.ttype) if isinstance(other, Tag) else (other, None) 99 | return v == self.value and ((t is None and self.ttype is None) or (t == self.ttype)) 100 | 101 | def __ne__(self, other: Union[int, float, str, Tag]): 102 | return not self.__eq__(other) 103 | 104 | def __str__(self): 105 | return str(self._value) if self._type is None else (self._type + ':' + str(self._value)) 106 | 107 | 108 | class PeakList_Tags(object): 109 | """ 110 | The PeakList_Tags class. 111 | 112 | Container for both typed and untyped tags. This class is mainly used in PeakList and PeakMatrix classes for sample filtering. 113 | For a PeakList the tag types must be unique, but not the tag values (unless they are untyped). 114 | For instance, PeakList can have tags batch = 1 and plate = 1, but not batch = 1 and batch = 2, or (untyped) 1 and (untyped) 1. 115 | Single value will be treated as untyped tag. 116 | 117 | :param args: List of untyped tags 118 | :param kwargs: List of typed tags. Only one tag value can be assigned to a specific tag type 119 | 120 | >>> PeakList_Tags('untyped_tag1', Tag('untyped_tag2'), Tag('typed_tag', 'tag_type')) 121 | >>> PeakList_Tags(tag_type1 = 'tag_value1', tag_type2 = 'tag_value2') 122 | 123 | """ 124 | 125 | def __init__(self, *args, **kwargs): 126 | self._tags = [] 127 | for v in args: self.add_tag(v) 128 | for k, v in list(kwargs.items()): self.add_tag(v, k) 129 | 130 | # build-ins 131 | def __str__(self): 132 | return self.to_str() 133 | 134 | def __contains__(self, item: Union[int, float, str, Tag]): 135 | return item in self._tags 136 | 137 | def __len__(self): 138 | return len(self._tags) 139 | 140 | # properties 141 | @property 142 | def tag_types(self): 143 | """ 144 | Property of included tag types. None indicates untyped tags included. 145 | 146 | :getter: Returns a set containing all the tag types of the typed tags 147 | :type: set 148 | 149 | """ 150 | return set([x.ttype for x in self._tags]) 151 | 152 | @property 153 | def tag_values(self): 154 | """ 155 | Property of included tag values. Same tag values will be merged 156 | 157 | :getter: Returns a set containing all the tag values, both typed and untyped tags 158 | :type: set 159 | 160 | """ 161 | return set([x.value for x in self._tags]) 162 | 163 | @property 164 | def tags(self): 165 | """ 166 | Property of all included tags. 167 | 168 | :getter: Returns a tuple containing all the tags, both typed and untyped 169 | :type: tuple 170 | 171 | """ 172 | return tuple(self._tags) 173 | 174 | @property 175 | def typed_tags(self): 176 | """ 177 | Property of included typed tags. 178 | 179 | :getter: Returns a tuple containing all the typed tags 180 | :type: tuple 181 | 182 | """ 183 | return tuple([x for x in self._tags if x.typed]) 184 | 185 | @property 186 | def untyped_tags(self): 187 | """ 188 | Property of included untyped tags. 189 | 190 | :getter: Returns a tuple containing all the untyped tags 191 | :type: tuple 192 | 193 | """ 194 | return tuple([x for x in self._tags if not x.typed]) 195 | 196 | # methods 197 | def has_tag(self, tag: Union[int, float, str, Tag], tag_type: Union[str, None] = None): 198 | """ 199 | Checks whether there exists a specific tag. 200 | 201 | :param tag: The tag for checking 202 | :param tag_type: The type of the tag 203 | :rtype: bool 204 | 205 | >>> tags = PeakList_Tags('untyped_tag1', Tag('tag_value1', 'tag_type1')) 206 | >>> tags.has_tag('untyped_tag1') 207 | True 208 | >>> tags.has_tag('typed_tag1') 209 | False 210 | >>> tags.has_tag(Tag('tag_value1', 'tag_type1')) 211 | True 212 | >>> tags.has_tag('tag_value1', 'tag_type1') 213 | True 214 | 215 | """ 216 | return (tag in self._tags) if isinstance(tag, Tag) or tag_type is None else \ 217 | (Tag(tag, tag_type) in self._tags) 218 | 219 | def has_tag_type(self, tag_type: Union[str, None] = None): 220 | """ 221 | Checks whether there exists a specific tag type. 222 | 223 | :param tag_type: The tag type for checking, None indicates untyped tags 224 | :rtype: bool 225 | 226 | """ 227 | return tag_type in self.tag_types 228 | 229 | def tag_of(self, tag_type: Union[str, None] = None): 230 | """ 231 | Returns tag value of the given tag type, or tuple of untyped tags if tag_type is None. 232 | 233 | :param tag_type: Valid tag type, None for untyped tags 234 | :rtype: Tag, or None if tag_type not exists 235 | 236 | """ 237 | t = [x for x in self._tags if x.ttype == tag_type] 238 | return None if len(t) == 0 else tuple(t) if tag_type is None else t[0] 239 | 240 | def add_tag(self, tag: Union[int, float, str, Tag], tag_type: Union[str, None] = None): 241 | """ 242 | Adds typed or untyped tag. 243 | 244 | :param tag: Tag or tag value to add 245 | :param tag_type: Type of the tag value 246 | 247 | >>> tags = PeakList_Tags() 248 | >>> tags.add_tag('untyped_tag1') 249 | >>> tags.add_tag(Tag('typed_tag1', 'tag_type1')) 250 | >>> tags.add_tag(tag_type2 = 'typed_tag2') 251 | 252 | """ 253 | if tag_type is not None and self.has_tag_type(tag_type): 254 | raise KeyError('tag type %s already exists' % tag_type) 255 | tag = Tag(tag, tag_type) 256 | if self.has_tag(tag): 257 | raise ValueError('tag already exist') 258 | self._tags += [tag] 259 | 260 | def drop_tag(self, tag: Union[int, float, str, Tag], tag_type: Union[str, None] = None): 261 | """ 262 | Drops typed and untyped tag. 263 | 264 | :param tag: Tag or tag value to drop 265 | :param tag_type: Type of the tag value 266 | 267 | >>> tags = PeakList_Tags('untyped_tag1', tag_type1 = 'tag_value1') 268 | >>> tags.drop_tag(Tag('tag_value1', 'tag_type1')) 269 | >>> print(tags) 270 | untyped_tag1 271 | 272 | """ 273 | t = Tag(tag, tag_type) 274 | self._tags = [x for x in self._tags if x != t] 275 | 276 | def drop_tag_type(self, tag_type: Union[str, None] = None): 277 | """ 278 | Drops the tag with the given type. 279 | 280 | :param tag_type: Tag type to drop, None (untyped) may drop multiple tags 281 | 282 | """ 283 | self._tags = [x for x in self._tags if x.ttype != tag_type] 284 | 285 | def drop_all_tags(self): 286 | """ 287 | Drops all tags, both typed and untyped. 288 | 289 | """ 290 | self._tags = [] 291 | 292 | # portals 293 | def to_list(self): 294 | """ 295 | Exports tags to a list. Each element is a tuple of (tag value, tag type). 296 | 297 | >>> tags = PeakList_Tags('untyped_tag1', tag_type1 = 'tag_value1') 298 | >>> tags.to_list() 299 | [('untyped_tag1', None), ('tag_value1', 'tag_type1')] 300 | 301 | :rtype: list 302 | 303 | """ 304 | return [(t.value, t.ttype) for t in self._tags] 305 | 306 | def to_str(self): 307 | """ 308 | Exports tags to a string. It can also be used inexplicitly as 309 | 310 | >>> tags = PeakList_Tags('untyped_tag1', tag_type1 = 'tag_value1') 311 | >>> print(tags) 312 | untyped_tag1, tag_type1:tag_value1 313 | 314 | :rtype: str 315 | 316 | """ 317 | return str.join(', ', map(str, self._tags)) 318 | -------------------------------------------------------------------------------- /dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.BackgroundSubtraction.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.BackgroundSubtraction.dll -------------------------------------------------------------------------------- /dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.Data.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.Data.dll -------------------------------------------------------------------------------- /dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.MassPrecisionEstimator.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.MassPrecisionEstimator.dll -------------------------------------------------------------------------------- /dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.RawFileReader.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.RawFileReader.dll -------------------------------------------------------------------------------- /dimspy/portals/ThermoRawFileReader_3_0_41/License/RawFileRdr_License_Agreement_RevA.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/dimspy/portals/ThermoRawFileReader_3_0_41/License/RawFileRdr_License_Agreement_RevA.doc -------------------------------------------------------------------------------- /dimspy/portals/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | -------------------------------------------------------------------------------- /dimspy/portals/mzml_portal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import collections 24 | import os 25 | from io import BytesIO 26 | from typing import Sequence, Union 27 | 28 | import numpy as np 29 | import pymzml 30 | 31 | from ..metadata import mz_range_from_header 32 | from ..models.peaklist import PeakList 33 | 34 | 35 | class Mzml: 36 | """ 37 | mzML portal 38 | """ 39 | def __init__(self, filename: Union[str, BytesIO], **kwargs): 40 | """ 41 | Initialise a object interface to a mzML file. 42 | 43 | :param filename: Path to the mzML file 44 | :param kwargs: 45 | 46 | """ 47 | self.filename = filename 48 | 49 | if not isinstance(filename, BytesIO) and not os.path.isfile(self.filename): 50 | raise IOError("{} does not exist".format(self.filename)) 51 | 52 | if not isinstance(filename, BytesIO) and not self.filename.lower().endswith(".mzml") and not self.filename.lower().endswith(".mzml.gz"): 53 | raise IOError('Incorrect file format for mzML parser') 54 | 55 | if "ms_precisions" in kwargs: 56 | self.ms_precisions = kwargs["ms_precisions"] 57 | else: 58 | self.ms_precisions = dict(zip(range(3, 11), 8 * [5e-6])) 59 | 60 | self._sids = self._scan_ids() 61 | 62 | self.run = pymzml.run.Reader(self.filename) 63 | self.run.ms_precisions.update(self.ms_precisions) 64 | self.timestamp = self.run.info["start_time"] 65 | 66 | def headers(self) -> collections.OrderedDict: 67 | """ 68 | Get all unique header or filter strings and associated scan ids. 69 | :return: Dictionary 70 | """ 71 | h_sids = collections.OrderedDict() 72 | for scan_id in self._sids: 73 | if 'MS:1000512' in self.run[scan_id]: 74 | h_sids.setdefault(self.run[scan_id]['MS:1000512'], []).append(scan_id) 75 | return h_sids 76 | 77 | def _scan_ids(self) -> collections.OrderedDict: 78 | sids_h = collections.OrderedDict() 79 | run = pymzml.run.Reader(self.filename) 80 | run.ms_precisions.update(self.ms_precisions) 81 | for scan in run: 82 | if 'MS:1000512' in scan: 83 | sids_h[scan.ID] = str(scan['MS:1000512']) 84 | else: 85 | sids_h[scan.ID] = None 86 | run.close() 87 | return sids_h 88 | 89 | def scan_ids(self) -> collections.OrderedDict: 90 | """ 91 | Get all scan ids and associated headers or filter strings. 92 | :return: Dictionary 93 | """ 94 | return self._sids 95 | 96 | def peaklist(self, scan_id, function_noise="median") -> PeakList: 97 | """ 98 | Create a peaklist object for a specific scan id. 99 | :param scan_id: Scan id 100 | :param function_noise: Function to calculate the noise from each scan. The following options are available: 101 | 102 | * **median** - the median of all peak intensities within a given scan is used as the noise value. 103 | 104 | * **mean** - the unweighted mean average of all peak intensities within a given scan is used as the noise value. 105 | 106 | * **mad (Mean Absolute Deviation)** - the noise value is set as the mean of the absolute differences between peak 107 | intensities and the mean peak intensity (calculated across all peak intensities within a given scan). 108 | 109 | :return: PeakList object 110 | """ 111 | if function_noise not in ["mean", "median", "mad"]: 112 | raise ValueError("select a function that is available [mean, median, mad]") 113 | 114 | scan = self.run[scan_id] 115 | peaks = scan.peaks("raw") 116 | if len(peaks) > 0: 117 | mzs, ints = list(zip(*peaks)) 118 | else: 119 | mzs, ints = [], [] 120 | 121 | scan_time = scan["MS:1000016"] 122 | tic = scan["total ion current"] 123 | if "MS:1000927" in scan: 124 | ion_injection_time = scan["MS:1000927"] 125 | else: 126 | ion_injection_time = None 127 | header = scan['MS:1000512'] 128 | if header: 129 | mz_range = mz_range_from_header(header) 130 | else: 131 | mz_range = [None, None] 132 | ms_level = scan['ms level'] 133 | pl = PeakList(ID=scan.ID, mz=mzs, intensity=ints, 134 | mz_range=mz_range, 135 | header=header, 136 | ms_level=ms_level, 137 | ion_injection_time=ion_injection_time, 138 | scan_time=scan_time, 139 | tic=tic, 140 | function_noise=function_noise) 141 | snr = np.divide(ints, scan.estimated_noise_level(mode=function_noise)) 142 | pl.add_attribute('snr', snr) 143 | return pl 144 | 145 | def peaklists(self, scan_ids, function_noise="median") -> Sequence[PeakList]: 146 | """ 147 | Create a list of peaklist objects for each scan id in the list. 148 | :param scan_ids: List of scan ids 149 | 150 | :param function_noise: Function to calculate the noise from each scan. The following options are available: 151 | 152 | * **median** - the median of all peak intensities within a given scan is used as the noise value. 153 | 154 | * **mean** - the unweighted mean average of all peak intensities within a given scan is used as the noise value. 155 | 156 | * **mad (Mean Absolute Deviation)** - the noise value is set as the mean of the absolute differences between peak 157 | intensities and the mean peak intensity (calculated across all peak intensities within a given scan). 158 | 159 | * **noise_packets** - the noise value is calculated using the proprietary algorithms contained in Thermo Fisher 160 | Scientific’s msFileReader library. This option should only be applied when you are processing .RAW files. 161 | 162 | :return: List of PeakList objects 163 | """ 164 | if function_noise not in ["mean", "median", "mad"]: 165 | raise ValueError("select a function that is available [mean, median, mad]") 166 | 167 | return [self.peaklist(scan_id, function_noise) for scan_id in scan_ids if scan_id in self._sids] 168 | 169 | def tics(self) -> collections.OrderedDict: 170 | """ 171 | Get all TIC values and associated scan ids 172 | :return: Dictionary 173 | """ 174 | tic_values = collections.OrderedDict() 175 | for scan_id in self._sids: 176 | tic_values[scan_id] = self.run[scan_id].TIC 177 | return tic_values 178 | 179 | def ion_injection_times(self) -> collections.OrderedDict: 180 | """ 181 | Get all ion injection time values and associated scan ids 182 | :return: Dictionary 183 | """ 184 | iits = collections.OrderedDict() 185 | for scan_id in self._sids: 186 | scan = self.run[scan_id] 187 | if "MS:1000927" in scan: 188 | iits[scan_id] = scan["MS:1000927"] 189 | else: 190 | iits[scan_id] = None 191 | return iits 192 | 193 | def scan_dependents(self) -> list: 194 | """ 195 | Get a nested list of scan id pairs. Each pair represents a fragementation event. 196 | :return: List 197 | """ 198 | l = [] 199 | for scan_id in self._sids: 200 | scan = self.run[scan_id] 201 | if scan.selected_precursors: 202 | precursor = scan.element.find("./{}precursorList/{}precursor".format(scan.ns, scan.ns)) 203 | l.append([int(precursor.get("spectrumRef").split("scan=")[1]), scan.ID]) 204 | return l 205 | 206 | def close(self): 207 | """ 208 | Close the reader/file object 209 | :return: None 210 | """ 211 | self.run.close() 212 | -------------------------------------------------------------------------------- /dimspy/portals/paths.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import os 24 | 25 | import h5py 26 | import numpy as np 27 | from datetime import datetime 28 | import platform 29 | 30 | from ..models.peaklist import PeakList 31 | from ..portals import hdf5_portal 32 | from ..portals.mzml_portal import Mzml 33 | from ..portals.thermo_raw_portal import ThermoRaw 34 | 35 | 36 | def sort_ms_files_by_timestamp(ps): 37 | """ 38 | Sort a set directory of .mzml or .raw files 39 | 40 | :param ps: List of paths 41 | :return List 42 | """ 43 | s_files = {} 44 | for i, fn in enumerate(ps): 45 | if fn.lower().endswith(".raw"): 46 | run = ThermoRaw(fn) 47 | 48 | elif fn.lower().endswith(".mzml"): 49 | run = Mzml(fn) 50 | else: 51 | continue 52 | s_files[fn] = str(run.timestamp) 53 | run.close() 54 | 55 | if list(s_files.keys())[0].lower().endswith(".mzml"): 56 | pattern = "%Y-%m-%dT%H:%M:%SZ" 57 | s_files_sorted = sorted(s_files.items(), key=lambda x: datetime.strptime(x[1], pattern), reverse=False) 58 | else: 59 | try: 60 | pattern = "%d/%m/%Y %H:%M:%S" 61 | s_files_sorted = sorted(s_files.items(), key=lambda x: datetime.strptime(x[1], pattern), reverse=False) 62 | except: 63 | pattern = "%m/%d/%Y %I:%M:%S %p" 64 | s_files_sorted = sorted(s_files.items(), key=lambda x: datetime.strptime(x[1], pattern), reverse=False) 65 | 66 | return s_files_sorted 67 | 68 | 69 | def validate_and_sort_paths(source, tsv): 70 | """ 71 | Validate and sort a set (i.e. directory or hdf5 file) of .mzml or .raw files. 72 | 73 | :param tsv: Path to tab-separated file 74 | :param source: Path to a Path to the .hdf5 file to read from. 75 | :return: List 76 | """ 77 | if tsv is None: 78 | if type(source) == str: 79 | if os.path.isdir(source): 80 | filenames = [os.path.join(source, fn) for fn in os.listdir(source) if 81 | fn.lower().endswith(".mzml") or fn.lower().endswith(".raw")] 82 | filenames = [fd[0] for fd in sort_ms_files_by_timestamp(filenames)] 83 | 84 | elif h5py.is_hdf5(source): 85 | peaklists = hdf5_portal.load_peaklists_from_hdf5(source) 86 | filenames = [os.path.join(os.path.abspath(os.path.dirname(source)), pl.ID) for pl in peaklists] 87 | elif os.path.isfile(source): 88 | if source.lower().endswith(".raw") or source.lower().endswith(".mzml"): 89 | filenames = [source] 90 | else: 91 | raise IOError("Incorrect file format, provide .mzml or .raw files: {}".format(source)) 92 | else: 93 | raise IOError("[Errno 2] No such file or directory: {}".format(source)) 94 | 95 | elif type(source) == list or type(source) == tuple: 96 | if isinstance(source[0], PeakList): 97 | filenames = [pl.ID for pl in source] 98 | else: 99 | filenames = [] 100 | for fn in source: 101 | if os.path.isfile(fn): 102 | if fn.lower().endswith(".raw") or fn.lower().endswith(".mzml"): 103 | filenames.append(fn) 104 | else: 105 | raise IOError("Incorrect file format, provide .mzml or .raw files: {}".format(source)) 106 | else: 107 | raise IOError("[Errno 2] No such file or directory: {}".format(source)) 108 | else: 109 | raise IOError("[Errno 2] No such file or directory: {}".format(source)) 110 | 111 | elif os.path.isfile(tsv): 112 | fm = np.genfromtxt(tsv, dtype=None, delimiter="\t", names=True, encoding=None) 113 | if len(fm.shape) == 0: 114 | fm = np.array([fm]) 115 | if fm.dtype.names[0] != "filename" and fm.dtype.names[0] != "sample_id": 116 | raise IOError("Incorrect header for first column. Use filename or sample_id") 117 | 118 | filenames = [] 119 | if type(source) == list or type(source) == tuple: 120 | if isinstance(source[0], PeakList): 121 | for filename in fm[fm.dtype.names[0]]: 122 | if filename in [pl.ID for pl in source]: 123 | filenames.append(filename) 124 | else: 125 | raise IOError("{} does not exist in list with Peaklist objects".format(filename)) 126 | else: 127 | for fn in source: 128 | if not os.path.isfile(fn): 129 | raise IOError("[Errno 2] No such file or directory: {}".format(fn)) 130 | 131 | for filename in fm[fm.dtype.names[0]]: 132 | fns = [os.path.basename(fn) for fn in source] 133 | if filename in fns: 134 | filenames.append(source[fns.index(filename)]) 135 | else: 136 | raise IOError("{} (row {}) does not exist in source provided".format(filename, list( 137 | fm[fm.dtype.names[0]]).index(filename) + 1)) 138 | 139 | elif type(source) == str: 140 | if os.path.isdir(source): 141 | l = os.listdir(source) 142 | for fn in fm[fm.dtype.names[0]]: 143 | if os.path.basename(fn) not in l: 144 | raise IOError("{} does not exist in directory provided".format(os.path.basename(fn))) 145 | filenames.append(os.path.join(source, fn)) 146 | 147 | elif h5py.is_hdf5(source): 148 | peaklists = hdf5_portal.load_peaklists_from_hdf5(source) 149 | filenames = [pl.ID for pl in peaklists] 150 | elif os.path.isfile(source): 151 | if source.lower().endswith(".raw") or source.lower().endswith(".mzml"): 152 | filenames.append(source) 153 | else: 154 | raise IOError("Incorrect file format, provide .mzml or .raw files: {}".format(source)) 155 | else: 156 | raise IOError("[Errno 2] No such file or directory: {} or {}".format(source, tsv)) 157 | else: 158 | raise IOError("[Errno 2] No such file or directory: {}".format(tsv)) 159 | 160 | return filenames 161 | -------------------------------------------------------------------------------- /dimspy/portals/thermo_raw_portal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import collections 24 | import os 25 | from typing import Sequence, Union 26 | import re 27 | import sys 28 | 29 | import numpy as np 30 | from ..models.peaklist import PeakList 31 | 32 | try: 33 | import clr 34 | sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "ThermoRawFileReader_3_0_41/Libraries")) 35 | clr.AddReference('ThermoFisher.CommonCore.RawFileReader') 36 | clr.AddReference('ThermoFisher.CommonCore.Data') 37 | import ThermoFisher.CommonCore.Data.Business as Business 38 | import ThermoFisher.CommonCore.RawFileReader as RawFileReader 39 | except ImportError: 40 | import warnings 41 | warnings.warn(""" 42 | DIMSpy requires the Mono framework in order to read and process .raw files. 43 | Install dimspy via conda (highly recommended) to automatically install Mono 44 | (see https://dimspy.readthedocs.io/en/latest/installation.html) or 45 | install Mono from (https://www.mono-project.com). 46 | You can ignore this warning if you use DIMSpy to read and process .mzML files. 47 | """) 48 | 49 | 50 | def mz_range_from_header(h: str) -> list: 51 | """ 52 | Extract the m/z range from a header or filterstring 53 | 54 | :param h: str 55 | :return: Sequence[float, float] 56 | """ 57 | return [float(m) for m in re.findall(r'([\w\.-]+)-([\w\.-]+)', h)[0]] 58 | 59 | 60 | class ThermoRaw: 61 | "ThermoRaw portal" 62 | def __init__(self, filename): 63 | """ 64 | Initialise a object interface to a mzML file. 65 | 66 | :param filename: Path to the mzML file 67 | 68 | """ 69 | self.run = RawFileReader.RawFileReaderAdapter.FileFactory(filename) 70 | self.run.SelectInstrument(Business.Device.MS, 1) 71 | self.filename = filename 72 | self.timestamp = self.run.CreationDate 73 | 74 | def headers(self) -> collections.OrderedDict: 75 | """ 76 | Get all unique header or filter strings and associated scan ids. 77 | :return: Dictionary 78 | """ 79 | sids = collections.OrderedDict() 80 | for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1): 81 | sids.setdefault(str(self.run.GetFilterForScanNumber(scan_id).Filter), []).append(scan_id) 82 | return sids 83 | 84 | def scan_ids(self) -> collections.OrderedDict: 85 | """ 86 | Get all scan ids and associated headers or filter strings. 87 | :return: Dictionary 88 | """ 89 | sids = collections.OrderedDict() 90 | for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1): 91 | sids[scan_id] = str(self.run.GetFilterForScanNumber(scan_id).Filter) 92 | return sids 93 | 94 | def peaklist(self, scan_id, function_noise="noise_packets") -> PeakList: 95 | """ 96 | Create a peaklist object for a specific scan id. 97 | :param scan_id: Scan id 98 | :param function_noise: Function to calculate the noise from each scan. The following options are available: 99 | 100 | * **median** - the median of all peak intensities within a given scan is used as the noise value. 101 | 102 | * **mean** - the unweighted mean average of all peak intensities within a given scan is used as the noise value. 103 | 104 | * **mad (Mean Absolute Deviation)** - the noise value is set as the mean of the absolute differences between peak 105 | intensities and the mean peak intensity (calculated across all peak intensities within a given scan). 106 | 107 | * **noise_packets** - the noise value is calculated using the proprietary algorithms contained in Thermo Fisher 108 | Scientific’s msFileReader library. This option should only be applied when you are processing .RAW files. 109 | 110 | :return: PeakList object 111 | """ 112 | if function_noise not in ["noise_packets", "mean", "median", "mad"]: 113 | raise ValueError("select a function that is available [noise_packets, mean, median, mad]") 114 | 115 | scan = self.run.GetCentroidStream(scan_id, False) 116 | if scan.Masses is not None: 117 | mz_ibn = list( 118 | zip(scan.Masses, scan.Intensities, scan.Baselines, scan.Noises)) # SignalToNoise not available 119 | mz_ibn.sort() 120 | mzs, ints, baseline, noise = list(zip(*mz_ibn)) 121 | else: 122 | mzs, ints, baseline, noise = [], [], [], [] 123 | 124 | if function_noise == "noise_packets" and len(ints) > 0: 125 | snr = [p.SignalToNoise for p in scan.GetCentroids()] 126 | elif function_noise == "median" and len(ints) > 0: 127 | snr = ints / np.median(ints) 128 | elif function_noise == "mean" and len(ints) > 0: 129 | snr = ints / np.mean(ints) 130 | elif function_noise == "mad" and len(ints) > 0: 131 | snr = ints / np.median(np.abs(np.subtract(ints, np.median(ints)))) 132 | else: 133 | snr = [] 134 | 135 | scan_stats = self.run.GetScanStatsForScanNumber(scan_id) 136 | 137 | ion_injection_time = None 138 | micro_scans = None 139 | elapsed_scan_time = None 140 | 141 | extra_values = list(self.run.GetTrailerExtraInformation(scan_id).Values) 142 | extra_labels = list(self.run.GetTrailerExtraInformation(scan_id).Labels) 143 | for i, label in enumerate(extra_labels): 144 | if "Ion Injection Time (ms):" == label: 145 | ion_injection_time = float(extra_values[i]) 146 | if "Elapsed Scan Time (sec):" == label: 147 | elapsed_scan_time = float(extra_values[i]) 148 | if "Micro Scan Count:" == label: 149 | micro_scans = float(extra_values[i]) 150 | 151 | scan_time = float(scan_stats.StartTime) 152 | tic = scan_stats.TIC 153 | segment = scan_stats.SegmentNumber 154 | header = str(self.run.GetScanEventStringForScanNumber(scan_id)) 155 | ms_level = header.count("@") + 1 156 | 157 | pl = PeakList(ID=scan_id, mz=mzs, intensity=ints, 158 | mz_range=mz_range_from_header(header), 159 | header=header, 160 | ms_level=ms_level, 161 | micro_scans=micro_scans, 162 | segment=segment, 163 | ion_injection_time=ion_injection_time, 164 | scan_time=scan_time, 165 | elapsed_scan_time=elapsed_scan_time, 166 | tic=tic, 167 | function_noise=function_noise) 168 | 169 | if len(pl.mz) > 0: 170 | pl.add_attribute('snr', snr) 171 | pl.add_attribute('noise', noise) 172 | pl.add_attribute('baseline', baseline) 173 | 174 | return pl 175 | 176 | def peaklists(self, scan_ids, function_noise="noise_packets") -> Sequence[PeakList]: 177 | """ 178 | Create a list of peaklist objects for each scan id in the list. 179 | :param scan_ids: List of scan ids 180 | 181 | :param function_noise: Function to calculate the noise from each scan. The following options are available: 182 | 183 | * **median** - the median of all peak intensities within a given scan is used as the noise value. 184 | 185 | * **mean** - the unweighted mean average of all peak intensities within a given scan is used as the noise value. 186 | 187 | * **mad (Mean Absolute Deviation)** - the noise value is set as the mean of the absolute differences between peak 188 | intensities and the mean peak intensity (calculated across all peak intensities within a given scan). 189 | 190 | * **noise_packets** - the noise value is calculated using the proprietary algorithms contained in Thermo Fisher 191 | Scientific’s msFileReader library. This option should only be applied when you are processing .RAW files. 192 | 193 | :return: List of PeakList objects 194 | """ 195 | if function_noise not in ["noise_packets", "mean", "median", "mad"]: 196 | raise ValueError("select a function that is available [noise_packets, mean, median, mad]") 197 | 198 | return [self.peaklist(scan_id, function_noise=function_noise) for scan_id in scan_ids] 199 | 200 | def tics(self) -> collections.OrderedDict: 201 | """ 202 | Get all TIC values and associated scan ids 203 | :return: Dictionary 204 | """ 205 | tics = collections.OrderedDict() 206 | for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1): 207 | scan_stats = self.run.GetScanStatsForScanNumber(scan_id) 208 | tics[scan_id] = scan_stats.TIC 209 | return tics 210 | 211 | def ion_injection_times(self) -> collections.OrderedDict: 212 | """ 213 | Get all TIC values and associated scan ids 214 | :return: Dictionary 215 | """ 216 | iits = collections.OrderedDict() 217 | for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1): 218 | extra_values = list(self.run.GetTrailerExtraInformation(scan_id).Values) 219 | extra_labels = list(self.run.GetTrailerExtraInformation(scan_id).Labels) 220 | for i, label in enumerate(extra_labels): 221 | if "Ion Injection Time (ms):" == label: 222 | iits[scan_id] = float(extra_values[i]) 223 | if scan_id not in iits: 224 | iits[scan_id] = None 225 | return iits 226 | 227 | def scan_dependents(self) -> list: 228 | """ 229 | Get a nested list of scan id pairs. Each pair represents a fragementation event. 230 | :return: List 231 | """ 232 | l = [] 233 | for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1): 234 | gsd = self.run.GetScanDependents(scan_id, 5) 235 | if gsd is not None: 236 | for i, d in enumerate(gsd.ScanDependentDetailArray): 237 | l.append([scan_id, d.ScanIndex]) 238 | return l 239 | 240 | def close(self): 241 | """ 242 | Close the reader/file object 243 | :return: None 244 | """ 245 | self.run.Close() 246 | -------------------------------------------------------------------------------- /dimspy/portals/txt_portal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import logging 24 | import os 25 | from ast import literal_eval 26 | 27 | import numpy as np 28 | from ..models.peak_matrix import PeakMatrix, unmask_all_peakmatrix 29 | from ..models.peaklist import PeakList 30 | from ..models.peaklist_tags import PeakList_Tags 31 | 32 | 33 | def _evalv(vect): 34 | try: 35 | ctype = type(literal_eval(vect[0])) 36 | except (ValueError, SyntaxError): 37 | ctype = None 38 | return vect if ctype is None else list(map(ctype, vect)) 39 | 40 | 41 | # peaklist portals 42 | def save_peaklist_as_txt(pkl: PeakList, filename: str, *args, **kwargs): 43 | """ 44 | Saves a peaklist object to a plain text file. 45 | 46 | :param pkl: the target peaklist object 47 | :param filename: path to a new text file 48 | :param args: arguments to be passed to PeakList.to_str 49 | :param kwargs: keyword arguments to be passed to PeakList.to_str 50 | 51 | """ 52 | if os.path.isfile(filename): 53 | logging.warning('plain text file [%s] already exists, override' % filename) 54 | with open(filename, 'w') as f: f.write(pkl.to_str(*args, **kwargs)) 55 | 56 | 57 | def load_peaklist_from_txt(filename: str, ID: any, delimiter: str = ',', flag_names: str = 'auto', 58 | has_flag_col: bool = True): 59 | """ 60 | Loads a peaklist from plain text file. 61 | 62 | :param filename: Path to an exiting text-based peaklist file 63 | :param ID: ID of the peaklist 64 | :param delimiter: Delimiter of the text lines. Default = ',', i.e., CSV format 65 | :param flag_names: Names of the flag attributes. Default = 'auto', indicating all the attribute names ends 66 | with "_flag" will be treated as flag attibute. Provide None to indicate no flag attributes 67 | :param has_flag_col: Whether the text file contains the overall "flags" column. If True, it's values will be 68 | discarded. The overall flags of the new peaklist will be calculated automatically. Default = True 69 | :rtype: PeakList object 70 | 71 | """ 72 | if not os.path.isfile(filename): 73 | raise IOError('plain text file [%s] does not exist' % filename) 74 | with open(filename, 'r') as f: 75 | rlns = [x for x in map(str.strip, f.readlines()) if x != ''] 76 | 77 | dlns = [list(map(str.strip, x.split(delimiter))) for x in rlns] 78 | if any([len(x) != len(dlns[0]) for x in dlns[1:]]): 79 | raise IOError('data matrix size not match') 80 | 81 | hd, dm = dlns[0], list(zip(*dlns[1:])) 82 | if has_flag_col: 83 | hd, dm = hd[:-1], dm[:-1] # flag_col must be the last one, and discarded 84 | if len(set(hd)) != len(hd): 85 | raise IOError('duplicate headers found') 86 | 87 | mzs, ints = np.array(dm[0], dtype=float), np.array(dm[1], dtype=float) # first two cols must be mz and ints 88 | pkl = PeakList(ID, mzs, ints) 89 | 90 | flag_names = [x for x in hd if x.endswith('_flag')] if flag_names == 'auto' else \ 91 | [] if flag_names is None else set(flag_names) 92 | for n, v in zip(hd[2:], dm[2:]): pkl.add_attribute(n, _evalv(v), is_flag=n in flag_names, flagged_only=False) 93 | 94 | return pkl 95 | 96 | 97 | # peak matrix portals 98 | def save_peak_matrix_as_txt(pm: PeakMatrix, filename: str, *args, **kwargs): 99 | """ 100 | Saves a peak matrix in plain text file. 101 | 102 | :param pm: The target peak matrix object 103 | :param filename: Path to a new text file 104 | :param args: Arguments to be passed to PeakMatrix.to_str 105 | :param kwargs: Keyword arguments to be passed to PeakMatrix.to_str 106 | 107 | """ 108 | if os.path.isfile(filename): 109 | logging.warning('plain text file [%s] already exists, override' % filename) 110 | with open(filename, 'w') as f: 111 | with unmask_all_peakmatrix(pm) as m: f.write(m.to_str(*args, **kwargs)) 112 | 113 | 114 | def load_peak_matrix_from_txt(filename: str, delimiter: str = '\t', samples_in_rows: bool = True, 115 | comprehensive: str = 'auto'): 116 | """ 117 | Loads a peak matrix from plain text file. 118 | 119 | :param filename: Path to an exiting text-based peak matrix file 120 | :param delimiter: Delimiter of the text lines. Default = '\t', i.e., TSV format 121 | :param samples_in_rows: Whether or not the samples are stored in rows. Default = True 122 | :param comprehensive: Whether the input is a 'comprehensive' or 'simple' version of the matrix. Default = 'auto', i.e., auto detect 123 | :rtype: PeakMatrix object 124 | 125 | """ 126 | if not os.path.isfile(filename): 127 | raise IOError('plain text file [%s] does not exist' % filename) 128 | with open(filename, 'r') as f: 129 | rlns = [x for x in f.readlines() if x != ''] 130 | 131 | dlns = [list(map(str.strip, x.split(delimiter))) for x in rlns] 132 | if any([len(x) != len(dlns[0]) for x in dlns[1:]]): 133 | raise IOError('data matrix size not match') 134 | 135 | if samples_in_rows: dlns = list(zip(*dlns)) 136 | if comprehensive == 'auto': comprehensive = ('flags' in dlns[0]) 137 | rdlns = list(zip(*dlns)) 138 | rsdrow = list(filter(lambda x: x[1][0] == 'rsd_all', enumerate(rdlns)))[0][0] 139 | 140 | def _parseflags(): 141 | fgs = [] 142 | for l, ln in enumerate(rdlns[rsdrow + 1:]): 143 | if ln[0] == 'flags': break 144 | fgs += [(ln[0], list(map(eval, [x for x in ln[1:] if x != ''])))] 145 | return fgs 146 | 147 | flgs = _parseflags() if comprehensive else [] 148 | 149 | # must refactor if PeakMatrix.to_str changed 150 | pcol = rsdrow + len(flgs) + 2 if comprehensive else 1 151 | pids = dlns[0][pcol:] 152 | 153 | def _parsetags(tgs): 154 | l = 0 155 | for l, ln in enumerate(dlns[2:]): # line 1 = missing 156 | if not ln[0].startswith('tags_'): break 157 | tn, tv = ln[0][5:], ln[pcol:] 158 | tl = [x for x in enumerate(_evalv(tv)) if x[1] != ''] 159 | for i, v in tl: tgs[i].add_tag(v) if tn == 'untyped' else tgs[i].add_tag(v, tn) 160 | return l, tgs 161 | 162 | tnum, tags = 0, [PeakList_Tags() for _ in pids] 163 | if comprehensive: tnum, tags = _parsetags(tags) 164 | 165 | rlns = list(zip(*dlns[2 + tnum:])) 166 | mz = np.array([rlns[0]] * len(pids), dtype=float) 167 | ints = np.array(rlns[pcol:], dtype=float) 168 | 169 | pm = PeakMatrix(pids, tags, [('mz', mz), ('intensity', ints)]) 170 | for fn, fv in flgs: pm.add_flag(fn, fv, flagged_only=False) 171 | return pm 172 | -------------------------------------------------------------------------------- /dimspy/process/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | -------------------------------------------------------------------------------- /dimspy/process/peak_filters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import logging 24 | from functools import reduce 25 | from typing import Union, Sequence, Tuple, Any 26 | 27 | import numpy as np 28 | from ..models.peak_matrix import PeakMatrix, mask_peakmatrix, unmask_peakmatrix 29 | from ..models.peaklist import PeakList 30 | 31 | 32 | # peaklist filters 33 | def filter_attr(pl: PeakList, attr_name: str, max_threshold: Union[int, float, None] = None, 34 | min_threshold: [int, float, None] = None, flag_name: Union[str, None] = None, 35 | flag_index: Union[int, None] = None): 36 | """ 37 | Peaklist attribute values filter. 38 | 39 | :param pl: The target peaklist 40 | :param attr_name: Name of the target attribute 41 | :param max_threshold: Maximum threshold. A peak will be unflagged if the value of it's attr_name is larger than the 42 | threshold. Default = None, indicating no threshold 43 | :param min_threshold: Minimum threshold. A peak will be unflagged if the value of it's attr_name is smaller than the 44 | threshold. Default = None, indicating no threshold 45 | :param flag_name: Name of the new flag attribute. Default = None, indicating using attr_name + '_flag' 46 | :param flag_index: Index of the new flag to be inserted into the peaklist. Default = None 47 | :rtype: PeakList object 48 | 49 | This filter accepts real value attributes only. 50 | 51 | """ 52 | if min_threshold is None and max_threshold is None: 53 | raise ValueError('must specify minimum or maximum threshold value') 54 | flt = lambda x: np.logical_and((min_threshold <= x) if min_threshold is not None else True, 55 | (x <= max_threshold) if max_threshold is not None else True) 56 | if flag_name is None: flag_name = attr_name + '_flag' 57 | return pl.add_attribute(flag_name, flt(pl[attr_name]), is_flag=True, on_index=flag_index) 58 | 59 | 60 | def filter_ringing(pl: PeakList, threshold: float, bin_size: Union[int, float] = 1.0, flag_name: str = 'ringing_flag', 61 | flag_index: Union[int, None] = None): 62 | """ 63 | Peaklist ringing filter. 64 | 65 | :param pl: The target peaklist 66 | :param threshold: Intensity threshold ratio 67 | :param bin_size: size of the mz chunk for intensity filtering. Default = 1.0 ppm 68 | :param flag_name: Name of the new flag attribute. Default = 'ringing_flag' 69 | :param flag_index: Index of the new flag to be inserted into the peaklist. Default = None 70 | :rtype: PeakList object 71 | 72 | This filter will split the mz values into bin_size chunks, and search the highest intensity value for each chunk. 73 | All other peaks, if it's intensity is smaller than threshold x the highest intensity in that chunk, will be unflagged. 74 | 75 | """ 76 | if not 0 <= threshold <= 1: 77 | raise ValueError('mzr_remove: Provide a value in the range [0.0, 1.0]') 78 | inds = np.digitize(pl.mz, np.arange(np.floor(np.min(pl.mz)), np.ceil(np.max(pl.mz)) + bin_size, bin_size) - 0.5) 79 | blks = [(inds == i) for i in np.unique(inds)] 80 | mask = np.array(reduce(lambda x, y: x + y, [[np.max(pl.intensity[c])] * np.sum(c) for c in blks])) 81 | return pl.add_attribute(flag_name, pl.intensity > (mask * threshold), is_flag=True, on_index=flag_index) 82 | 83 | 84 | def filter_mz_ranges(pl: PeakList, mz_ranges: Sequence[Tuple[float, float]], flag_name: str = 'mz_ranges_flag', 85 | flagged_only: bool = False, flag_index: Union[int, None] = None): 86 | """ 87 | Peaklist mz range filter. 88 | 89 | :param pl: The target peaklist 90 | :param mz_ranges: The mz ranges to remove. Must be in the format of [(mz_min1, mz_max2), (mz_min2, mz_max2), ...] 91 | :param flag_name: Name of the new flag attribute. Default = 'mz_range_remove_flag' 92 | :param flag_index: Index of the new flag to be inserted into the peaklist. Default = None 93 | :rtype: PeakList 94 | 95 | This filter will remove all the peaks whose mz values are within any of the ranges in the mz_remove_rngs. 96 | 97 | """ 98 | 99 | if flagged_only: 100 | flags = np.ones(pl.shape[0], dtype=bool) 101 | else: 102 | flags = np.ones(pl.full_size, dtype=bool) 103 | 104 | for mzr in mz_ranges: 105 | if len(mzr) != 2: 106 | raise ValueError( 107 | 'mzr_remove: Provide a list of "start" and "end" values for each m/z range that needs to be removed.') 108 | if mzr[0] >= mzr[1]: 109 | raise ValueError('mzr_remove: Start value cannot be larger then end value.') 110 | flags[ 111 | (pl.get_attribute("mz", flagged_only) >= mzr[0]) & (pl.get_attribute("mz", flagged_only) <= mzr[1])] = False 112 | pl.add_attribute(flag_name, flags, flagged_only=flagged_only, is_flag=True, on_index=flag_index) 113 | return pl 114 | 115 | 116 | # PeakMatrix filters 117 | def filter_rsd(pm: PeakMatrix, rsd_threshold: Union[int, float], qc_tag: Any, on_attr: str = 'intensity', 118 | flag_name: str = 'rsd_flag'): 119 | """ 120 | PeakMatrix RSD filter. 121 | 122 | :param pm: The target peak matrix 123 | :param rsd_threshold: Threshold of the RSD of the QC samples 124 | :param qc_tag: Tag (label) to unmask qc samples 125 | :param on_attr: Calculate RSD on given attribute. Default = "intensity" 126 | :param flag_name: Name of the new flag. Default = 'rsd_flag' 127 | :rtype: PeakMatrix 128 | 129 | This filter will calculate the RSD values of the QC samples. A peak with a QC RSD value larger than the 130 | threshold will be unflagged. 131 | 132 | """ 133 | rsd_values = pm.rsd(qc_tag, on_attr=on_attr) 134 | if np.any(np.isnan(rsd_values)): 135 | logging.warning('nan found in QC rsd values, filter might not work properly') 136 | 137 | pm.add_flag(flag_name, [not (np.isnan(v) or v > rsd_threshold) for v in rsd_values]) 138 | return pm 139 | 140 | 141 | def filter_fraction(pm: PeakMatrix, fraction_threshold: float, within_classes: bool = False, class_tag_type: Any = None, 142 | flag_name: str = 'fraction_flag'): 143 | """ 144 | PeakMatrix fraction filter. 145 | 146 | :param pm: The target peak matrix 147 | :param fraction_threshold: Threshold of the sample fractions 148 | :param within_classes: Whether to calculate the fraction array within each class. Default = False 149 | :param class_tag_type: Tag type to unmask samples within the same class (e.g. "classLabel"). Default = None 150 | :param flag_name: Name of the new flag. Default = 'fraction_flag' 151 | :rtype: PeakMatrix object 152 | 153 | This filter will calculate the fraction array over all samples or within each class (based on class_tag_type). 154 | The peaks with a fraction value smaller than the threshold will be unflagged. 155 | 156 | """ 157 | if not within_classes: 158 | pm.add_flag(flag_name, pm.fraction >= fraction_threshold) 159 | else: 160 | if class_tag_type is None: 161 | raise KeyError('must provide class tag type for within classes filtering') 162 | if not all([t.has_tag_type(class_tag_type) for t in pm.peaklist_tags]): 163 | raise AttributeError('not all tags have tag type [%s]' % class_tag_type) 164 | flg = np.zeros(pm.shape[1]) 165 | for tag in pm.tags_of(class_tag_type): 166 | with unmask_peakmatrix(pm, tag) as m: 167 | flg = np.logical_or(flg, (m.fraction >= fraction_threshold)) 168 | pm.add_flag(flag_name, flg) 169 | return pm 170 | 171 | 172 | def filter_blank_peaks(pm: PeakMatrix, blank_tag: Any, fraction_threshold: Union[int, float] = 1, 173 | fold_threshold: Union[int, float] = 1, 174 | method: str = 'mean', rm_blanks: bool = True, flag_name: str = 'blank_flag'): 175 | """ 176 | PeakMatrix blank filter. 177 | 178 | :param pm: The target peak matrix 179 | :param blank_tag: Tag (label) to mask blank samples. e.g Tag("blank", "classLabel") 180 | :param fraction_threshold: Threshold of the sample fractions. Default = 1 181 | :param fold_threshold: Threshold of the blank sample intensity folds. Default = 1 182 | :param method: Method to calculate blank sample intensity array. Valid values include 'mean', 'median', and 'max'. 183 | Default = 'mean' 184 | :param rm_blanks: Whether to remove (not mask) blank samples after filtering 185 | :param flag_name: Name of the new flag. Default = 'blank_flag' 186 | :rtype: PeakMatrix object 187 | 188 | This filter will calculate the intensity array of the blanks using the "method", and compare with the 189 | intensities of the other samples. If fraction_threshold% of the intensity values of a peak are smaller than the 190 | blank intensities x fold_threshold, this peak will be unflagged. 191 | 192 | """ 193 | if not any([blank_tag in x for x in pm.peaklist_tags]): 194 | raise ValueError('blank tag [%s] does not exist' % blank_tag) 195 | if method not in ('mean', 'median', 'max'): 196 | raise ValueError('filter method must be mean, median or max') 197 | 198 | with unmask_peakmatrix(pm, blank_tag) as m: 199 | mm = np.ma.masked_array(m.intensity_matrix, mask = ~(m.intensity_matrix > 0)) 200 | ints = mm[0] if mm.shape[0] == 1 else getattr(np, method)(mm, axis = 0) 201 | imsk = ints.mask 202 | ints = np.array(ints) * fold_threshold 203 | 204 | with mask_peakmatrix(pm, blank_tag) as m: 205 | faild_int = np.sum(m.intensity_matrix >= ints, axis=0) < (fraction_threshold * m.shape[0]) 206 | m.add_flag(flag_name, ~(~imsk & faild_int)) 207 | 208 | if rm_blanks: 209 | pm = pm.remove_samples(np.where([x.has_tag(blank_tag) for x in pm.peaklist_tags])[0]) 210 | return pm 211 | 212 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/source/api-reference.rst: -------------------------------------------------------------------------------- 1 | API reference 2 | ============= 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 1 7 | 8 | dimspy.tools 9 | dimspy.metadata 10 | dimspy.models 11 | dimspy.portals 12 | dimspy.process 13 | -------------------------------------------------------------------------------- /docs/source/bugs-and-issues.rst: -------------------------------------------------------------------------------- 1 | Bugs and Issues 2 | =============== 3 | 4 | Please report any bugs that you find `here `_. 5 | Or fork the repository on `GitHub `_ 6 | and create a pull request (PR). We welcome all contributions, and we will help you to make 7 | the PR if you are new to `git`. 8 | -------------------------------------------------------------------------------- /docs/source/changelog.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | All notable changes to this project will be documented here. For more details changes please refer to `github `_ commit history 5 | 6 | `DIMSpy v2.0.0 `_ 7 | ------------------------------------------------------------------------------------------- 8 | 9 | **Release date: 26 April 2020** 10 | 11 | - First stable Python 3 only release 12 | - Refactor and improve HDF5 portal to save peaklists and/or peak matrices 13 | - Add compatibility for previous HDF5 files (python 2 version of DIMSpy) 14 | - Improve filelist handling 15 | - mzML or raw files are ordered by timestamp if no filelist is provided (i.e. process_scans) 16 | - Fix warnings (NaturalNameWarning, ResourceWarning, DeprecationWarning) 17 | - Fix 'blank filter' bug (missing and/or zero values are excluded) 18 | - Improve sub setting / filtering of scan events 19 | - Optimise imports 20 | - Increase `coverage tests `_ 21 | - Improve documentation (`Read the Docs `_), including docstrings 22 | 23 | 24 | `DIMSpy v1.4.0 `_ 25 | ------------------------------------------------------------------------------------------- 26 | 27 | **Release date: 2 October 2019** 28 | 29 | - Final Python 2 release 30 | 31 | 32 | `DIMSpy v1.3.0 `_ 33 | ------------------------------------------------------------------------------------------- 34 | 35 | **Release date: 26 November 2018** 36 | 37 | 38 | `DIMSpy v1.2.0 `_ 39 | ------------------------------------------------------------------------------------------- 40 | 41 | **Release date: 29 May 2018** 42 | 43 | 44 | `DIMSpy v1.1.0 `_ 45 | ------------------------------------------------------------------------------------------- 46 | 47 | **Release date: 19 February 2018** 48 | 49 | 50 | `DIMSpy v1.0.0 `_ 51 | ------------------------------------------------------------------------------------------- 52 | 53 | **Release date: 10 December 2017** 54 | 55 | 56 | `DIMSpy v0.1.0 (pre-release) `_ 57 | --------------------------------------------------------------------------------------------------------- 58 | 59 | **Release date: 11 July 2017** 60 | -------------------------------------------------------------------------------- /docs/source/citation.rst: -------------------------------------------------------------------------------- 1 | Citation 2 | ======== 3 | 4 | To cite DIMSpy please use the following publication. 5 | 6 | Check `Zenodo `_ for citing more up-to-date versions of DIMSpy if not listed here. 7 | 8 | 9 | **DIMSpy v2.0.0** 10 | 11 | Ralf J. M. Weber & Jiarui Zhou. (2020, April 24). DIMSpy: Python package for processing direct-infusion mass spectrometry-based metabolomics and lipidomics data (Version v2.0.0). Zenodo. http://doi.org/10.5281/zenodo.3764169 12 | 13 | 14 | BibTeX 15 | 16 | .. code-block:: 17 | 18 | @software{ralf_j_m_weber_2020_3764169, 19 | author = {Ralf J. M. Weber and 20 | Jiarui Zhou}, 21 | title = {{DIMSpy: Python package for processing direct- 22 | infusion mass spectrometry-based metabolomics and 23 | lipidomics data}}, 24 | month = april, 25 | year = 2020, 26 | publisher = {Zenodo}, 27 | version = {v2.0.0}, 28 | doi = {10.5281/zenodo.3764169}, 29 | url = {https://doi.org/10.5281/zenodo.3764169} 30 | } 31 | 32 | 33 | **DIMSpy v1.4.0** 34 | 35 | Ralf J. M. Weber & Jiarui Zhou. (2019, October 2). DIMSpy: Python package for processing direct-infusion mass spectrometry-based metabolomics and lipidomics data (Version v1.4.0). Zenodo. http://doi.org/10.5281/zenodo.3764110 36 | 37 | 38 | BibTeX 39 | 40 | .. code-block:: 41 | 42 | @software{ralf_j_m_weber_2019_3764110, 43 | author = {Ralf J. M. Weber and 44 | Jiarui Zhou}, 45 | title = {{DIMSpy: Python package for processing direct- 46 | infusion mass spectrometry-based metabolomics and 47 | lipidomics data}}, 48 | month = oct, 49 | year = 2019, 50 | publisher = {Zenodo}, 51 | version = {v1.4.0}, 52 | doi = {10.5281/zenodo.3764110}, 53 | url = {https://doi.org/10.5281/zenodo.3764110} 54 | } 55 | -------------------------------------------------------------------------------- /docs/source/cli.rst: -------------------------------------------------------------------------------- 1 | Command Line Interface 2 | ====================== 3 | 4 | .. code-block:: console 5 | 6 | $ dimspy --help 7 | 8 | Executing dimspy version 2.0.0. 9 | usage: __main__.py [-h] 10 | {process-scans,replicate-filter,align-samples,blank-filter,sample-filter,remove-samples,mv-sample-filter,merge-peaklists,get-peaklists,get-average-peaklist,hdf5-pm-to-txt,hdf5-pls-to-txt,create-sample-list,unzip,licenses} 11 | ... 12 | 13 | Python package to process DIMS data 14 | 15 | positional arguments: 16 | {process-scans,replicate-filter,align-samples,blank-filter,sample-filter,remove-samples,mv-sample-filter,merge-peaklists,get-peaklists,get-average-peaklist,hdf5-pm-to-txt,hdf5-pls-to-txt,create-sample-list,unzip,licenses} 17 | process-scans Process scans and/or stitch SIM windows. 18 | replicate-filter Filter irreproducible peaks from technical replicate 19 | peaklists. 20 | align-samples Align peaklists across samples. 21 | blank-filter Filter peaks across samples that are present in the 22 | blank samples. 23 | sample-filter Filter peaks based on certain reproducibility and 24 | sample class criteria. 25 | remove-samples Remove sample(s) from a peak matrix object or list of 26 | peaklist objects. 27 | mv-sample-filter Filter samples based on the percentage of missing 28 | values. 29 | merge-peaklists Merge peaklists from multiple lists of peaklist or 30 | peak matrix objects. 31 | get-peaklists Get peaklists from a peak matrix object. 32 | get-average-peaklist 33 | Get an average peaklist from a peak matrix object. 34 | hdf5-pm-to-txt Write HDF5 output (peak matrix) to text format. 35 | hdf5-pls-to-txt Write HDF5 output (peak lists) to text format. 36 | create-sample-list Create a sample list from a peak matrix object or list 37 | of peaklist objects. 38 | unzip Extract files from zip file 39 | licenses Show licenses DIMSpy and RawFileReader 40 | 41 | optional arguments: 42 | -h, --help show this help message and exit 43 | 44 | 45 | .. code-block:: console 46 | 47 | $ dimspy process-scans --help 48 | 49 | Executing dimspy version 2.0.0b1. 50 | usage: __main__.py process-scans [-h] -i source -o OUTPUT [-l FILELIST] -m 51 | {median,mean,mad,noise_packets} -s 52 | SNR_THRESHOLD [-p PPM] [-n MIN_SCANS] 53 | [-a MIN_FRACTION] [-d RSD_THRESHOLD] [-k] 54 | [-r RINGING_THRESHOLD] 55 | [-e start end scan_type] 56 | [-x start end scan_type] [-z start end] 57 | [-u REPORT] [-b BLOCK_SIZE] [-c NCPUS] 58 | 59 | optional arguments: 60 | -h, --help show this help message and exit 61 | -i source, --input source 62 | Directory (*.raw, *.mzml or tab-delimited peaklist 63 | files), single *.mzml/*.raw file or zip archive 64 | (*.mzml only) 65 | -o OUTPUT, --output OUTPUT 66 | HDF5 file to save the peaklist objects to. 67 | -l FILELIST, --filelist FILELIST 68 | Tab-delimited file that include the name of the data 69 | files (*.raw or *.mzml) and meta data. Column names: 70 | filename, replicate, batch, injectionOrder, 71 | classLabel. 72 | -m {median,mean,mad,noise_packets}, --function-noise {median,mean,mad,noise_packets} 73 | Select function to calculate noise. 74 | -s SNR_THRESHOLD, --snr-threshold SNR_THRESHOLD 75 | Signal-to-noise threshold 76 | -p PPM, --ppm PPM Mass tolerance in Parts per million to group peaks 77 | across scans / mass spectra. 78 | -n MIN_SCANS, --min_scans MIN_SCANS 79 | Minimum number of scans required for each m/z range or 80 | event. 81 | -a MIN_FRACTION, --min-fraction MIN_FRACTION 82 | Minimum fraction a peak has to be present. Use 0.0 to 83 | not apply this filter. 84 | -d RSD_THRESHOLD, --rsd-threshold RSD_THRESHOLD 85 | Maximum threshold - relative standard deviation 86 | (Calculated for peaks that have been measured across a 87 | minimum of two scans). 88 | -k, --skip-stitching Skip the step where (SIM) windows are 'stitched' or 89 | 'joined' together. Individual peaklists are generated 90 | for each window. 91 | -r RINGING_THRESHOLD, --ringing-threshold RINGING_THRESHOLD 92 | Ringing 93 | -e start end scan_type, --include-scan-events start end scan_type 94 | Scan events to select. E.g. 100.0 200.0 sim or 50.0 95 | 1000.0 full 96 | -x start end scan_type, --exclude-scan-events start end scan_type 97 | Scan events to select. E.g. 100.0 200.0 sim or 50.0 98 | 1000.0 full 99 | -z start end, --remove-mz-range start end 100 | M/z range(s) to remove. E.g. 100.0 102.0 or 140.0 101 | 145.0. 102 | -u REPORT, --report REPORT 103 | Summary/Report of processed mass spectra 104 | -b BLOCK_SIZE, --block-size BLOCK_SIZE 105 | The size of each block of peaks to perform clustering 106 | on. 107 | -c NCPUS, --ncpus NCPUS 108 | Number of central processing units (CPUs). 109 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('..')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | project = u'DIMSPy' 20 | copyright = u'2019, Ralf Weber, Jiarui (Albert) Zhou' 21 | author = u'Ralf Weber, Jiarui (Albert) Zhou' 22 | 23 | # The full version, including alpha/beta/rc tags 24 | release = '2.0.0' 25 | 26 | 27 | # -- General configuration --------------------------------------------------- 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | 'sphinx.ext.doctest', 35 | 'sphinx.ext.viewcode', 36 | 'sphinx.ext.napoleon', 37 | 'sphinx.ext.todo', 38 | 'sphinx.ext.mathjax' 39 | ] 40 | 41 | # Add any paths that contain templates here, relative to this directory. 42 | templates_path = ['_templates'] 43 | 44 | # The master toctree document. 45 | master_doc = 'index' 46 | 47 | # List of patterns, relative to source directory, that match files and 48 | # directories to ignore when looking for source files. 49 | # This pattern also affects html_static_path and html_extra_path. 50 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 51 | 52 | 53 | # -- Options for HTML output ------------------------------------------------- 54 | 55 | # The theme to use for HTML and HTML Help pages. See the documentation for 56 | # a list of builtin themes. 57 | # 58 | html_theme = 'sphinx_rtd_theme' 59 | 60 | # Add any paths that contain custom static files (such as style sheets) here, 61 | # relative to this directory. They are copied after the builtin static files, 62 | # so a file named "default.css" will overwrite the builtin "default.css". 63 | html_static_path = ['_static'] 64 | -------------------------------------------------------------------------------- /docs/source/credits.rst: -------------------------------------------------------------------------------- 1 | Credits 2 | ======= 3 | 4 | DIMSpy was originally written by Ralf Weber and Albert Zhou and has been developed with the help of many others. 5 | Thanks to everyone who has improved DIMSpy contributing code, features, bug reports (and fixes), and documentation. 6 | 7 | Developers & Contributors 8 | ------------------------- 9 | - Ralf J. M. Weber (r.j.weber@bham.ac.uk) - `University of Birmingham (UK) `__ 10 | - Jiarui (Albert) Zhou (j.zhou.3@bham.ac.uk) - `University of Birmingham (UK) `_, `HIT Shenzhen (China) `_ 11 | - Thomas N. Lawson (t.n.lawson@bham.ac.uk) - `University of Birmingham (UK) `__ 12 | - Martin R. Jones (martin.jones@eawag.ch) - `Eawag (Switzerland) `_ 13 | 14 | Funding 15 | ------- 16 | DIMSpy acknowledges support from the following funders: 17 | - BBSRC, grant number BB/M019985/1 18 | - European Commission's H2020 programme, grant agreement number 654241 19 | - Wellcome Trust, grant number 202952/Z/16/Z 20 | -------------------------------------------------------------------------------- /docs/source/dimspy.metadata.rst: -------------------------------------------------------------------------------- 1 | metadata 2 | ======== 3 | 4 | .. automodule:: dimspy.metadata 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/dimspy.models.rst: -------------------------------------------------------------------------------- 1 | models 2 | ====== 3 | 4 | peaklist 5 | -------- 6 | 7 | .. automodule:: dimspy.models.peaklist 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | peaklist\_metadata 13 | ------------------ 14 | 15 | .. automodule:: dimspy.models.peaklist_metadata 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | peaklist\_tags 21 | -------------- 22 | 23 | .. automodule:: dimspy.models.peaklist_tags 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | peak\_matrix 29 | ------------ 30 | 31 | .. automodule:: dimspy.models.peak_matrix 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | -------------------------------------------------------------------------------- /docs/source/dimspy.portals.rst: -------------------------------------------------------------------------------- 1 | portals 2 | ======= 3 | 4 | mzml\_portal 5 | ------------ 6 | 7 | .. automodule:: dimspy.portals.mzml_portal 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | :member-order: bysource 12 | 13 | thermo\_raw\_portal 14 | ------------------- 15 | 16 | .. automodule:: dimspy.portals.thermo_raw_portal 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | :member-order: bysource 21 | 22 | txt\_portal 23 | ----------- 24 | 25 | .. automodule:: dimspy.portals.txt_portal 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | :member-order: bysource 30 | 31 | hdf5\_portal 32 | ------------ 33 | 34 | .. automodule:: dimspy.portals.hdf5_portal 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | :member-order: bysource 39 | 40 | paths 41 | ----- 42 | 43 | .. automodule:: dimspy.portals.paths 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | :member-order: bysource 48 | -------------------------------------------------------------------------------- /docs/source/dimspy.process.rst: -------------------------------------------------------------------------------- 1 | process 2 | ======= 3 | 4 | peak\_alignment 5 | --------------- 6 | 7 | .. automodule:: dimspy.process.peak_alignment 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | :member-order: bysource 12 | 13 | peak\_filters 14 | ------------- 15 | 16 | .. automodule:: dimspy.process.peak_filters 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | :member-order: bysource 21 | 22 | scan\_processing 23 | ---------------- 24 | 25 | .. automodule:: dimspy.process.replicate_processing 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | :member-order: bysource 30 | -------------------------------------------------------------------------------- /docs/source/dimspy.tools.rst: -------------------------------------------------------------------------------- 1 | tools 2 | ===== 3 | 4 | .. automodule:: dimspy.tools 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :member-order: bysource 9 | -------------------------------------------------------------------------------- /docs/source/images/alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/docs/source/images/alignment.png -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to DIMSpy's documentation! 2 | ================================== 3 | 4 | |Py versions| |Version| |Bioconda| |Galaxy-eu| |Git| |Build Status (Travis)| |Build Status (AppVeyor)| |codecov| |License| |binder| |RTD doc| |gitter| 5 | 6 | Python package for processing direct-infusion mass spectrometry-based metabolomics and lipidomics data 7 | 8 | 9 | Contents 10 | -------- 11 | 12 | .. toctree:: 13 | :maxdepth: 3 14 | 15 | installation 16 | api-reference 17 | cli 18 | credits 19 | bugs-and-issues 20 | changelog 21 | citation 22 | license 23 | 24 | 25 | .. |Build Status (Travis)| image:: https://img.shields.io/travis/computational-metabolomics/dimspy.svg?logo=travis&maxAge=600&style=flat-square 26 | :target: https://travis-ci.com/computational-metabolomics/dimspy 27 | 28 | .. |Build Status (AppVeyor)| image:: https://img.shields.io/appveyor/ci/RJMW/dimspy.svg?logo=appveyor&style=flat-square&maxAge=600 29 | :target: https://ci.appveyor.com/project/RJMW/dimspy/branch/master 30 | 31 | .. |Py versions| image:: https://img.shields.io/pypi/pyversions/dimspy.svg?style=flat&maxAge=3600 32 | :target: https://pypi.python.org/pypi/dimspy/ 33 | 34 | .. |Version| image:: https://img.shields.io/pypi/v/dimspy.svg?style=flat&maxAge=3600 35 | :target: https://pypi.python.org/pypi/dimspy/ 36 | 37 | .. |Git| image:: https://img.shields.io/badge/repository-GitHub-blue.svg?style=flat&maxAge=3600 38 | :target: https://github.com/computational-metabolomics/dimspy 39 | 40 | .. |Bioconda| image:: https://img.shields.io/conda/vn/bioconda/dimspy?style=flat-square&maxAge=3600 41 | :target: http://bioconda.github.io/recipes/dimspy/README.html 42 | 43 | .. |galaxy-eu| image:: https://img.shields.io/badge/usegalaxy-.eu-brightgreen?logo= 44 | :target: http://usegalaxy.eu 45 | 46 | .. |License| image:: https://img.shields.io/pypi/l/dimspy.svg?style=flat&maxAge=3600 47 | :target: https://www.gnu.org/licenses/gpl-3.0.html 48 | 49 | .. |RTD doc| image:: https://img.shields.io/badge/documentation-RTD-71B360.svg?style=flat&maxAge=3600 50 | :target: https://dimspy.readthedocs.io/en/latest/ 51 | 52 | .. |codecov| image:: https://codecov.io/gh/computational-metabolomics/dimspy/branch/master/graph/badge.svg 53 | :target: https://codecov.io/gh/computational-metabolomics/dimspy 54 | 55 | .. |binder| image:: https://mybinder.org/badge_logo.svg 56 | :target: https://mybinder.org/v2/gh/computational-metabolomics/dimspy/master?filepath=notebooks%2Fworkflow.ipynb 57 | 58 | .. |gitter| image:: https://badges.gitter.im/Join%20Chat.svg 59 | :target: https://gitter.im/computational-metabolomics/dimspy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge 60 | 61 | 62 | Indices and tables 63 | ================== 64 | 65 | * :ref:`genindex` 66 | * :ref:`search` 67 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Conda (recommended) 5 | ------------------- 6 | 7 | Install Miniconda, follow the steps described `here `_ 8 | 9 | Start the ``conda prompt`` 10 | 11 | * Windows: Open the ``Anaconda Prompt`` via the Start menu 12 | * macOS or Linux: Open a ``Terminal`` 13 | 14 | Create a dimspy specific ``conda`` environment. 15 | This will install a the dependencies required to run ``dimspy``:: 16 | 17 | $ conda create --yes --name dimspy dimspy -c conda-forge -c bioconda -c computational-metabolomics 18 | 19 | .. note:: 20 | 21 | * The installation process will take a few minutes. 22 | * Feel free to use a different name for the Conda environment 23 | 24 | You can use the following command to remove a conda environment:: 25 | 26 | $ conda env remove -y --name dimspy 27 | 28 | This is only required if something has gone wrong in the previous step. 29 | 30 | Activate the ``dimspy`` environment:: 31 | 32 | $ conda activate dimspy 33 | 34 | To test your ``dimspy`` installation, in your Conda Prompt, run the command:: 35 | 36 | $ dimspy --help 37 | 38 | or:: 39 | 40 | $ python 41 | import dimspy 42 | 43 | Close and deactivate the ``dimspy`` environment when you’re done:: 44 | 45 | $ conda deactivate 46 | 47 | 48 | PyPi 49 | ---- 50 | 51 | Install the current release of ``dimspy`` with ``pip``:: 52 | 53 | $ pip install dimspy 54 | 55 | .. note:: 56 | 57 | * The installation process will take a few minutes. 58 | 59 | To upgrade to a newer release use the ``--upgrade`` flag:: 60 | 61 | $ pip install --upgrade dimspy 62 | 63 | If you do not have permission to install software systemwide, you can 64 | install into your user directory using the ``--user`` flag:: 65 | 66 | $ pip install --user dimspy 67 | 68 | Alternatively, you can manually download ``dimspy`` from 69 | `GitHub `_ or 70 | `PyPI `_. 71 | To install one of these versions, unpack it and run the following from the 72 | top-level source directory using the Terminal:: 73 | 74 | $ pip install . 75 | 76 | 77 | Testing 78 | ------- 79 | DIMSpy uses the Python ``pytest`` testing package. You can learn more 80 | about pytest on their `homepage `_. 81 | -------------------------------------------------------------------------------- /docs/source/license.rst: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | 4 | DIMSpy is licensed under the GNU General Public License v3.0 (see `LICENSE file `_ for licensing information). Copyright © 2017 - 2020 Ralf Weber, Albert Zhou 5 | 6 | **Third-party licenses and copyright** 7 | 8 | RawFileReader reading tool. Copyright © 2016 by Thermo Fisher Scientific, Inc. All rights reserved. See `RawFileReaderLicense `_ for licensing information. 9 | Using DIMSpy software for processing Thermo Fisher Scientific *.raw files implies the acceptance of the RawFileReader license terms. 10 | Anyone receiving RawFileReader as part of a larger software distribution (in the current context, as part of DIMSpy) is considered an "end user" under 11 | section 3.3 of the RawFileReader License, and is not granted rights to redistribute RawFileReader. 12 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: dimspy 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | dependencies: 6 | - python=3.7 7 | - fastcluster=1.1.26 8 | - h5py=2.10.0 9 | - numpy=1.17.1 10 | - pandas=0.25.0 11 | - pymzml=2.4.5 12 | - pytables=3.6.1 13 | - pythonnet=2.4.0 14 | - scipy=1.3.1 15 | -------------------------------------------------------------------------------- /examples/examples.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | from dimspy.tools import * 5 | from dimspy.portals.hdf5_portal import * 6 | import zipfile 7 | 8 | 9 | def main(): 10 | 11 | source = os.path.join("..", "tests", "data", "MTBLS79_subset", "MTBLS79_mzml_triplicates.zip") 12 | fn_filelist = os.path.join("..", "tests", "data", "MTBLS79_subset", "filelist_mzml_triplicates.txt") 13 | output = os.path.join("results") 14 | if not os.path.exists(output): 15 | os.mkdir(output) 16 | 17 | print("Unzip mzML files.....") 18 | zip_ref = zipfile.ZipFile(source, 'r') 19 | zip_ref.extractall(os.path.join("data")) 20 | zip_ref.close() 21 | print("Completed") 22 | 23 | print("Process Scans.....") 24 | pls = process_scans("data", min_scans=1, function_noise="median", 25 | snr_thres=3.0, ppm=2.0, min_fraction=None, rsd_thres=None, 26 | filelist=fn_filelist, remove_mz_range=[], block_size=5000, ncpus=None) 27 | print("Completed") 28 | 29 | print("Replicate Filter.....") 30 | logfile = os.path.join(output, "log_replicate_filter.txt") 31 | pls_rf = replicate_filter(pls, ppm=2.0, replicates=3, min_peaks=2, rsd_thres=None, report=logfile, block_size=5000) 32 | print("Completed") 33 | 34 | print("Write each replicate filtered peaklist to a text file") 35 | for pl in pls_rf: 36 | with open(pl.ID + ".txt", "w") as out: 37 | out.write(os.path.join("results", pl.to_str("\t"))) 38 | print("Completed") 39 | 40 | # print("Save, write and load peaklists") 41 | # save_peaklists_as_hdf5(pls_rf, os.path.join(output, "pls_rf.h5")) 42 | # hdf5_peaklists_to_txt(os.path.join(output, "pls_rf.h5"), path_out=output) 43 | # pls_rf = load_peaklists_from_hdf5(os.path.join(output, "pls_rf.h5")) 44 | # print("Completed") 45 | 46 | # print("Create a new sample list.....") 47 | # sample_list = os.path.join(output, "sample_list.txt") 48 | # create_sample_list(pls_rf, sample_list, delimiter="\t") 49 | # print("Completed") 50 | # print("") 51 | 52 | print("Align Samples.....") 53 | pm = align_samples(pls_rf, ppm=3.0, ncpus=1, block_size=5000) 54 | print("Completed", pm.shape) 55 | 56 | # print("Save, write and load peak matrix") 57 | # save_peak_matrix_as_hdf5(pm, os.path.join(output, "pm.h5")) 58 | # hdf5_peak_matrix_to_txt(os.path.join(output, "pm.h5"), path_out=os.path.join(output, "pm.txt"), attr_name="intensity", comprehensive=True) 59 | # pm = load_peak_matrix_from_hdf5(os.path.join(output, "pm.h5")) 60 | # print("Completed") 61 | 62 | print("Blank Filter.....") 63 | pm_bf = blank_filter(pm, "blank", min_fraction=1.0, min_fold_change=10.0, function="mean", rm_samples=True) 64 | print("Completed", pm_bf.shape) 65 | 66 | print("Sample Filter.....") 67 | pm_bf_sf = sample_filter(pm, 0.8, within=False) 68 | print("Completed", pm_bf_sf.shape) 69 | 70 | 71 | if __name__ == '__main__': 72 | main() 73 | -------------------------------------------------------------------------------- /examples/run.bat: -------------------------------------------------------------------------------- 1 | dimspy --help 2 | 3 | dimspy unzip^ 4 | --input ../tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates.zip^ 5 | --output results/mzml 6 | 7 | dimspy process-scans^ 8 | --input results/mzml^ 9 | --output results/peaklists.hdf5^ 10 | --filelist tests/data/MTBLS79_subset/filelist_mzml_triplicates.txt^ 11 | --function-noise median^ 12 | --snr-threshold 3.0^ 13 | --ppm 2.0^ 14 | --min_scans 1^ 15 | --min-fraction 0.5^ 16 | --block-size 5000^ 17 | --ncpus 2 18 | 19 | dimspy replicate-filter^ 20 | --input results/peaklists.hdf5^ 21 | --output results/peaklists_rf.hdf5^ 22 | --ppm 2.0^ 23 | --replicates 3^ 24 | --min-peak-present 2 25 | 26 | dimspy align-samples^ 27 | --input results/peaklists.hdf5^ 28 | --output results/pm_a.hdf5^ 29 | --ppm 2.0 30 | 31 | dimspy blank-filter^ 32 | --input results/pm_a.hdf5^ 33 | --output results/pm_a_bf.hdf5^ 34 | --blank-label blank^ 35 | --remove 36 | 37 | dimspy sample-filter^ 38 | --input results/pm_a_bf.hdf5^ 39 | --output results/pm_a_bf_sf.hdf5^ 40 | --min-fraction 0.8 41 | 42 | dimspy hdf5-pls-to-txt^ 43 | --input results/peaklists.hdf5^ 44 | --output results^ 45 | --delimiter tab 46 | 47 | dimspy hdf5-pm-to-txt^ 48 | --input results/pm_a_bf_sf.hdf5^ 49 | --output results/pm_a_bf_sf.txt^ 50 | --delimiter tab 51 | 52 | dimspy merge-peaklists^ 53 | --input results/peaklists_rf.hdf5^ 54 | --input results/peaklists.hdf5^ 55 | --output results/peaklists_merged.hdf5 56 | -------------------------------------------------------------------------------- /examples/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | dimspy --help 4 | 5 | dimspy unzip \ 6 | --input ../tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates.zip \ 7 | --output results/mzml 8 | 9 | dimspy process-scans \ 10 | --input results/mzml \ 11 | --output results/peaklists.hdf5 \ 12 | --filelist ../tests/data/MTBLS79_subset/filelist_mzml_triplicates.txt \ 13 | --function-noise median \ 14 | --snr-threshold 3.0 \ 15 | --ppm 2.0 \ 16 | --min_scans 1 \ 17 | --min-fraction 0.5 \ 18 | --block-size 5000 \ 19 | --ncpus 2 20 | 21 | dimspy replicate-filter \ 22 | --input results/peaklists.hdf5 \ 23 | --output results/peaklists_rf.hdf5 \ 24 | --ppm 2.0 \ 25 | --replicates 3 \ 26 | --min-peak-present 2 27 | 28 | dimspy align-samples \ 29 | --input results/peaklists.hdf5 \ 30 | --output results/pm_a.hdf5 \ 31 | --ppm 2.0 32 | 33 | dimspy blank-filter \ 34 | --input results/pm_a.hdf5 \ 35 | --output results/pm_a_bf.hdf5 \ 36 | --blank-label blank \ 37 | --remove 38 | 39 | dimspy sample-filter \ 40 | --input results/pm_a_bf.hdf5 \ 41 | --output results/pm_a_bf_sf.hdf5 \ 42 | --min-fraction 0.8 43 | 44 | dimspy hdf5-pls-to-txt \ 45 | --input results/peaklists_rf.hdf5 \ 46 | --output results \ 47 | --delimiter tab 48 | 49 | dimspy hdf5-pm-to-txt \ 50 | --input results/pm_a_bf_sf.hdf5 \ 51 | --output results/pm_a_bf_sf.txt \ 52 | --delimiter tab 53 | 54 | dimspy merge-peaklists \ 55 | --input results/peaklists_rf.hdf5 \ 56 | --input results/peaklists.hdf5 \ 57 | --output results/peaklists_merged.hdf5 58 | -------------------------------------------------------------------------------- /notebooks/workflow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#!/usr/bin/env python\n", 10 | "# -*- coding: utf-8 -*-\n", 11 | "\n", 12 | "import os\n", 13 | "import zipfile\n", 14 | "from dimspy.tools import process_scans\n", 15 | "from dimspy.tools import replicate_filter\n", 16 | "from dimspy.tools import create_sample_list\n", 17 | "from dimspy.tools import align_samples\n", 18 | "from dimspy.tools import blank_filter\n", 19 | "from dimspy.tools import sample_filter\n", 20 | "\n", 21 | "\n", 22 | "source = os.path.join(\"..\", \"tests\", \"data\", \"MTBLS79_subset\", \"MTBLS79_mzml_triplicates.zip\")\n", 23 | "fn_filelist = os.path.join(\"..\", \"tests\", \"data\", \"MTBLS79_subset\", \"filelist_mzml_triplicates.txt\")\n", 24 | "\n", 25 | "zip_ref = zipfile.ZipFile(source, 'r')\n", 26 | "zip_ref.extractall(\"data\")\n", 27 | "zip_ref.close()\n", 28 | "\n", 29 | "print(os.listdir(\"data\"))\n" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "pycharm": { 37 | "name": "#%%\n" 38 | } 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "pls = process_scans(\"data\", min_scans=1, function_noise=\"median\",\n", 43 | " snr_thres=3.0, ppm=2.0, min_fraction=None, rsd_thres=None,\n", 44 | " filelist=fn_filelist, remove_mz_range=[], block_size=5000, ncpus=None)\n", 45 | "\n", 46 | "print(pls[0]) # first peaklist" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "pycharm": { 54 | "name": "#%%\n" 55 | } 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "pls_rf = replicate_filter(pls, ppm=2.0, replicates=3, min_peaks=2, rsd_thres=None,\n", 60 | " report=\"log_replicate_filter.txt\", block_size=5000)\n", 61 | "\n", 62 | "print(pls_rf[0]) # first peaklist" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "pycharm": { 70 | "name": "#%%\n" 71 | } 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "create_sample_list(pls_rf, \"sample_list.txt\", delimiter=\"\\t\")" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "pycharm": { 83 | "name": "#%%\n" 84 | } 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "pm = align_samples(pls, ppm=3.0, ncpus=1, block_size=5000)\n", 89 | "print(pm.shape)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "pycharm": { 97 | "name": "#%%\n" 98 | } 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "pm_bf = blank_filter(pm, \"blank\", min_fraction=1.0, min_fold_change=10.0, function=\"mean\", rm_samples=True)\n", 103 | "print(pm_bf.shape)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "pycharm": { 111 | "name": "#%%\n" 112 | } 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "pm_bf_sf = sample_filter(pm, 0.8, within=False)\n", 117 | "print(pm_bf_sf.shape)\n", 118 | "\n" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "# " 128 | ] 129 | } 130 | ], 131 | "metadata": { 132 | "kernelspec": { 133 | "display_name": "Python 3", 134 | "language": "python", 135 | "name": "python3" 136 | }, 137 | "language_info": { 138 | "codemirror_mode": { 139 | "name": "ipython", 140 | "version": 3 141 | }, 142 | "file_extension": ".py", 143 | "mimetype": "text/x-python", 144 | "name": "python", 145 | "nbconvert_exporter": "python", 146 | "pygments_lexer": "ipython3", 147 | "version": "3.7.3" 148 | }, 149 | "pycharm": { 150 | "stem_cell": { 151 | "cell_type": "raw", 152 | "metadata": { 153 | "collapsed": false 154 | }, 155 | "source": [] 156 | } 157 | } 158 | }, 159 | "nbformat": 4, 160 | "nbformat_minor": 1 161 | } 162 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastcluster==1.1.26 2 | h5py==2.10.0 3 | numpy==1.17.1 4 | pandas==0.25.0 5 | pymzml==2.4.5 6 | pythonnet==2.4.0 7 | tables==3.6.1 8 | scipy==1.3.1 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import setuptools 24 | import sys 25 | import dimspy 26 | 27 | 28 | def main(): 29 | 30 | setuptools.setup(name="dimspy", 31 | version=dimspy.__version__, 32 | description="Python package for processing of direct-infusion mass spectrometry-based metabolomics and lipidomics data", 33 | long_description=open('README.rst').read(), 34 | author="Ralf Weber, Albert Zhou", 35 | author_email="r.j.weber@bham.ac.uk, j.zhou.3@bham.ac.uk ", 36 | url="https://github.com/computational-metabolomics/dimspy", 37 | license="GPLv3", 38 | platforms=['Windows, UNIX'], 39 | keywords=['Metabolomics', 'Lipidomics', 'Mass spectrometry', 'Data Processing', 'Direct-Infusion Mass Spectrometry'], 40 | packages=setuptools.find_packages(), 41 | test_suite='tests.suite', 42 | python_requires='>=3.7', 43 | install_requires=open('requirements.txt').read().splitlines(), 44 | include_package_data=True, 45 | project_urls={ 46 | "Documentation": "https://dimspy.readthedocs.io/en/latest/", 47 | "Changelog": "https://dimspy.readthedocs.io/en/latest/changelog.html", 48 | "Bug Tracker": "https://github.com/computational-metabolomics/dimspy/issues", 49 | }, 50 | classifiers=[ 51 | "Programming Language :: Python :: 3", 52 | "Programming Language :: Python :: 3.7", 53 | "Topic :: Scientific/Engineering :: Bio-Informatics", 54 | "Topic :: Scientific/Engineering :: Chemistry", 55 | "Topic :: Utilities", 56 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 57 | "Operating System :: OS Independent", 58 | ], 59 | entry_points={ 60 | 'console_scripts': [ 61 | 'dimspy = dimspy.__main__:main' 62 | ] 63 | } 64 | ) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import unittest 24 | 25 | 26 | def suite(): 27 | test_loader = unittest.TestLoader() 28 | test_suite = test_loader.discover('.', pattern='test_*.py') 29 | return test_suite 30 | -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/MTBLS79_mzml_peak_matrix_v1.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_peak_matrix_v1.hdf5 -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/MTBLS79_mzml_peak_matrix_v2.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_peak_matrix_v2.hdf5 -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/MTBLS79_mzml_single.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_single.zip -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/MTBLS79_mzml_single_report.txt: -------------------------------------------------------------------------------- 1 | filename event scans peaks median_rsd 2 | batch04_QC17_rep01_262.mzML FTMS + p ESI w SIM ms [70.00-170.00] 10 501 16.597287464014354 3 | batch04_QC17_rep01_262.mzML FTMS + p ESI w SIM ms [140.00-240.00] 11 308 11.861413863099502 4 | batch04_QC17_rep01_262.mzML FTMS + p ESI w SIM ms [210.00-310.00] 14 221 12.059987578794935 5 | batch04_QC17_rep01_262.mzML FTMS + p ESI w SIM ms [280.00-380.00] 14 212 11.145086842326155 6 | batch04_QC17_rep01_262.mzML FTMS + p ESI w SIM ms [350.00-450.00] 13 205 10.548051403649117 7 | batch04_QC17_rep01_262.mzML FTMS + p ESI w SIM ms [420.00-520.00] 13 180 11.35556910318272 8 | batch04_QC17_rep01_262.mzML FTMS + p ESI w SIM ms [490.00-590.00] 13 173 11.024412858650523 9 | batch04_QC17_rep01_262.mzML SIM-Stitch NA 1800 12.033732483598556 10 | -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates.zip -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates_report.txt: -------------------------------------------------------------------------------- 1 | name peaks peaks_3oo3 median_rsd_3oo3 2 | batch04_B02_rep01_301_2_302_3_303 650 527 11.278862335879921 3 | batch04_QC17_rep01_262_2_263_3_264 487 405 8.047266384318867 4 | batch04_S01_rep01_247_2_248_3_249 518 441 5.330921878107105 5 | -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates_v1.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates_v1.hdf5 -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates_v2.hdf5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates_v2.hdf5 -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_class_label_error.txt: -------------------------------------------------------------------------------- 1 | filename replicate batch injectionOrder classLabel 2 | batch04_B02_rep01_301.mzML 1 1 1 blank 3 | batch04_B02_rep02_302.mzML 2 1 2 blank 4 | batch04_B02_rep03_303.mzML 3 1 3 blank 5 | batch04_QC17_rep01_262.mzML 4 1 4 QC 6 | batch04_QC17_rep02_263.mzML 1 1 5 QC 7 | batch04_QC17_rep03_264.mzML 2 1 6 QC 8 | batch04_S01_rep01_247.mzML 3 1 7 sample 9 | batch04_S01_rep02_248.mzML 1 1 8 sample 10 | batch04_S01_rep03_249.mzML 2 1 9 sample -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_csl_MTBLS79_mzml_peak_matrix.txt: -------------------------------------------------------------------------------- 1 | filename replicate batch injectionOrder classLabel 2 | batch04_B02_rep01_301.mzML 1 1 1 blank 3 | batch04_B02_rep02_302.mzML 2 1 2 blank 4 | batch04_B02_rep03_303.mzML 3 1 3 blank 5 | batch04_QC17_rep01_262.mzML 1 1 4 QC 6 | batch04_QC17_rep02_263.mzML 2 1 5 QC 7 | batch04_QC17_rep03_264.mzML 3 1 6 QC 8 | batch04_S01_rep01_247.mzML 1 1 7 sample 9 | batch04_S01_rep02_248.mzML 2 1 8 sample 10 | batch04_S01_rep03_249.mzML 3 1 9 sample 11 | -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_csl_MTBLS79_mzml_triplicates.txt: -------------------------------------------------------------------------------- 1 | filename replicate batch injectionOrder classLabel 2 | batch04_B02_rep01_301.mzML 1 1 1 blank 3 | batch04_B02_rep02_302.mzML 2 1 2 blank 4 | batch04_B02_rep03_303.mzML 3 1 3 blank 5 | batch04_QC17_rep01_262.mzML 1 1 4 QC 6 | batch04_QC17_rep02_263.mzML 2 1 5 QC 7 | batch04_QC17_rep03_264.mzML 3 1 6 QC 8 | batch04_S01_rep01_247.mzML 1 1 7 sample 9 | batch04_S01_rep02_248.mzML 2 1 8 sample 10 | batch04_S01_rep03_249.mzML 3 1 9 sample 11 | -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_filename_error.txt: -------------------------------------------------------------------------------- 1 | filename replicate batch injectionOrder classLabel 2 | batch04_B02_rep01_301.mzML 1 1 1 blank 3 | batch04_B02_rep02_302.mzML 2 1 2 blank 4 | batch04_B02_rep03_303.mzML 3 1 3 blank 5 | batch04_QC17_rep01_262.mzML 1 1 4 QC 6 | batch04_QC17_rep02_263.mzML 2 1 5 QC 7 | batch04_QC17_rep03_264.mzML 3 1 6 QC 8 | batch04_S01_rep01_247.mzML 1 1 7 sample 9 | batch04_S01_rep02_248.mzML 2 1 8 sample 10 | batch04_S01_rep02_248.mzML 3 1 9 sample 11 | -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_injection_order_error.txt: -------------------------------------------------------------------------------- 1 | filename replicate batch injectionOrder classLabel 2 | batch04_B02_rep01_301.mzML 1 1 1 blank 3 | batch04_B02_rep02_302.mzML 2 1 2 blank 4 | batch04_B02_rep03_303.mzML 3 1 3 blank 5 | batch04_QC17_rep01_262.mzML 1 1 10 QC 6 | batch04_QC17_rep02_263.mzML 2 1 5 QC 7 | batch04_QC17_rep03_264.mzML 3 1 6 QC 8 | batch04_S01_rep01_247.mzML 1 1 7 sample 9 | batch04_S01_rep02_248.mzML 2 1 8 sample 10 | batch04_S01_rep03_249.mzML 3 1 4 sample -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_multi.txt: -------------------------------------------------------------------------------- 1 | filename class multilist 2 | batch04_QC17_rep01_262.RAW blank 1 3 | batch04_QC17_rep02_263.RAW sample 1 4 | batch04_QC17_rep03_264.RAW sample 2 5 | -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_multi_error.txt: -------------------------------------------------------------------------------- 1 | filename class multilist 2 | batch04_QC17_rep01_262.RAW blank 1 3 | batch04_QC17_rep02_263.RAW sample 'UNWANTED STRING' 4 | batch04_QC17_rep03_264.RAW sample 2 5 | -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_mzml_single.txt: -------------------------------------------------------------------------------- 1 | filename replicate classLabel 2 | batch04_QC17_rep01_262.mzML 1 sample 3 | -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_mzml_triplicates.txt: -------------------------------------------------------------------------------- 1 | filename replicate batch injectionOrder classLabel 2 | batch04_B02_rep01_301.mzML 1 1 1 blank 3 | batch04_B02_rep02_302.mzML 2 1 2 blank 4 | batch04_B02_rep03_303.mzML 3 1 3 blank 5 | batch04_QC17_rep01_262.mzML 1 1 4 QC 6 | batch04_QC17_rep02_263.mzML 2 1 5 QC 7 | batch04_QC17_rep03_264.mzML 3 1 6 QC 8 | batch04_S01_rep01_247.mzML 1 1 7 sample 9 | batch04_S01_rep02_248.mzML 2 1 8 sample 10 | batch04_S01_rep03_249.mzML 3 1 9 sample 11 | -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_raw_triplicates.txt: -------------------------------------------------------------------------------- 1 | filename replicate batch injectionOrder classLabel 2 | batch04_QC17_rep01_262.RAW 1 1 1 QC 3 | batch04_QC17_rep02_263.RAW 2 1 2 QC 4 | batch04_QC17_rep03_264.RAW 3 1 3 QC 5 | -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_replicate_error_1.txt: -------------------------------------------------------------------------------- 1 | filename replicate batch injectionOrder classLabel 2 | batch04_B02_rep01_301.mzML 1 1 1 blank 3 | batch04_B02_rep02_302.mzML 2 1 2 blank 4 | batch04_B02_rep03_303.mzML 0 1 3 blank 5 | batch04_QC17_rep01_262.mzML 1 1 4 QC 6 | batch04_QC17_rep02_263.mzML 2 1 5 QC 7 | batch04_QC17_rep03_264.mzML 0 1 6 QC 8 | batch04_S01_rep01_247.mzML 1 1 7 sample 9 | batch04_S01_rep02_248.mzML 2 1 8 sample 10 | batch04_S01_rep03_249.mzML 0 1 9 sample -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/filelist_replicate_error_2.txt: -------------------------------------------------------------------------------- 1 | filename replicate batch injectionOrder classLabel 2 | batch04_B02_rep01_301.mzML 1 1 1 blank 3 | batch04_B02_rep02_302.mzML 2 1 2 blank 4 | batch04_B02_rep03_303.mzML 3 1 3 blank 5 | batch04_QC17_rep01_262.mzML 1 1 4 QC 6 | batch04_QC17_rep02_263.mzML 2 1 5 QC 7 | batch04_QC17_rep03_264.mzML 10 1 6 QC 8 | batch04_S01_rep01_247.mzML 1 1 7 sample 9 | batch04_S01_rep02_248.mzML 5 1 8 sample 10 | batch04_S01_rep03_249.mzML 3 1 9 sample -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/raw/batch04_QC17_rep01_262.RAW: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/raw/batch04_QC17_rep01_262.RAW -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/raw/batch04_QC17_rep02_263.RAW: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/raw/batch04_QC17_rep02_263.RAW -------------------------------------------------------------------------------- /tests/data/MTBLS79_subset/raw/batch04_QC17_rep03_264.RAW: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/raw/batch04_QC17_rep03_264.RAW -------------------------------------------------------------------------------- /tests/data/mzml_DIMSn.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/mzml_DIMSn.zip -------------------------------------------------------------------------------- /tests/test_hdf5_portal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import os 24 | import unittest 25 | 26 | import numpy as np 27 | from dimspy.models.peaklist import PeakList 28 | from dimspy.models.peaklist_tags import Tag 29 | from dimspy.portals.hdf5_portal import save_peak_matrix_as_hdf5, load_peak_matrix_from_hdf5 30 | from dimspy.portals.hdf5_portal import save_peaklists_as_hdf5, load_peaklists_from_hdf5 31 | from dimspy.process.peak_alignment import align_peaks 32 | 33 | 34 | class HDF5PortalsTestCase(unittest.TestCase): 35 | @staticmethod 36 | def _createPeaklists(): 37 | _mzs = lambda: sorted(np.random.uniform(100, 1200, size = 100)) 38 | _ints = lambda: np.abs(np.random.normal(100, 10, size = 100)) 39 | 40 | pkls = [ 41 | PeakList('sample_1_1', _mzs(), _ints(), mz_range = (100, 1200)), 42 | PeakList('sample_1_2', _mzs(), _ints(), mz_range = (100, 1200)), 43 | PeakList('QC_1', _mzs(), _ints(), mz_range = (100, 1200)), 44 | PeakList('sample_2_1', _mzs(), _ints(), mz_range = (100, 1200)), 45 | PeakList('sample_2_2', _mzs(), _ints(), mz_range = (100, 1200)), 46 | PeakList('QC_2', _mzs(), _ints(), mz_range = (100, 1200)), 47 | ] 48 | 49 | for t in ('sample', Tag('compound_1', 'treatment'), Tag('1hr', 'time_point'), Tag(1, 'plate')): pkls[0].tags.add_tag(t) 50 | for t in ('sample', Tag('compound_1', 'treatment'), Tag('6hr', 'time_point'), Tag(1, 'plate')): pkls[1].tags.add_tag(t) 51 | for t in ('qc', Tag(1, 'plate')): pkls[2].tags.add_tag(t) 52 | for t in ('sample', Tag('compound_2', 'treatment'), Tag('1hr', 'time_point'), Tag(2, 'plate')): pkls[3].tags.add_tag(t) 53 | for t in ('sample', Tag('compound_2', 'treatment'), Tag('6hr', 'time_point'), Tag(2, 'plate')): pkls[4].tags.add_tag(t) 54 | for t in ('qc', Tag(2, 'plate')): pkls[5].tags.add_tag(t) 55 | 56 | for p in pkls: p.add_attribute('snr', np.random.uniform(300, 400, size = 100)) 57 | for p in pkls: p.add_attribute('quad_flag', [0, 1, 1, 1] * 25, is_flag = True) 58 | for p in pkls: p.add_attribute('lab', [chr(i%26+97) for i in range(100)], flagged_only = False) 59 | return pkls 60 | 61 | def test_peaklist_portal(self): 62 | pkls = self._createPeaklists() 63 | 64 | save_peaklists_as_hdf5(pkls, '.test_peaklist.hdf5') 65 | npkls = load_peaklists_from_hdf5('.test_peaklist.hdf5') 66 | 67 | self.assertListEqual([x.size for x in npkls], [75] * 6) 68 | self.assertListEqual([x.full_size for x in npkls], [100] * 6) 69 | self.assertTrue(all([np.allclose(x[0].mz_all, x[1].mz_all) for x in zip(pkls, npkls)])) 70 | self.assertTrue(all([np.allclose(x[0].intensity, x[1].intensity) for x in zip(pkls, npkls)])) 71 | self.assertTrue(all([np.allclose(x[0].snr, x[1].snr, atol = 1e-30) for x in zip(pkls, npkls)])) 72 | self.assertTrue(all([np.all(x[0].quad_flag == x[1].quad_flag) for x in zip(pkls, npkls)])) 73 | self.assertTrue(all([np.all(x[0].lab == x[1].lab) for x in zip(pkls, npkls)])) 74 | self.assertTrue(all([list(x[0].metadata.keys()) == list(x[1].metadata.keys()) for x in zip(pkls, npkls)])) 75 | self.assertTrue(all([x[0].tags.tag_types == x[1].tags.tag_types for x in zip(pkls, npkls)])) 76 | self.assertTrue(all([x[0].tags.tag_values == x[1].tags.tag_values for x in zip(pkls, npkls)])) 77 | 78 | def test_peak_matrix_portal(self): 79 | pkls = self._createPeaklists() 80 | pm = align_peaks(pkls, ppm = 2.0, block_size = 10, ncpus = 2) 81 | 82 | pm.mask_tags('qc') 83 | 84 | pnum = pm.full_shape[1] 85 | pm.add_flag('odd_flag', ([0, 1] * int(pnum/2.+1))[:pnum]) 86 | pm.add_flag('qua_flag', ([0, 0, 0, 1] * int(pnum/4.+1))[:pnum], flagged_only = False) 87 | 88 | save_peak_matrix_as_hdf5(pm, '.test_peak_matrix.hdf5') 89 | npm = load_peak_matrix_from_hdf5('.test_peak_matrix.hdf5') 90 | 91 | self.assertEqual(pm.shape, npm.shape) 92 | self.assertEqual(pm.full_shape, npm.full_shape) 93 | self.assertTupleEqual(pm.attributes, npm.attributes) 94 | self.assertTrue(np.allclose(pm.mz_matrix, npm.mz_matrix)) 95 | self.assertTrue(np.allclose(pm.intensity_matrix, npm.intensity_matrix)) 96 | self.assertTrue(np.allclose(pm.attr_matrix('snr'), npm.attr_matrix('snr'))) 97 | self.assertTrue(np.all(pm.attr_matrix('lab') == npm.attr_matrix('lab'))) 98 | self.assertTrue(np.all( pm.property('present_matrix', flagged_only = False) == 99 | npm.property('present_matrix', flagged_only = False))) 100 | self.assertEqual(pm.peaklist_tag_types, npm.peaklist_tag_types) 101 | self.assertEqual(pm.peaklist_tag_values, npm.peaklist_tag_values) 102 | self.assertTrue(np.all(pm.mask == npm.mask)) 103 | self.assertTrue(np.all(pm.flag_values('odd_flag') == npm.flag_values('odd_flag'))) 104 | self.assertTrue(np.all(pm.flag_values('qua_flag') == npm.flag_values('qua_flag'))) 105 | self.assertTrue(np.all(pm.flags == npm.flags)) 106 | 107 | def tearDown(self): 108 | if os.path.isfile('.test_peaklist.hdf5'): os.remove('.test_peaklist.hdf5') 109 | if os.path.isfile('.test_peak_matrix.hdf5'): os.remove('.test_peak_matrix.hdf5') 110 | 111 | 112 | if __name__ == '__main__': 113 | unittest.main() 114 | -------------------------------------------------------------------------------- /tests/test_metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import unittest 24 | import os 25 | 26 | from dimspy.metadata import validate_metadata 27 | 28 | 29 | def to_test_data(*args): 30 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", "MTBLS79_subset", *args) 31 | 32 | 33 | class ValidateMetadataTestCase(unittest.TestCase): 34 | 35 | def test_filelist_standard(self): 36 | # filename replicate batch injectionOrder classLabel 37 | fm_dict = validate_metadata(to_test_data("filelist_csl_MTBLS79_mzml_triplicates.txt")) 38 | self.assertEqual(fm_dict['filename'], ['batch04_B02_rep01_301.mzML', 'batch04_B02_rep02_302.mzML', 39 | 'batch04_B02_rep03_303.mzML', 'batch04_QC17_rep01_262.mzML', 40 | 'batch04_QC17_rep02_263.mzML', 'batch04_QC17_rep03_264.mzML', 41 | 'batch04_S01_rep01_247.mzML', 'batch04_S01_rep02_248.mzML', 42 | 'batch04_S01_rep03_249.mzML']) 43 | self.assertEqual(fm_dict['replicate'], [1, 2, 3, 1, 2, 3, 1, 2, 3]) 44 | self.assertEqual(fm_dict['batch'], [1] * 9) 45 | self.assertEqual(fm_dict['injectionOrder'], [1, 2, 3, 4, 5, 6, 7, 8, 9]) 46 | self.assertEqual(fm_dict['classLabel'], ['blank', 'blank', 'blank', 47 | 'QC', 'QC', 'QC', 'sample', 'sample', 'sample']) 48 | 49 | def test_filelist_multi(self): 50 | fm_dict = validate_metadata(to_test_data("filelist_multi.txt")) 51 | self.assertEqual(fm_dict['multilist'], [1, 1, 2]) 52 | 53 | def test_filename_error(self): 54 | with self.assertRaises(Exception) as context: 55 | validate_metadata(to_test_data("filelist_filename_error.txt")) 56 | self.assertTrue("Duplicate filename in list" in str(context.exception)) 57 | 58 | def test_filelist_multilist_error(self): 59 | with self.assertRaises(Exception) as context: 60 | validate_metadata(to_test_data("filelist_multi_error.txt")) 61 | self.assertTrue("Column 'multilist' values should be integers" in str(context.exception)) 62 | 63 | def test_filelist_injection_order_error(self): 64 | with self.assertRaises(Exception) as context: 65 | validate_metadata(to_test_data("filelist_injection_order_error.txt")) 66 | self.assertTrue("samples not in order" in str(context.exception)) 67 | 68 | def test_filelist_class_label_error(self): 69 | with self.assertRaises(Exception) as context: 70 | validate_metadata(to_test_data("filelist_class_label_error.txt")) 71 | self.assertTrue("class names do not match with number of replicates" in str(context.exception)) 72 | 73 | def test_filelist_replicate_error_zero_value(self): 74 | with self.assertRaises(Exception) as context: 75 | validate_metadata(to_test_data("filelist_replicate_error_1.txt")) 76 | self.assertTrue("Incorrect replicate number in list" in str(context.exception)) 77 | 78 | def test_filelist_replicate_error_zero_value(self): 79 | with self.assertRaises(Exception) as context: 80 | validate_metadata(to_test_data("filelist_replicate_error_2.txt")) 81 | self.assertTrue("Incorrect numbering for replicates" in str(context.exception)) 82 | 83 | 84 | 85 | if __name__ == '__main__': 86 | unittest.main() 87 | -------------------------------------------------------------------------------- /tests/test_mzml_portal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import io 24 | import os 25 | import unittest 26 | import zipfile 27 | 28 | from dimspy.portals.mzml_portal import Mzml 29 | 30 | 31 | def to_test_data(*args): 32 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", *args) 33 | 34 | 35 | def to_test_results(*args): 36 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), "results", *args) 37 | 38 | 39 | class MzmlPortalsTestCase(unittest.TestCase): 40 | 41 | @classmethod 42 | def setUpClass(cls): 43 | 44 | zip_ref = zipfile.ZipFile(to_test_data("mzml_DIMSn.zip"), 'r') 45 | zip_ref.extractall(to_test_results("zip_data", "mzml")) 46 | zip_ref.close() 47 | 48 | def test_mzml_portal(self): 49 | run = Mzml(to_test_data("MTBLS79_subset", "mzml", "batch04_QC17_rep01_262.mzML")) 50 | self.assertEqual(run.timestamp, "2011-04-02T03:28:02Z") 51 | self.assertEqual((run.run.get_spectrum_count(), run.run.get_spectrum_count()), (88, 88)) 52 | self.assertListEqual(list(run.headers().keys()), ['FTMS + p ESI w SIM ms [70.00-170.00]', 53 | 'FTMS + p ESI w SIM ms [140.00-240.00]', 54 | 'FTMS + p ESI w SIM ms [210.00-310.00]', 55 | 'FTMS + p ESI w SIM ms [280.00-380.00]', 56 | 'FTMS + p ESI w SIM ms [350.00-450.00]', 57 | 'FTMS + p ESI w SIM ms [420.00-520.00]', 58 | 'FTMS + p ESI w SIM ms [490.00-590.00]']) 59 | self.assertListEqual(list(run.scan_ids().keys()), list(range(1,89))) 60 | self.assertListEqual(list(run.tics().values())[0:2], [39800032.0, 38217892.0]) 61 | self.assertEqual(len(run.tics()), 88) 62 | self.assertListEqual(list(run.ion_injection_times().values())[0:2], [40.433891296387, 40.094646453857]) 63 | self.assertEqual(len(run.ion_injection_times()), 88) 64 | self.assertListEqual(run.scan_dependents(), []) 65 | 66 | pl = run.peaklist(1) 67 | self.assertEqual(pl.ID, 1) 68 | self.assertEqual(pl.metadata["header"], "FTMS + p ESI w SIM ms [70.00-170.00]") 69 | self.assertEqual(pl.metadata["ms_level"], 1.0) 70 | self.assertEqual(pl.metadata["ion_injection_time"], 40.433891296387) 71 | self.assertEqual(pl.metadata["scan_time"], 0.50109) 72 | self.assertEqual(pl.metadata["tic"], 39800032.0) 73 | self.assertEqual(pl.metadata["function_noise"], "median") 74 | self.assertEqual(pl.metadata["mz_range"], [70.0, 170.0]) 75 | run.close() 76 | 77 | run = Mzml(to_test_results("zip_data", "mzml", "A08_Apolar_Daph_AMP1_C30_LCMS_Pos_DIMSn_subset.mzML")) 78 | sd = run.scan_dependents() 79 | self.assertListEqual(list(run.tics().values())[0:2], [120293696.0, 13602.5234375]) 80 | self.assertEqual(len(run.tics()), 36) 81 | self.assertListEqual(sd[0], [1, 3]) 82 | self.assertListEqual(sd[-1], [511, 512]) 83 | self.assertEqual(len(sd), 30) 84 | run.close() 85 | 86 | # with open(to_test_results("zip_data", "mzml", "A08_Apolar_Daph_AMP1_C30_LCMS_Pos_DIMSn_subset.mzML"), "rb") as inp: 87 | # b = io.BytesIO(inp.read()) 88 | # run = Mzml(b) 89 | # sd = run.scan_dependents() 90 | # self.assertListEqual(list(run.tics().values())[0:2], [120293696.0, 13602.5234375]) 91 | # self.assertEqual(len(run.tics()), 36) 92 | # self.assertListEqual(sd[0], [1, 3]) 93 | # self.assertListEqual(sd[-1], [511, 512]) 94 | # self.assertEqual(len(sd), 30) 95 | # run.close() 96 | 97 | @classmethod 98 | def tearDownClass(cls): 99 | 100 | import shutil 101 | shutil.rmtree(to_test_results("")) 102 | os.makedirs(to_test_results("")) 103 | 104 | 105 | if __name__ == '__main__': 106 | unittest.main() 107 | -------------------------------------------------------------------------------- /tests/test_paths_portal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import os 24 | import unittest 25 | import platform 26 | 27 | from dimspy.portals import paths 28 | 29 | 30 | def to_test_data(*args): 31 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", *args) 32 | 33 | 34 | def to_test_results(*args): 35 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), "results", *args) 36 | 37 | 38 | class PathsPortalsTestCase(unittest.TestCase): 39 | def test_paths_portal(self): 40 | 41 | files_correct = [to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep01_262.RAW"), 42 | to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep02_263.RAW"), 43 | to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep03_264.RAW")] 44 | tsv = to_test_data("MTBLS79_subset", "filelist_raw_triplicates.txt") 45 | 46 | source = to_test_data("MTBLS79_subset", "raw") 47 | files = paths.validate_and_sort_paths(source, tsv) 48 | self.assertListEqual(files, files_correct) 49 | 50 | source = to_test_data("MTBLS79_subset", "raw") 51 | files = paths.validate_and_sort_paths(source, tsv) 52 | self.assertListEqual(files, files_correct) 53 | 54 | source = [to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep03_264.RAW"), 55 | to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep02_263.RAW"), 56 | to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep01_262.RAW")] 57 | files = paths.validate_and_sort_paths(source, tsv) 58 | self.assertListEqual(files, files_correct) 59 | 60 | files = paths.validate_and_sort_paths(tsv=None, source=source) 61 | self.assertListEqual(files, source) 62 | 63 | path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", "MTBLS79_subset") 64 | 65 | source_raw = os.path.join(path, "raw") 66 | fn_filelist_raw = os.path.join(path, "filelist_raw_triplicates.txt") 67 | fns = paths.validate_and_sort_paths(source_raw, fn_filelist_raw) 68 | fns_c = [os.path.join(source_raw, 'batch04_QC17_rep01_262.RAW'), 69 | os.path.join(source_raw, 'batch04_QC17_rep02_263.RAW'), 70 | os.path.join(source_raw, 'batch04_QC17_rep03_264.RAW')] 71 | self.assertListEqual(fns, fns_c) 72 | 73 | fns = [os.path.join(source_raw, "batch04_QC17_rep01_262.RAW")] 74 | fns_out = paths.validate_and_sort_paths(fns, None) 75 | self.assertListEqual(fns, fns_out) 76 | 77 | fns = [os.path.join(source_raw, "batch04_QC17_rep01_262.RAW"), 78 | os.path.join(source_raw, "batch04_QC17_rep02_263.RAW"), 79 | os.path.join(source_raw, "batch04_QC17_rep03_264.RAW")] 80 | fns_out = paths.validate_and_sort_paths(fns, fn_filelist_raw) 81 | self.assertListEqual(fns, fns_out) 82 | 83 | source_mzml = os.path.join(path, "mzml") 84 | fns = [os.path.join(source_mzml, 'batch04_QC17_rep01_262.mzML')] 85 | fns_out = paths.validate_and_sort_paths(fns, None) 86 | self.assertListEqual(fns, fns_out) 87 | 88 | fn_filelist_mzml = os.path.join(path, "filelist_mzml_triplicates.txt") 89 | source_mzml_fns = [os.path.join(source_mzml, "batch04_QC17_rep01_262.mzML"), 90 | os.path.join(source_mzml, "batch04_QC17_rep02_263.mzML"), 91 | os.path.join(source_mzml, "batch04_QC17_rep03_264.mzML")] 92 | 93 | with self.assertRaises(IOError): 94 | paths.validate_and_sort_paths(source_mzml_fns, fn_filelist_mzml) 95 | 96 | with self.assertRaises(IOError): 97 | paths.validate_and_sort_paths(source_mzml, fn_filelist_mzml) 98 | 99 | def test_sort_ms_files_by_timestamp(self): 100 | p = to_test_data("MTBLS79_subset", "mzml") 101 | ps = [os.path.join(p, fn) for fn in os.listdir(p)] 102 | files_sorted = paths.sort_ms_files_by_timestamp(ps) 103 | self.assertEqual(files_sorted[0], (os.path.join(p, "batch04_QC17_rep01_262.mzML"), '2011-04-02T03:28:02Z')) 104 | self.assertEqual(files_sorted[1], (os.path.join(p, "batch04_QC17_rep02_263.mzML"), '2011-04-02T03:31:04Z')) 105 | self.assertEqual(files_sorted[2], (os.path.join(p, "batch04_QC17_rep03_264.mzML"), '2011-04-02T03:34:08Z')) 106 | 107 | ps.reverse() 108 | files_sorted = paths.sort_ms_files_by_timestamp(ps) 109 | self.assertEqual(files_sorted[0], (os.path.join(p, "batch04_QC17_rep01_262.mzML"), '2011-04-02T03:28:02Z')) 110 | self.assertEqual(files_sorted[1], (os.path.join(p, "batch04_QC17_rep02_263.mzML"), '2011-04-02T03:31:04Z')) 111 | self.assertEqual(files_sorted[2], (os.path.join(p, "batch04_QC17_rep03_264.mzML"), '2011-04-02T03:34:08Z')) 112 | 113 | p = to_test_data("MTBLS79_subset", "raw") 114 | ps = [os.path.join(p, fn) for fn in os.listdir(p)] 115 | files_sorted = paths.sort_ms_files_by_timestamp(ps) 116 | 117 | self.assertTrue(files_sorted[0] == (os.path.join(p, "batch04_QC17_rep01_262.RAW"), '02/04/2011 03:28:02') 118 | or files_sorted[0] == (os.path.join(p, "batch04_QC17_rep01_262.RAW"), '4/2/2011 3:28:02 AM')) 119 | 120 | self.assertTrue(files_sorted[1] == (os.path.join(p, "batch04_QC17_rep02_263.RAW"), '02/04/2011 03:31:05') 121 | or files_sorted[1] == (os.path.join(p, "batch04_QC17_rep02_263.RAW"), '4/2/2011 3:31:05 AM')) 122 | 123 | self.assertTrue(files_sorted[2] == (os.path.join(p, "batch04_QC17_rep03_264.RAW"), '02/04/2011 03:34:09') 124 | or files_sorted[2] == (os.path.join(p, "batch04_QC17_rep03_264.RAW"), '4/2/2011 3:34:09 AM')) 125 | 126 | ps.reverse() 127 | files_sorted = paths.sort_ms_files_by_timestamp(ps) 128 | 129 | self.assertTrue(files_sorted[0] == (os.path.join(p, "batch04_QC17_rep01_262.RAW"), '02/04/2011 03:28:02') 130 | or files_sorted[0] == (os.path.join(p, "batch04_QC17_rep01_262.RAW"), '4/2/2011 3:28:02 AM')) 131 | 132 | self.assertTrue(files_sorted[1] == (os.path.join(p, "batch04_QC17_rep02_263.RAW"), '02/04/2011 03:31:05') 133 | or files_sorted[1] == (os.path.join(p, "batch04_QC17_rep02_263.RAW"), '4/2/2011 3:31:05 AM')) 134 | 135 | self.assertTrue(files_sorted[2] == (os.path.join(p, "batch04_QC17_rep03_264.RAW"), '02/04/2011 03:34:09') 136 | or files_sorted[2] == (os.path.join(p, "batch04_QC17_rep03_264.RAW"), '4/2/2011 3:34:09 AM')) 137 | 138 | if __name__ == '__main__': 139 | unittest.main() 140 | -------------------------------------------------------------------------------- /tests/test_peak_alignment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import unittest 24 | from functools import reduce 25 | 26 | import numpy as np 27 | from dimspy.models.peaklist import PeakList 28 | from dimspy.process.peak_alignment import align_peaks 29 | 30 | 31 | class PeakAlignmentTestCase(unittest.TestCase): 32 | mz = [ 33 | [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], 34 | [10, 30, 50, 60, 70, 80, 90, 100], 35 | [20, 30, 40, 70, 80, 90, 100], 36 | [10, 20, 30, 80, 90, 100], 37 | [10, 20, 50, 60, 70, 80, ], 38 | [ 50, ], 39 | ] 40 | 41 | ints = [ 42 | [11, 12, 13, 14, 15, 16, 17, 18, 19, 110], 43 | [21, 23, 25, 26, 27, 28, 29, 210], 44 | [ 32, 33, 34, 37, 38, 39, 310], 45 | [41, 42, 43, 48, 49, 410], 46 | [51, 52, 55, 56, 57, 58, ], 47 | [ 65, ], 48 | ] 49 | 50 | strs = [ 51 | ['a','b','c','d','e','f','g','h','i','j'], 52 | ['k', 'l', 'm','n','o','p','q','r'], 53 | [ 's','t','u', 'v','w','x','y'], 54 | ['z','a','b', 'c','d','e'], 55 | ['f','g', 'h','i','j','k', ], 56 | [ 'l', ], 57 | ] 58 | 59 | def _createPeakLists(self): 60 | mz = [np.array(m) + np.random.normal(0, 1e-5, len(m)) for m in self.mz] 61 | pkls = [] 62 | for i in range(len(mz)): 63 | pl = PeakList('peaklist_' + str(i), mz[i], self.ints[i]) 64 | pl.add_attribute('str_attr', self.strs[i]) 65 | pkls += [pl] 66 | return pkls 67 | 68 | def _checkAlignmentResults(self, pm): 69 | self.assertTrue(np.allclose(np.unique(np.round(pm.to_peaklist('merged').mz)), np.arange(10, 110, step = 10))) 70 | self.assertTrue(all(np.allclose(mi[mm != 0], ri) for mi, mm, ri in zip(pm.intensity_matrix, pm.mz_matrix, self.ints))) 71 | 72 | def test_normal_alignment(self): 73 | pkls = self._createPeakLists() 74 | 75 | try: 76 | pm = align_peaks(pkls, ppm = 2.0, block_size = 5, fixed_block = True, edge_extend = 10, ncpus = 2) 77 | # print pm.attr_matrix('str_attr') 78 | # print pm.attr_mean_vector('str_attr') 79 | except Exception as e: 80 | self.fail('alignment failed: ' + str(e)) 81 | 82 | self._checkAlignmentResults(pm) 83 | 84 | def test_block_size(self): 85 | pkls = self._createPeakLists() 86 | try: 87 | pm = align_peaks(pkls, ppm = 2.0, block_size = 1, fixed_block = True, edge_extend = 10, ncpus = 2) 88 | except Exception as e: 89 | self.fail('alignment failed: ' + str(e)) 90 | self._checkAlignmentResults(pm) 91 | 92 | pkls = self._createPeakLists() 93 | try: 94 | pm = align_peaks(pkls, ppm = 2.0, block_size = 20, fixed_block = True, edge_extend = 10, ncpus = 2) 95 | except Exception as e: 96 | self.fail('alignment failed: ' + str(e)) 97 | self._checkAlignmentResults(pm) 98 | 99 | def test_ppm(self): 100 | pkls = self._createPeakLists() 101 | 102 | try: 103 | pm = align_peaks(pkls, ppm = 1e+10, block_size = 5, fixed_block = True, edge_extend = 10, ncpus = 2) 104 | except Exception as e: 105 | self.fail('alignment failed: ' + str(e)) 106 | 107 | self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, [np.mean(list(map(np.mean, self.mz)))])) 108 | self.assertTrue(np.allclose(pm.intensity_matrix.flatten(), list(map(np.mean, self.ints)))) 109 | self.assertTrue(np.allclose(pm.attr_matrix('intra_count').flatten(), list(map(len, self.mz)))) 110 | 111 | try: 112 | pm = align_peaks(pkls, ppm = 1e-10, block_size = 5, fixed_block = True, edge_extend = 10, ncpus = 2) 113 | except Exception as e: 114 | self.fail('alignment failed: ' + str(e)) 115 | 116 | self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, np.sort(reduce(lambda x,y: x+y, list(map(list, self.mz)))))) 117 | self.assertTrue(np.allclose(np.sort(np.sum(pm.intensity_matrix, axis = 0)), np.sort(reduce(lambda x,y: x+y, self.ints)))) 118 | self.assertTrue(np.allclose(np.sum(pm.attr_matrix('intra_count'), axis = 0), np.ones(pm.shape[1]))) 119 | 120 | def test_single_peaklist(self): 121 | pkls = [PeakList('peaklist_0', np.arange(10, 110, step = 10), np.arange(10) + 11)] 122 | 123 | try: 124 | pm = align_peaks(pkls, ppm = 2.0, block_size = 5, fixed_block = True, edge_extend = 10, ncpus = 2) 125 | except Exception as e: 126 | self.fail('alignment failed: ' + str(e)) 127 | 128 | self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, np.arange(10, 110, step = 10))) 129 | self.assertTrue(np.allclose(pm.intensity_matrix, [np.arange(10) + 11])) 130 | 131 | def test_special_peaklists(self): 132 | pkls = [PeakList('peaklist_' + str(i), np.ones(10) * 10, np.ones(10)) for i in range(6)] 133 | 134 | try: 135 | pm = align_peaks(pkls, ppm = 2.0, block_size = 5, fixed_block = False, edge_extend = 10, ncpus = 2) 136 | except Exception as e: 137 | self.fail('alignment failed: ' + str(e)) 138 | 139 | self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, [10.])) 140 | self.assertTrue(np.allclose(np.sum(pm.intensity_matrix, axis = 0), [6])) 141 | self.assertTrue(np.allclose(np.sum(pm.attr_matrix('intra_count'), axis = 0), [60])) 142 | 143 | try: 144 | pm = align_peaks(pkls, ppm = 1e-10, block_size = 1, fixed_block = True, edge_extend = 1, ncpus = 2) 145 | except Exception as e: 146 | self.fail('alignment failed: ' + str(e)) 147 | 148 | self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, [10.])) 149 | self.assertTrue(np.allclose(np.sum(pm.intensity_matrix, axis = 0), [6])) 150 | self.assertTrue(np.allclose(np.sum(pm.attr_matrix('intra_count'), axis = 0), [60])) 151 | 152 | # may take a while to run 153 | # def test_large_peaklists(self): 154 | # pkls = [PeakList('peaklist_' + str(i), 155 | # np.sort(np.random.uniform(100, 1200, size = 10000)), 156 | # np.random.normal(100, 10, size = 10000)) 157 | # for i in range(100)] 158 | # 159 | # try: 160 | # pm = align_peaks(pkls, ppm = 2.0, block_size = 5000, fixed_block = False, edge_extend = 10, ncpus = 2) 161 | # except Exception, e: 162 | # self.fail('alignment failed: ' + str(e)) 163 | 164 | 165 | if __name__ == '__main__': 166 | unittest.main() 167 | -------------------------------------------------------------------------------- /tests/test_peak_filters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import unittest 24 | 25 | from dimspy.models.peaklist_tags import PeakList_Tags 26 | from dimspy.process.peak_filters import * 27 | 28 | 29 | class PeakFiltersTestCase(unittest.TestCase): 30 | @staticmethod 31 | def _createPeakList(): 32 | pkl = PeakList('peaklist', np.arange(10, dtype = float), np.arange(10, dtype = float) + 1) 33 | pkl.add_attribute('snr', (np.arange(10, dtype = float) + 1) / 10) 34 | return pkl 35 | 36 | @staticmethod 37 | def _createPeakMatrix(): 38 | pids, tags = list(zip(*[ 39 | ('sample_1_1', PeakList_Tags('sample', treatment = 'compound_1', time_point = '1hr', plate = 1, order = 1)), 40 | ('sample_1_2', PeakList_Tags('sample', treatment = 'compound_1', time_point = '6hr', plate = 1, order = 2)), 41 | ('QC_1', PeakList_Tags('qc', plate = 1, order = 3)), 42 | ('Blank_1', PeakList_Tags('blank', plate = 1, order = 4)), 43 | ('sample_2_1', PeakList_Tags('sample', treatment = 'compound_2', time_point = '1hr', plate = 2, order = 1)), 44 | ('sample_2_2', PeakList_Tags('sample', treatment = 'compound_2', time_point = '6hr', plate = 2, order = 2)), 45 | ('QC_2', PeakList_Tags('qc', plate = 2, order = 3)), 46 | ('Blank_2', PeakList_Tags('blank', plate = 2, order = 4)), 47 | ])) 48 | 49 | mzs = np.tile(np.arange(0, 1000, step = 100, dtype = float), (8, 1)) 50 | ints = np.arange(80, dtype = float).reshape((8, 10)) / 20. 51 | ints[3, 1] = ints[7, 1] = ints[7, 3] = 0 # test blank filter 52 | ics = np.array([[1, 2] * 5] * 8) 53 | 54 | return PeakMatrix(pids, tags, (('mz', mzs), ('intensity', ints), ('intra_count', ics))) 55 | 56 | # peaklist filters 57 | def test_peaklist_attr_filter(self): 58 | pkl = self._createPeakList() 59 | 60 | try: 61 | filter_attr(pkl, 'snr', 0.5, flag_index = 2) 62 | except Exception as e: 63 | self.fail('filter peaklist attribute failed: ' + str(e)) 64 | self.assertListEqual(pkl.snr.tolist(), [0.1, 0.2, 0.3, 0.4, 0.5]) 65 | self.assertTupleEqual(pkl.attributes, ('mz', 'intensity', 'snr_flag', 'snr')) 66 | 67 | self.assertRaises(AttributeError, lambda: filter_attr(pkl, 'not_exists', 0.5)) 68 | self.assertRaises(AttributeError, lambda: filter_attr(pkl, 'snr', 0.6)) 69 | self.assertRaises(ValueError, lambda: filter_attr(pkl, 'snr')) 70 | 71 | filter_attr(pkl, 'snr', min_threshold = 0.4, max_threshold = 0.4, flag_name = 'new_snr_flag') 72 | self.assertListEqual(pkl.mz.tolist(), [3]) 73 | 74 | def test_peaklist_ringing_filter(self): 75 | pkl = self._createPeakList() 76 | 77 | try: 78 | filter_ringing(pkl, threshold = 0.9, bin_size = 3.0) 79 | except Exception as e: 80 | self.fail('filter peaklist ringing failed: ' + str(e)) 81 | self.assertListEqual(pkl.mz.tolist(), [2., 5., 8., 9.]) 82 | 83 | def test_peaklist_mz_ranges(self): 84 | pkl = self._createPeakList() 85 | 86 | try: 87 | filter_mz_ranges(pkl, [(1.,3.), (5.,8.)]) 88 | except Exception as e: 89 | self.fail('filter peaklist mz ranges failed: ' + str(e)) 90 | self.assertListEqual(pkl.mz.tolist(), [0., 4., 9.]) 91 | 92 | # peakmatrix filters 93 | def test_peak_matrix_rsd_filter(self): 94 | pm = self._createPeakMatrix() 95 | 96 | try: 97 | pm = filter_rsd(pm, 62, 'qc') 98 | except Exception as e: 99 | self.fail('filter peak_matrix rsd failed: ' + str(e)) 100 | self.assertTrue(np.allclose(pm.rsd('qc'), 101 | [61.48754619, 60.17930052, 58.92556509, 57.72300254])) 102 | 103 | self.assertRaises(AttributeError, lambda: filter_rsd(pm, 45, 'not_QC')) 104 | 105 | def test_peak_matrix_fraction_filter(self): 106 | pm = self._createPeakMatrix() 107 | for attr in ('mz', 'intensity', 'intra_count'): pm._attr_dict[attr][:,1] = 0 108 | 109 | try: 110 | pm = filter_fraction(pm, 1) 111 | except Exception as e: 112 | self.fail('filter peak_matrix fraction failed: ' + str(e)) 113 | self.assertEqual(pm.shape[1], 9) 114 | 115 | pm = self._createPeakMatrix() 116 | for attr in ('mz', 'intensity', 'intra_count'): 117 | pm._attr_dict[attr][:,1] *= [1, 1, 1, 0, 1, 1, 1, 0] 118 | pm._attr_dict[attr][:,2] *= [1, 1, 1, 1, 1, 1, 0, 0] 119 | 120 | pm = filter_fraction(pm, 0.6, within_classes = True, class_tag_type = 'plate') 121 | self.assertEqual(pm.shape[1], 10) 122 | self.assertRaises(AttributeError, lambda: filter_fraction(pm, 1, within_classes = True, class_tag_type = 'time_point')) 123 | self.assertRaises(KeyError, lambda: filter_fraction(pm, 1, within_classes = True)) 124 | 125 | def test_peak_matrix_blank_filter(self): 126 | pm = self._createPeakMatrix() 127 | pm = filter_blank_peaks(pm, 'blank', 0.3) 128 | self.assertTupleEqual(pm.shape, (6, 10)) 129 | 130 | pm = self._createPeakMatrix() 131 | pm = filter_blank_peaks(pm, 'blank', 0.4) 132 | self.assertTupleEqual(pm.shape, (6, 2)) 133 | 134 | pm = self._createPeakMatrix() 135 | pm = filter_blank_peaks(pm, 'blank', 0.3, method = 'max') 136 | self.assertTupleEqual(pm.shape, (6, 2)) 137 | 138 | pm = self._createPeakMatrix() 139 | pm = filter_blank_peaks(pm, 'blank', 0.3, fold_threshold = 2) 140 | self.assertTupleEqual(pm.shape, (6, 1)) 141 | 142 | pm = self._createPeakMatrix() 143 | self.assertRaises(ValueError, lambda: filter_blank_peaks(pm, 'Not_blank', 0.3)) 144 | 145 | 146 | if __name__ == '__main__': 147 | unittest.main() 148 | -------------------------------------------------------------------------------- /tests/test_peak_matrix.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import pickle as cp 24 | import unittest 25 | 26 | import numpy as np 27 | from dimspy.models.peak_matrix import PeakMatrix 28 | from dimspy.models.peak_matrix import mask_peakmatrix, unmask_peakmatrix, mask_all_peakmatrix, unmask_all_peakmatrix 29 | from dimspy.models.peaklist_tags import Tag, PeakList_Tags 30 | 31 | 32 | class PeakMatrixTestCase(unittest.TestCase): 33 | @staticmethod 34 | def _createPeakMatrix(): 35 | pids, tags = list(zip(*[ 36 | ('sample_1_1', PeakList_Tags('sample', treatment = 'compound_1', time_point = '1hr', plate = 1, order = 1)), 37 | ('sample_1_2', PeakList_Tags('sample', treatment = 'compound_1', time_point = '6hr', plate = 1, order = 2)), 38 | ('QC_1', PeakList_Tags('qc', plate = 1, order = 3)), 39 | ('sample_2_1', PeakList_Tags('sample', treatment = 'compound_2', time_point = '1hr', plate = 2, order = 1)), 40 | ('sample_2_2', PeakList_Tags('sample', treatment = 'compound_2', time_point = '6hr', plate = 2, order = 2)), 41 | ('QC_2', PeakList_Tags('qc', plate = 2, order = 3)), 42 | ])) 43 | 44 | mzs = np.tile(np.arange(0, 1000, step = 100, dtype = float) + 1, (6, 1)) 45 | ints = np.arange(60, dtype = float).reshape((6, 10)) / 20. 46 | ics = np.array([[2] * 10] * 6) 47 | # simulate missing values 48 | for m in (mzs, ints, ics): 49 | np.fill_diagonal(m, 0) 50 | m[:,2] = 0 51 | return PeakMatrix(pids, tags, [('mz', mzs), ('intensity', ints), ('intra_count', ics)]) 52 | 53 | def test_pm_creation(self): 54 | try: 55 | self._createPeakMatrix() 56 | except Exception as e: 57 | self.fail('create PeakMatrix object failed: ' + str(e)) 58 | 59 | def test_pm_properties(self): 60 | pm = self._createPeakMatrix() 61 | 62 | pm.mask = [True, False] * 3 63 | self.assertTrue(np.all(pm.mask == [True, False, True, False, True, False])) 64 | pm.mask = None 65 | self.assertTrue(np.all(pm.mask == [False] * 6)) 66 | 67 | self.assertTupleEqual(pm.flag_names, ()) 68 | self.assertTrue(np.all(pm.flags == np.ones(10))) 69 | 70 | self.assertTupleEqual(pm.attributes, ('mz', 'intensity', 'intra_count')) 71 | 72 | self.assertTupleEqual(pm.peaklist_ids, 73 | ('sample_1_1', 'sample_1_2', 'QC_1', 'sample_2_1', 'sample_2_2', 'QC_2')) 74 | 75 | self.assertEqual(len(pm.peaklist_tags), 6) 76 | self.assertEqual(pm.peaklist_tag_types, {None, 'treatment', 'time_point', 'plate', 'order'}) 77 | self.assertEqual(pm.peaklist_tag_values, {'sample', 'qc', 'compound_1', 'compound_2', '1hr', '6hr', 1, 2, 3}) 78 | 79 | pm.mask = [True, False] * 3 80 | self.assertTupleEqual(pm.shape, (3, 10)) 81 | self.assertTupleEqual(pm.full_shape, (6, 10)) 82 | pm.mask = None 83 | 84 | self.assertTrue(np.all(pm.present == [5]*2+[0]+[5]*3+[6]*4)) 85 | self.assertTrue(np.allclose(pm.fraction, [0.83333333]*2+[0]+[0.83333333]*3+[1]*4)) 86 | self.assertTrue(np.all(pm.missing_values == [2]*2+[1]+[2]*3)) 87 | self.assertTrue(np.all(pm.occurrence == [10]*2+[0]+[10]*3+[12]*4)) 88 | self.assertTrue(np.allclose(pm.purity[~np.isnan(pm.purity)], [0]*9)) 89 | 90 | ics = pm._attr_dict['intra_count'] 91 | ics[0, 1] = ics[2, 1] = 1 92 | self.assertTrue(np.isclose(pm.purity[1], 0.4)) 93 | ics[0, 1] = ics[2, 1] = 2 94 | 95 | pm.add_flag('odd_flag', [True, False] * 5) 96 | self.assertTrue(np.all(pm.property('present') == [5, 0, 5, 6, 6])) 97 | self.assertTrue(np.all(pm.property('present', flagged_only = False) == [5]*2+[0]+[5]*3+[6]*4)) 98 | pm.drop_flag('odd_flag') 99 | 100 | mmz = np.arange(0, 1000, step = 100, dtype = float) + 1 101 | mmz[2] = np.nan 102 | self.assertTrue(np.allclose(*list(map(np.nan_to_num, (pm.mz_mean_vector, mmz))))) 103 | mit = [30., 29., np.nan, 27., 26., 25., 31., 32., 33., 34.] 104 | self.assertTrue(np.allclose(*list(map(np.nan_to_num, (pm.intensity_mean_vector*20, mit))))) 105 | 106 | def test_pm_mask(self): 107 | pm = self._createPeakMatrix() 108 | 109 | self.assertEqual(set([x.value for x in pm.tags_of('plate')]), {1, 2}) 110 | self.assertEqual(set([x.value for x in pm.tags_of()]), {'sample', 'qc'}) 111 | self.assertRaises(KeyError, lambda: pm.tags_of('treatment')) 112 | self.assertRaises(KeyError, lambda: pm.tags_of('not_exist')) 113 | 114 | pm.mask_tags(1) 115 | self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'QC_1', 'sample_2_1', 'sample_2_2', 'QC_2')) 116 | pm.mask_tags('qc', plate = 1) # mask samples with both of the two 117 | self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'sample_2_1', 'sample_2_2', 'QC_2')) 118 | pm.mask = None 119 | pm.mask_tags('qc') 120 | self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'sample_2_1', 'sample_2_2')) 121 | pm.mask = None 122 | pm.mask_tags('qc').mask_tags(plate = 1) 123 | self.assertTupleEqual(pm.peaklist_ids, ('sample_2_1', 'sample_2_2')) 124 | pm.mask = None 125 | pm.mask_tags('not_exist') 126 | self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'QC_1', 'sample_2_1', 'sample_2_2', 'QC_2')) 127 | 128 | pm.mask = [True] * 6 129 | pm.unmask_tags('qc', plate = 1) # unmask samples with both of the two 130 | self.assertTupleEqual(pm.peaklist_ids, ('QC_1',)) 131 | pm.mask = [True] * 6 132 | pm.unmask_tags('qc') 133 | self.assertTupleEqual(pm.peaklist_ids, ('QC_1', 'QC_2')) 134 | pm.mask = [True] * 6 135 | pm.unmask_tags('qc').unmask_tags(plate = 1) 136 | self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'QC_1', 'QC_2')) 137 | pm.mask = [True] * 6 138 | pm.unmask_tags('not_exist') 139 | self.assertTupleEqual(pm.peaklist_ids, ()) 140 | 141 | pm.unmask_tags('qc', override = True) 142 | self.assertTupleEqual(pm.peaklist_ids, ('QC_1', 'QC_2')) 143 | with mask_all_peakmatrix(pm) as m: 144 | m.unmask_tags('sample') 145 | self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'sample_2_1', 'sample_2_2')) 146 | 147 | pm.mask_tags('qc', override = True) 148 | self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'sample_2_1', 'sample_2_2')) 149 | with unmask_all_peakmatrix(pm) as m: 150 | m.mask_tags('sample') 151 | self.assertTupleEqual(m.peaklist_ids, ('QC_1', 'QC_2')) 152 | self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'sample_2_1', 'sample_2_2')) 153 | 154 | pm.mask = None 155 | with unmask_peakmatrix(pm, plate = 1) as m: 156 | self.assertTupleEqual(m.peaklist_ids, ('sample_1_1', 'sample_1_2', 'QC_1')) 157 | self.assertTupleEqual(m.full_shape, (6, 10)) 158 | self.assertEqual(len(pm.peaklist_ids), 6) 159 | 160 | with mask_peakmatrix(pm, plate = 2) as m: 161 | self.assertTupleEqual(m.peaklist_ids, ('sample_1_1', 'sample_1_2', 'QC_1')) 162 | with unmask_all_peakmatrix(pm) as mm: 163 | self.assertTupleEqual(mm.peaklist_ids, 164 | ('sample_1_1', 'sample_1_2', 'QC_1', 'sample_2_1', 'sample_2_2', 'QC_2')) 165 | 166 | with mask_peakmatrix(pm, 'qc') as m: 167 | m.remove_samples((1, 2)) 168 | self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'QC_1', 'sample_2_2', 'QC_2')) 169 | 170 | def test_pm_flags(self): 171 | pm = self._createPeakMatrix() 172 | 173 | self.assertTrue(np.sum(pm.flags) == 10) 174 | 175 | pm.add_flag('qua_flag', [True, True, False, True] * 2 + [True, True], flagged_only = True) 176 | pm.add_flag('odd_flag', [True, False] * 5, flagged_only = False) 177 | 178 | self.assertTupleEqual(pm.flag_names, ('qua_flag', 'odd_flag')) 179 | self.assertTrue(np.all(pm.flags == [1, 0, 0, 0, 1, 0, 0, 0, 1, 0])) 180 | self.assertTrue(np.all(pm.flag_values('odd_flag') == [1, 0] * 5)) 181 | 182 | with mask_peakmatrix(pm, 'qc') as m: 183 | self.assertTupleEqual(m.shape, (4, 3)) 184 | self.assertTupleEqual(m.shape, (6, 3)) 185 | self.assertTupleEqual(m.full_shape, (6, 10)) 186 | 187 | with mask_peakmatrix(pm, plate = 1) as m: 188 | mzs = np.array([ 189 | [ 1., 0., 401., 601., 801.], 190 | [ 1., 0., 0., 601., 801.], 191 | [ 1., 0., 401., 601., 801.], 192 | ]) 193 | m.drop_flag('qua_flag') 194 | self.assertTrue(np.allclose(m.mz_matrix, mzs)) 195 | self.assertTupleEqual(pm.shape, (6, 5)) 196 | 197 | def test_pm_access(self): 198 | pm = self._createPeakMatrix() 199 | 200 | pm.add_flag('even_flag', [False, True] * 5) 201 | self.assertTrue(np.allclose(pm.attr_mean_vector('mz'), 202 | [101.0, 301.0, 501.0, 701.0, 901.0])) 203 | self.assertTrue(np.allclose(*list(map(np.nan_to_num, (pm.attr_mean_vector('mz', flagged_only = False), 204 | [1.0, 101.0, np.nan, 301.0, 401.0, 501.0, 601.0, 701.0, 801.0, 901.0]))))) 205 | self.assertTrue(np.allclose((lambda x: x[~np.isnan(x)])(pm.rsd('qc')), 206 | [58.92556509, 55.82421956, 50.50762722, 48.21182598])) 207 | self.assertTrue(np.allclose((lambda x: x[~np.isnan(x)])(pm.rsd()), 208 | [66.32891055, 76.80163464, 63.24555320, 58.46339666, 55.02437333])) 209 | 210 | pm.remove_peaks((0, 1), flagged_only = False) 211 | self.assertTrue(np.allclose((lambda x: x[~np.isnan(x)])(pm.rsd('qc')), 212 | [55.82421956, 50.50762722, 48.21182598])) 213 | pm.remove_peaks((0, 1), flagged_only = True) 214 | self.assertTrue(np.allclose(pm.rsd('qc'), 215 | [50.50762722, 48.21182598])) 216 | 217 | self.assertRaises(AttributeError, lambda: pm.rsd('no_such_tag')) 218 | 219 | with mask_peakmatrix(pm, 'sample', plate = 1): 220 | pm.remove_samples((0, 1)) 221 | self.assertTupleEqual(pm.peaklist_ids, ('sample_2_2', 'QC_2')) 222 | pm.remove_samples((1, 2), masked_only = False) 223 | self.assertTupleEqual(pm.peaklist_ids, ('QC_2',)) 224 | self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'QC_2')) 225 | 226 | def test_pm_exports(self): 227 | pm = self._createPeakMatrix() 228 | 229 | pm.add_flag('even_flag', [False, True] * 5) 230 | with mask_peakmatrix(pm, plate = 1): 231 | peaklists = pm.extract_peaklists() 232 | self.assertListEqual([x.ID for x in peaklists], ['sample_2_1', 'sample_2_2', 'QC_2']) 233 | 234 | mzs = [ 235 | [101.0, 501.0, 701.0, 901.0], 236 | [101.0, 301.0, 501.0, 701.0, 901.0], 237 | [101.0, 301.0, 701.0, 901.0], 238 | ] 239 | self.assertTrue(all([np.allclose(x[0].mz, x[1]) for x in zip(peaklists, mzs)])) 240 | 241 | pm.drop_flag('even_flag') 242 | pkl = pm.to_peaklist('merged_pkl') 243 | self.assertTrue(np.allclose(pkl.mz, [1.0, 101.0, 301.0, 401.0, 501.0, 601.0, 701.0, 801.0, 901.0])) 244 | 245 | pm.to_str(comprehensive = True, rsd_tags = (Tag('compound_1', 'treatment'), Tag('compound_2', 'treatment'), 'qc')) 246 | 247 | def test_pm_pickle(self): 248 | pm = self._createPeakMatrix() 249 | try: 250 | pstr = cp.dumps(pm) 251 | pm = cp.loads(pstr) 252 | except Exception as e: 253 | self.fail('PeakMatrix pickle failed: ' + str(e)) 254 | self.assertTupleEqual(pm.attributes, ('mz', 'intensity', 'intra_count')) 255 | 256 | 257 | if __name__ == '__main__': 258 | unittest.main() 259 | -------------------------------------------------------------------------------- /tests/test_peaklist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import pickle as cp 24 | import unittest 25 | 26 | import numpy as np 27 | import pandas as pd 28 | from dimspy.models.peaklist import PeakList 29 | 30 | 31 | class PeakListTestCase(unittest.TestCase): 32 | @staticmethod 33 | def _createPeakList(): 34 | mzs = np.arange(0, 1000, step = 100) 35 | ints = np.abs(np.random.normal(10, 3, size = 10)) 36 | pl = PeakList('sample_peaklist', mzs, ints, mz_range = (100, 1000), frag_mode = 'slb') 37 | return pl 38 | 39 | def test_pl_creation(self): 40 | try: 41 | self._createPeakList() 42 | except Exception as e: 43 | self.fail('create PeakList object failed: ' + str(e)) 44 | 45 | def test_pl_properties(self): 46 | pl = self._createPeakList() 47 | self.assertEqual(pl.ID, 'sample_peaklist') 48 | 49 | pl.add_attribute('odd_flag', [1, 0] * 5, is_flag = True) 50 | self.assertEqual(pl.size, 5) 51 | self.assertEqual(pl.full_size, 10) 52 | self.assertTupleEqual(pl.shape, (5, 3)) 53 | self.assertTupleEqual(pl.full_shape, (10, 3)) 54 | 55 | try: 56 | pl.metadata.type = 'blank' 57 | except Exception as e: 58 | self.fail('access metadata failed: ' + str(e)) 59 | self.assertListEqual(sorted(pl.metadata.keys()), ['frag_mode', 'mz_range', 'type']) 60 | 61 | try: 62 | pl.tags.add_tag('sample') 63 | pl.tags.add_tag('passed_qc') 64 | pl.tags.add_tag('high_dose', tag_type = 'treatment') 65 | except Exception as e: 66 | self.fail('access tags failed: ' + str(e)) 67 | self.assertEqual(set(pl.tags.tag_types), {None, 'treatment'}) 68 | self.assertEqual(set(pl.tags.tag_values), {'sample', 'passed_qc', 'high_dose'}) 69 | 70 | self.assertTupleEqual(pl.attributes, ('mz', 'intensity', 'odd_flag')) 71 | self.assertTupleEqual(pl.flag_attributes, ('odd_flag',)) 72 | 73 | self.assertTrue(np.all(pl.flags == [1, 0] * 5)) 74 | 75 | self.assertTupleEqual((len(pl.peaks), len(pl.peaks[0])), (5, 3)) 76 | self.assertTupleEqual((len(pl.dtable), len(pl.dtable[0])), (10, 3)) 77 | 78 | def test_pl_attribute_operations(self): 79 | pl = self._createPeakList() 80 | 81 | self.assertTrue(pl.has_attribute('mz')) 82 | self.assertFalse(pl.has_attribute('snr')) 83 | self.assertFalse(pl.has_attribute('flag')) # flag is not a real attribute 84 | 85 | snr = np.array([20, 0] * 5, dtype = int) 86 | pl.add_attribute('snr', snr, attr_dtype = float) 87 | pl.add_attribute('snr_flag', snr > 10, is_flag = True) 88 | self.assertTrue(np.all(pl.get_attribute('snr') > 10)) 89 | self.assertTrue(np.all(pl.get_attribute('snr', flagged_only = False) == snr)) 90 | 91 | pl.add_attribute('values_1', [0, 1] * 5, on_index = 2, flagged_only = False) 92 | self.assertEqual(pl.attributes[2], 'values_1') 93 | pl.set_attribute('values_1', [1] * 5) # snr_flag already masked odd peaks 94 | self.assertTrue(np.all(pl.get_attribute('values_1', flagged_only = False) == np.ones(10))) 95 | pl.set_attribute('values_1', [0] * 10, flagged_only = False) 96 | self.assertTrue(np.all(pl.get_attribute('values_1', flagged_only = False) == np.zeros(10))) 97 | pl.drop_attribute('values_1') 98 | self.assertFalse(pl.has_attribute('values_1')) 99 | 100 | self.assertRaises(AttributeError, lambda: pl.add_attribute('mz', np.ones(pl.size))) 101 | self.assertRaises(AttributeError, lambda: pl.add_attribute('snr', np.ones(pl.size))) 102 | self.assertRaises(AttributeError, lambda: pl.add_attribute('_dtable', np.ones(pl.size))) 103 | self.assertRaises(ValueError, lambda: pl.add_attribute('flags_1', np.arange(pl.size), is_flag = True)) 104 | self.assertRaises(IndexError, lambda: pl.add_attribute('values_2', np.arange(pl.size), on_index = 0)) 105 | self.assertRaises(IndexError, lambda: pl.add_attribute('values_2', np.arange(pl.size), on_index = -pl.shape[1])) 106 | self.assertRaises(ValueError, lambda: pl.add_attribute('values_2', np.arange(pl.full_size))) 107 | 108 | self.assertRaises(AttributeError, lambda: pl.set_attribute('flags', np.ones_like(pl.size))) 109 | self.assertRaises(AttributeError, lambda: pl.set_attribute('values_3', np.arange(pl.size))) 110 | self.assertRaises(ValueError, lambda: pl.set_attribute('mz', np.arange(10)[::-1], flagged_only = False)) 111 | 112 | try: 113 | pl.set_attribute('mz', np.arange(10)[::-1], flagged_only = False, unsorted_mz = True) 114 | except Exception as e: 115 | self.fail('unsorted_mz flag failed: ' + str(e)) 116 | self.assertTrue(np.all(pl.get_attribute('mz') == np.arange(10)[1::2])) # setting mz reversed the snr_flag 117 | 118 | self.assertRaises(AttributeError, lambda: pl.get_attribute('values_4')) 119 | self.assertRaises(AttributeError, lambda: pl.drop_attribute('values_4')) 120 | self.assertRaises(AttributeError, lambda: pl.drop_attribute('mz')) 121 | 122 | def test_pl_peaks_operations(self): 123 | pl = self._createPeakList() 124 | pl.add_attribute('value_flag', [1, 0] * 5, is_flag = True) 125 | 126 | # mz = 0, (100), 200, (300), 400, (500), 600, (700), 800, (900) 127 | pl.set_peak(4, (50, 10., True), flagged_only = False) 128 | self.assertTupleEqual((0, 50, 200, 600, 800), tuple(pl.get_attribute('mz'))) 129 | 130 | # mz = 0, 50, (100), 200, (300), (500), 600, (700), 800, (900) 131 | pl.insert_peak((150, 10., True)) 132 | self.assertTupleEqual((0, 50, 150, 200, 600, 800), tuple(pl.get_attribute('mz'))) 133 | self.assertEqual(pl.full_size, 11) 134 | 135 | # mz = 0, 50, (100), 150, 200, (300), (500), 600, (700), 800, (900) 136 | pl.remove_peak((1,2)) 137 | self.assertTupleEqual((0, 100, 200, 300, 500, 600, 700, 800, 900), tuple(pl.get_attribute('mz', flagged_only = False))) 138 | pl.remove_peak(1, flagged_only = False) 139 | self.assertTupleEqual((0, 200, 300, 500, 600, 700, 800, 900), tuple(pl.get_attribute('mz', flagged_only = False))) 140 | self.assertEqual(pl.size, 4) 141 | self.assertEqual(pl.full_size, 8) 142 | 143 | # mz = 0, 200, (300), (500), 600, (700), 800, (900) 144 | self.assertRaises(AttributeError, lambda: pl.cleanup_unflagged_peaks('mz')) 145 | self.assertRaises(AttributeError, lambda: pl.cleanup_unflagged_peaks('not_exists')) 146 | pl.cleanup_unflagged_peaks('value_flag') 147 | self.assertEqual(pl.full_size, pl.size) 148 | pl.cleanup_unflagged_peaks() 149 | self.assertTupleEqual((0, 200, 600, 800), tuple(pl.get_attribute('mz'))) 150 | 151 | def test_pl_build_ins(self): 152 | pl = self._createPeakList() 153 | 154 | try: 155 | str(pl) 156 | except Exception as e: 157 | self.fail('__str__ failed: ' + str(e)) 158 | self.assertEqual(len(pl), 10) 159 | 160 | pl.add_attribute('value_flag', [1, 0] * 5, is_flag = True) 161 | # mz = 0, (100), 200, (300), 400, (500), 600, (700), 800, (900) 162 | self.assertEqual(len(pl), 5) 163 | 164 | self.assertListEqual([0, 200, 400, 600, 800], pl.mz.tolist()) 165 | self.assertListEqual(np.arange(0, 1000, step = 100).tolist(), pl.mz_all.tolist()) 166 | 167 | self.assertListEqual([0, 200, 400, 600, 800], pl['mz'].tolist()) 168 | self.assertListEqual([0, 200, 400], list(list(zip(*pl[:3].tolist()))[0])) 169 | 170 | def test_pl_exports(self): 171 | pl = self._createPeakList() 172 | 173 | try: 174 | lst = pl.to_list() 175 | except Exception as e: 176 | self.fail('to_list function failed: ' + str(e)) 177 | self.assertListEqual(np.arange(0, 1000, step = 100).tolist(), list(lst[0])) 178 | 179 | try: 180 | psr = pl.to_str(',') 181 | except Exception as e: 182 | self.fail('to_str function failed: ' + str(e)) 183 | self.assertListEqual(np.arange(0, 1000, step = 100).tolist(), 184 | list(map(float, list(zip(*[x.split(',') for x in psr.split('\n')[1:]]))[0]))) 185 | 186 | try: 187 | pl_df = pl.to_df() 188 | except Exception as e: 189 | self.fail('to_df function failed: ' + str(e)) 190 | 191 | title, data = zip(*pl.to_dict().items()) 192 | self.assertTrue(pl_df.equals(pd.DataFrame(list(zip(*data)), columns=title))) 193 | 194 | def test_pl_pickle(self): 195 | pl = self._createPeakList() 196 | try: 197 | pstr = cp.dumps(pl) 198 | pl = cp.loads(pstr) 199 | except Exception as e: 200 | self.fail('PeakList pickle failed: ' + str(e)) 201 | self.assertTupleEqual(pl.attributes, ('mz', 'intensity')) 202 | 203 | 204 | if __name__ == '__main__': 205 | unittest.main() 206 | -------------------------------------------------------------------------------- /tests/test_peaklist_metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import pickle as cp 24 | import unittest 25 | 26 | from dimspy.models.peaklist_metadata import PeakList_Metadata 27 | 28 | 29 | class PeakListMetadataTestCase(unittest.TestCase): 30 | @staticmethod 31 | def _createMetadata(): 32 | return PeakList_Metadata((('a', 1), ('b', 2), ('c', 3))) 33 | 34 | def test_pl_meta_creation(self): 35 | try: 36 | self._createMetadata() 37 | except Exception as e: 38 | self.fail('create metadata object failed: ' + str(e)) 39 | 40 | def test_pl_meta_operations(self): 41 | meta = self._createMetadata() 42 | 43 | self.assertListEqual(sorted(meta.keys()), ['a', 'b', 'c']) 44 | self.assertListEqual(sorted(meta.values()), [1, 2, 3]) 45 | self.assertListEqual(sorted(meta.items()), [('a', 1), ('b', 2), ('c', 3)]) 46 | self.assertTrue(meta['a'] == 1 and meta['b'] == 2 and meta['c'] == 3) 47 | self.assertTrue(('a' in meta) == True and ('d' in meta) == False) 48 | self.assertTrue(meta.get('a', 4) == 1 and meta.get('d', 4) == 4) 49 | 50 | meta['a'] = 4 51 | self.assertEqual(meta['a'], 4) 52 | meta['d'] = 5 53 | self.assertEqual(meta['d'], 5) 54 | del meta['b'] 55 | self.assertFalse('b' in meta) 56 | 57 | def test_pl_meta_pickle(self): 58 | meta = self._createMetadata() 59 | try: 60 | mstr = cp.dumps(meta) 61 | meta = cp.loads(mstr) 62 | except Exception as e: 63 | self.fail('metadata pickle failed: ' + str(e)) 64 | self.assertTrue(meta['a'] == 1 and meta['b'] == 2 and meta['c'] == 3) 65 | 66 | 67 | if __name__ == '__main__': 68 | unittest.main() 69 | -------------------------------------------------------------------------------- /tests/test_peaklist_tags.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import pickle as cp 24 | import unittest 25 | 26 | from dimspy.models.peaklist_tags import Tag, PeakList_Tags 27 | 28 | 29 | class TagTestCase(unittest.TestCase): 30 | def test_tag_creation(self): 31 | try: 32 | tag1 = Tag('1') 33 | tag2 = Tag(2, 'batch') 34 | tag3 = Tag(tag2) 35 | except Exception as e: 36 | self.fail('create tag object failed: ' + str(e)) 37 | 38 | self.assertTrue(tag1.value == '1' and tag1.ttype is None) 39 | self.assertTrue(tag2.value == 2 and tag2.ttype == 'batch') 40 | self.assertTrue(tag3.value == 2 and tag3.ttype == 'batch') 41 | self.assertRaises(KeyError, lambda: Tag(9, ttype = 'None')) 42 | 43 | def test_tag_property(self): 44 | tag = Tag('value', ttype = 'type') 45 | self.assertTrue(tag.typed) 46 | 47 | tag.value = 1 48 | tag.ttype = None 49 | self.assertTrue(tag.value == 1 and tag.ttype is None) 50 | self.assertFalse(tag.typed) 51 | 52 | def _assign_type(t): tag.ttype = t 53 | self.assertRaises(KeyError, lambda: _assign_type('None')) 54 | 55 | def test_tag_magic(self): 56 | tag = Tag(1, ttype = 'type') 57 | 58 | self.assertEqual(tag, Tag(1, 'type')) 59 | self.assertNotEqual(tag, 1) 60 | 61 | tag.ttype = None 62 | self.assertEqual(tag, 1) 63 | self.assertTrue(1 == tag) 64 | self.assertFalse(1 != tag) 65 | self.assertTrue(2 != tag) 66 | self.assertTrue(tag in (1, 2, 3)) 67 | self.assertTrue(1 in (tag, 2, 3)) 68 | 69 | self.assertEqual(str(tag), '1') 70 | tag.ttype = 'type' 71 | self.assertEqual(str(tag), 'type:1') 72 | 73 | class PeakListTagsTestCase(unittest.TestCase): 74 | @staticmethod 75 | def _createTags(): 76 | return PeakList_Tags(0, 'str_tag', 'ustr_tag', Tag(1, 'typed_tag1'), typed_tag2 = 2) 77 | 78 | def test_pl_tags_creation(self): 79 | try: 80 | self._createTags() 81 | except Exception as e: 82 | self.fail('create tags object failed: ' + str(e)) 83 | 84 | def test_pl_tags_properties(self): 85 | tags = self._createTags() 86 | self.assertEqual(tags.tag_types, {None, 'typed_tag1', 'typed_tag2'}) 87 | self.assertEqual(tags.tag_values, {0, 1, 2, 'str_tag', 'ustr_tag'}) 88 | self.assertEqual(len(tags), 5) 89 | self.assertTrue(all([x.ttype is not None for x in tags.typed_tags])) 90 | self.assertTrue(all([x.ttype is None for x in tags.untyped_tags])) 91 | 92 | def test_pl_tags_checking_methods(self): 93 | tags = self._createTags() 94 | 95 | self.assertTrue(tags.has_tag_type('typed_tag1') and tags.has_tag_type('typed_tag2')) 96 | self.assertTrue(tags.has_tag_type(None)) 97 | self.assertFalse(tags.has_tag_type('not_exist')) 98 | 99 | self.assertTrue(Tag(2, 'typed_tag2') in tags) 100 | self.assertTrue(tags.has_tag(0) and tags.has_tag('str_tag') and tags.has_tag('ustr_tag')) 101 | self.assertTrue(tags.has_tag(1, 'typed_tag1') and tags.has_tag(2, 'typed_tag2')) 102 | self.assertTrue(tags.has_tag(Tag(1, 'typed_tag1'))) 103 | self.assertFalse(tags.has_tag(0, 'typed_tag1')) 104 | self.assertFalse(tags.has_tag(1) or tags.has_tag(2)) 105 | self.assertFalse(tags.has_tag('not_exist') or tags.has_tag(1, 'wrong_type')) 106 | 107 | self.assertTupleEqual(tags.tag_of(), (0, 'str_tag', 'ustr_tag')) 108 | self.assertTrue(tags.tag_of('typed_tag1').value == 1 and tags.tag_of('typed_tag2').value == 2) 109 | self.assertTrue(tags.tag_of('not_such_type') is None) 110 | 111 | def test_pl_tags_adding_methods(self): 112 | tags = self._createTags() 113 | 114 | self.assertRaises(KeyError, lambda: tags.add_tag(3, 'typed_tag1')) 115 | self.assertRaises(ValueError, lambda: tags.add_tag(0)) 116 | self.assertRaises(ValueError, lambda: tags.add_tag('ustr_tag')) 117 | 118 | tags.add_tag(1) 119 | tags.add_tag(1, 'typed_tag3') 120 | tags.add_tag(Tag('new_value', 'typed_tag4')) 121 | self.assertEqual(tags.tag_types, {None, 'typed_tag1', 'typed_tag2', 'typed_tag3', 'typed_tag4'}) 122 | self.assertEqual(tags.tag_values, {0, 1, 2, 'new_value', 'str_tag', 'ustr_tag'}) 123 | 124 | def test_pl_tags_dropping_methods(self): 125 | tags = self._createTags() 126 | 127 | tags.drop_tag(0) 128 | tags.drop_tag(1) 129 | tags.drop_tag(1, 'wrong_type') 130 | self.assertEqual(tags.tag_types, {None, 'typed_tag1', 'typed_tag2'}) 131 | self.assertEqual(tags.tag_values, {1, 2, 'str_tag', 'ustr_tag'}) 132 | tags.drop_tag('str_tag') 133 | tags.drop_tag('ustr_tag') 134 | self.assertEqual(tags.tag_types, {'typed_tag1', 'typed_tag2'}) 135 | self.assertEqual(tags.tag_values, {1, 2}) 136 | 137 | tags.drop_tag_type('typed_tag1') 138 | self.assertEqual(tags.tag_types, {'typed_tag2'}) 139 | self.assertEqual(tags.tag_values, {2}) 140 | 141 | tags.drop_all_tags() 142 | self.assertTupleEqual(tags.tags, ()) 143 | 144 | def test_pl_tags_portals(self): 145 | tags = self._createTags() 146 | self.assertListEqual(tags.to_list(), [(0, None), ('str_tag', None), ('ustr_tag', None), (1, 'typed_tag1'), (2, 'typed_tag2')]) 147 | self.assertEqual(tags.to_str(), '0, str_tag, ustr_tag, typed_tag1:1, typed_tag2:2') 148 | self.assertEqual(str(tags), '0, str_tag, ustr_tag, typed_tag1:1, typed_tag2:2') 149 | 150 | def test_pl_tags_pickle(self): 151 | tags = self._createTags() 152 | try: 153 | tstr = cp.dumps(tags) 154 | tags = cp.loads(tstr) 155 | except Exception as e: 156 | self.fail('tags pickle failed: ' + str(e)) 157 | self.assertEqual(tags.tag_types, {None, 'typed_tag1', 'typed_tag2'}) 158 | self.assertEqual(tags.tag_values, {0, 1, 2, 'str_tag', 'ustr_tag'}) 159 | self.assertEqual(len(tags), 5) 160 | 161 | 162 | if __name__ == '__main__': 163 | unittest.main() 164 | -------------------------------------------------------------------------------- /tests/test_replicate_processing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import os 24 | import unittest 25 | import zipfile 26 | from dimspy.process.replicate_processing import read_scans 27 | 28 | 29 | def to_test_data(*args): 30 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", "MTBLS79_subset", *args) 31 | 32 | def to_test_results(*args): 33 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), "results", *args) 34 | 35 | 36 | class ReplicateProcessingTestCase(unittest.TestCase): 37 | 38 | @classmethod 39 | def setUpClass(cls): 40 | 41 | zip_ref = zipfile.ZipFile(to_test_data("MTBLS79_mzml_single.zip"), 'r') 42 | zip_ref.extractall(to_test_results("zip_data")) 43 | zip_ref.close() 44 | 45 | def test_read_scans(self): 46 | 47 | scans = read_scans(to_test_data("mzml", "batch04_QC17_rep01_262.mzML"), function_noise="median", 48 | min_scans=1, filter_scan_events={"exclude": [["70.0", "170.0", "sim"]]}) 49 | self.assertListEqual(list(scans.keys()), ['FTMS + p ESI w SIM ms [140.00-240.00]', 50 | 'FTMS + p ESI w SIM ms [210.00-310.00]', 51 | 'FTMS + p ESI w SIM ms [280.00-380.00]', 52 | 'FTMS + p ESI w SIM ms [350.00-450.00]', 53 | 'FTMS + p ESI w SIM ms [420.00-520.00]', 54 | 'FTMS + p ESI w SIM ms [490.00-590.00]']) 55 | 56 | scans = read_scans(to_test_data("mzml", "batch04_QC17_rep01_262.mzML"), function_noise="median", 57 | min_scans=1, filter_scan_events={"include": [["70.0", "170.0", "sim"]]}) 58 | self.assertListEqual(list(scans.keys()), ['FTMS + p ESI w SIM ms [70.00-170.00]']) 59 | 60 | scans = read_scans(to_test_data("mzml", "batch04_QC17_rep01_262.mzML"), function_noise="median", 61 | min_scans=1, filter_scan_events={"exclude": ["FTMS + p ESI w SIM ms [70.00-170.00]"]}) 62 | self.assertListEqual(list(scans.keys()), ['FTMS + p ESI w SIM ms [140.00-240.00]', 63 | 'FTMS + p ESI w SIM ms [210.00-310.00]', 64 | 'FTMS + p ESI w SIM ms [280.00-380.00]', 65 | 'FTMS + p ESI w SIM ms [350.00-450.00]', 66 | 'FTMS + p ESI w SIM ms [420.00-520.00]', 67 | 'FTMS + p ESI w SIM ms [490.00-590.00]']) 68 | 69 | scans = read_scans(to_test_data("mzml", "batch04_QC17_rep01_262.mzML"), function_noise="median", 70 | min_scans=1, filter_scan_events={"include": ["FTMS + p ESI w SIM ms [70.00-170.00]"]}) 71 | self.assertListEqual(list(scans.keys()), ['FTMS + p ESI w SIM ms [70.00-170.00]']) 72 | -------------------------------------------------------------------------------- /tests/test_suite_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import sys 24 | import unittest 25 | from pathlib import Path 26 | 27 | sys.path.insert(0, str(Path(__file__).parent.parent.resolve())) 28 | from . import test_peaklist_metadata, test_peaklist_tags, test_peaklist, test_peak_matrix 29 | 30 | 31 | if __name__ == '__main__': 32 | suite = unittest.TestSuite() 33 | 34 | suite.addTest(unittest.findTestCases(test_peaklist_metadata)) 35 | suite.addTest(unittest.findTestCases(test_peaklist_tags)) 36 | suite.addTest(unittest.findTestCases(test_peaklist)) 37 | suite.addTest(unittest.findTestCases(test_peak_matrix)) 38 | 39 | runner = unittest.TextTestRunner() 40 | runner.run(suite) 41 | -------------------------------------------------------------------------------- /tests/test_suite_portals.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import sys 24 | import unittest 25 | from pathlib import Path 26 | 27 | sys.path.insert(0, str(Path(__file__).parent.parent.resolve())) 28 | from . import test_txt_portal, test_hdf5_portal, test_paths_portal 29 | 30 | 31 | if __name__ == '__main__': 32 | suite = unittest.TestSuite() 33 | 34 | suite.addTest(unittest.findTestCases(test_txt_portal)) 35 | suite.addTest(unittest.findTestCases(test_hdf5_portal)) 36 | suite.addTest(unittest.findTestCases(test_paths_portal)) 37 | 38 | runner = unittest.TextTestRunner() 39 | runner.run(suite) 40 | 41 | -------------------------------------------------------------------------------- /tests/test_suite_process.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import sys 24 | import unittest 25 | from pathlib import Path 26 | 27 | sys.path.insert(0, str(Path(__file__).parent.parent.resolve())) 28 | from . import test_peak_filters, test_peak_alignment 29 | 30 | 31 | if __name__ == '__main__': 32 | suite = unittest.TestSuite() 33 | 34 | suite.addTest(unittest.findTestCases(test_peak_alignment)) 35 | suite.addTest(unittest.findTestCases(test_peak_filters)) 36 | 37 | runner = unittest.TextTestRunner() 38 | runner.run(suite) 39 | -------------------------------------------------------------------------------- /tests/test_suite_tools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import sys 24 | import unittest 25 | from pathlib import Path 26 | 27 | sys.path.insert(0, str(Path(__file__).parent.parent.resolve())) 28 | from . import test_tools 29 | 30 | 31 | if __name__ == '__main__': 32 | suite = unittest.TestSuite() 33 | 34 | suite.addTest(unittest.findTestCases(test_tools)) 35 | 36 | runner = unittest.TextTestRunner() 37 | runner.run(suite) 38 | -------------------------------------------------------------------------------- /tests/test_thermo_raw_portal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import os 24 | import unittest 25 | import platform 26 | 27 | from dimspy.portals.thermo_raw_portal import ThermoRaw 28 | 29 | 30 | def to_test_data(*args): 31 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", "MTBLS79_subset", *args) 32 | 33 | def to_test_results(*args): 34 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), "results", *args) 35 | 36 | 37 | class ThermoRawPortalsTestCase(unittest.TestCase): 38 | 39 | def test_thermo_raw_portal(self): 40 | 41 | run = ThermoRaw(to_test_data("raw", "batch04_QC17_rep01_262.RAW")) 42 | 43 | self.assertTrue(str(run.timestamp) == "4/2/2011 3:28:02 AM" or str(run.timestamp) == "02/04/2011 03:28:02") 44 | 45 | self.assertListEqual(list(run.headers().keys()), ['FTMS + p ESI w SIM ms [70.00-170.00]', 46 | 'FTMS + p ESI w SIM ms [140.00-240.00]', 47 | 'FTMS + p ESI w SIM ms [210.00-310.00]', 48 | 'FTMS + p ESI w SIM ms [280.00-380.00]', 49 | 'FTMS + p ESI w SIM ms [350.00-450.00]', 50 | 'FTMS + p ESI w SIM ms [420.00-520.00]', 51 | 'FTMS + p ESI w SIM ms [490.00-590.00]']) 52 | self.assertListEqual(list(run.scan_ids().keys()), list(range(1,89))) 53 | self.assertListEqual(list(run.tics().values())[0:2], [39800032.0, 38217892.0]) 54 | self.assertEqual(len(run.tics()), 88) 55 | self.assertListEqual(list(run.ion_injection_times().values())[0:2], [40.434, 40.095]) 56 | self.assertEqual(len(run.ion_injection_times()), 88) 57 | self.assertListEqual(run.scan_dependents(), []) 58 | pl = run.peaklist(1) 59 | self.assertEqual(pl.ID, 1) 60 | self.assertEqual(pl.metadata["header"], "FTMS + p ESI w SIM ms [70.00-170.00]") 61 | self.assertEqual(pl.metadata["ms_level"], 1.0) 62 | self.assertEqual(pl.metadata["ion_injection_time"], 40.434) 63 | self.assertEqual(pl.metadata["scan_time"], 0.5010899999999999) 64 | self.assertEqual(pl.metadata["elapsed_scan_time"], 1.05) 65 | self.assertEqual(pl.metadata["tic"], 39800032.0) 66 | self.assertEqual(pl.metadata["function_noise"], "noise_packets") 67 | self.assertEqual(pl.metadata["mz_range"], [70.0, 170.0]) 68 | run.close() 69 | 70 | 71 | if __name__ == '__main__': 72 | unittest.main() 73 | -------------------------------------------------------------------------------- /tests/test_txt_portal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou. 5 | # 6 | # This file is part of DIMSpy. 7 | # 8 | # DIMSpy is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # DIMSpy is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with DIMSpy. If not, see . 20 | # 21 | 22 | 23 | import os 24 | import unittest 25 | 26 | import numpy as np 27 | from dimspy.models.peaklist import PeakList 28 | from dimspy.models.peaklist_tags import Tag 29 | from dimspy.portals.txt_portal import save_peak_matrix_as_txt, load_peak_matrix_from_txt 30 | from dimspy.portals.txt_portal import save_peaklist_as_txt, load_peaklist_from_txt 31 | from dimspy.process.peak_alignment import align_peaks 32 | 33 | 34 | class TxtPortalsTestCase(unittest.TestCase): 35 | def test_peaklist_portal(self): 36 | pkl = PeakList('peaklist', np.sort(np.random.uniform(100, 1200, size = 100)), np.random.normal(100, 10, size = 100)) 37 | pkl.add_attribute('odd_flag', [0, 1] * 50, is_flag = True) 38 | 39 | save_peaklist_as_txt(pkl, '.test_peaklist.txt') 40 | npkl = load_peaklist_from_txt('.test_peaklist.txt', 'peaklist') 41 | 42 | self.assertEqual(npkl.size, 50) 43 | self.assertEqual(npkl.full_size, 100) 44 | self.assertTrue(np.allclose(pkl.mz_all, npkl.mz_all)) 45 | self.assertTrue(np.allclose(pkl.intensity, npkl.intensity)) 46 | 47 | def test_peak_matrix_portal(self): 48 | _mzs = lambda: sorted(np.random.uniform(100, 1200, size = 100)) 49 | _ints = lambda: np.abs(np.random.normal(100, 10, size = 100)) 50 | 51 | pkls = [ 52 | PeakList('sample_1_1', _mzs(), _ints()), 53 | PeakList('sample_1_2', _mzs(), _ints()), 54 | PeakList('QC_1', _mzs(), _ints()), 55 | PeakList('sample_2_1', _mzs(), _ints()), 56 | PeakList('sample_2_2', _mzs(), _ints()), 57 | PeakList('QC_2', _mzs(), _ints()), 58 | ] 59 | for t in ('sample', Tag('compound_1', 'treatment'), Tag('1hr', 'time_point'), Tag(1, 'plate')): pkls[0].tags.add_tag(t) 60 | for t in ('sample', Tag('compound_1', 'treatment'), Tag('6hr', 'time_point'), Tag(1, 'plate')): pkls[1].tags.add_tag(t) 61 | for t in ('qc', Tag(1, 'plate')): pkls[2].tags.add_tag(t) 62 | for t in ('sample', Tag('compound_2', 'treatment'), Tag('1hr', 'time_point'), Tag(2, 'plate')): pkls[3].tags.add_tag(t) 63 | for t in ('sample', Tag('compound_2', 'treatment'), Tag('6hr', 'time_point'), Tag(2, 'plate')): pkls[4].tags.add_tag(t) 64 | for t in ('qc', Tag(2, 'plate')): pkls[5].tags.add_tag(t) 65 | 66 | pm = align_peaks(pkls, ppm = 2e+4, block_size = 10, ncpus = 2) 67 | pm.add_flag('odd_flag', ([0, 1] * int(pm.shape[1]/2+1))[:pm.shape[1]]) 68 | pm.add_flag('qua_flag', ([0, 0, 1, 1] * int(pm.shape[1]/4+1))[:pm.shape[1]]) 69 | 70 | save_peak_matrix_as_txt(pm, '.test_peak_matrix.txt', samples_in_rows = True, comprehensive = True, 71 | rsd_tags = ('qc', Tag('compound_1', 'treatment'), Tag('compound_2', 'treatment'))) 72 | npm = load_peak_matrix_from_txt('.test_peak_matrix.txt', samples_in_rows = True, comprehensive = 'auto') 73 | 74 | self.assertEqual(pm.shape, npm.shape) 75 | self.assertEqual(pm.full_shape, npm.full_shape) 76 | self.assertTrue(np.all(pm.flags == npm.flags)) 77 | self.assertTrue(np.all(pm.flag_names == npm.flag_names)) 78 | self.assertTrue(np.allclose(pm.intensity_matrix, npm.intensity_matrix)) 79 | self.assertEqual(pm.peaklist_tag_types, npm.peaklist_tag_types) 80 | self.assertEqual(pm.peaklist_tag_values, npm.peaklist_tag_values) 81 | 82 | def tearDown(self): 83 | if os.path.isfile('.test_peaklist.txt'): os.remove('.test_peaklist.txt') 84 | if os.path.isfile('.test_peak_matrix.txt'): os.remove('.test_peak_matrix.txt') 85 | 86 | 87 | if __name__ == '__main__': 88 | unittest.main() 89 | --------------------------------------------------------------------------------