├── .gitignore
├── .readthedocs.yml
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── RawFileReaderLicense.rst
├── appveyor.yml
├── binder
    └── environment.yml
├── dimspy
    ├── __init__.py
    ├── __main__.py
    ├── metadata.py
    ├── models
    │   ├── __init__.py
    │   ├── peak_matrix.py
    │   ├── peaklist.py
    │   ├── peaklist_metadata.py
    │   └── peaklist_tags.py
    ├── portals
    │   ├── ThermoRawFileReader_3_0_41
    │   │   ├── Libraries
    │   │   │   ├── ThermoFisher.CommonCore.BackgroundSubtraction.XML
    │   │   │   ├── ThermoFisher.CommonCore.BackgroundSubtraction.dll
    │   │   │   ├── ThermoFisher.CommonCore.Data.XML
    │   │   │   ├── ThermoFisher.CommonCore.Data.dll
    │   │   │   ├── ThermoFisher.CommonCore.MassPrecisionEstimator.XML
    │   │   │   ├── ThermoFisher.CommonCore.MassPrecisionEstimator.dll
    │   │   │   ├── ThermoFisher.CommonCore.RawFileReader.XML
    │   │   │   └── ThermoFisher.CommonCore.RawFileReader.dll
    │   │   └── License
    │   │   │   └── RawFileRdr_License_Agreement_RevA.doc
    │   ├── __init__.py
    │   ├── hdf5_portal.py
    │   ├── mzml_portal.py
    │   ├── paths.py
    │   ├── thermo_raw_portal.py
    │   └── txt_portal.py
    ├── process
    │   ├── __init__.py
    │   ├── peak_alignment.py
    │   ├── peak_filters.py
    │   └── replicate_processing.py
    └── tools.py
├── docs
    ├── Makefile
    └── source
    │   ├── api-reference.rst
    │   ├── bugs-and-issues.rst
    │   ├── changelog.rst
    │   ├── citation.rst
    │   ├── cli.rst
    │   ├── conf.py
    │   ├── credits.rst
    │   ├── dimspy.metadata.rst
    │   ├── dimspy.models.rst
    │   ├── dimspy.portals.rst
    │   ├── dimspy.process.rst
    │   ├── dimspy.tools.rst
    │   ├── images
    │       └── alignment.png
    │   ├── index.rst
    │   ├── installation.rst
    │   └── license.rst
├── environment.yml
├── examples
    ├── examples.py
    ├── run.bat
    └── run.sh
├── notebooks
    └── workflow.ipynb
├── requirements.txt
├── setup.py
└── tests
    ├── __init__.py
    ├── data
        ├── MTBLS79_subset
        │   ├── MTBLS79_mzml_peak_matrix_v1.hdf5
        │   ├── MTBLS79_mzml_peak_matrix_v2.hdf5
        │   ├── MTBLS79_mzml_single.zip
        │   ├── MTBLS79_mzml_single_report.txt
        │   ├── MTBLS79_mzml_triplicates.zip
        │   ├── MTBLS79_mzml_triplicates_report.txt
        │   ├── MTBLS79_mzml_triplicates_v1.hdf5
        │   ├── MTBLS79_mzml_triplicates_v2.hdf5
        │   ├── batch04_QC17_rep01_262_v1.txt
        │   ├── batch04_QC17_rep01_262_v2.txt
        │   ├── filelist_class_label_error.txt
        │   ├── filelist_csl_MTBLS79_mzml_peak_matrix.txt
        │   ├── filelist_csl_MTBLS79_mzml_triplicates.txt
        │   ├── filelist_filename_error.txt
        │   ├── filelist_injection_order_error.txt
        │   ├── filelist_multi.txt
        │   ├── filelist_multi_error.txt
        │   ├── filelist_mzml_single.txt
        │   ├── filelist_mzml_triplicates.txt
        │   ├── filelist_raw_triplicates.txt
        │   ├── filelist_replicate_error_1.txt
        │   ├── filelist_replicate_error_2.txt
        │   ├── mzml
        │   │   ├── batch04_QC17_rep01_262.mzML
        │   │   ├── batch04_QC17_rep02_263.mzML
        │   │   └── batch04_QC17_rep03_264.mzML
        │   ├── pm_mzml_triplicates_v1.txt
        │   ├── pm_mzml_triplicates_v2.txt
        │   └── raw
        │   │   ├── batch04_QC17_rep01_262.RAW
        │   │   ├── batch04_QC17_rep02_263.RAW
        │   │   └── batch04_QC17_rep03_264.RAW
        └── mzml_DIMSn.zip
    ├── test_hdf5_portal.py
    ├── test_metadata.py
    ├── test_mzml_portal.py
    ├── test_paths_portal.py
    ├── test_peak_alignment.py
    ├── test_peak_filters.py
    ├── test_peak_matrix.py
    ├── test_peaklist.py
    ├── test_peaklist_metadata.py
    ├── test_peaklist_tags.py
    ├── test_replicate_processing.py
    ├── test_suite_models.py
    ├── test_suite_portals.py
    ├── test_suite_process.py
    ├── test_suite_tools.py
    ├── test_thermo_raw_portal.py
    ├── test_tools.py
    └── test_txt_portal.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *,cover
 47 | .hypothesis/
 48 | debug.py
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | docs/source/images/alignment.graffle
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # dotenv
 82 | .env
 83 | 
 84 | # virtualenv
 85 | .venv
 86 | venv/
 87 | venv-py3/
 88 | ENV/
 89 | 
 90 | # PyCharm
 91 | # User-specific stuff:
 92 | .idea/
 93 | 
 94 | # vscode
 95 | .vscode
 96 | 
 97 | # R-lanaguage
 98 | # History files
 99 | .Rhistory
100 | .Rapp.history
101 | 
102 | # Session Data files
103 | .RData
104 | 
105 | #macOS
106 | *.DS_Store
107 | 
108 | *~
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Build documentation in the docs/ directory with Sphinx
 9 | sphinx:
10 |   configuration: docs/source/conf.py
11 | 
12 | # Optionally build your docs in additional formats such as PDF and ePub
13 | formats: all
14 | 
15 | conda:
16 |   environment: environment.yml
17 | 
18 | python:
19 |   version: 3.7
20 |   install:
21 |     - method: pip
22 |       path: .
23 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: xenial
 2 | language: python
 3 | python:
 4 |   - "3.7"
 5 | 
 6 | install:
 7 |   - sudo apt-get update
 8 |   - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
 9 |   - bash miniconda.sh -b -p $HOME/miniconda
10 |   - export PATH="$HOME/miniconda/bin:$PATH"
11 |   - hash -r
12 |   - conda config --set always_yes yes --set changeps1 no
13 |   - conda update -q conda
14 |   - conda info -a
15 |   - conda env create -n test-environment -f environment.yml
16 |   - source activate test-environment
17 |   - conda install pytest codecov pytest-cov -c conda-forge
18 |   - pip install .
19 |   
20 | script:
21 |   - dimspy --help
22 |   - pytest --cov=dimspy tests/
23 | 
24 | after_script:
25 |   - codecov
26 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.rst
 2 | include LICENSE
 3 | include RawFileReaderLicense.rst
 4 | include requirements.txt
 5 | include examples/*.*
 6 | include tests/*.py
 7 | include tests/data/*.*
 8 | include tests/data/MTBLS79_subset/*.*
 9 | include tests/data/MTBLS79_subset/mzml/*.*
10 | include tests/data/MTBLS79_subset/raw/*.*
11 | include tests/results/*.*
12 | include dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/*.XML
13 | include dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/*.dll
14 | include dimspy/portals/ThermoRawFileReader_3_0_41/License/RawFileRdr_License_Agreement_RevA.doc
15 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | DIMSpy
 2 | ======
 3 | |Py versions| |Version| |Bioconda| |Galaxy-eu| |Git| |Build Status (Travis)| |Build Status (AppVeyor)| |codecov| |License| |binder| |RTD doc| |gitter|
 4 | 
 5 | Python package for processing direct-infusion mass spectrometry-based metabolomics and lipidomics data
 6 | 
 7 | - **Documentation:** https://dimspy.readthedocs.io/en/latest
 8 | - **Source:** https://github.com/computational-metabolomics/dimspy
 9 | - **Bug reports:** https://github.com/computational-metabolomics/dimspy/issues
10 | 
11 | 
12 | Installation (Conda, PyPi and Galaxy)
13 | -------------------------------------
14 | See the `Installation page <https://dimspy.readthedocs.io/en/latest/installation.html>`__ of
15 | the `online documentation <https://computational-metabolomics.github.io/dimspy/>`__.
16 | 
17 | 
18 | Bug reports
19 | -----------
20 | Please report any bugs that you find `here <https://github.com/computational-metabolomics/dimspy/issues>`_.
21 | Or fork the repository on `GitHub <https://github.com/computational-metabolomics/dimspy/>`_
22 | and create a pull request (PR). We welcome all contributions, and we
23 | will help you to make the PR if you are new to `git`.
24 | 
25 | 
26 | Credits
27 | -------
28 | DIMSpy was originally written by Ralf Weber and Albert Zhou and has been developed with the help of many others.
29 | Thanks to everyone who has improved DIMSpy by contributing code, adding features, bug reports and fixes, and documentation.
30 | 
31 | **Developers and contributers**
32 |  - Ralf J. M. Weber (r.j.weber@bham.ac.uk) - `University of Birmingham (UK) <https://www.birmingham.ac.uk/staff/profiles/biosciences/weber-ralf.aspx>`__
33 |  - Jiarui (Albert) Zhou (j.zhou.3@bham.ac.uk) - `University of Birmingham (UK) <http://www.birmingham.ac.uk/index.aspx>`__, `HIT Shenzhen (China) <http://www.hitsz.edu.cn>`_
34 |  - Thomas N. Lawson (t.n.lawson@bham.ac.uk) - `University of Birmingham (UK) <http://www.birmingham.ac.uk/index.aspx>`__
35 |  - Martin R. Jones (martin.jones@eawag.ch) - `Eawag  (Switzerland) <https://www.eawag.ch/en/aboutus/portrait/organisation/staff/profile/martin-jones/show/>`_
36 | 
37 | **DIMSpy acknowledges support from the following funders:**
38 |  - BBSRC, grant number BB/M019985/1
39 |  - European Commission's H2020 programme, grant agreement number 654241
40 |  - Wellcome Trust, grant number 202952/Z/16/Z
41 | 
42 | **Citation**
43 | 
44 | To cite DIMSpy please use one of the Zenodo references listed `here <https://dimspy.readthedocs.io/en/latest/citation.html>`__.
45 | 
46 | 
47 | License
48 | --------
49 | DIMSpy is licensed under the GNU General Public License v3.0 (see `LICENSE file <https://github.com/computational-metabolomics/dimspy/blob/master/LICENSE>`_ for licensing information). Copyright © 2017 - 2020 Ralf Weber, Albert Zhou
50 | 
51 | **Third-party licenses and copyright**
52 | 
53 | RawFileReader reading tool. Copyright © 2016 by Thermo Fisher Scientific, Inc. All rights reserved. See `RawFileReaderLicense <https://github.com/computational-metabolomics/dimspy/blob/master/RawFileReaderLicense.rst>`_ for licensing information.
54 | Using DIMSpy software for processing Thermo Fisher Scientific \*.raw files implies the acceptance of the RawFileReader license terms.
55 | Anyone receiving RawFileReader as part of a larger software distribution (in the current context, as part of DIMSpy) is considered an "end user" under
56 | section 3.3 of the RawFileReader License, and is not granted rights to redistribute RawFileReader.
57 | 
58 | 
59 | .. |Build Status (Travis)| image:: https://img.shields.io/travis/computational-metabolomics/dimspy.svg?logo=travis&maxAge=600&style=flat-square
60 |    :target: https://travis-ci.com/computational-metabolomics/dimspy
61 | 
62 | .. |Build Status (AppVeyor)| image:: https://img.shields.io/appveyor/ci/RJMW/dimspy.svg?logo=appveyor&style=flat-square&maxAge=600
63 |    :target: https://ci.appveyor.com/project/RJMW/dimspy/branch/master
64 | 
65 | .. |Py versions| image:: https://img.shields.io/pypi/pyversions/dimspy.svg?style=flat&maxAge=3600
66 |    :target: https://pypi.python.org/pypi/dimspy/
67 | 
68 | .. |Version| image:: https://img.shields.io/pypi/v/dimspy.svg?style=flat&maxAge=3600
69 |    :target: https://pypi.python.org/pypi/dimspy/
70 | 
71 | .. |Git| image:: https://img.shields.io/badge/repository-GitHub-blue.svg?style=flat&maxAge=3600
72 |    :target: https://github.com/computational-metabolomics/dimspy
73 | 
74 | .. |Bioconda| image:: https://img.shields.io/conda/vn/bioconda/dimspy?style=flat-square&maxAge=3600
75 |    :target: http://bioconda.github.io/recipes/dimspy/README.html
76 |    
77 | .. |galaxy-eu| image:: https://img.shields.io/badge/usegalaxy-.eu-brightgreen?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAASCAYAAABB7B6eAAAABGdBTUEAALGPC/xhBQAAACBjSFJNAAB6JgAAgIQAAPoAAACA6AAAdTAAAOpgAAA6mAAAF3CculE8AAAACXBIWXMAAAsTAAALEwEAmpwYAAACC2lUWHRYTUw6Y29tLmFkb2JlLnhtcAAAAAAAPHg6eG1wbWV0YSB4bWxuczp4PSJhZG9iZTpuczptZXRhLyIgeDp4bXB0az0iWE1QIENvcmUgNS40LjAiPgogICA8cmRmOlJERiB4bWxuczpyZGY9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkvMDIvMjItcmRmLXN5bnRheC1ucyMiPgogICAgICA8cmRmOkRlc2NyaXB0aW9uIHJkZjphYm91dD0iIgogICAgICAgICAgICB4bWxuczp0aWZmPSJodHRwOi8vbnMuYWRvYmUuY29tL3RpZmYvMS4wLyI+CiAgICAgICAgIDx0aWZmOlJlc29sdXRpb25Vbml0PjI8L3RpZmY6UmVzb2x1dGlvblVuaXQ+CiAgICAgICAgIDx0aWZmOkNvbXByZXNzaW9uPjE8L3RpZmY6Q29tcHJlc3Npb24+CiAgICAgICAgIDx0aWZmOk9yaWVudGF0aW9uPjE8L3RpZmY6T3JpZW50YXRpb24+CiAgICAgICAgIDx0aWZmOlBob3RvbWV0cmljSW50ZXJwcmV0YXRpb24+MjwvdGlmZjpQaG90b21ldHJpY0ludGVycHJldGF0aW9uPgogICAgICA8L3JkZjpEZXNjcmlwdGlvbj4KICAgPC9yZGY6UkRGPgo8L3g6eG1wbWV0YT4KD0UqkwAAAn9JREFUOBGlVEuLE0EQruqZiftwDz4QYT1IYM8eFkHFw/4HYX+GB3/B4l/YP+CP8OBNTwpCwFMQXAQPKtnsg5nJZpKdni6/6kzHvAYDFtRUT71f3UwAEbkLch9ogQxcBwRKMfAnM1/CBwgrbxkgPAYqlBOy1jfovlaPsEiWPROZmqmZKKzOYCJb/AbdYLso9/9B6GppBRqCrjSYYaquZq20EUKAzVpjo1FzWRDVrNay6C/HDxT92wXrAVCH3ASqq5VqEtv1WZ13Mdwf8LFyyKECNbgHHAObWhScf4Wnj9CbQpPzWYU3UFoX3qkhlG8AY2BTQt5/EA7qaEPQsgGLWied0A8VKrHAsCC1eJ6EFoUd1v6GoPOaRAtDPViUr/wPzkIFV9AaAZGtYB568VyJfijV+ZBzlVZJ3W7XHB2RESGe4opXIGzRTdjcAupOK09RA6kzr1NTrTj7V1ugM4VgPGWEw+e39CxO6JUw5XhhKihmaDacU2GiR0Ohcc4cZ+Kq3AjlEnEeRSazLs6/9b/kh4eTC+hngE3QQD7Yyclxsrf3cpxsPXn+cFdenF9aqlBXMXaDiEyfyfawBz2RqC/O9WF1ysacOpytlUSoqNrtfbS642+4D4CS9V3xb4u8P/ACI4O810efRu6KsC0QnjHJGaq4IOGUjWTo/YDZDB3xSIxcGyNlWcTucb4T3in/3IaueNrZyX0lGOrWndstOr+w21UlVFokILjJLFhPukbVY8OmwNQ3nZgNJNmKDccusSb4UIe+gtkI+9/bSLJDjqn763f5CQ5TLApmICkqwR0QnUPKZFIUnoozWcQuRbC0Km02knj0tPYx63furGs3x/iPnz83zJDVNtdP3QAAAABJRU5ErkJggg==
78 |    :target: http://usegalaxy.eu
79 | 
80 | .. |License| image:: https://img.shields.io/pypi/l/dimspy.svg?style=flat&maxAge=3600
81 |    :target: https://www.gnu.org/licenses/gpl-3.0.html
82 | 
83 | .. |RTD doc| image:: https://img.shields.io/badge/documentation-RTD-71B360.svg?style=flat&maxAge=3600
84 |    :target: https://dimspy.readthedocs.io/en/latest/
85 |    
86 | .. |codecov| image:: https://codecov.io/gh/computational-metabolomics/dimspy/branch/master/graph/badge.svg
87 |    :target: https://codecov.io/gh/computational-metabolomics/dimspy
88 | 
89 | .. |binder| image:: https://mybinder.org/badge_logo.svg
90 |    :target: https://mybinder.org/v2/gh/computational-metabolomics/dimspy/master?filepath=notebooks%2Fworkflow.ipynb
91 | 
92 | .. |gitter| image:: https://badges.gitter.im/Join%20Chat.svg
93 |    :target: https://gitter.im/computational-metabolomics/dimspy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
94 | 


--------------------------------------------------------------------------------
/RawFileReaderLicense.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | This license (see "SOFTWARE LICENSE AGREEMENT" below) covers the following files which are distributed with the DIMSpy software package:
 3 | 
 4 |  - ThermoFisher.CommonCore.BackgroundSubtraction.dll
 5 |  - ThermoFisher.CommonCore.BackgroundSubtraction.XML
 6 |  - ThermoFisher.CommonCore.Data.dll
 7 |  - ThermoFisher.CommonCore.Data.XML
 8 |  - ThermoFisher.CommonCore.MassPrecisionEstimator.dll
 9 |  - ThermoFisher.CommonCore.MassPrecisionEstimator.XML
10 |  - ThermoFisher.CommonCore.RawFileReader.dll
11 |  - ThermoFisher.CommonCore.RawFileReader.XML
12 | 
13 | Anyone receiving RawFileReader as part of a larger software distribution (in the current context, as part of DIMSpy) is considered an "end user" under
14 | section 3.3 of the RawFileReader License, and is not granted rights to redistribute RawFileReader.
15 | 
16 | |
17 | 
18 | **SOFTWARE LICENSE AGREEMENT ("License") FOR RawFileReader**
19 | ----------------------------------------------------------------------
20 | These License terms are an agreement between you and Thermo Finnigan LLC ("Licensor"). They apply to Licensor's MSFileReader software program ("Software"), which includes documentation and any media on which you received it. These terms also apply to any updates or supplements for this Software, unless other terms accompany those items, in which case those terms apply. **If you use this Software, you accept this License. If you do not accept this License, you are prohibited from using this software.  If you comply with these License terms, you have the rights set forth below.**
21 | 
22 | 1. Rights Granted:
23 | 
24 | 1.1. You may install and use this Software on any of your computing devices.
25 | 
26 | 1.2. You may distribute this Software to others, but only in combination with other software components and/or programs that you provide and subject to the distribution requirements and restrictions below.
27 | 
28 | 2.  Use Restrictions:
29 | 
30 | 2.1. You may not decompile, disassemble, reverse engineer, use reflection or modify this Software.
31 | 
32 | 3. Distribution Requirements:
33 | 
34 | If you distribute this Software to others, you agree to:
35 | 
36 | 3.1. Indemnify, defend and hold harmless the Licensor from any claims, including attorneys' fees, related to the distribution or use of this Software;
37 | 
38 | 3.2. Display the following text in your software's "About" box: " **RawFileReader reading tool. Copyright © 2016 by Thermo Fisher Scientific, Inc. All rights reserved**.";
39 | 
40 | 3.3. Require your end users to agree to a license agreement that prohibits them from redistributing this Software to others.
41 | 
42 | 4.  Distribution Restrictions:
43 | 
44 | 4.1. You may not use the Licensor's trademarks in a way that suggests your software components and/or programs are provided by or are endorsed by the Licensor; and
45 | 
46 | 4.2. You may not commercially exploit this Software or products that incorporate this Software without the prior written consent of Licensor. Commercial exploitation includes, but is not limited to, charging a purchase price, license fee, maintenance fee, or subscription fee; or licensing, transferring or redistributing the Software in exchange for consideration of any kind.
47 | 
48 | 4.3. Your rights to this Software do not include any license, right, power or authority to subject this Software in whole or in part to any of the terms of an Excluded License. "Excluded License" means any license that requires as a condition of use, modification and/or distribution of software subject to the Excluded License, that such software or other software combined and/or distributed with such software be (a) disclosed or distributed in source code form; or (b) licensed for the purpose of making derivative works.  Without limiting the foregoing obligation, you are specifically prohibited from distributing this Software with any software that is subject to the General Public License (GPL) or similar license in a manner that would create a combined work.
49 | 
50 | 5.  Additional Terms Applicable to Software:
51 | 
52 | 5.1. This Software is licensed, not sold. This License only gives you some rights to use this Software; the Licensor reserves all other rights. Unless applicable law gives you more rights despite this limitation, you may use this Software only as expressly permitted in this License.
53 | 
54 | 5.2. Licensor has no obligation to fix, update, supplement or support this Software.
55 | 
56 | 5.3. This Software is not designed, manufactured or intended for any use requiring fail-safe performance in which the failure of this Software could lead to death, serious personal injury or severe physical and environmental damage ("High Risk Activities"), such as the operation of aircraft, medical or nuclear facilities. You agree not to use, or license the use of, this Software in connection with any High Risk Activities.
57 | 
58 | 5.4. Your rights under this License terminate automatically if you breach this License in any way. Termination of this License will not affect any of your obligations or liabilities arising prior to termination. The following sections of this License shall survive termination: 2.1, 3.1, 3.2, 3.3, 4.1, 4.2, 4.3, 5.1, 5.2, 5.3, 5.5, 5.6, 5.7, 5.8, and 5.9.
59 | 
60 | 5.5. This Software is subject to United States export laws and regulations. You agree to comply with all domestic and international export laws and regulations that apply to this Software. These laws include restrictions on destinations, end users and end use.
61 | 
62 | 5.6. This License shall be construed and controlled by the laws of the State of California, U.S.A., without regard to conflicts of law. You consent to the jurisdiction of the state and federal courts situated in the State of California in any action arising under this License. The application of the U.N. Convention on Contracts for the International Sale of Goods to this License is hereby expressly excluded. If any provision of this License shall be deemed unenforceable or contrary to law, the rest of this License shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language.
63 | 
64 | 5.7. THIS SOFTWARE IS LICENSED "AS IS". YOU BEAR ALL RISKS OF USING IT. LICENSOR GIVES NO AND DISCLAIMS ALL EXPRESS AND IMPLIED WARRANTIES, REPRESENTATIONS OR GUARANTEES.  YOU MAY HAVE ADDITIONAL CONSUMER RIGHTS UNDER YOUR LOCAL LAWS WHICH THIS LICENSE CANNOT CHANGE. TO THE EXTENT PERMITTED UNDER YOUR LOCAL LAWS, LICENSOR EXCLUDES THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
65 | 
66 | 5.8. LICENSOR'S TOTAL LIABILITY TO YOU FOR DIRECT DAMAGES ARISING UNDER THIS LICENSE IS LIMITED TO U.S. $1.00. YOU CANNOT RECOVER ANY OTHER DAMAGES, INCLUDING CONSEQUENTIAL, LOST PROFITS, SPECIAL, INDIRECT OR INCIDENTAL DAMAGES, EVEN IF LICENSOR IS EXPRESSLY MADE AWARE OF THE POSSIBILITY THEREOF OR IS NEGLIGENT. THIS LIMITATION APPLIES TO ANYTHING RELATED TO THIS SOFTWARE, SERVICES, CONTENT (INCLUDING CODE) ON THIRD PARTY INTERNET SITES, OR THIRD PARTY PROGRAMS, AND CLAIMS FOR BREACH OF CONTRACT, BREACH OF WARRANTY, GUARANTEE  OR CONDITION, STRICT LIABILITY, NEGLIGENCE, OR OTHER TORT TO THE EXTENT PERMITTED BY APPLICABLE LAW.
67 | 
68 | 5.9. Use, duplication or disclosure of this Software by the U.S. Government is subject to the restricted rights applicable to commercial computer software (under FAR 52.227019 and DFARS 252.227-7013 or parallel regulations). The manufacturer for this purpose is Thermo Finnigan LLC, 355 River Oaks Parkway, San Jose, California 95134, U.S.A.
69 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | build: false
 2 | 
 3 | environment:
 4 |   matrix:
 5 |     - PYTHON_VERSION: 3.7
 6 |       MINICONDA: C:\Miniconda-x64
 7 | 
 8 | init:
 9 |   - "ECHO %PYTHON_VERSION% %MINICONDA%"
10 | 
11 | install:
12 |   - "set PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%"
13 |   - conda config --set always_yes yes --set changeps1 no
14 |   - conda update -q conda
15 |   - conda info -a
16 |   - conda env create -n test-environment -f environment.yml
17 |   - activate test-environment
18 |   - conda install pytest -c conda-forge
19 |   - pip install .
20 |   
21 | test_script:
22 |   - dimspy --help
23 |   - pytest
24 | 


--------------------------------------------------------------------------------
/binder/environment.yml:
--------------------------------------------------------------------------------
 1 | name: dimspy
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - computational-metabolomics
 6 | dependencies:
 7 |  - python=3.7
 8 |  - dimspy
 9 |  
10 | 


--------------------------------------------------------------------------------
/dimspy/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | __author__ = 'Ralf Weber (r.j.weber@bham.ac.uk), Albert Zhou (j.zhou.3@bham.ac.uk)'
24 | __credits__ = 'Ralf Weber (r.j.weber@bham.ac.uk), Albert Zhou (j.zhou.3@bham.ac.uk)'
25 | __version__ = '2.0.0'
26 | __license__ = 'GPLv3'
27 | 


--------------------------------------------------------------------------------
/dimspy/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | from .peak_matrix import PeakMatrix, mask_peakmatrix, unmask_peakmatrix, unmask_all_peakmatrix
24 | from .peaklist import PeakList
25 | 
26 | 


--------------------------------------------------------------------------------
/dimspy/models/peaklist_metadata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | # DO NOT try metadata.metadata.attr.
24 | # All attribute methods overrided
25 | class PeakList_Metadata(dict):
26 |     """
27 |     The PeakList_Metadata class.
28 | 
29 |     Dictionary-like container for PeakList metadata storage.
30 | 
31 |     :param args: Iterable object of key-value pairs
32 |     :param kwargs: Metadata key-value pairs
33 | 
34 |     >>> PeakList_Metadata([('name', 'sample_1'), ('qc', False)])
35 |     >>> PeakList_Metadata(name = 'sample_1', qc = False)
36 | 
37 |     metadata attributes can be accessed in both dictionary-like and property-like manners.
38 | 
39 |     >>> meta = PeakList_Metadata(name = 'sample_1', qc = False)
40 |     >>> meta['name']
41 |     sample_1
42 |     >>> meta.qc
43 |     False
44 |     >>> del meta.qc
45 |     >>> meta.has_key('qc')
46 |     False
47 | 
48 |     .. warning::
49 |         The *__getattr__*, *__setattr__*, and *__delattr__* methods are overrided. **DO NOT** assign a metadata object
50 |         to another metadata object, e.g., metadata.metadata.attr = value.
51 | 
52 |     """
53 | 
54 |     def __getattr__(self, item):
55 |         return self[item] if item in self else super().__getattribute__(item)
56 | 
57 |     def __setattr__(self, item, value):
58 |         if item == '__dict__':
59 |             raise ValueError('"__dict__" is not an acceptable metadata key')
60 |         if type(value) == PeakList_Metadata:
61 |             raise ValueError('metadata object is not an acceptable metadata value')
62 | 
63 |         if item not in self.__dict__:
64 |             self[item] = value
65 |         else:
66 |             super().__setattr__(item, value)
67 | 
68 |     def __delattr__(self, item):
69 |         if item in self:
70 |             del self[item]
71 |         else:
72 |             super().__delattr__(item)
73 | 


--------------------------------------------------------------------------------
/dimspy/models/peaklist_tags.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | from __future__ import annotations
 24 | 
 25 | from typing import Union
 26 | 
 27 | 
 28 | class Tag(object):
 29 |     """
 30 |     The Tag class.
 31 | 
 32 |     This class is mainly used in PeakList and PeakMatrix classes for sample filtering.
 33 | 
 34 |     :param value: Tag value, must be number (int, float), string (ascii, unicode), or Tag object (ignore ttype setting)
 35 |     :param ttype: Tag type, must be string or None (untyped), default = None
 36 | 
 37 |     Single value will be treated as untyped tag:
 38 | 
 39 |     >>> tag = Tag(1)
 40 |     >>> tag == 1
 41 |     True
 42 |     >>> tag = Tag(1, 'batch')
 43 |     >>> tag == 1
 44 |     False
 45 | 
 46 |     """
 47 | 
 48 |     def __init__(self, value: Union[int, float, str, Tag], ttype: Union[str, None] = None):
 49 |         self._value, self._type = None, None
 50 |         self.value, self.ttype = (value.value, value.ttype) if isinstance(value, Tag) else (value, ttype)
 51 | 
 52 |     @property
 53 |     def value(self):
 54 |         """
 55 |         Property of tag value.
 56 | 
 57 |         :getter: Returns the value of the tag
 58 |         :setter: Set the tag value, must be number or string
 59 |         :type: int, float, str, unicode
 60 | 
 61 |         """
 62 |         return self._value
 63 | 
 64 |     @value.setter
 65 |     def value(self, value: Union[int, float, str]):  # numpy types should be manually converted
 66 |         self._value = value
 67 | 
 68 |     @property
 69 |     def ttype(self):
 70 |         """
 71 |         Property of tag type. None indicates untyped tag.
 72 | 
 73 |         :getter: Returns the type of the tag
 74 |         :setter: Set the tag type, must be None or string
 75 |         :type: None, str, unicode
 76 | 
 77 |         """
 78 |         return self._type
 79 | 
 80 |     @ttype.setter
 81 |     def ttype(self, value: Union[str, None]):
 82 |         if value in ('None', ''):  # reserve for hdf5 protal
 83 |             raise KeyError('["%s"] is not an acceptable tag type' % value)
 84 |         self._type = None if value is None else value
 85 | 
 86 |     @property
 87 |     def typed(self):
 88 |         """
 89 |         Property to decide if the tag is typed or untyped.
 90 | 
 91 |         :getter: Returns typed status of the tag
 92 |         :type: bool
 93 | 
 94 |         """
 95 |         return not self._type is None
 96 | 
 97 |     def __eq__(self, other: Union[int, float, str, Tag]):
 98 |         v, t = (other.value, other.ttype) if isinstance(other, Tag) else (other, None)
 99 |         return v == self.value and ((t is None and self.ttype is None) or (t == self.ttype))
100 | 
101 |     def __ne__(self, other: Union[int, float, str, Tag]):
102 |         return not self.__eq__(other)
103 | 
104 |     def __str__(self):
105 |         return str(self._value) if self._type is None else (self._type + ':' + str(self._value))
106 | 
107 | 
108 | class PeakList_Tags(object):
109 |     """
110 |     The PeakList_Tags class.
111 | 
112 |     Container for both typed and untyped tags. This class is mainly used in PeakList and PeakMatrix classes for sample filtering.
113 |     For a PeakList the tag types must be unique, but not the tag values (unless they are untyped).
114 |     For instance, PeakList can have tags batch = 1 and plate = 1, but not batch = 1 and batch = 2, or (untyped) 1 and (untyped) 1.
115 |     Single value will be treated as untyped tag.
116 | 
117 |     :param args: List of untyped tags
118 |     :param kwargs: List of typed tags. Only one tag value can be assigned to a specific tag type
119 | 
120 |     >>> PeakList_Tags('untyped_tag1', Tag('untyped_tag2'), Tag('typed_tag', 'tag_type'))
121 |     >>> PeakList_Tags(tag_type1 = 'tag_value1', tag_type2 = 'tag_value2')
122 | 
123 |     """
124 | 
125 |     def __init__(self, *args, **kwargs):
126 |         self._tags = []
127 |         for v in args: self.add_tag(v)
128 |         for k, v in list(kwargs.items()): self.add_tag(v, k)
129 | 
130 |     # build-ins
131 |     def __str__(self):
132 |         return self.to_str()
133 | 
134 |     def __contains__(self, item: Union[int, float, str, Tag]):
135 |         return item in self._tags
136 | 
137 |     def __len__(self):
138 |         return len(self._tags)
139 | 
140 |     # properties
141 |     @property
142 |     def tag_types(self):
143 |         """
144 |         Property of included tag types. None indicates untyped tags included.
145 | 
146 |         :getter: Returns a set containing all the tag types of the typed tags
147 |         :type: set
148 | 
149 |         """
150 |         return set([x.ttype for x in self._tags])
151 | 
152 |     @property
153 |     def tag_values(self):
154 |         """
155 |         Property of included tag values. Same tag values will be merged
156 | 
157 |         :getter: Returns a set containing all the tag values, both typed and untyped tags
158 |         :type: set
159 | 
160 |         """
161 |         return set([x.value for x in self._tags])
162 | 
163 |     @property
164 |     def tags(self):
165 |         """
166 |         Property of all included tags.
167 | 
168 |         :getter: Returns a tuple containing all the tags, both typed and untyped
169 |         :type: tuple
170 | 
171 |         """
172 |         return tuple(self._tags)
173 | 
174 |     @property
175 |     def typed_tags(self):
176 |         """
177 |         Property of included typed tags.
178 | 
179 |         :getter: Returns a tuple containing all the typed tags
180 |         :type: tuple
181 | 
182 |         """
183 |         return tuple([x for x in self._tags if x.typed])
184 | 
185 |     @property
186 |     def untyped_tags(self):
187 |         """
188 |         Property of included untyped tags.
189 | 
190 |         :getter: Returns a tuple containing all the untyped tags
191 |         :type: tuple
192 | 
193 |         """
194 |         return tuple([x for x in self._tags if not x.typed])
195 | 
196 |     # methods
197 |     def has_tag(self, tag: Union[int, float, str, Tag], tag_type: Union[str, None] = None):
198 |         """
199 |         Checks whether there exists a specific tag.
200 | 
201 |         :param tag: The tag for checking
202 |         :param tag_type: The type of the tag
203 |         :rtype: bool
204 | 
205 |         >>> tags = PeakList_Tags('untyped_tag1', Tag('tag_value1', 'tag_type1'))
206 |         >>> tags.has_tag('untyped_tag1')
207 |         True
208 |         >>> tags.has_tag('typed_tag1')
209 |         False
210 |         >>> tags.has_tag(Tag('tag_value1', 'tag_type1'))
211 |         True
212 |         >>> tags.has_tag('tag_value1', 'tag_type1')
213 |         True
214 | 
215 |         """
216 |         return (tag in self._tags) if isinstance(tag, Tag) or tag_type is None else \
217 |             (Tag(tag, tag_type) in self._tags)
218 | 
219 |     def has_tag_type(self, tag_type: Union[str, None] = None):
220 |         """
221 |         Checks whether there exists a specific tag type.
222 | 
223 |         :param tag_type: The tag type for checking, None indicates untyped tags
224 |         :rtype: bool
225 | 
226 |         """
227 |         return tag_type in self.tag_types
228 | 
229 |     def tag_of(self, tag_type: Union[str, None] = None):
230 |         """
231 |         Returns tag value of the given tag type, or tuple of untyped tags if tag_type is None.
232 | 
233 |         :param tag_type: Valid tag type, None for untyped tags
234 |         :rtype: Tag, or None if tag_type not exists
235 | 
236 |         """
237 |         t = [x for x in self._tags if x.ttype == tag_type]
238 |         return None if len(t) == 0 else tuple(t) if tag_type is None else t[0]
239 | 
240 |     def add_tag(self, tag: Union[int, float, str, Tag], tag_type: Union[str, None] = None):
241 |         """
242 |         Adds typed or untyped tag.
243 | 
244 |         :param tag: Tag or tag value to add
245 |         :param tag_type: Type of the tag value
246 | 
247 |         >>> tags = PeakList_Tags()
248 |         >>> tags.add_tag('untyped_tag1')
249 |         >>> tags.add_tag(Tag('typed_tag1', 'tag_type1'))
250 |         >>> tags.add_tag(tag_type2 = 'typed_tag2')
251 | 
252 |         """
253 |         if tag_type is not None and self.has_tag_type(tag_type):
254 |             raise KeyError('tag type %s already exists' % tag_type)
255 |         tag = Tag(tag, tag_type)
256 |         if self.has_tag(tag):
257 |             raise ValueError('tag already exist')
258 |         self._tags += [tag]
259 | 
260 |     def drop_tag(self, tag: Union[int, float, str, Tag], tag_type: Union[str, None] = None):
261 |         """
262 |         Drops typed and untyped tag.
263 | 
264 |         :param tag: Tag or tag value to drop
265 |         :param tag_type: Type of the tag value
266 | 
267 |         >>> tags = PeakList_Tags('untyped_tag1', tag_type1 = 'tag_value1')
268 |         >>> tags.drop_tag(Tag('tag_value1', 'tag_type1'))
269 |         >>> print(tags)
270 |         untyped_tag1
271 | 
272 |         """
273 |         t = Tag(tag, tag_type)
274 |         self._tags = [x for x in self._tags if x != t]
275 | 
276 |     def drop_tag_type(self, tag_type: Union[str, None] = None):
277 |         """
278 |         Drops the tag with the given type.
279 | 
280 |         :param tag_type: Tag type to drop, None (untyped) may drop multiple tags
281 | 
282 |         """
283 |         self._tags = [x for x in self._tags if x.ttype != tag_type]
284 | 
285 |     def drop_all_tags(self):
286 |         """
287 |         Drops all tags, both typed and untyped.
288 | 
289 |         """
290 |         self._tags = []
291 | 
292 |     # portals
293 |     def to_list(self):
294 |         """
295 |         Exports tags to a list. Each element is a tuple of (tag value, tag type).
296 | 
297 |         >>> tags = PeakList_Tags('untyped_tag1', tag_type1 = 'tag_value1')
298 |         >>> tags.to_list()
299 |         [('untyped_tag1', None), ('tag_value1', 'tag_type1')]
300 | 
301 |         :rtype: list
302 | 
303 |         """
304 |         return [(t.value, t.ttype) for t in self._tags]
305 | 
306 |     def to_str(self):
307 |         """
308 |         Exports tags to a string. It can also be used inexplicitly as
309 | 
310 |         >>> tags = PeakList_Tags('untyped_tag1', tag_type1 = 'tag_value1')
311 |         >>> print(tags)
312 |         untyped_tag1, tag_type1:tag_value1
313 | 
314 |         :rtype: str
315 | 
316 |         """
317 |         return str.join(', ', map(str, self._tags))
318 | 


--------------------------------------------------------------------------------
/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.BackgroundSubtraction.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.BackgroundSubtraction.dll


--------------------------------------------------------------------------------
/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.Data.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.Data.dll


--------------------------------------------------------------------------------
/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.MassPrecisionEstimator.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.MassPrecisionEstimator.dll


--------------------------------------------------------------------------------
/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.RawFileReader.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/dimspy/portals/ThermoRawFileReader_3_0_41/Libraries/ThermoFisher.CommonCore.RawFileReader.dll


--------------------------------------------------------------------------------
/dimspy/portals/ThermoRawFileReader_3_0_41/License/RawFileRdr_License_Agreement_RevA.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/dimspy/portals/ThermoRawFileReader_3_0_41/License/RawFileRdr_License_Agreement_RevA.doc


--------------------------------------------------------------------------------
/dimspy/portals/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 


--------------------------------------------------------------------------------
/dimspy/portals/mzml_portal.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import collections
 24 | import os
 25 | from io import BytesIO
 26 | from typing import Sequence, Union
 27 | 
 28 | import numpy as np
 29 | import pymzml
 30 | 
 31 | from ..metadata import mz_range_from_header
 32 | from ..models.peaklist import PeakList
 33 | 
 34 | 
 35 | class Mzml:
 36 |     """
 37 |     mzML portal
 38 |     """
 39 |     def __init__(self, filename: Union[str, BytesIO], **kwargs):
 40 |         """
 41 |         Initialise a object interface to a mzML file.
 42 | 
 43 |         :param filename: Path to the mzML file
 44 |         :param kwargs:
 45 | 
 46 |         """
 47 |         self.filename = filename
 48 | 
 49 |         if not isinstance(filename, BytesIO) and not os.path.isfile(self.filename):
 50 |             raise IOError("{} does not exist".format(self.filename))
 51 | 
 52 |         if not isinstance(filename, BytesIO) and not self.filename.lower().endswith(".mzml") and not self.filename.lower().endswith(".mzml.gz"):
 53 |             raise IOError('Incorrect file format for mzML parser')
 54 | 
 55 |         if "ms_precisions" in kwargs:
 56 |             self.ms_precisions = kwargs["ms_precisions"]
 57 |         else:
 58 |             self.ms_precisions = dict(zip(range(3, 11), 8 * [5e-6]))
 59 | 
 60 |         self._sids = self._scan_ids()
 61 | 
 62 |         self.run = pymzml.run.Reader(self.filename)
 63 |         self.run.ms_precisions.update(self.ms_precisions)
 64 |         self.timestamp = self.run.info["start_time"]
 65 | 
 66 |     def headers(self) -> collections.OrderedDict:
 67 |         """
 68 |         Get all unique header or filter strings and associated scan ids.
 69 |         :return: Dictionary
 70 |         """
 71 |         h_sids = collections.OrderedDict()
 72 |         for scan_id in self._sids:
 73 |             if 'MS:1000512' in self.run[scan_id]:
 74 |                 h_sids.setdefault(self.run[scan_id]['MS:1000512'], []).append(scan_id)
 75 |         return h_sids
 76 | 
 77 |     def _scan_ids(self) -> collections.OrderedDict:
 78 |         sids_h = collections.OrderedDict()
 79 |         run = pymzml.run.Reader(self.filename)
 80 |         run.ms_precisions.update(self.ms_precisions)
 81 |         for scan in run:
 82 |             if 'MS:1000512' in scan:
 83 |                 sids_h[scan.ID] = str(scan['MS:1000512'])
 84 |             else:
 85 |                 sids_h[scan.ID] = None
 86 |         run.close()
 87 |         return sids_h
 88 | 
 89 |     def scan_ids(self) -> collections.OrderedDict:
 90 |         """
 91 |         Get all scan ids and associated headers or filter strings.
 92 |         :return: Dictionary
 93 |         """
 94 |         return self._sids
 95 | 
 96 |     def peaklist(self, scan_id, function_noise="median") -> PeakList:
 97 |         """
 98 |         Create a peaklist object for a specific scan id.
 99 |         :param scan_id: Scan id
100 |         :param function_noise: Function to calculate the noise from each scan. The following options are available:
101 | 
102 |         * **median** - the median of all peak intensities within a given scan is used as the noise value.
103 | 
104 |         * **mean** - the unweighted mean average of all peak intensities within a given scan is used as the noise value.
105 | 
106 |         * **mad (Mean Absolute Deviation)** - the noise value is set as the mean of the absolute differences between peak
107 |           intensities and the mean peak intensity (calculated across all peak intensities within a given scan).
108 | 
109 |         :return: PeakList object
110 |         """
111 |         if function_noise not in ["mean", "median", "mad"]:
112 |             raise ValueError("select a function that is available [mean, median, mad]")
113 | 
114 |         scan = self.run[scan_id]
115 |         peaks = scan.peaks("raw")
116 |         if len(peaks) > 0:
117 |             mzs, ints = list(zip(*peaks))
118 |         else:
119 |             mzs, ints = [], []
120 | 
121 |         scan_time = scan["MS:1000016"]
122 |         tic = scan["total ion current"]
123 |         if "MS:1000927" in scan:
124 |             ion_injection_time = scan["MS:1000927"]
125 |         else:
126 |             ion_injection_time = None
127 |         header = scan['MS:1000512']
128 |         if header:
129 |             mz_range = mz_range_from_header(header)
130 |         else:
131 |             mz_range = [None, None]
132 |         ms_level = scan['ms level']
133 |         pl = PeakList(ID=scan.ID, mz=mzs, intensity=ints,
134 |                       mz_range=mz_range,
135 |                       header=header,
136 |                       ms_level=ms_level,
137 |                       ion_injection_time=ion_injection_time,
138 |                       scan_time=scan_time,
139 |                       tic=tic,
140 |                       function_noise=function_noise)
141 |         snr = np.divide(ints, scan.estimated_noise_level(mode=function_noise))
142 |         pl.add_attribute('snr', snr)
143 |         return pl
144 | 
145 |     def peaklists(self, scan_ids, function_noise="median") -> Sequence[PeakList]:
146 |         """
147 |         Create a list of peaklist objects for each scan id in the list.
148 |         :param scan_ids: List of scan ids
149 | 
150 |         :param function_noise: Function to calculate the noise from each scan. The following options are available:
151 | 
152 |         * **median** - the median of all peak intensities within a given scan is used as the noise value.
153 | 
154 |         * **mean** - the unweighted mean average of all peak intensities within a given scan is used as the noise value.
155 | 
156 |         * **mad (Mean Absolute Deviation)** - the noise value is set as the mean of the absolute differences between peak
157 |           intensities and the mean peak intensity (calculated across all peak intensities within a given scan).
158 | 
159 |         * **noise_packets** - the noise value is calculated using the proprietary algorithms contained in Thermo Fisher
160 |           Scientific’s msFileReader library. This option should only be applied when you are processing .RAW files.
161 | 
162 |         :return: List of PeakList objects
163 |         """
164 |         if function_noise not in ["mean", "median", "mad"]:
165 |             raise ValueError("select a function that is available [mean, median, mad]")
166 | 
167 |         return [self.peaklist(scan_id, function_noise) for scan_id in scan_ids if scan_id in self._sids]
168 | 
169 |     def tics(self) -> collections.OrderedDict:
170 |         """
171 |         Get all TIC values and associated scan ids
172 |         :return: Dictionary
173 |         """
174 |         tic_values = collections.OrderedDict()
175 |         for scan_id in self._sids:
176 |             tic_values[scan_id] = self.run[scan_id].TIC
177 |         return tic_values
178 | 
179 |     def ion_injection_times(self) -> collections.OrderedDict:
180 |         """
181 |         Get all ion injection time values and associated scan ids
182 |         :return: Dictionary
183 |         """
184 |         iits = collections.OrderedDict()
185 |         for scan_id in self._sids:
186 |             scan = self.run[scan_id]
187 |             if "MS:1000927" in scan:
188 |                 iits[scan_id] = scan["MS:1000927"]
189 |             else:
190 |                 iits[scan_id] = None
191 |         return iits
192 | 
193 |     def scan_dependents(self) -> list:
194 |         """
195 |         Get a nested list of scan id pairs. Each pair represents a fragementation event.
196 |         :return: List
197 |         """
198 |         l = []
199 |         for scan_id in self._sids:
200 |             scan = self.run[scan_id]
201 |             if scan.selected_precursors:
202 |                 precursor = scan.element.find("./{}precursorList/{}precursor".format(scan.ns, scan.ns))
203 |                 l.append([int(precursor.get("spectrumRef").split("scan=")[1]), scan.ID])
204 |         return l
205 | 
206 |     def close(self):
207 |         """
208 |         Close the reader/file object
209 |         :return: None
210 |         """
211 |         self.run.close()
212 | 


--------------------------------------------------------------------------------
/dimspy/portals/paths.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import os
 24 | 
 25 | import h5py
 26 | import numpy as np
 27 | from datetime import datetime
 28 | import platform
 29 | 
 30 | from ..models.peaklist import PeakList
 31 | from ..portals import hdf5_portal
 32 | from ..portals.mzml_portal import Mzml
 33 | from ..portals.thermo_raw_portal import ThermoRaw
 34 | 
 35 | 
 36 | def sort_ms_files_by_timestamp(ps):
 37 |     """
 38 |     Sort a set directory of .mzml or .raw files
 39 | 
 40 |     :param ps: List of paths
 41 |     :return List
 42 |     """
 43 |     s_files = {}
 44 |     for i, fn in enumerate(ps):
 45 |         if fn.lower().endswith(".raw"):
 46 |             run = ThermoRaw(fn)
 47 | 
 48 |         elif fn.lower().endswith(".mzml"):
 49 |             run = Mzml(fn)
 50 |         else:
 51 |             continue
 52 |         s_files[fn] = str(run.timestamp)
 53 |         run.close()
 54 | 
 55 |     if list(s_files.keys())[0].lower().endswith(".mzml"):
 56 |         pattern = "%Y-%m-%dT%H:%M:%SZ"
 57 |         s_files_sorted = sorted(s_files.items(), key=lambda x: datetime.strptime(x[1], pattern), reverse=False)
 58 |     else:
 59 |         try:
 60 |             pattern = "%d/%m/%Y %H:%M:%S"
 61 |             s_files_sorted = sorted(s_files.items(), key=lambda x: datetime.strptime(x[1], pattern), reverse=False)
 62 |         except:
 63 |             pattern = "%m/%d/%Y %I:%M:%S %p"
 64 |             s_files_sorted = sorted(s_files.items(), key=lambda x: datetime.strptime(x[1], pattern), reverse=False)
 65 | 
 66 |     return s_files_sorted
 67 | 
 68 | 
 69 | def validate_and_sort_paths(source, tsv):
 70 |     """
 71 |     Validate and sort a set (i.e. directory or hdf5 file) of .mzml or .raw files.
 72 | 
 73 |     :param tsv: Path to tab-separated file
 74 |     :param source: Path to a Path to the .hdf5 file to read from.
 75 |     :return: List
 76 |     """
 77 |     if tsv is None:
 78 |         if type(source) == str:
 79 |             if os.path.isdir(source):
 80 |                 filenames = [os.path.join(source, fn) for fn in os.listdir(source) if
 81 |                              fn.lower().endswith(".mzml") or fn.lower().endswith(".raw")]
 82 |                 filenames = [fd[0] for fd in sort_ms_files_by_timestamp(filenames)]
 83 | 
 84 |             elif h5py.is_hdf5(source):
 85 |                 peaklists = hdf5_portal.load_peaklists_from_hdf5(source)
 86 |                 filenames = [os.path.join(os.path.abspath(os.path.dirname(source)), pl.ID) for pl in peaklists]
 87 |             elif os.path.isfile(source):
 88 |                 if source.lower().endswith(".raw") or source.lower().endswith(".mzml"):
 89 |                     filenames = [source]
 90 |                 else:
 91 |                     raise IOError("Incorrect file format, provide .mzml or .raw files: {}".format(source))
 92 |             else:
 93 |                 raise IOError("[Errno 2] No such file or directory: {}".format(source))
 94 | 
 95 |         elif type(source) == list or type(source) == tuple:
 96 |             if isinstance(source[0], PeakList):
 97 |                 filenames = [pl.ID for pl in source]
 98 |             else:
 99 |                 filenames = []
100 |                 for fn in source:
101 |                     if os.path.isfile(fn):
102 |                         if fn.lower().endswith(".raw") or fn.lower().endswith(".mzml"):
103 |                             filenames.append(fn)
104 |                         else:
105 |                             raise IOError("Incorrect file format, provide .mzml or .raw files: {}".format(source))
106 |                     else:
107 |                         raise IOError("[Errno 2] No such file or directory: {}".format(source))
108 |         else:
109 |             raise IOError("[Errno 2] No such file or directory: {}".format(source))
110 | 
111 |     elif os.path.isfile(tsv):
112 |         fm = np.genfromtxt(tsv, dtype=None, delimiter="\t", names=True, encoding=None)
113 |         if len(fm.shape) == 0:
114 |             fm = np.array([fm])
115 |         if fm.dtype.names[0] != "filename" and fm.dtype.names[0] != "sample_id":
116 |             raise IOError("Incorrect header for first column. Use filename or sample_id")
117 | 
118 |         filenames = []
119 |         if type(source) == list or type(source) == tuple:
120 |             if isinstance(source[0], PeakList):
121 |                 for filename in fm[fm.dtype.names[0]]:
122 |                     if filename in [pl.ID for pl in source]:
123 |                         filenames.append(filename)
124 |                     else:
125 |                         raise IOError("{} does not exist in list with Peaklist objects".format(filename))
126 |             else:
127 |                 for fn in source:
128 |                     if not os.path.isfile(fn):
129 |                         raise IOError("[Errno 2] No such file or directory: {}".format(fn))
130 | 
131 |                 for filename in fm[fm.dtype.names[0]]:
132 |                     fns = [os.path.basename(fn) for fn in source]
133 |                     if filename in fns:
134 |                         filenames.append(source[fns.index(filename)])
135 |                     else:
136 |                         raise IOError("{} (row {}) does not exist in source provided".format(filename, list(
137 |                             fm[fm.dtype.names[0]]).index(filename) + 1))
138 | 
139 |         elif type(source) == str:
140 |             if os.path.isdir(source):
141 |                 l = os.listdir(source)
142 |                 for fn in fm[fm.dtype.names[0]]:
143 |                     if os.path.basename(fn) not in l:
144 |                         raise IOError("{} does not exist in directory provided".format(os.path.basename(fn)))
145 |                     filenames.append(os.path.join(source, fn))
146 | 
147 |             elif h5py.is_hdf5(source):
148 |                 peaklists = hdf5_portal.load_peaklists_from_hdf5(source)
149 |                 filenames = [pl.ID for pl in peaklists]
150 |             elif os.path.isfile(source):
151 |                 if source.lower().endswith(".raw") or source.lower().endswith(".mzml"):
152 |                     filenames.append(source)
153 |                 else:
154 |                     raise IOError("Incorrect file format, provide .mzml or .raw files: {}".format(source))
155 |             else:
156 |                 raise IOError("[Errno 2] No such file or directory: {} or {}".format(source, tsv))
157 |     else:
158 |         raise IOError("[Errno 2] No such file or directory: {}".format(tsv))
159 | 
160 |     return filenames
161 | 


--------------------------------------------------------------------------------
/dimspy/portals/thermo_raw_portal.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import collections
 24 | import os
 25 | from typing import Sequence, Union
 26 | import re
 27 | import sys
 28 | 
 29 | import numpy as np
 30 | from ..models.peaklist import PeakList
 31 | 
 32 | try:
 33 |     import clr
 34 |     sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "ThermoRawFileReader_3_0_41/Libraries"))
 35 |     clr.AddReference('ThermoFisher.CommonCore.RawFileReader')
 36 |     clr.AddReference('ThermoFisher.CommonCore.Data')
 37 |     import ThermoFisher.CommonCore.Data.Business as Business
 38 |     import ThermoFisher.CommonCore.RawFileReader as RawFileReader
 39 | except ImportError:
 40 |     import warnings
 41 |     warnings.warn("""
 42 |                   DIMSpy requires the Mono framework in order to read and process .raw files. 
 43 |                   Install dimspy via conda (highly recommended) to automatically install Mono 
 44 |                   (see https://dimspy.readthedocs.io/en/latest/installation.html) or 
 45 |                   install Mono from (https://www.mono-project.com). 
 46 |                   You can ignore this warning if you use DIMSpy to read and process .mzML files.
 47 |                   """)
 48 | 
 49 | 
 50 | def mz_range_from_header(h: str) -> list:
 51 |     """
 52 |     Extract the m/z range from a header or filterstring
 53 | 
 54 |     :param h: str
 55 |     :return: Sequence[float, float]
 56 |     """
 57 |     return [float(m) for m in re.findall(r'([\w\.-]+)-([\w\.-]+)', h)[0]]
 58 | 
 59 | 
 60 | class ThermoRaw:
 61 |     "ThermoRaw portal"
 62 |     def __init__(self, filename):
 63 |         """
 64 |         Initialise a object interface to a mzML file.
 65 | 
 66 |         :param filename: Path to the mzML file
 67 | 
 68 |         """
 69 |         self.run = RawFileReader.RawFileReaderAdapter.FileFactory(filename)
 70 |         self.run.SelectInstrument(Business.Device.MS, 1)
 71 |         self.filename = filename
 72 |         self.timestamp = self.run.CreationDate
 73 | 
 74 |     def headers(self) -> collections.OrderedDict:
 75 |         """
 76 |         Get all unique header or filter strings and associated scan ids.
 77 |         :return: Dictionary
 78 |         """
 79 |         sids = collections.OrderedDict()
 80 |         for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1):
 81 |             sids.setdefault(str(self.run.GetFilterForScanNumber(scan_id).Filter), []).append(scan_id)
 82 |         return sids
 83 | 
 84 |     def scan_ids(self) -> collections.OrderedDict:
 85 |         """
 86 |         Get all scan ids and associated headers or filter strings.
 87 |         :return: Dictionary
 88 |         """
 89 |         sids = collections.OrderedDict()
 90 |         for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1):
 91 |             sids[scan_id] = str(self.run.GetFilterForScanNumber(scan_id).Filter)
 92 |         return sids
 93 | 
 94 |     def peaklist(self, scan_id, function_noise="noise_packets") -> PeakList:
 95 |         """
 96 |         Create a peaklist object for a specific scan id.
 97 |         :param scan_id: Scan id
 98 |         :param function_noise: Function to calculate the noise from each scan. The following options are available:
 99 | 
100 |         * **median** - the median of all peak intensities within a given scan is used as the noise value.
101 | 
102 |         * **mean** - the unweighted mean average of all peak intensities within a given scan is used as the noise value.
103 | 
104 |         * **mad (Mean Absolute Deviation)** - the noise value is set as the mean of the absolute differences between peak
105 |           intensities and the mean peak intensity (calculated across all peak intensities within a given scan).
106 | 
107 |         * **noise_packets** - the noise value is calculated using the proprietary algorithms contained in Thermo Fisher
108 |           Scientific’s msFileReader library. This option should only be applied when you are processing .RAW files.
109 | 
110 |         :return: PeakList object
111 |         """
112 |         if function_noise not in ["noise_packets", "mean", "median", "mad"]:
113 |             raise ValueError("select a function that is available [noise_packets, mean, median, mad]")
114 | 
115 |         scan = self.run.GetCentroidStream(scan_id, False)
116 |         if scan.Masses is not None:
117 |             mz_ibn = list(
118 |                 zip(scan.Masses, scan.Intensities, scan.Baselines, scan.Noises))  # SignalToNoise not available
119 |             mz_ibn.sort()
120 |             mzs, ints, baseline, noise = list(zip(*mz_ibn))
121 |         else:
122 |             mzs, ints, baseline, noise = [], [], [], []
123 | 
124 |         if function_noise == "noise_packets" and len(ints) > 0:
125 |             snr = [p.SignalToNoise for p in scan.GetCentroids()]
126 |         elif function_noise == "median" and len(ints) > 0:
127 |             snr = ints / np.median(ints)
128 |         elif function_noise == "mean" and len(ints) > 0:
129 |             snr = ints / np.mean(ints)
130 |         elif function_noise == "mad" and len(ints) > 0:
131 |             snr = ints / np.median(np.abs(np.subtract(ints, np.median(ints))))
132 |         else:
133 |             snr = []
134 | 
135 |         scan_stats = self.run.GetScanStatsForScanNumber(scan_id)
136 | 
137 |         ion_injection_time = None
138 |         micro_scans = None
139 |         elapsed_scan_time = None
140 | 
141 |         extra_values = list(self.run.GetTrailerExtraInformation(scan_id).Values)
142 |         extra_labels = list(self.run.GetTrailerExtraInformation(scan_id).Labels)
143 |         for i, label in enumerate(extra_labels):
144 |             if "Ion Injection Time (ms):" == label:
145 |                 ion_injection_time = float(extra_values[i])
146 |             if "Elapsed Scan Time (sec):" == label:
147 |                 elapsed_scan_time = float(extra_values[i])
148 |             if "Micro Scan Count:" == label:
149 |                 micro_scans = float(extra_values[i])
150 | 
151 |         scan_time = float(scan_stats.StartTime)
152 |         tic = scan_stats.TIC
153 |         segment = scan_stats.SegmentNumber
154 |         header = str(self.run.GetScanEventStringForScanNumber(scan_id))
155 |         ms_level = header.count("@") + 1
156 | 
157 |         pl = PeakList(ID=scan_id, mz=mzs, intensity=ints,
158 |                       mz_range=mz_range_from_header(header),
159 |                       header=header,
160 |                       ms_level=ms_level,
161 |                       micro_scans=micro_scans,
162 |                       segment=segment,
163 |                       ion_injection_time=ion_injection_time,
164 |                       scan_time=scan_time,
165 |                       elapsed_scan_time=elapsed_scan_time,
166 |                       tic=tic,
167 |                       function_noise=function_noise)
168 | 
169 |         if len(pl.mz) > 0:
170 |             pl.add_attribute('snr', snr)
171 |             pl.add_attribute('noise', noise)
172 |             pl.add_attribute('baseline', baseline)
173 | 
174 |         return pl
175 | 
176 |     def peaklists(self, scan_ids, function_noise="noise_packets") -> Sequence[PeakList]:
177 |         """
178 |         Create a list of peaklist objects for each scan id in the list.
179 |         :param scan_ids: List of scan ids
180 | 
181 |         :param function_noise: Function to calculate the noise from each scan. The following options are available:
182 | 
183 |         * **median** - the median of all peak intensities within a given scan is used as the noise value.
184 | 
185 |         * **mean** - the unweighted mean average of all peak intensities within a given scan is used as the noise value.
186 | 
187 |         * **mad (Mean Absolute Deviation)** - the noise value is set as the mean of the absolute differences between peak
188 |           intensities and the mean peak intensity (calculated across all peak intensities within a given scan).
189 | 
190 |         * **noise_packets** - the noise value is calculated using the proprietary algorithms contained in Thermo Fisher
191 |           Scientific’s msFileReader library. This option should only be applied when you are processing .RAW files.
192 | 
193 |         :return: List of PeakList objects
194 |         """
195 |         if function_noise not in ["noise_packets", "mean", "median", "mad"]:
196 |             raise ValueError("select a function that is available [noise_packets, mean, median, mad]")
197 | 
198 |         return [self.peaklist(scan_id, function_noise=function_noise) for scan_id in scan_ids]
199 | 
200 |     def tics(self) -> collections.OrderedDict:
201 |         """
202 |         Get all TIC values and associated scan ids
203 |         :return: Dictionary
204 |         """
205 |         tics = collections.OrderedDict()
206 |         for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1):
207 |             scan_stats = self.run.GetScanStatsForScanNumber(scan_id)
208 |             tics[scan_id] = scan_stats.TIC
209 |         return tics
210 | 
211 |     def ion_injection_times(self) -> collections.OrderedDict:
212 |         """
213 |         Get all TIC values and associated scan ids
214 |         :return: Dictionary
215 |         """
216 |         iits = collections.OrderedDict()
217 |         for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1):
218 |             extra_values = list(self.run.GetTrailerExtraInformation(scan_id).Values)
219 |             extra_labels = list(self.run.GetTrailerExtraInformation(scan_id).Labels)
220 |             for i, label in enumerate(extra_labels):
221 |                 if "Ion Injection Time (ms):" == label:
222 |                     iits[scan_id] = float(extra_values[i])
223 |             if scan_id not in iits:
224 |                 iits[scan_id] = None
225 |         return iits
226 | 
227 |     def scan_dependents(self) -> list:
228 |         """
229 |         Get a nested list of scan id pairs. Each pair represents a fragementation event.
230 |         :return: List
231 |         """
232 |         l = []
233 |         for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1):
234 |             gsd = self.run.GetScanDependents(scan_id, 5)
235 |             if gsd is not None:
236 |                 for i, d in enumerate(gsd.ScanDependentDetailArray):
237 |                     l.append([scan_id, d.ScanIndex])
238 |         return l
239 | 
240 |     def close(self):
241 |         """
242 |         Close the reader/file object
243 |         :return: None
244 |         """
245 |         self.run.Close()
246 | 


--------------------------------------------------------------------------------
/dimspy/portals/txt_portal.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import logging
 24 | import os
 25 | from ast import literal_eval
 26 | 
 27 | import numpy as np
 28 | from ..models.peak_matrix import PeakMatrix, unmask_all_peakmatrix
 29 | from ..models.peaklist import PeakList
 30 | from ..models.peaklist_tags import PeakList_Tags
 31 | 
 32 | 
 33 | def _evalv(vect):
 34 |     try:
 35 |         ctype = type(literal_eval(vect[0]))
 36 |     except (ValueError, SyntaxError):
 37 |         ctype = None
 38 |     return vect if ctype is None else list(map(ctype, vect))
 39 | 
 40 | 
 41 | # peaklist portals
 42 | def save_peaklist_as_txt(pkl: PeakList, filename: str, *args, **kwargs):
 43 |     """
 44 |     Saves a peaklist object to a plain text file.
 45 | 
 46 |     :param pkl: the target peaklist object
 47 |     :param filename: path to a new text file
 48 |     :param args: arguments to be passed to PeakList.to_str
 49 |     :param kwargs: keyword arguments to be passed to PeakList.to_str
 50 | 
 51 |     """
 52 |     if os.path.isfile(filename):
 53 |         logging.warning('plain text file [%s] already exists, override' % filename)
 54 |     with open(filename, 'w') as f: f.write(pkl.to_str(*args, **kwargs))
 55 | 
 56 | 
 57 | def load_peaklist_from_txt(filename: str, ID: any, delimiter: str = ',', flag_names: str = 'auto',
 58 |                            has_flag_col: bool = True):
 59 |     """
 60 |     Loads a peaklist from plain text file.
 61 | 
 62 |     :param filename: Path to an exiting text-based peaklist file
 63 |     :param ID: ID of the peaklist
 64 |     :param delimiter: Delimiter of the text lines. Default = ',', i.e., CSV format
 65 |     :param flag_names: Names of the flag attributes. Default = 'auto', indicating all the attribute names ends
 66 |         with "_flag" will be treated as flag attibute. Provide None to indicate no flag attributes
 67 |     :param has_flag_col: Whether the text file contains the overall "flags" column. If True, it's values will be
 68 |         discarded. The overall flags of the new peaklist will be calculated automatically. Default = True
 69 |     :rtype: PeakList object
 70 | 
 71 |     """
 72 |     if not os.path.isfile(filename):
 73 |         raise IOError('plain text file [%s] does not exist' % filename)
 74 |     with open(filename, 'r') as f:
 75 |         rlns = [x for x in map(str.strip, f.readlines()) if x != '']
 76 | 
 77 |     dlns = [list(map(str.strip, x.split(delimiter))) for x in rlns]
 78 |     if any([len(x) != len(dlns[0]) for x in dlns[1:]]):
 79 |         raise IOError('data matrix size not match')
 80 | 
 81 |     hd, dm = dlns[0], list(zip(*dlns[1:]))
 82 |     if has_flag_col:
 83 |         hd, dm = hd[:-1], dm[:-1]  # flag_col must be the last one, and discarded
 84 |     if len(set(hd)) != len(hd):
 85 |         raise IOError('duplicate headers found')
 86 | 
 87 |     mzs, ints = np.array(dm[0], dtype=float), np.array(dm[1], dtype=float)  # first two cols must be mz and ints
 88 |     pkl = PeakList(ID, mzs, ints)
 89 | 
 90 |     flag_names = [x for x in hd if x.endswith('_flag')] if flag_names == 'auto' else \
 91 |         [] if flag_names is None else set(flag_names)
 92 |     for n, v in zip(hd[2:], dm[2:]): pkl.add_attribute(n, _evalv(v), is_flag=n in flag_names, flagged_only=False)
 93 | 
 94 |     return pkl
 95 | 
 96 | 
 97 | # peak matrix portals
 98 | def save_peak_matrix_as_txt(pm: PeakMatrix, filename: str, *args, **kwargs):
 99 |     """
100 |     Saves a peak matrix in plain text file.
101 | 
102 |     :param pm: The target peak matrix object
103 |     :param filename: Path to a new text file
104 |     :param args: Arguments to be passed to PeakMatrix.to_str
105 |     :param kwargs: Keyword arguments to be passed to PeakMatrix.to_str
106 | 
107 |     """
108 |     if os.path.isfile(filename):
109 |         logging.warning('plain text file [%s] already exists, override' % filename)
110 |     with open(filename, 'w') as f:
111 |         with unmask_all_peakmatrix(pm) as m: f.write(m.to_str(*args, **kwargs))
112 | 
113 | 
114 | def load_peak_matrix_from_txt(filename: str, delimiter: str = '\t', samples_in_rows: bool = True,
115 |                               comprehensive: str = 'auto'):
116 |     """
117 |     Loads a peak matrix from plain text file.
118 | 
119 |     :param filename: Path to an exiting text-based peak matrix file
120 |     :param delimiter: Delimiter of the text lines. Default = '\t', i.e., TSV format
121 |     :param samples_in_rows: Whether or not the samples are stored in rows. Default = True
122 |     :param comprehensive: Whether the input is a 'comprehensive' or 'simple' version of the matrix. Default = 'auto', i.e., auto detect
123 |     :rtype: PeakMatrix object
124 | 
125 |     """
126 |     if not os.path.isfile(filename):
127 |         raise IOError('plain text file [%s] does not exist' % filename)
128 |     with open(filename, 'r') as f:
129 |         rlns = [x for x in f.readlines() if x != '']
130 | 
131 |     dlns = [list(map(str.strip, x.split(delimiter))) for x in rlns]
132 |     if any([len(x) != len(dlns[0]) for x in dlns[1:]]):
133 |         raise IOError('data matrix size not match')
134 | 
135 |     if samples_in_rows: dlns = list(zip(*dlns))
136 |     if comprehensive == 'auto': comprehensive = ('flags' in dlns[0])
137 |     rdlns = list(zip(*dlns))
138 |     rsdrow = list(filter(lambda x: x[1][0] == 'rsd_all', enumerate(rdlns)))[0][0]
139 | 
140 |     def _parseflags():
141 |         fgs = []
142 |         for l, ln in enumerate(rdlns[rsdrow + 1:]):
143 |             if ln[0] == 'flags': break
144 |             fgs += [(ln[0], list(map(eval, [x for x in ln[1:] if x != ''])))]
145 |         return fgs
146 | 
147 |     flgs = _parseflags() if comprehensive else []
148 | 
149 |     # must refactor if PeakMatrix.to_str changed
150 |     pcol = rsdrow + len(flgs) + 2 if comprehensive else 1
151 |     pids = dlns[0][pcol:]
152 | 
153 |     def _parsetags(tgs):
154 |         l = 0
155 |         for l, ln in enumerate(dlns[2:]):  # line 1 = missing
156 |             if not ln[0].startswith('tags_'): break
157 |             tn, tv = ln[0][5:], ln[pcol:]
158 |             tl = [x for x in enumerate(_evalv(tv)) if x[1] != '']
159 |             for i, v in tl: tgs[i].add_tag(v) if tn == 'untyped' else tgs[i].add_tag(v, tn)
160 |         return l, tgs
161 | 
162 |     tnum, tags = 0, [PeakList_Tags() for _ in pids]
163 |     if comprehensive: tnum, tags = _parsetags(tags)
164 | 
165 |     rlns = list(zip(*dlns[2 + tnum:]))
166 |     mz = np.array([rlns[0]] * len(pids), dtype=float)
167 |     ints = np.array(rlns[pcol:], dtype=float)
168 | 
169 |     pm = PeakMatrix(pids, tags, [('mz', mz), ('intensity', ints)])
170 |     for fn, fv in flgs: pm.add_flag(fn, fv, flagged_only=False)
171 |     return pm
172 | 


--------------------------------------------------------------------------------
/dimspy/process/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 


--------------------------------------------------------------------------------
/dimspy/process/peak_filters.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import logging
 24 | from functools import reduce
 25 | from typing import Union, Sequence, Tuple, Any
 26 | 
 27 | import numpy as np
 28 | from ..models.peak_matrix import PeakMatrix, mask_peakmatrix, unmask_peakmatrix
 29 | from ..models.peaklist import PeakList
 30 | 
 31 | 
 32 | # peaklist filters
 33 | def filter_attr(pl: PeakList, attr_name: str, max_threshold: Union[int, float, None] = None,
 34 |                 min_threshold: [int, float, None] = None, flag_name: Union[str, None] = None,
 35 |                 flag_index: Union[int, None] = None):
 36 |     """
 37 |     Peaklist attribute values filter.
 38 | 
 39 |     :param pl: The target peaklist
 40 |     :param attr_name: Name of the target attribute
 41 |     :param max_threshold: Maximum threshold. A peak will be unflagged if the value of it's attr_name is larger than the
 42 |         threshold. Default = None, indicating no threshold
 43 |     :param min_threshold: Minimum threshold. A peak will be unflagged if the value of it's attr_name is smaller than the
 44 |         threshold. Default = None, indicating no threshold
 45 |     :param flag_name: Name of the new flag attribute. Default = None, indicating using attr_name + '_flag'
 46 |     :param flag_index: Index of the new flag to be inserted into the peaklist. Default = None
 47 |     :rtype: PeakList object
 48 | 
 49 |     This filter accepts real value attributes only.
 50 | 
 51 |     """
 52 |     if min_threshold is None and max_threshold is None:
 53 |         raise ValueError('must specify minimum or maximum threshold value')
 54 |     flt = lambda x: np.logical_and((min_threshold <= x) if min_threshold is not None else True,
 55 |                                    (x <= max_threshold) if max_threshold is not None else True)
 56 |     if flag_name is None: flag_name = attr_name + '_flag'
 57 |     return pl.add_attribute(flag_name, flt(pl[attr_name]), is_flag=True, on_index=flag_index)
 58 | 
 59 | 
 60 | def filter_ringing(pl: PeakList, threshold: float, bin_size: Union[int, float] = 1.0, flag_name: str = 'ringing_flag',
 61 |                    flag_index: Union[int, None] = None):
 62 |     """
 63 |     Peaklist ringing filter.
 64 | 
 65 |     :param pl: The target peaklist
 66 |     :param threshold: Intensity threshold ratio
 67 |     :param bin_size: size of the mz chunk for intensity filtering. Default = 1.0 ppm
 68 |     :param flag_name: Name of the new flag attribute. Default = 'ringing_flag'
 69 |     :param flag_index: Index of the new flag to be inserted into the peaklist. Default = None
 70 |     :rtype: PeakList object
 71 | 
 72 |     This filter will split the mz values into bin_size chunks, and search the highest intensity value for each chunk.
 73 |     All other peaks, if it's intensity is smaller than threshold x the highest intensity in that chunk, will be unflagged.
 74 | 
 75 |     """
 76 |     if not 0 <= threshold <= 1:
 77 |         raise ValueError('mzr_remove: Provide a value in the range [0.0, 1.0]')
 78 |     inds = np.digitize(pl.mz, np.arange(np.floor(np.min(pl.mz)), np.ceil(np.max(pl.mz)) + bin_size, bin_size) - 0.5)
 79 |     blks = [(inds == i) for i in np.unique(inds)]
 80 |     mask = np.array(reduce(lambda x, y: x + y, [[np.max(pl.intensity[c])] * np.sum(c) for c in blks]))
 81 |     return pl.add_attribute(flag_name, pl.intensity > (mask * threshold), is_flag=True, on_index=flag_index)
 82 | 
 83 | 
 84 | def filter_mz_ranges(pl: PeakList, mz_ranges: Sequence[Tuple[float, float]], flag_name: str = 'mz_ranges_flag',
 85 |                      flagged_only: bool = False, flag_index: Union[int, None] = None):
 86 |     """
 87 |     Peaklist mz range filter.
 88 | 
 89 |     :param pl: The target peaklist
 90 |     :param mz_ranges: The mz ranges to remove. Must be in the format of [(mz_min1, mz_max2), (mz_min2, mz_max2), ...]
 91 |     :param flag_name: Name of the new flag attribute. Default = 'mz_range_remove_flag'
 92 |     :param flag_index: Index of the new flag to be inserted into the peaklist. Default = None
 93 |     :rtype: PeakList
 94 | 
 95 |     This filter will remove all the peaks whose mz values are within any of the ranges in the mz_remove_rngs.
 96 | 
 97 |     """
 98 | 
 99 |     if flagged_only:
100 |         flags = np.ones(pl.shape[0], dtype=bool)
101 |     else:
102 |         flags = np.ones(pl.full_size, dtype=bool)
103 | 
104 |     for mzr in mz_ranges:
105 |         if len(mzr) != 2:
106 |             raise ValueError(
107 |                 'mzr_remove: Provide a list of "start" and "end" values for each m/z range that needs to be removed.')
108 |         if mzr[0] >= mzr[1]:
109 |             raise ValueError('mzr_remove: Start value cannot be larger then end value.')
110 |         flags[
111 |             (pl.get_attribute("mz", flagged_only) >= mzr[0]) & (pl.get_attribute("mz", flagged_only) <= mzr[1])] = False
112 |     pl.add_attribute(flag_name, flags, flagged_only=flagged_only, is_flag=True, on_index=flag_index)
113 |     return pl
114 | 
115 | 
116 | # PeakMatrix filters
117 | def filter_rsd(pm: PeakMatrix, rsd_threshold: Union[int, float], qc_tag: Any, on_attr: str = 'intensity',
118 |                flag_name: str = 'rsd_flag'):
119 |     """
120 |     PeakMatrix RSD filter.
121 | 
122 |     :param pm: The target peak matrix
123 |     :param rsd_threshold: Threshold of the RSD of the QC samples
124 |     :param qc_tag: Tag (label) to unmask qc samples
125 |     :param on_attr: Calculate RSD on given attribute. Default = "intensity"
126 |     :param flag_name: Name of the new flag. Default = 'rsd_flag'
127 |     :rtype: PeakMatrix
128 | 
129 |     This filter will calculate the RSD values of the QC samples. A peak with a QC RSD value larger than the
130 |     threshold will be unflagged.
131 | 
132 |     """
133 |     rsd_values = pm.rsd(qc_tag, on_attr=on_attr)
134 |     if np.any(np.isnan(rsd_values)):
135 |         logging.warning('nan found in QC rsd values, filter might not work properly')
136 | 
137 |     pm.add_flag(flag_name, [not (np.isnan(v) or v > rsd_threshold) for v in rsd_values])
138 |     return pm
139 | 
140 | 
141 | def filter_fraction(pm: PeakMatrix, fraction_threshold: float, within_classes: bool = False, class_tag_type: Any = None,
142 |                     flag_name: str = 'fraction_flag'):
143 |     """
144 |     PeakMatrix fraction filter.
145 | 
146 |     :param pm: The target peak matrix
147 |     :param fraction_threshold: Threshold of the sample fractions
148 |     :param within_classes: Whether to calculate the fraction array within each class. Default = False
149 |     :param class_tag_type: Tag type to unmask samples within the same class (e.g. "classLabel"). Default = None
150 |     :param flag_name: Name of the new flag. Default = 'fraction_flag'
151 |     :rtype: PeakMatrix object
152 | 
153 |     This filter will calculate the fraction array over all samples or within each class (based on class_tag_type).
154 |     The peaks with a fraction value smaller than the threshold will be unflagged.
155 | 
156 |     """
157 |     if not within_classes:
158 |         pm.add_flag(flag_name, pm.fraction >= fraction_threshold)
159 |     else:
160 |         if class_tag_type is None:
161 |             raise KeyError('must provide class tag type for within classes filtering')
162 |         if not all([t.has_tag_type(class_tag_type) for t in pm.peaklist_tags]):
163 |             raise AttributeError('not all tags have tag type [%s]' % class_tag_type)
164 |         flg = np.zeros(pm.shape[1])
165 |         for tag in pm.tags_of(class_tag_type):
166 |             with unmask_peakmatrix(pm, tag) as m:
167 |                 flg = np.logical_or(flg, (m.fraction >= fraction_threshold))
168 |         pm.add_flag(flag_name, flg)
169 |     return pm
170 | 
171 | 
172 | def filter_blank_peaks(pm: PeakMatrix, blank_tag: Any, fraction_threshold: Union[int, float] = 1,
173 |                        fold_threshold: Union[int, float] = 1,
174 |                        method: str = 'mean', rm_blanks: bool = True, flag_name: str = 'blank_flag'):
175 |     """
176 |     PeakMatrix blank filter.
177 | 
178 |     :param pm: The target peak matrix
179 |     :param blank_tag: Tag (label) to mask blank samples. e.g Tag("blank", "classLabel")
180 |     :param fraction_threshold: Threshold of the sample fractions. Default = 1
181 |     :param fold_threshold: Threshold of the blank sample intensity folds. Default = 1
182 |     :param method: Method to calculate blank sample intensity array. Valid values include 'mean', 'median', and 'max'.
183 |         Default = 'mean'
184 |     :param rm_blanks: Whether to remove (not mask) blank samples after filtering
185 |     :param flag_name: Name of the new flag. Default = 'blank_flag'
186 |     :rtype: PeakMatrix object
187 | 
188 |     This filter will calculate the intensity array of the blanks using the "method", and compare with the
189 |     intensities of the other samples. If fraction_threshold% of the intensity values of a peak are smaller than the
190 |     blank intensities x fold_threshold, this peak will be unflagged.
191 | 
192 |     """
193 |     if not any([blank_tag in x for x in pm.peaklist_tags]):
194 |         raise ValueError('blank tag [%s] does not exist' % blank_tag)
195 |     if method not in ('mean', 'median', 'max'):
196 |         raise ValueError('filter method must be mean, median or max')
197 | 
198 |     with unmask_peakmatrix(pm, blank_tag) as m:
199 |         mm = np.ma.masked_array(m.intensity_matrix, mask = ~(m.intensity_matrix > 0))
200 |         ints = mm[0] if mm.shape[0] == 1 else getattr(np, method)(mm, axis = 0)
201 |         imsk = ints.mask
202 |         ints = np.array(ints) * fold_threshold
203 | 
204 |     with mask_peakmatrix(pm, blank_tag) as m:
205 |         faild_int = np.sum(m.intensity_matrix >= ints, axis=0) < (fraction_threshold * m.shape[0])
206 |         m.add_flag(flag_name, ~(~imsk & faild_int))
207 | 
208 |     if rm_blanks:
209 |         pm = pm.remove_samples(np.where([x.has_tag(blank_tag) for x in pm.peaklist_tags])[0])
210 |     return pm
211 | 
212 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/source/api-reference.rst:
--------------------------------------------------------------------------------
 1 | API reference
 2 | =============
 3 | 
 4 | 
 5 | .. toctree::
 6 |     :maxdepth: 1
 7 | 
 8 |     dimspy.tools
 9 |     dimspy.metadata
10 |     dimspy.models
11 |     dimspy.portals
12 |     dimspy.process
13 | 


--------------------------------------------------------------------------------
/docs/source/bugs-and-issues.rst:
--------------------------------------------------------------------------------
1 | Bugs and Issues
2 | ===============
3 | 
4 | Please report any bugs that you find `here <https://github.com/computational-metabolomics/dimspy/issues>`_.
5 | Or fork the repository on `GitHub <https://github.com/computational-metabolomics/dimspy/>`_
6 | and create a pull request (PR). We welcome all contributions, and we will help you to make
7 | the PR if you are new to `git`.
8 | 


--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
 1 | Changelog
 2 | =========
 3 | 
 4 | All notable changes to this project will be documented here. For more details changes please refer to `github <https://github.com/computational-metabolomics/dimspy>`_ commit history
 5 | 
 6 | `DIMSpy v2.0.0 <https://github.com/computational-metabolomics/dimspy/releases/tag/v2.0.0>`_
 7 | -------------------------------------------------------------------------------------------
 8 | 
 9 | **Release date: 26 April 2020**
10 | 
11 | - First stable Python 3 only release
12 | - Refactor and improve HDF5 portal to save peaklists and/or peak matrices
13 | - Add compatibility for previous HDF5 files (python 2 version of DIMSpy)
14 | - Improve filelist handling
15 | - mzML or raw files are ordered by timestamp if no filelist is provided (i.e. process_scans)
16 | - Fix warnings (NaturalNameWarning, ResourceWarning, DeprecationWarning)
17 | - Fix 'blank filter' bug (missing and/or zero values are excluded)
18 | - Improve sub setting / filtering of scan events
19 | - Optimise imports
20 | - Increase `coverage tests <https://codecov.io/gh/computational-metabolomics/dimspy>`_
21 | - Improve documentation (`Read the Docs <https://dimspy.readthedocs.io/en/latest/>`_), including docstrings
22 | 
23 | 
24 | `DIMSpy v1.4.0 <https://github.com/computational-metabolomics/dimspy/releases/tag/v1.4.0>`_
25 | -------------------------------------------------------------------------------------------
26 | 
27 | **Release date: 2 October 2019**
28 | 
29 | - Final Python 2 release
30 | 
31 | 
32 | `DIMSpy v1.3.0 <https://github.com/computational-metabolomics/dimspy/releases/tag/v1.3.0>`_
33 | -------------------------------------------------------------------------------------------
34 | 
35 | **Release date: 26 November 2018**
36 | 
37 | 
38 | `DIMSpy v1.2.0 <https://github.com/computational-metabolomics/dimspy/releases/tag/v1.2.0>`_
39 | -------------------------------------------------------------------------------------------
40 | 
41 | **Release date: 29 May 2018**
42 | 
43 | 
44 | `DIMSpy v1.1.0 <https://github.com/computational-metabolomics/dimspy/releases/tag/v1.1.0>`_
45 | -------------------------------------------------------------------------------------------
46 | 
47 | **Release date: 19 February 2018**
48 | 
49 | 
50 | `DIMSpy v1.0.0 <https://github.com/computational-metabolomics/dimspy/releases/tag/v1.0.0>`_
51 | -------------------------------------------------------------------------------------------
52 | 
53 | **Release date: 10 December 2017**
54 | 
55 | 
56 | `DIMSpy v0.1.0 (pre-release) <https://github.com/computational-metabolomics/dimspy/releases/tag/v0.1.0>`_
57 | ---------------------------------------------------------------------------------------------------------
58 | 
59 | **Release date: 11 July 2017**
60 | 


--------------------------------------------------------------------------------
/docs/source/citation.rst:
--------------------------------------------------------------------------------
 1 | Citation
 2 | ========
 3 | 
 4 | To cite DIMSpy please use the following publication.
 5 | 
 6 | Check `Zenodo <https://zenodo.org/search?page=1&size=20&q=dimspy>`_ for citing more up-to-date versions of DIMSpy if not listed here.
 7 | 
 8 | 
 9 | **DIMSpy v2.0.0**
10 | 
11 |   Ralf J. M. Weber & Jiarui Zhou. (2020, April 24). DIMSpy: Python package for processing direct-infusion mass spectrometry-based metabolomics and lipidomics data (Version v2.0.0). Zenodo. http://doi.org/10.5281/zenodo.3764169
12 | 
13 | 
14 | BibTeX
15 | 
16 | .. code-block::
17 | 
18 |   @software{ralf_j_m_weber_2020_3764169,
19 |     author       = {Ralf J. M. Weber and
20 |                     Jiarui Zhou},
21 |     title        = {{DIMSpy: Python package for processing direct-
22 |                      infusion mass spectrometry-based metabolomics and
23 |                      lipidomics data}},
24 |     month        = april,
25 |     year         = 2020,
26 |     publisher    = {Zenodo},
27 |     version      = {v2.0.0},
28 |     doi          = {10.5281/zenodo.3764169},
29 |     url          = {https://doi.org/10.5281/zenodo.3764169}
30 |   }
31 | 
32 | 
33 | **DIMSpy v1.4.0**
34 | 
35 |   Ralf J. M. Weber & Jiarui Zhou. (2019, October 2). DIMSpy: Python package for processing direct-infusion mass spectrometry-based metabolomics and lipidomics data (Version v1.4.0). Zenodo. http://doi.org/10.5281/zenodo.3764110
36 | 
37 | 
38 | BibTeX
39 | 
40 | .. code-block::
41 | 
42 |   @software{ralf_j_m_weber_2019_3764110,
43 |     author       = {Ralf J. M. Weber and
44 |                     Jiarui Zhou},
45 |     title        = {{DIMSpy: Python package for processing direct-
46 |                      infusion mass spectrometry-based metabolomics and
47 |                      lipidomics data}},
48 |     month        = oct,
49 |     year         = 2019,
50 |     publisher    = {Zenodo},
51 |     version      = {v1.4.0},
52 |     doi          = {10.5281/zenodo.3764110},
53 |     url          = {https://doi.org/10.5281/zenodo.3764110}
54 |   }
55 | 


--------------------------------------------------------------------------------
/docs/source/cli.rst:
--------------------------------------------------------------------------------
  1 | Command Line Interface
  2 | ======================
  3 | 
  4 | .. code-block:: console
  5 | 
  6 |     $ dimspy --help
  7 | 
  8 |     Executing dimspy version 2.0.0.
  9 |     usage: __main__.py [-h]
 10 |                        {process-scans,replicate-filter,align-samples,blank-filter,sample-filter,remove-samples,mv-sample-filter,merge-peaklists,get-peaklists,get-average-peaklist,hdf5-pm-to-txt,hdf5-pls-to-txt,create-sample-list,unzip,licenses}
 11 |                        ...
 12 | 
 13 |     Python package to process DIMS data
 14 | 
 15 |     positional arguments:
 16 |       {process-scans,replicate-filter,align-samples,blank-filter,sample-filter,remove-samples,mv-sample-filter,merge-peaklists,get-peaklists,get-average-peaklist,hdf5-pm-to-txt,hdf5-pls-to-txt,create-sample-list,unzip,licenses}
 17 |         process-scans       Process scans and/or stitch SIM windows.
 18 |         replicate-filter    Filter irreproducible peaks from technical replicate
 19 |                             peaklists.
 20 |         align-samples       Align peaklists across samples.
 21 |         blank-filter        Filter peaks across samples that are present in the
 22 |                             blank samples.
 23 |         sample-filter       Filter peaks based on certain reproducibility and
 24 |                             sample class criteria.
 25 |         remove-samples      Remove sample(s) from a peak matrix object or list of
 26 |                             peaklist objects.
 27 |         mv-sample-filter    Filter samples based on the percentage of missing
 28 |                             values.
 29 |         merge-peaklists     Merge peaklists from multiple lists of peaklist or
 30 |                             peak matrix objects.
 31 |         get-peaklists       Get peaklists from a peak matrix object.
 32 |         get-average-peaklist
 33 |                             Get an average peaklist from a peak matrix object.
 34 |         hdf5-pm-to-txt      Write HDF5 output (peak matrix) to text format.
 35 |         hdf5-pls-to-txt     Write HDF5 output (peak lists) to text format.
 36 |         create-sample-list  Create a sample list from a peak matrix object or list
 37 |                             of peaklist objects.
 38 |         unzip               Extract files from zip file
 39 |         licenses            Show licenses DIMSpy and RawFileReader
 40 | 
 41 |     optional arguments:
 42 |       -h, --help            show this help message and exit
 43 | 
 44 | 
 45 | .. code-block:: console
 46 | 
 47 |     $ dimspy process-scans --help
 48 | 
 49 |     Executing dimspy version 2.0.0b1.
 50 |     usage: __main__.py process-scans [-h] -i source -o OUTPUT [-l FILELIST] -m
 51 |                                      {median,mean,mad,noise_packets} -s
 52 |                                      SNR_THRESHOLD [-p PPM] [-n MIN_SCANS]
 53 |                                      [-a MIN_FRACTION] [-d RSD_THRESHOLD] [-k]
 54 |                                      [-r RINGING_THRESHOLD]
 55 |                                      [-e start end scan_type]
 56 |                                      [-x start end scan_type] [-z start end]
 57 |                                      [-u REPORT] [-b BLOCK_SIZE] [-c NCPUS]
 58 | 
 59 |     optional arguments:
 60 |       -h, --help            show this help message and exit
 61 |       -i source, --input source
 62 |                             Directory (*.raw, *.mzml or tab-delimited peaklist
 63 |                             files), single *.mzml/*.raw file or zip archive
 64 |                             (*.mzml only)
 65 |       -o OUTPUT, --output OUTPUT
 66 |                             HDF5 file to save the peaklist objects to.
 67 |       -l FILELIST, --filelist FILELIST
 68 |                             Tab-delimited file that include the name of the data
 69 |                             files (*.raw or *.mzml) and meta data. Column names:
 70 |                             filename, replicate, batch, injectionOrder,
 71 |                             classLabel.
 72 |       -m {median,mean,mad,noise_packets}, --function-noise {median,mean,mad,noise_packets}
 73 |                             Select function to calculate noise.
 74 |       -s SNR_THRESHOLD, --snr-threshold SNR_THRESHOLD
 75 |                             Signal-to-noise threshold
 76 |       -p PPM, --ppm PPM     Mass tolerance in Parts per million to group peaks
 77 |                             across scans / mass spectra.
 78 |       -n MIN_SCANS, --min_scans MIN_SCANS
 79 |                             Minimum number of scans required for each m/z range or
 80 |                             event.
 81 |       -a MIN_FRACTION, --min-fraction MIN_FRACTION
 82 |                             Minimum fraction a peak has to be present. Use 0.0 to
 83 |                             not apply this filter.
 84 |       -d RSD_THRESHOLD, --rsd-threshold RSD_THRESHOLD
 85 |                             Maximum threshold - relative standard deviation
 86 |                             (Calculated for peaks that have been measured across a
 87 |                             minimum of two scans).
 88 |       -k, --skip-stitching  Skip the step where (SIM) windows are 'stitched' or
 89 |                             'joined' together. Individual peaklists are generated
 90 |                             for each window.
 91 |       -r RINGING_THRESHOLD, --ringing-threshold RINGING_THRESHOLD
 92 |                             Ringing
 93 |       -e start end scan_type, --include-scan-events start end scan_type
 94 |                             Scan events to select. E.g. 100.0 200.0 sim or 50.0
 95 |                             1000.0 full
 96 |       -x start end scan_type, --exclude-scan-events start end scan_type
 97 |                             Scan events to select. E.g. 100.0 200.0 sim or 50.0
 98 |                             1000.0 full
 99 |       -z start end, --remove-mz-range start end
100 |                             M/z range(s) to remove. E.g. 100.0 102.0 or 140.0
101 |                             145.0.
102 |       -u REPORT, --report REPORT
103 |                             Summary/Report of processed mass spectra
104 |       -b BLOCK_SIZE, --block-size BLOCK_SIZE
105 |                             The size of each block of peaks to perform clustering
106 |                             on.
107 |       -c NCPUS, --ncpus NCPUS
108 |                             Number of central processing units (CPUs).
109 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # http://www.sphinx-doc.org/en/master/config
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('..'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | project = u'DIMSPy'
20 | copyright = u'2019, Ralf Weber, Jiarui (Albert) Zhou'
21 | author = u'Ralf Weber, Jiarui (Albert) Zhou'
22 | 
23 | # The full version, including alpha/beta/rc tags
24 | release = '2.0.0'
25 | 
26 | 
27 | # -- General configuration ---------------------------------------------------
28 | 
29 | # Add any Sphinx extension module names here, as strings. They can be
30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
31 | # ones.
32 | extensions = [
33 | 	'sphinx.ext.autodoc',
34 | 	'sphinx.ext.doctest',
35 | 	'sphinx.ext.viewcode',
36 | 	'sphinx.ext.napoleon',
37 | 	'sphinx.ext.todo',
38 |     'sphinx.ext.mathjax'
39 | ]
40 | 
41 | # Add any paths that contain templates here, relative to this directory.
42 | templates_path = ['_templates']
43 | 
44 | # The master toctree document.
45 | master_doc = 'index'
46 | 
47 | # List of patterns, relative to source directory, that match files and
48 | # directories to ignore when looking for source files.
49 | # This pattern also affects html_static_path and html_extra_path.
50 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
51 | 
52 | 
53 | # -- Options for HTML output -------------------------------------------------
54 | 
55 | # The theme to use for HTML and HTML Help pages.  See the documentation for
56 | # a list of builtin themes.
57 | #
58 | html_theme = 'sphinx_rtd_theme'
59 | 
60 | # Add any paths that contain custom static files (such as style sheets) here,
61 | # relative to this directory. They are copied after the builtin static files,
62 | # so a file named "default.css" will overwrite the builtin "default.css".
63 | html_static_path = ['_static']
64 | 


--------------------------------------------------------------------------------
/docs/source/credits.rst:
--------------------------------------------------------------------------------
 1 | Credits
 2 | =======
 3 | 
 4 | DIMSpy was originally written by Ralf Weber and Albert Zhou and has been developed with the help of many others.
 5 | Thanks to everyone who has improved DIMSpy contributing code, features, bug reports (and fixes), and documentation.
 6 | 
 7 | Developers & Contributors
 8 | -------------------------
 9 |  - Ralf J. M. Weber (r.j.weber@bham.ac.uk) - `University of Birmingham (UK) <https://www.birmingham.ac.uk/staff/profiles/biosciences/weber-ralf.aspx>`__
10 |  - Jiarui (Albert) Zhou (j.zhou.3@bham.ac.uk) - `University of Birmingham (UK) <http://www.birmingham.ac.uk/index.aspx>`_, `HIT Shenzhen (China) <http://www.hitsz.edu.cn>`_
11 |  - Thomas N. Lawson (t.n.lawson@bham.ac.uk) - `University of Birmingham (UK) <http://www.birmingham.ac.uk/index.aspx>`__
12 |  - Martin R. Jones (martin.jones@eawag.ch) - `Eawag (Switzerland) <https://www.eawag.ch/en/aboutus/portrait/organisation/staff/profile/martin-jones/show/>`_
13 | 
14 | Funding
15 | -------
16 | DIMSpy acknowledges support from the following funders:
17 |  - BBSRC, grant number BB/M019985/1
18 |  - European Commission's H2020 programme, grant agreement number 654241
19 |  - Wellcome Trust, grant number 202952/Z/16/Z
20 | 


--------------------------------------------------------------------------------
/docs/source/dimspy.metadata.rst:
--------------------------------------------------------------------------------
1 | metadata
2 | ========
3 | 
4 | .. automodule:: dimspy.metadata
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/dimspy.models.rst:
--------------------------------------------------------------------------------
 1 | models
 2 | ======
 3 | 
 4 | peaklist
 5 | --------
 6 | 
 7 | .. automodule:: dimspy.models.peaklist
 8 |     :members:
 9 |     :undoc-members:
10 |     :show-inheritance:
11 | 
12 | peaklist\_metadata
13 | ------------------
14 | 
15 | .. automodule:: dimspy.models.peaklist_metadata
16 |     :members:
17 |     :undoc-members:
18 |     :show-inheritance:
19 | 
20 | peaklist\_tags
21 | --------------
22 | 
23 | .. automodule:: dimspy.models.peaklist_tags
24 |     :members:
25 |     :undoc-members:
26 |     :show-inheritance:
27 | 
28 | peak\_matrix
29 | ------------
30 | 
31 | .. automodule:: dimspy.models.peak_matrix
32 |     :members:
33 |     :undoc-members:
34 |     :show-inheritance:
35 | 
36 | 


--------------------------------------------------------------------------------
/docs/source/dimspy.portals.rst:
--------------------------------------------------------------------------------
 1 | portals
 2 | =======
 3 | 
 4 | mzml\_portal
 5 | ------------
 6 | 
 7 | .. automodule:: dimspy.portals.mzml_portal
 8 |     :members:
 9 |     :undoc-members:
10 |     :show-inheritance:
11 |     :member-order: bysource
12 | 
13 | thermo\_raw\_portal
14 | -------------------
15 | 
16 | .. automodule:: dimspy.portals.thermo_raw_portal
17 |     :members:
18 |     :undoc-members:
19 |     :show-inheritance:
20 |     :member-order: bysource
21 | 
22 | txt\_portal
23 | -----------
24 | 
25 | .. automodule:: dimspy.portals.txt_portal
26 |     :members:
27 |     :undoc-members:
28 |     :show-inheritance:
29 |     :member-order: bysource
30 | 
31 | hdf5\_portal
32 | ------------
33 | 
34 | .. automodule:: dimspy.portals.hdf5_portal
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 |     :member-order: bysource
39 | 
40 | paths
41 | -----
42 | 
43 | .. automodule:: dimspy.portals.paths
44 |     :members:
45 |     :undoc-members:
46 |     :show-inheritance:
47 |     :member-order: bysource
48 | 


--------------------------------------------------------------------------------
/docs/source/dimspy.process.rst:
--------------------------------------------------------------------------------
 1 | process
 2 | =======
 3 | 
 4 | peak\_alignment
 5 | ---------------
 6 | 
 7 | .. automodule:: dimspy.process.peak_alignment
 8 |     :members:
 9 |     :undoc-members:
10 |     :show-inheritance:
11 |     :member-order: bysource
12 | 
13 | peak\_filters
14 | -------------
15 | 
16 | .. automodule:: dimspy.process.peak_filters
17 |     :members:
18 |     :undoc-members:
19 |     :show-inheritance:
20 |     :member-order: bysource
21 | 
22 | scan\_processing
23 | ----------------
24 | 
25 | .. automodule:: dimspy.process.replicate_processing
26 |     :members:
27 |     :undoc-members:
28 |     :show-inheritance:
29 |     :member-order: bysource
30 | 


--------------------------------------------------------------------------------
/docs/source/dimspy.tools.rst:
--------------------------------------------------------------------------------
1 | tools
2 | =====
3 | 
4 | .. automodule:: dimspy.tools
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 |     :member-order: bysource
9 | 


--------------------------------------------------------------------------------
/docs/source/images/alignment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/docs/source/images/alignment.png


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to DIMSpy's documentation!
 2 | ==================================
 3 | 
 4 | |Py versions| |Version| |Bioconda| |Galaxy-eu| |Git| |Build Status (Travis)| |Build Status (AppVeyor)| |codecov| |License| |binder| |RTD doc| |gitter|
 5 | 
 6 | Python package for processing direct-infusion mass spectrometry-based metabolomics and lipidomics data
 7 | 
 8 | 
 9 | Contents
10 | --------
11 | 
12 | .. toctree::
13 |     :maxdepth: 3
14 | 
15 |     installation
16 |     api-reference
17 |     cli
18 |     credits
19 |     bugs-and-issues
20 |     changelog
21 |     citation
22 |     license
23 | 
24 | 
25 | .. |Build Status (Travis)| image:: https://img.shields.io/travis/computational-metabolomics/dimspy.svg?logo=travis&maxAge=600&style=flat-square
26 |    :target: https://travis-ci.com/computational-metabolomics/dimspy
27 | 
28 | .. |Build Status (AppVeyor)| image:: https://img.shields.io/appveyor/ci/RJMW/dimspy.svg?logo=appveyor&style=flat-square&maxAge=600
29 |    :target: https://ci.appveyor.com/project/RJMW/dimspy/branch/master
30 | 
31 | .. |Py versions| image:: https://img.shields.io/pypi/pyversions/dimspy.svg?style=flat&maxAge=3600
32 |    :target: https://pypi.python.org/pypi/dimspy/
33 | 
34 | .. |Version| image:: https://img.shields.io/pypi/v/dimspy.svg?style=flat&maxAge=3600
35 |    :target: https://pypi.python.org/pypi/dimspy/
36 | 
37 | .. |Git| image:: https://img.shields.io/badge/repository-GitHub-blue.svg?style=flat&maxAge=3600
38 |    :target: https://github.com/computational-metabolomics/dimspy
39 | 
40 | .. |Bioconda| image:: https://img.shields.io/conda/vn/bioconda/dimspy?style=flat-square&maxAge=3600
41 |    :target: http://bioconda.github.io/recipes/dimspy/README.html
42 | 
43 | .. |galaxy-eu| image:: https://img.shields.io/badge/usegalaxy-.eu-brightgreen?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAASCAYAAABB7B6eAAAABGdBTUEAALGPC/xhBQAAACBjSFJNAAB6JgAAgIQAAPoAAACA6AAAdTAAAOpgAAA6mAAAF3CculE8AAAACXBIWXMAAAsTAAALEwEAmpwYAAACC2lUWHRYTUw6Y29tLmFkb2JlLnhtcAAAAAAAPHg6eG1wbWV0YSB4bWxuczp4PSJhZG9iZTpuczptZXRhLyIgeDp4bXB0az0iWE1QIENvcmUgNS40LjAiPgogICA8cmRmOlJERiB4bWxuczpyZGY9Imh0dHA6Ly93d3cudzMub3JnLzE5OTkvMDIvMjItcmRmLXN5bnRheC1ucyMiPgogICAgICA8cmRmOkRlc2NyaXB0aW9uIHJkZjphYm91dD0iIgogICAgICAgICAgICB4bWxuczp0aWZmPSJodHRwOi8vbnMuYWRvYmUuY29tL3RpZmYvMS4wLyI+CiAgICAgICAgIDx0aWZmOlJlc29sdXRpb25Vbml0PjI8L3RpZmY6UmVzb2x1dGlvblVuaXQ+CiAgICAgICAgIDx0aWZmOkNvbXByZXNzaW9uPjE8L3RpZmY6Q29tcHJlc3Npb24+CiAgICAgICAgIDx0aWZmOk9yaWVudGF0aW9uPjE8L3RpZmY6T3JpZW50YXRpb24+CiAgICAgICAgIDx0aWZmOlBob3RvbWV0cmljSW50ZXJwcmV0YXRpb24+MjwvdGlmZjpQaG90b21ldHJpY0ludGVycHJldGF0aW9uPgogICAgICA8L3JkZjpEZXNjcmlwdGlvbj4KICAgPC9yZGY6UkRGPgo8L3g6eG1wbWV0YT4KD0UqkwAAAn9JREFUOBGlVEuLE0EQruqZiftwDz4QYT1IYM8eFkHFw/4HYX+GB3/B4l/YP+CP8OBNTwpCwFMQXAQPKtnsg5nJZpKdni6/6kzHvAYDFtRUT71f3UwAEbkLch9ogQxcBwRKMfAnM1/CBwgrbxkgPAYqlBOy1jfovlaPsEiWPROZmqmZKKzOYCJb/AbdYLso9/9B6GppBRqCrjSYYaquZq20EUKAzVpjo1FzWRDVrNay6C/HDxT92wXrAVCH3ASqq5VqEtv1WZ13Mdwf8LFyyKECNbgHHAObWhScf4Wnj9CbQpPzWYU3UFoX3qkhlG8AY2BTQt5/EA7qaEPQsgGLWied0A8VKrHAsCC1eJ6EFoUd1v6GoPOaRAtDPViUr/wPzkIFV9AaAZGtYB568VyJfijV+ZBzlVZJ3W7XHB2RESGe4opXIGzRTdjcAupOK09RA6kzr1NTrTj7V1ugM4VgPGWEw+e39CxO6JUw5XhhKihmaDacU2GiR0Ohcc4cZ+Kq3AjlEnEeRSazLs6/9b/kh4eTC+hngE3QQD7Yyclxsrf3cpxsPXn+cFdenF9aqlBXMXaDiEyfyfawBz2RqC/O9WF1ysacOpytlUSoqNrtfbS642+4D4CS9V3xb4u8P/ACI4O810efRu6KsC0QnjHJGaq4IOGUjWTo/YDZDB3xSIxcGyNlWcTucb4T3in/3IaueNrZyX0lGOrWndstOr+w21UlVFokILjJLFhPukbVY8OmwNQ3nZgNJNmKDccusSb4UIe+gtkI+9/bSLJDjqn763f5CQ5TLApmICkqwR0QnUPKZFIUnoozWcQuRbC0Km02knj0tPYx63furGs3x/iPnz83zJDVNtdP3QAAAABJRU5ErkJggg==
44 |    :target: http://usegalaxy.eu
45 | 
46 | .. |License| image:: https://img.shields.io/pypi/l/dimspy.svg?style=flat&maxAge=3600
47 |    :target: https://www.gnu.org/licenses/gpl-3.0.html
48 | 
49 | .. |RTD doc| image:: https://img.shields.io/badge/documentation-RTD-71B360.svg?style=flat&maxAge=3600
50 |    :target: https://dimspy.readthedocs.io/en/latest/
51 | 
52 | .. |codecov| image:: https://codecov.io/gh/computational-metabolomics/dimspy/branch/master/graph/badge.svg
53 |    :target: https://codecov.io/gh/computational-metabolomics/dimspy
54 | 
55 | .. |binder| image:: https://mybinder.org/badge_logo.svg
56 |    :target: https://mybinder.org/v2/gh/computational-metabolomics/dimspy/master?filepath=notebooks%2Fworkflow.ipynb
57 | 
58 | .. |gitter| image:: https://badges.gitter.im/Join%20Chat.svg
59 |    :target: https://gitter.im/computational-metabolomics/dimspy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
60 | 
61 | 
62 | Indices and tables
63 | ==================
64 | 
65 | * :ref:`genindex`
66 | * :ref:`search`
67 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | Conda (recommended)
 5 | -------------------
 6 | 
 7 | Install Miniconda, follow the steps described `here <https://docs.conda.io/projects/conda/en/latest/user-guide/install>`_
 8 | 
 9 | Start the ``conda prompt``
10 | 
11 | * Windows: Open the ``Anaconda Prompt`` via the Start menu
12 | * macOS or Linux: Open a ``Terminal``
13 | 
14 | Create a dimspy specific ``conda`` environment.
15 | This will install a the dependencies required to run ``dimspy``::
16 | 
17 |     $ conda create --yes --name dimspy dimspy -c conda-forge -c bioconda -c computational-metabolomics
18 | 
19 | .. note::
20 | 
21 |     * The installation process will take a few minutes.
22 |     * Feel free to use a different name for the Conda environment
23 | 
24 |     You can use the following command to remove a conda environment::
25 | 
26 |         $ conda env remove -y --name dimspy
27 | 
28 |     This is only required if something has gone wrong in the previous step.
29 | 
30 | Activate the ``dimspy`` environment::
31 | 
32 |     $ conda activate dimspy
33 | 
34 | To test your ``dimspy`` installation, in your Conda Prompt, run the command::
35 | 
36 |     $ dimspy --help
37 | 
38 | or::
39 | 
40 |     $ python
41 |     import dimspy
42 | 
43 | Close and deactivate the ``dimspy`` environment when you’re done::
44 | 
45 |     $ conda deactivate
46 | 
47 | 
48 | PyPi
49 | ----
50 | 
51 | Install the current release of ``dimspy`` with ``pip``::
52 | 
53 |     $ pip install dimspy
54 | 
55 | .. note::
56 | 
57 |     * The installation process will take a few minutes.
58 | 
59 | To upgrade to a newer release use the ``--upgrade`` flag::
60 | 
61 |     $ pip install --upgrade dimspy
62 | 
63 | If you do not have permission to install software systemwide, you can
64 | install into your user directory using the ``--user`` flag::
65 | 
66 |     $ pip install --user dimspy
67 | 
68 | Alternatively, you can manually download ``dimspy`` from
69 | `GitHub <https://github.com/computational-metabolomics/dimspy/releases>`_  or
70 | `PyPI <https://pypi.python.org/pypi/dimspy>`_.
71 | To install one of these versions, unpack it and run the following from the
72 | top-level source directory using the Terminal::
73 | 
74 |     $ pip install .
75 | 
76 | 
77 | Testing
78 | -------
79 | DIMSpy uses the Python ``pytest`` testing package.  You can learn more
80 | about pytest on their `homepage <https://pytest.org>`_.
81 | 


--------------------------------------------------------------------------------
/docs/source/license.rst:
--------------------------------------------------------------------------------
 1 | License
 2 | =======
 3 | 
 4 | DIMSpy is licensed under the GNU General Public License v3.0 (see `LICENSE file <https://github.com/computational-metabolomics/dimspy/blob/master/LICENSE>`_ for licensing information). Copyright © 2017 - 2020 Ralf Weber, Albert Zhou
 5 | 
 6 | **Third-party licenses and copyright**
 7 | 
 8 | RawFileReader reading tool. Copyright © 2016 by Thermo Fisher Scientific, Inc. All rights reserved. See `RawFileReaderLicense <https://github.com/computational-metabolomics/dimspy/blob/master/RawFileReaderLicense.rst>`_ for licensing information.
 9 | Using DIMSpy software for processing Thermo Fisher Scientific *.raw files implies the acceptance of the RawFileReader license terms.
10 | Anyone receiving RawFileReader as part of a larger software distribution (in the current context, as part of DIMSpy) is considered an "end user" under
11 | section 3.3 of the RawFileReader License, and is not granted rights to redistribute RawFileReader.
12 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: dimspy
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 | dependencies:
 6 |  - python=3.7
 7 |  - fastcluster=1.1.26
 8 |  - h5py=2.10.0
 9 |  - numpy=1.17.1
10 |  - pandas=0.25.0
11 |  - pymzml=2.4.5
12 |  - pytables=3.6.1
13 |  - pythonnet=2.4.0
14 |  - scipy=1.3.1
15 | 


--------------------------------------------------------------------------------
/examples/examples.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from dimspy.tools import *
 5 | from dimspy.portals.hdf5_portal import *
 6 | import zipfile
 7 | 
 8 | 
 9 | def main():
10 | 
11 |     source = os.path.join("..", "tests", "data", "MTBLS79_subset", "MTBLS79_mzml_triplicates.zip")
12 |     fn_filelist = os.path.join("..", "tests", "data", "MTBLS79_subset", "filelist_mzml_triplicates.txt")
13 |     output = os.path.join("results")
14 |     if not os.path.exists(output):
15 |         os.mkdir(output)
16 | 
17 |     print("Unzip mzML files.....")
18 |     zip_ref = zipfile.ZipFile(source, 'r')
19 |     zip_ref.extractall(os.path.join("data"))
20 |     zip_ref.close()
21 |     print("Completed")
22 | 
23 |     print("Process Scans.....")
24 |     pls = process_scans("data", min_scans=1, function_noise="median",
25 |                         snr_thres=3.0, ppm=2.0, min_fraction=None, rsd_thres=None,
26 |                         filelist=fn_filelist, remove_mz_range=[], block_size=5000, ncpus=None)
27 |     print("Completed")
28 | 
29 |     print("Replicate Filter.....")
30 |     logfile = os.path.join(output, "log_replicate_filter.txt")
31 |     pls_rf = replicate_filter(pls, ppm=2.0, replicates=3, min_peaks=2, rsd_thres=None, report=logfile, block_size=5000)
32 |     print("Completed")
33 | 
34 |     print("Write each replicate filtered peaklist to a text file")
35 |     for pl in pls_rf:
36 |         with open(pl.ID + ".txt", "w") as out:
37 |             out.write(os.path.join("results", pl.to_str("\t")))
38 |     print("Completed")
39 | 
40 |     # print("Save, write and load peaklists")
41 |     # save_peaklists_as_hdf5(pls_rf, os.path.join(output, "pls_rf.h5"))
42 |     # hdf5_peaklists_to_txt(os.path.join(output, "pls_rf.h5"), path_out=output)
43 |     # pls_rf = load_peaklists_from_hdf5(os.path.join(output, "pls_rf.h5"))
44 |     # print("Completed")
45 | 
46 |     # print("Create a new sample list.....")
47 |     # sample_list = os.path.join(output, "sample_list.txt")
48 |     # create_sample_list(pls_rf, sample_list, delimiter="\t")
49 |     # print("Completed")
50 |     # print("")
51 | 
52 |     print("Align Samples.....")
53 |     pm = align_samples(pls_rf, ppm=3.0, ncpus=1, block_size=5000)
54 |     print("Completed", pm.shape)
55 | 
56 |     # print("Save, write and load peak matrix")
57 |     # save_peak_matrix_as_hdf5(pm, os.path.join(output, "pm.h5"))
58 |     # hdf5_peak_matrix_to_txt(os.path.join(output, "pm.h5"), path_out=os.path.join(output, "pm.txt"), attr_name="intensity", comprehensive=True)
59 |     # pm = load_peak_matrix_from_hdf5(os.path.join(output, "pm.h5"))
60 |     # print("Completed")
61 | 
62 |     print("Blank Filter.....")
63 |     pm_bf = blank_filter(pm, "blank", min_fraction=1.0, min_fold_change=10.0, function="mean", rm_samples=True)
64 |     print("Completed", pm_bf.shape)
65 | 
66 |     print("Sample Filter.....")
67 |     pm_bf_sf = sample_filter(pm, 0.8, within=False)
68 |     print("Completed", pm_bf_sf.shape)
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     main()
73 | 


--------------------------------------------------------------------------------
/examples/run.bat:
--------------------------------------------------------------------------------
 1 | dimspy --help
 2 | 
 3 | dimspy unzip^
 4 |  --input ../tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates.zip^
 5 |  --output results/mzml
 6 | 
 7 | dimspy process-scans^
 8 |  --input results/mzml^
 9 |  --output results/peaklists.hdf5^
10 |  --filelist tests/data/MTBLS79_subset/filelist_mzml_triplicates.txt^
11 |  --function-noise median^
12 |  --snr-threshold 3.0^
13 |  --ppm 2.0^
14 |  --min_scans 1^
15 |  --min-fraction 0.5^
16 |  --block-size 5000^
17 |  --ncpus 2
18 | 
19 | dimspy replicate-filter^
20 |  --input results/peaklists.hdf5^
21 |  --output results/peaklists_rf.hdf5^
22 |  --ppm 2.0^
23 |  --replicates 3^
24 |  --min-peak-present 2
25 | 
26 | dimspy align-samples^
27 |  --input results/peaklists.hdf5^
28 |  --output results/pm_a.hdf5^
29 |  --ppm 2.0
30 | 
31 | dimspy blank-filter^
32 |  --input results/pm_a.hdf5^
33 |  --output results/pm_a_bf.hdf5^
34 |  --blank-label blank^
35 |  --remove
36 | 
37 | dimspy sample-filter^
38 |  --input results/pm_a_bf.hdf5^
39 |  --output results/pm_a_bf_sf.hdf5^
40 |  --min-fraction 0.8
41 | 
42 | dimspy hdf5-pls-to-txt^
43 |  --input results/peaklists.hdf5^
44 |  --output results^
45 |  --delimiter tab
46 | 
47 | dimspy hdf5-pm-to-txt^
48 |  --input results/pm_a_bf_sf.hdf5^
49 |  --output results/pm_a_bf_sf.txt^
50 |  --delimiter tab
51 | 
52 | dimspy merge-peaklists^
53 |  --input results/peaklists_rf.hdf5^
54 |  --input results/peaklists.hdf5^
55 |  --output results/peaklists_merged.hdf5
56 | 


--------------------------------------------------------------------------------
/examples/run.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | dimspy --help
 4 | 
 5 | dimspy unzip \
 6 | --input ../tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates.zip \
 7 | --output results/mzml
 8 | 
 9 | dimspy process-scans \
10 | --input results/mzml \
11 | --output results/peaklists.hdf5 \
12 | --filelist ../tests/data/MTBLS79_subset/filelist_mzml_triplicates.txt \
13 | --function-noise median \
14 | --snr-threshold 3.0 \
15 | --ppm 2.0 \
16 | --min_scans 1 \
17 | --min-fraction 0.5 \
18 | --block-size 5000 \
19 | --ncpus 2
20 | 
21 | dimspy replicate-filter \
22 | --input results/peaklists.hdf5 \
23 | --output results/peaklists_rf.hdf5 \
24 | --ppm 2.0 \
25 | --replicates 3 \
26 | --min-peak-present 2
27 | 
28 | dimspy align-samples \
29 | --input results/peaklists.hdf5 \
30 | --output results/pm_a.hdf5 \
31 | --ppm 2.0
32 | 
33 | dimspy blank-filter \
34 | --input results/pm_a.hdf5 \
35 | --output results/pm_a_bf.hdf5 \
36 | --blank-label blank \
37 | --remove
38 | 
39 | dimspy sample-filter \
40 | --input results/pm_a_bf.hdf5 \
41 | --output results/pm_a_bf_sf.hdf5 \
42 | --min-fraction 0.8
43 | 
44 | dimspy hdf5-pls-to-txt \
45 | --input results/peaklists_rf.hdf5 \
46 | --output results \
47 | --delimiter tab
48 | 
49 | dimspy hdf5-pm-to-txt \
50 | --input results/pm_a_bf_sf.hdf5 \
51 | --output results/pm_a_bf_sf.txt \
52 | --delimiter tab
53 | 
54 | dimspy merge-peaklists \
55 | --input results/peaklists_rf.hdf5 \
56 | --input results/peaklists.hdf5 \
57 | --output results/peaklists_merged.hdf5
58 | 


--------------------------------------------------------------------------------
/notebooks/workflow.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#!/usr/bin/env python\n",
 10 |     "# -*- coding: utf-8 -*-\n",
 11 |     "\n",
 12 |     "import os\n",
 13 |     "import zipfile\n",
 14 |     "from dimspy.tools import process_scans\n",
 15 |     "from dimspy.tools import replicate_filter\n",
 16 |     "from dimspy.tools import create_sample_list\n",
 17 |     "from dimspy.tools import align_samples\n",
 18 |     "from dimspy.tools import blank_filter\n",
 19 |     "from dimspy.tools import sample_filter\n",
 20 |     "\n",
 21 |     "\n",
 22 |     "source = os.path.join(\"..\", \"tests\", \"data\", \"MTBLS79_subset\", \"MTBLS79_mzml_triplicates.zip\")\n",
 23 |     "fn_filelist = os.path.join(\"..\", \"tests\", \"data\", \"MTBLS79_subset\", \"filelist_mzml_triplicates.txt\")\n",
 24 |     "\n",
 25 |     "zip_ref = zipfile.ZipFile(source, 'r')\n",
 26 |     "zip_ref.extractall(\"data\")\n",
 27 |     "zip_ref.close()\n",
 28 |     "\n",
 29 |     "print(os.listdir(\"data\"))\n"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {
 36 |     "pycharm": {
 37 |      "name": "#%%\n"
 38 |     }
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "pls = process_scans(\"data\", min_scans=1, function_noise=\"median\",\n",
 43 |     "                    snr_thres=3.0, ppm=2.0, min_fraction=None, rsd_thres=None,\n",
 44 |     "                    filelist=fn_filelist, remove_mz_range=[], block_size=5000, ncpus=None)\n",
 45 |     "\n",
 46 |     "print(pls[0]) # first peaklist"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "pycharm": {
 54 |      "name": "#%%\n"
 55 |     }
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "pls_rf = replicate_filter(pls, ppm=2.0, replicates=3, min_peaks=2, rsd_thres=None,\n",
 60 |     "                          report=\"log_replicate_filter.txt\", block_size=5000)\n",
 61 |     "\n",
 62 |     "print(pls_rf[0]) # first peaklist"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {
 69 |     "pycharm": {
 70 |      "name": "#%%\n"
 71 |     }
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "create_sample_list(pls_rf, \"sample_list.txt\", delimiter=\"\\t\")"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {
 82 |     "pycharm": {
 83 |      "name": "#%%\n"
 84 |     }
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "pm = align_samples(pls, ppm=3.0, ncpus=1, block_size=5000)\n",
 89 |     "print(pm.shape)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "pycharm": {
 97 |      "name": "#%%\n"
 98 |     }
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "pm_bf = blank_filter(pm, \"blank\", min_fraction=1.0, min_fold_change=10.0, function=\"mean\", rm_samples=True)\n",
103 |     "print(pm_bf.shape)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {
110 |     "pycharm": {
111 |      "name": "#%%\n"
112 |     }
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "pm_bf_sf = sample_filter(pm, 0.8, within=False)\n",
117 |     "print(pm_bf_sf.shape)\n",
118 |     "\n"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "# "
128 |    ]
129 |   }
130 |  ],
131 |  "metadata": {
132 |   "kernelspec": {
133 |    "display_name": "Python 3",
134 |    "language": "python",
135 |    "name": "python3"
136 |   },
137 |   "language_info": {
138 |    "codemirror_mode": {
139 |     "name": "ipython",
140 |     "version": 3
141 |    },
142 |    "file_extension": ".py",
143 |    "mimetype": "text/x-python",
144 |    "name": "python",
145 |    "nbconvert_exporter": "python",
146 |    "pygments_lexer": "ipython3",
147 |    "version": "3.7.3"
148 |   },
149 |   "pycharm": {
150 |    "stem_cell": {
151 |     "cell_type": "raw",
152 |     "metadata": {
153 |      "collapsed": false
154 |     },
155 |     "source": []
156 |    }
157 |   }
158 |  },
159 |  "nbformat": 4,
160 |  "nbformat_minor": 1
161 | }
162 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | fastcluster==1.1.26
2 | h5py==2.10.0
3 | numpy==1.17.1
4 | pandas==0.25.0
5 | pymzml==2.4.5
6 | pythonnet==2.4.0
7 | tables==3.6.1
8 | scipy==1.3.1
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | import setuptools
24 | import sys
25 | import dimspy
26 | 
27 | 
28 | def main():
29 | 
30 |     setuptools.setup(name="dimspy",
31 |         version=dimspy.__version__,
32 |         description="Python package for processing of direct-infusion mass spectrometry-based metabolomics and lipidomics data",
33 |         long_description=open('README.rst').read(),
34 |         author="Ralf Weber, Albert Zhou",
35 |         author_email="r.j.weber@bham.ac.uk, j.zhou.3@bham.ac.uk ",
36 |         url="https://github.com/computational-metabolomics/dimspy",
37 |         license="GPLv3",
38 |         platforms=['Windows, UNIX'],
39 |         keywords=['Metabolomics', 'Lipidomics', 'Mass spectrometry', 'Data Processing', 'Direct-Infusion Mass Spectrometry'],
40 |         packages=setuptools.find_packages(),
41 |         test_suite='tests.suite',
42 |         python_requires='>=3.7',
43 |         install_requires=open('requirements.txt').read().splitlines(),
44 |         include_package_data=True,
45 |         project_urls={
46 |             "Documentation": "https://dimspy.readthedocs.io/en/latest/",
47 |             "Changelog": "https://dimspy.readthedocs.io/en/latest/changelog.html",
48 |             "Bug Tracker": "https://github.com/computational-metabolomics/dimspy/issues",
49 |         },
50 |         classifiers=[
51 |           "Programming Language :: Python :: 3",
52 |           "Programming Language :: Python :: 3.7",
53 |           "Topic :: Scientific/Engineering :: Bio-Informatics",
54 |           "Topic :: Scientific/Engineering :: Chemistry",
55 |           "Topic :: Utilities",
56 |           "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
57 |           "Operating System :: OS Independent",
58 |         ],
59 |         entry_points={
60 |          'console_scripts': [
61 |              'dimspy = dimspy.__main__:main'
62 |          ]
63 |         }
64 |     )
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     main()
69 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | import unittest
24 | 
25 | 
26 | def suite():
27 |     test_loader = unittest.TestLoader()
28 |     test_suite = test_loader.discover('.', pattern='test_*.py')
29 |     return test_suite
30 | 


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/MTBLS79_mzml_peak_matrix_v1.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_peak_matrix_v1.hdf5


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/MTBLS79_mzml_peak_matrix_v2.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_peak_matrix_v2.hdf5


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/MTBLS79_mzml_single.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_single.zip


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/MTBLS79_mzml_single_report.txt:
--------------------------------------------------------------------------------
 1 | filename	event	scans	peaks	median_rsd
 2 | batch04_QC17_rep01_262.mzML	FTMS + p ESI w SIM ms [70.00-170.00]	10	501	16.597287464014354
 3 | batch04_QC17_rep01_262.mzML	FTMS + p ESI w SIM ms [140.00-240.00]	11	308	11.861413863099502
 4 | batch04_QC17_rep01_262.mzML	FTMS + p ESI w SIM ms [210.00-310.00]	14	221	12.059987578794935
 5 | batch04_QC17_rep01_262.mzML	FTMS + p ESI w SIM ms [280.00-380.00]	14	212	11.145086842326155
 6 | batch04_QC17_rep01_262.mzML	FTMS + p ESI w SIM ms [350.00-450.00]	13	205	10.548051403649117
 7 | batch04_QC17_rep01_262.mzML	FTMS + p ESI w SIM ms [420.00-520.00]	13	180	11.35556910318272
 8 | batch04_QC17_rep01_262.mzML	FTMS + p ESI w SIM ms [490.00-590.00]	13	173	11.024412858650523
 9 | batch04_QC17_rep01_262.mzML	SIM-Stitch	NA	1800	12.033732483598556
10 | 


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates.zip


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates_report.txt:
--------------------------------------------------------------------------------
1 | name	peaks	peaks_3oo3	median_rsd_3oo3
2 | batch04_B02_rep01_301_2_302_3_303	650	527	11.278862335879921
3 | batch04_QC17_rep01_262_2_263_3_264	487	405	8.047266384318867
4 | batch04_S01_rep01_247_2_248_3_249	518	441	5.330921878107105
5 | 


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates_v1.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates_v1.hdf5


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates_v2.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/MTBLS79_mzml_triplicates_v2.hdf5


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_class_label_error.txt:
--------------------------------------------------------------------------------
 1 | filename	replicate	batch	injectionOrder	classLabel
 2 | batch04_B02_rep01_301.mzML	1	1	1	blank
 3 | batch04_B02_rep02_302.mzML	2	1	2	blank
 4 | batch04_B02_rep03_303.mzML	3	1	3	blank
 5 | batch04_QC17_rep01_262.mzML	4	1	4	QC
 6 | batch04_QC17_rep02_263.mzML	1	1	5	QC
 7 | batch04_QC17_rep03_264.mzML	2	1	6	QC
 8 | batch04_S01_rep01_247.mzML	3	1	7	sample
 9 | batch04_S01_rep02_248.mzML	1	1	8	sample
10 | batch04_S01_rep03_249.mzML	2	1	9	sample


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_csl_MTBLS79_mzml_peak_matrix.txt:
--------------------------------------------------------------------------------
 1 | filename	replicate	batch	injectionOrder	classLabel
 2 | batch04_B02_rep01_301.mzML	1	1	1	blank
 3 | batch04_B02_rep02_302.mzML	2	1	2	blank
 4 | batch04_B02_rep03_303.mzML	3	1	3	blank
 5 | batch04_QC17_rep01_262.mzML	1	1	4	QC
 6 | batch04_QC17_rep02_263.mzML	2	1	5	QC
 7 | batch04_QC17_rep03_264.mzML	3	1	6	QC
 8 | batch04_S01_rep01_247.mzML	1	1	7	sample
 9 | batch04_S01_rep02_248.mzML	2	1	8	sample
10 | batch04_S01_rep03_249.mzML	3	1	9	sample
11 | 


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_csl_MTBLS79_mzml_triplicates.txt:
--------------------------------------------------------------------------------
 1 | filename	replicate	batch	injectionOrder	classLabel
 2 | batch04_B02_rep01_301.mzML	1	1	1	blank
 3 | batch04_B02_rep02_302.mzML	2	1	2	blank
 4 | batch04_B02_rep03_303.mzML	3	1	3	blank
 5 | batch04_QC17_rep01_262.mzML	1	1	4	QC
 6 | batch04_QC17_rep02_263.mzML	2	1	5	QC
 7 | batch04_QC17_rep03_264.mzML	3	1	6	QC
 8 | batch04_S01_rep01_247.mzML	1	1	7	sample
 9 | batch04_S01_rep02_248.mzML	2	1	8	sample
10 | batch04_S01_rep03_249.mzML	3	1	9	sample
11 | 


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_filename_error.txt:
--------------------------------------------------------------------------------
 1 | filename	replicate	batch	injectionOrder	classLabel
 2 | batch04_B02_rep01_301.mzML	1	1	1	blank
 3 | batch04_B02_rep02_302.mzML	2	1	2	blank
 4 | batch04_B02_rep03_303.mzML	3	1	3	blank
 5 | batch04_QC17_rep01_262.mzML	1	1	4	QC
 6 | batch04_QC17_rep02_263.mzML	2	1	5	QC
 7 | batch04_QC17_rep03_264.mzML	3	1	6	QC
 8 | batch04_S01_rep01_247.mzML	1	1	7	sample
 9 | batch04_S01_rep02_248.mzML	2	1	8	sample
10 | batch04_S01_rep02_248.mzML	3	1	9	sample
11 | 


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_injection_order_error.txt:
--------------------------------------------------------------------------------
 1 | filename	replicate	batch	injectionOrder	classLabel
 2 | batch04_B02_rep01_301.mzML	1	1	1	blank
 3 | batch04_B02_rep02_302.mzML	2	1	2	blank
 4 | batch04_B02_rep03_303.mzML	3	1	3	blank
 5 | batch04_QC17_rep01_262.mzML	1	1	10	QC
 6 | batch04_QC17_rep02_263.mzML	2	1	5	QC
 7 | batch04_QC17_rep03_264.mzML	3	1	6	QC
 8 | batch04_S01_rep01_247.mzML	1	1	7	sample
 9 | batch04_S01_rep02_248.mzML	2	1	8	sample
10 | batch04_S01_rep03_249.mzML	3	1	4	sample


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_multi.txt:
--------------------------------------------------------------------------------
1 | filename	class	multilist
2 | batch04_QC17_rep01_262.RAW	blank	1
3 | batch04_QC17_rep02_263.RAW	sample	1
4 | batch04_QC17_rep03_264.RAW	sample	2
5 | 


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_multi_error.txt:
--------------------------------------------------------------------------------
1 | filename	class	multilist
2 | batch04_QC17_rep01_262.RAW	blank	1
3 | batch04_QC17_rep02_263.RAW	sample	'UNWANTED STRING'
4 | batch04_QC17_rep03_264.RAW	sample	2
5 | 


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_mzml_single.txt:
--------------------------------------------------------------------------------
1 | filename	replicate	classLabel
2 | batch04_QC17_rep01_262.mzML	1	sample
3 | 


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_mzml_triplicates.txt:
--------------------------------------------------------------------------------
 1 | filename	replicate	batch	injectionOrder	classLabel
 2 | batch04_B02_rep01_301.mzML	1	1	1	blank
 3 | batch04_B02_rep02_302.mzML	2	1	2	blank
 4 | batch04_B02_rep03_303.mzML	3	1	3	blank
 5 | batch04_QC17_rep01_262.mzML	1	1	4	QC
 6 | batch04_QC17_rep02_263.mzML	2	1	5	QC
 7 | batch04_QC17_rep03_264.mzML	3	1	6	QC
 8 | batch04_S01_rep01_247.mzML	1	1	7	sample
 9 | batch04_S01_rep02_248.mzML	2	1	8	sample
10 | batch04_S01_rep03_249.mzML	3	1	9	sample
11 | 


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_raw_triplicates.txt:
--------------------------------------------------------------------------------
1 | filename	replicate	batch	injectionOrder	classLabel
2 | batch04_QC17_rep01_262.RAW	1	1	1	QC
3 | batch04_QC17_rep02_263.RAW	2	1	2	QC
4 | batch04_QC17_rep03_264.RAW	3	1	3	QC
5 | 


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_replicate_error_1.txt:
--------------------------------------------------------------------------------
 1 | filename	replicate	batch	injectionOrder	classLabel
 2 | batch04_B02_rep01_301.mzML	1	1	1	blank
 3 | batch04_B02_rep02_302.mzML	2	1	2	blank
 4 | batch04_B02_rep03_303.mzML	0	1	3	blank
 5 | batch04_QC17_rep01_262.mzML	1	1	4	QC
 6 | batch04_QC17_rep02_263.mzML	2	1	5	QC
 7 | batch04_QC17_rep03_264.mzML	0	1	6	QC
 8 | batch04_S01_rep01_247.mzML	1	1	7	sample
 9 | batch04_S01_rep02_248.mzML	2	1	8	sample
10 | batch04_S01_rep03_249.mzML	0	1	9	sample


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/filelist_replicate_error_2.txt:
--------------------------------------------------------------------------------
 1 | filename	replicate	batch	injectionOrder	classLabel
 2 | batch04_B02_rep01_301.mzML	1	1	1	blank
 3 | batch04_B02_rep02_302.mzML	2	1	2	blank
 4 | batch04_B02_rep03_303.mzML	3	1	3	blank
 5 | batch04_QC17_rep01_262.mzML	1	1	4	QC
 6 | batch04_QC17_rep02_263.mzML	2	1	5	QC
 7 | batch04_QC17_rep03_264.mzML	10	1	6	QC
 8 | batch04_S01_rep01_247.mzML	1	1	7	sample
 9 | batch04_S01_rep02_248.mzML	5	1	8	sample
10 | batch04_S01_rep03_249.mzML	3	1	9	sample


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/raw/batch04_QC17_rep01_262.RAW:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/raw/batch04_QC17_rep01_262.RAW


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/raw/batch04_QC17_rep02_263.RAW:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/raw/batch04_QC17_rep02_263.RAW


--------------------------------------------------------------------------------
/tests/data/MTBLS79_subset/raw/batch04_QC17_rep03_264.RAW:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/MTBLS79_subset/raw/batch04_QC17_rep03_264.RAW


--------------------------------------------------------------------------------
/tests/data/mzml_DIMSn.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/computational-metabolomics/dimspy/4a0b8982382b8198f2e28864a7f0263ce8a49c49/tests/data/mzml_DIMSn.zip


--------------------------------------------------------------------------------
/tests/test_hdf5_portal.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import os
 24 | import unittest
 25 | 
 26 | import numpy as np
 27 | from dimspy.models.peaklist import PeakList
 28 | from dimspy.models.peaklist_tags import Tag
 29 | from dimspy.portals.hdf5_portal import save_peak_matrix_as_hdf5, load_peak_matrix_from_hdf5
 30 | from dimspy.portals.hdf5_portal import save_peaklists_as_hdf5, load_peaklists_from_hdf5
 31 | from dimspy.process.peak_alignment import align_peaks
 32 | 
 33 | 
 34 | class HDF5PortalsTestCase(unittest.TestCase):
 35 |     @staticmethod
 36 |     def _createPeaklists():
 37 |         _mzs = lambda: sorted(np.random.uniform(100, 1200, size = 100))
 38 |         _ints = lambda: np.abs(np.random.normal(100, 10, size = 100))
 39 | 
 40 |         pkls = [
 41 |             PeakList('sample_1_1', _mzs(), _ints(), mz_range = (100, 1200)),
 42 |             PeakList('sample_1_2', _mzs(), _ints(), mz_range = (100, 1200)),
 43 |             PeakList('QC_1', _mzs(), _ints(), mz_range = (100, 1200)),
 44 |             PeakList('sample_2_1', _mzs(), _ints(), mz_range = (100, 1200)),
 45 |             PeakList('sample_2_2', _mzs(), _ints(), mz_range = (100, 1200)),
 46 |             PeakList('QC_2', _mzs(), _ints(), mz_range = (100, 1200)),
 47 |         ]
 48 | 
 49 |         for t in ('sample', Tag('compound_1', 'treatment'), Tag('1hr', 'time_point'), Tag(1, 'plate')): pkls[0].tags.add_tag(t)
 50 |         for t in ('sample', Tag('compound_1', 'treatment'), Tag('6hr', 'time_point'), Tag(1, 'plate')): pkls[1].tags.add_tag(t)
 51 |         for t in ('qc', Tag(1, 'plate')): pkls[2].tags.add_tag(t)
 52 |         for t in ('sample', Tag('compound_2', 'treatment'), Tag('1hr', 'time_point'), Tag(2, 'plate')): pkls[3].tags.add_tag(t)
 53 |         for t in ('sample', Tag('compound_2', 'treatment'), Tag('6hr', 'time_point'), Tag(2, 'plate')): pkls[4].tags.add_tag(t)
 54 |         for t in ('qc', Tag(2, 'plate')): pkls[5].tags.add_tag(t)
 55 | 
 56 |         for p in pkls: p.add_attribute('snr', np.random.uniform(300, 400, size = 100))
 57 |         for p in pkls: p.add_attribute('quad_flag', [0, 1, 1, 1] * 25, is_flag = True)
 58 |         for p in pkls: p.add_attribute('lab', [chr(i%26+97) for i in range(100)], flagged_only = False)
 59 |         return pkls
 60 | 
 61 |     def test_peaklist_portal(self):
 62 |         pkls = self._createPeaklists()
 63 | 
 64 |         save_peaklists_as_hdf5(pkls, '.test_peaklist.hdf5')
 65 |         npkls = load_peaklists_from_hdf5('.test_peaklist.hdf5')
 66 | 
 67 |         self.assertListEqual([x.size for x in npkls], [75] * 6)
 68 |         self.assertListEqual([x.full_size for x in npkls], [100] * 6)
 69 |         self.assertTrue(all([np.allclose(x[0].mz_all, x[1].mz_all) for x in zip(pkls, npkls)]))
 70 |         self.assertTrue(all([np.allclose(x[0].intensity, x[1].intensity) for x in zip(pkls, npkls)]))
 71 |         self.assertTrue(all([np.allclose(x[0].snr, x[1].snr, atol = 1e-30) for x in zip(pkls, npkls)]))
 72 |         self.assertTrue(all([np.all(x[0].quad_flag == x[1].quad_flag) for x in zip(pkls, npkls)]))
 73 |         self.assertTrue(all([np.all(x[0].lab == x[1].lab) for x in zip(pkls, npkls)]))
 74 |         self.assertTrue(all([list(x[0].metadata.keys()) == list(x[1].metadata.keys()) for x in zip(pkls, npkls)]))
 75 |         self.assertTrue(all([x[0].tags.tag_types == x[1].tags.tag_types for x in zip(pkls, npkls)]))
 76 |         self.assertTrue(all([x[0].tags.tag_values == x[1].tags.tag_values for x in zip(pkls, npkls)]))
 77 | 
 78 |     def test_peak_matrix_portal(self):
 79 |         pkls = self._createPeaklists()
 80 |         pm = align_peaks(pkls, ppm = 2.0, block_size = 10, ncpus = 2)
 81 | 
 82 |         pm.mask_tags('qc')
 83 | 
 84 |         pnum = pm.full_shape[1]
 85 |         pm.add_flag('odd_flag', ([0, 1] * int(pnum/2.+1))[:pnum])
 86 |         pm.add_flag('qua_flag', ([0, 0, 0, 1] * int(pnum/4.+1))[:pnum], flagged_only = False)
 87 | 
 88 |         save_peak_matrix_as_hdf5(pm, '.test_peak_matrix.hdf5')
 89 |         npm = load_peak_matrix_from_hdf5('.test_peak_matrix.hdf5')
 90 | 
 91 |         self.assertEqual(pm.shape, npm.shape)
 92 |         self.assertEqual(pm.full_shape, npm.full_shape)
 93 |         self.assertTupleEqual(pm.attributes, npm.attributes)
 94 |         self.assertTrue(np.allclose(pm.mz_matrix, npm.mz_matrix))
 95 |         self.assertTrue(np.allclose(pm.intensity_matrix, npm.intensity_matrix))
 96 |         self.assertTrue(np.allclose(pm.attr_matrix('snr'), npm.attr_matrix('snr')))
 97 |         self.assertTrue(np.all(pm.attr_matrix('lab') == npm.attr_matrix('lab')))
 98 |         self.assertTrue(np.all( pm.property('present_matrix', flagged_only = False) ==
 99 |                                npm.property('present_matrix', flagged_only = False)))
100 |         self.assertEqual(pm.peaklist_tag_types, npm.peaklist_tag_types)
101 |         self.assertEqual(pm.peaklist_tag_values, npm.peaklist_tag_values)
102 |         self.assertTrue(np.all(pm.mask == npm.mask))
103 |         self.assertTrue(np.all(pm.flag_values('odd_flag') == npm.flag_values('odd_flag')))
104 |         self.assertTrue(np.all(pm.flag_values('qua_flag') == npm.flag_values('qua_flag')))
105 |         self.assertTrue(np.all(pm.flags == npm.flags))
106 | 
107 |     def tearDown(self):
108 |         if os.path.isfile('.test_peaklist.hdf5'): os.remove('.test_peaklist.hdf5')
109 |         if os.path.isfile('.test_peak_matrix.hdf5'): os.remove('.test_peak_matrix.hdf5')
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     unittest.main()
114 | 


--------------------------------------------------------------------------------
/tests/test_metadata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | import unittest
24 | import os
25 | 
26 | from dimspy.metadata import validate_metadata
27 | 
28 | 
29 | def to_test_data(*args):
30 |     return os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", "MTBLS79_subset", *args)
31 | 
32 | 
33 | class ValidateMetadataTestCase(unittest.TestCase):
34 | 
35 |     def test_filelist_standard(self):
36 |         # filename	replicate	batch	injectionOrder	classLabel
37 |         fm_dict = validate_metadata(to_test_data("filelist_csl_MTBLS79_mzml_triplicates.txt"))
38 |         self.assertEqual(fm_dict['filename'], ['batch04_B02_rep01_301.mzML', 'batch04_B02_rep02_302.mzML',
39 |                                                'batch04_B02_rep03_303.mzML', 'batch04_QC17_rep01_262.mzML',
40 |                                                'batch04_QC17_rep02_263.mzML', 'batch04_QC17_rep03_264.mzML',
41 |                                                'batch04_S01_rep01_247.mzML', 'batch04_S01_rep02_248.mzML',
42 |                                                'batch04_S01_rep03_249.mzML'])
43 |         self.assertEqual(fm_dict['replicate'], [1, 2, 3, 1, 2, 3, 1, 2, 3])
44 |         self.assertEqual(fm_dict['batch'], [1] * 9)
45 |         self.assertEqual(fm_dict['injectionOrder'], [1, 2, 3, 4, 5, 6, 7, 8, 9])
46 |         self.assertEqual(fm_dict['classLabel'], ['blank', 'blank', 'blank',
47 |                                                  'QC', 'QC', 'QC', 'sample', 'sample', 'sample'])
48 | 
49 |     def test_filelist_multi(self):
50 |         fm_dict = validate_metadata(to_test_data("filelist_multi.txt"))
51 |         self.assertEqual(fm_dict['multilist'], [1, 1, 2])
52 | 
53 |     def test_filename_error(self):
54 |         with self.assertRaises(Exception) as context:
55 |             validate_metadata(to_test_data("filelist_filename_error.txt"))
56 |         self.assertTrue("Duplicate filename in list" in str(context.exception))
57 | 
58 |     def test_filelist_multilist_error(self):
59 |         with self.assertRaises(Exception) as context:
60 |             validate_metadata(to_test_data("filelist_multi_error.txt"))
61 |         self.assertTrue("Column 'multilist' values should be integers" in str(context.exception))
62 | 
63 |     def test_filelist_injection_order_error(self):
64 |         with self.assertRaises(Exception) as context:
65 |             validate_metadata(to_test_data("filelist_injection_order_error.txt"))
66 |         self.assertTrue("samples not in order" in str(context.exception))
67 | 
68 |     def test_filelist_class_label_error(self):
69 |         with self.assertRaises(Exception) as context:
70 |             validate_metadata(to_test_data("filelist_class_label_error.txt"))
71 |         self.assertTrue("class names do not match with number of replicates" in str(context.exception))
72 | 
73 |     def test_filelist_replicate_error_zero_value(self):
74 |         with self.assertRaises(Exception) as context:
75 |             validate_metadata(to_test_data("filelist_replicate_error_1.txt"))
76 |         self.assertTrue("Incorrect replicate number in list" in str(context.exception))
77 | 
78 |     def test_filelist_replicate_error_zero_value(self):
79 |         with self.assertRaises(Exception) as context:
80 |             validate_metadata(to_test_data("filelist_replicate_error_2.txt"))
81 |         self.assertTrue("Incorrect numbering for replicates" in str(context.exception))
82 | 
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     unittest.main()
87 | 


--------------------------------------------------------------------------------
/tests/test_mzml_portal.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import io
 24 | import os
 25 | import unittest
 26 | import zipfile
 27 | 
 28 | from dimspy.portals.mzml_portal import Mzml
 29 | 
 30 | 
 31 | def to_test_data(*args):
 32 |     return os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", *args)
 33 | 
 34 | 
 35 | def to_test_results(*args):
 36 |     return os.path.join(os.path.dirname(os.path.realpath(__file__)), "results", *args)
 37 | 
 38 | 
 39 | class MzmlPortalsTestCase(unittest.TestCase):
 40 | 
 41 |     @classmethod
 42 |     def setUpClass(cls):
 43 | 
 44 |         zip_ref = zipfile.ZipFile(to_test_data("mzml_DIMSn.zip"), 'r')
 45 |         zip_ref.extractall(to_test_results("zip_data", "mzml"))
 46 |         zip_ref.close()
 47 | 
 48 |     def test_mzml_portal(self):
 49 |         run = Mzml(to_test_data("MTBLS79_subset", "mzml", "batch04_QC17_rep01_262.mzML"))
 50 |         self.assertEqual(run.timestamp, "2011-04-02T03:28:02Z")
 51 |         self.assertEqual((run.run.get_spectrum_count(), run.run.get_spectrum_count()), (88, 88))
 52 |         self.assertListEqual(list(run.headers().keys()), ['FTMS + p ESI w SIM ms [70.00-170.00]',
 53 |                                                           'FTMS + p ESI w SIM ms [140.00-240.00]',
 54 |                                                           'FTMS + p ESI w SIM ms [210.00-310.00]',
 55 |                                                           'FTMS + p ESI w SIM ms [280.00-380.00]',
 56 |                                                           'FTMS + p ESI w SIM ms [350.00-450.00]',
 57 |                                                           'FTMS + p ESI w SIM ms [420.00-520.00]',
 58 |                                                           'FTMS + p ESI w SIM ms [490.00-590.00]'])
 59 |         self.assertListEqual(list(run.scan_ids().keys()), list(range(1,89)))
 60 |         self.assertListEqual(list(run.tics().values())[0:2], [39800032.0, 38217892.0])
 61 |         self.assertEqual(len(run.tics()), 88)
 62 |         self.assertListEqual(list(run.ion_injection_times().values())[0:2], [40.433891296387, 40.094646453857])
 63 |         self.assertEqual(len(run.ion_injection_times()), 88)
 64 |         self.assertListEqual(run.scan_dependents(), [])
 65 | 
 66 |         pl = run.peaklist(1)
 67 |         self.assertEqual(pl.ID, 1)
 68 |         self.assertEqual(pl.metadata["header"], "FTMS + p ESI w SIM ms [70.00-170.00]")
 69 |         self.assertEqual(pl.metadata["ms_level"], 1.0)
 70 |         self.assertEqual(pl.metadata["ion_injection_time"], 40.433891296387)
 71 |         self.assertEqual(pl.metadata["scan_time"], 0.50109)
 72 |         self.assertEqual(pl.metadata["tic"], 39800032.0)
 73 |         self.assertEqual(pl.metadata["function_noise"], "median")
 74 |         self.assertEqual(pl.metadata["mz_range"], [70.0, 170.0])
 75 |         run.close()
 76 | 
 77 |         run = Mzml(to_test_results("zip_data", "mzml", "A08_Apolar_Daph_AMP1_C30_LCMS_Pos_DIMSn_subset.mzML"))
 78 |         sd = run.scan_dependents()
 79 |         self.assertListEqual(list(run.tics().values())[0:2], [120293696.0, 13602.5234375])
 80 |         self.assertEqual(len(run.tics()), 36)
 81 |         self.assertListEqual(sd[0], [1, 3])
 82 |         self.assertListEqual(sd[-1], [511, 512])
 83 |         self.assertEqual(len(sd), 30)
 84 |         run.close()
 85 | 
 86 |         # with open(to_test_results("zip_data", "mzml", "A08_Apolar_Daph_AMP1_C30_LCMS_Pos_DIMSn_subset.mzML"), "rb") as inp:
 87 |         #     b = io.BytesIO(inp.read())
 88 |         #     run = Mzml(b)
 89 |         #     sd = run.scan_dependents()
 90 |         #     self.assertListEqual(list(run.tics().values())[0:2], [120293696.0, 13602.5234375])
 91 |         #     self.assertEqual(len(run.tics()), 36)
 92 |         #     self.assertListEqual(sd[0], [1, 3])
 93 |         #     self.assertListEqual(sd[-1], [511, 512])
 94 |         #     self.assertEqual(len(sd), 30)
 95 |         #     run.close()
 96 | 
 97 |     @classmethod
 98 |     def tearDownClass(cls):
 99 | 
100 |         import shutil
101 |         shutil.rmtree(to_test_results(""))
102 |         os.makedirs(to_test_results(""))
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     unittest.main()
107 | 


--------------------------------------------------------------------------------
/tests/test_paths_portal.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import os
 24 | import unittest
 25 | import platform
 26 | 
 27 | from dimspy.portals import paths
 28 | 
 29 | 
 30 | def to_test_data(*args):
 31 |     return os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", *args)
 32 | 
 33 | 
 34 | def to_test_results(*args):
 35 |     return os.path.join(os.path.dirname(os.path.realpath(__file__)), "results", *args)
 36 | 
 37 | 
 38 | class PathsPortalsTestCase(unittest.TestCase):
 39 |     def test_paths_portal(self):
 40 | 
 41 |         files_correct = [to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep01_262.RAW"),
 42 |                           to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep02_263.RAW"),
 43 |                           to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep03_264.RAW")]
 44 |         tsv = to_test_data("MTBLS79_subset", "filelist_raw_triplicates.txt")
 45 | 
 46 |         source = to_test_data("MTBLS79_subset", "raw")
 47 |         files = paths.validate_and_sort_paths(source, tsv)
 48 |         self.assertListEqual(files, files_correct)
 49 | 
 50 |         source = to_test_data("MTBLS79_subset", "raw")
 51 |         files = paths.validate_and_sort_paths(source, tsv)
 52 |         self.assertListEqual(files, files_correct)
 53 | 
 54 |         source = [to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep03_264.RAW"),
 55 |                   to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep02_263.RAW"),
 56 |                   to_test_data("MTBLS79_subset", "raw", "batch04_QC17_rep01_262.RAW")]
 57 |         files = paths.validate_and_sort_paths(source, tsv)
 58 |         self.assertListEqual(files, files_correct)
 59 | 
 60 |         files = paths.validate_and_sort_paths(tsv=None, source=source)
 61 |         self.assertListEqual(files, source)
 62 | 
 63 |         path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", "MTBLS79_subset")
 64 | 
 65 |         source_raw = os.path.join(path, "raw")
 66 |         fn_filelist_raw = os.path.join(path, "filelist_raw_triplicates.txt")
 67 |         fns = paths.validate_and_sort_paths(source_raw, fn_filelist_raw)
 68 |         fns_c = [os.path.join(source_raw, 'batch04_QC17_rep01_262.RAW'),
 69 |                  os.path.join(source_raw, 'batch04_QC17_rep02_263.RAW'),
 70 |                  os.path.join(source_raw, 'batch04_QC17_rep03_264.RAW')]
 71 |         self.assertListEqual(fns, fns_c)
 72 | 
 73 |         fns = [os.path.join(source_raw, "batch04_QC17_rep01_262.RAW")]
 74 |         fns_out = paths.validate_and_sort_paths(fns, None)
 75 |         self.assertListEqual(fns, fns_out)
 76 | 
 77 |         fns = [os.path.join(source_raw, "batch04_QC17_rep01_262.RAW"),
 78 |                os.path.join(source_raw, "batch04_QC17_rep02_263.RAW"),
 79 |                os.path.join(source_raw, "batch04_QC17_rep03_264.RAW")]
 80 |         fns_out = paths.validate_and_sort_paths(fns, fn_filelist_raw)
 81 |         self.assertListEqual(fns, fns_out)
 82 | 
 83 |         source_mzml = os.path.join(path, "mzml")
 84 |         fns = [os.path.join(source_mzml, 'batch04_QC17_rep01_262.mzML')]
 85 |         fns_out = paths.validate_and_sort_paths(fns, None)
 86 |         self.assertListEqual(fns, fns_out)
 87 | 
 88 |         fn_filelist_mzml = os.path.join(path, "filelist_mzml_triplicates.txt")
 89 |         source_mzml_fns = [os.path.join(source_mzml, "batch04_QC17_rep01_262.mzML"),
 90 |                            os.path.join(source_mzml, "batch04_QC17_rep02_263.mzML"),
 91 |                            os.path.join(source_mzml, "batch04_QC17_rep03_264.mzML")]
 92 | 
 93 |         with self.assertRaises(IOError):
 94 |             paths.validate_and_sort_paths(source_mzml_fns, fn_filelist_mzml)
 95 | 
 96 |         with self.assertRaises(IOError):
 97 |             paths.validate_and_sort_paths(source_mzml, fn_filelist_mzml)
 98 | 
 99 |     def test_sort_ms_files_by_timestamp(self):
100 |         p = to_test_data("MTBLS79_subset", "mzml")
101 |         ps = [os.path.join(p, fn) for fn in os.listdir(p)]
102 |         files_sorted = paths.sort_ms_files_by_timestamp(ps)
103 |         self.assertEqual(files_sorted[0], (os.path.join(p, "batch04_QC17_rep01_262.mzML"), '2011-04-02T03:28:02Z'))
104 |         self.assertEqual(files_sorted[1], (os.path.join(p, "batch04_QC17_rep02_263.mzML"), '2011-04-02T03:31:04Z'))
105 |         self.assertEqual(files_sorted[2], (os.path.join(p, "batch04_QC17_rep03_264.mzML"), '2011-04-02T03:34:08Z'))
106 | 
107 |         ps.reverse()
108 |         files_sorted = paths.sort_ms_files_by_timestamp(ps)
109 |         self.assertEqual(files_sorted[0], (os.path.join(p, "batch04_QC17_rep01_262.mzML"), '2011-04-02T03:28:02Z'))
110 |         self.assertEqual(files_sorted[1], (os.path.join(p, "batch04_QC17_rep02_263.mzML"), '2011-04-02T03:31:04Z'))
111 |         self.assertEqual(files_sorted[2], (os.path.join(p, "batch04_QC17_rep03_264.mzML"), '2011-04-02T03:34:08Z'))
112 | 
113 |         p = to_test_data("MTBLS79_subset", "raw")
114 |         ps = [os.path.join(p, fn) for fn in os.listdir(p)]
115 |         files_sorted = paths.sort_ms_files_by_timestamp(ps)
116 | 
117 |         self.assertTrue(files_sorted[0] == (os.path.join(p, "batch04_QC17_rep01_262.RAW"), '02/04/2011 03:28:02')
118 |                         or files_sorted[0] == (os.path.join(p, "batch04_QC17_rep01_262.RAW"), '4/2/2011 3:28:02 AM'))
119 | 
120 |         self.assertTrue(files_sorted[1] == (os.path.join(p, "batch04_QC17_rep02_263.RAW"), '02/04/2011 03:31:05')
121 |                         or files_sorted[1] == (os.path.join(p, "batch04_QC17_rep02_263.RAW"), '4/2/2011 3:31:05 AM'))
122 | 
123 |         self.assertTrue(files_sorted[2] == (os.path.join(p, "batch04_QC17_rep03_264.RAW"), '02/04/2011 03:34:09')
124 |                         or files_sorted[2] == (os.path.join(p, "batch04_QC17_rep03_264.RAW"), '4/2/2011 3:34:09 AM'))
125 | 
126 |         ps.reverse()
127 |         files_sorted = paths.sort_ms_files_by_timestamp(ps)
128 | 
129 |         self.assertTrue(files_sorted[0] == (os.path.join(p, "batch04_QC17_rep01_262.RAW"), '02/04/2011 03:28:02')
130 |                         or files_sorted[0] == (os.path.join(p, "batch04_QC17_rep01_262.RAW"), '4/2/2011 3:28:02 AM'))
131 | 
132 |         self.assertTrue(files_sorted[1] == (os.path.join(p, "batch04_QC17_rep02_263.RAW"), '02/04/2011 03:31:05')
133 |                         or files_sorted[1] == (os.path.join(p, "batch04_QC17_rep02_263.RAW"), '4/2/2011 3:31:05 AM'))
134 | 
135 |         self.assertTrue(files_sorted[2] == (os.path.join(p, "batch04_QC17_rep03_264.RAW"), '02/04/2011 03:34:09')
136 |                         or files_sorted[2] == (os.path.join(p, "batch04_QC17_rep03_264.RAW"), '4/2/2011 3:34:09 AM'))
137 | 
138 | if __name__ == '__main__':
139 |     unittest.main()
140 | 


--------------------------------------------------------------------------------
/tests/test_peak_alignment.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import unittest
 24 | from functools import reduce
 25 | 
 26 | import numpy as np
 27 | from dimspy.models.peaklist import PeakList
 28 | from dimspy.process.peak_alignment import align_peaks
 29 | 
 30 | 
 31 | class PeakAlignmentTestCase(unittest.TestCase):
 32 |     mz = [
 33 |         [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 34 |         [10,     30,     50, 60, 70, 80, 90, 100],
 35 |         [20,     30, 40,         70, 80, 90, 100],
 36 |         [10, 20, 30,                 80, 90, 100],
 37 |         [10, 20,         50, 60, 70, 80,        ],
 38 |         [                50,                    ],
 39 |     ]
 40 | 
 41 |     ints = [
 42 |         [11, 12, 13, 14, 15, 16, 17, 18, 19, 110],
 43 |         [21,     23,     25, 26, 27, 28, 29, 210],
 44 |         [    32, 33, 34,         37, 38, 39, 310],
 45 |         [41, 42, 43,                 48, 49, 410],
 46 |         [51, 52,         55, 56, 57, 58,        ],
 47 |         [                65,                    ],
 48 |     ]
 49 | 
 50 |     strs = [
 51 |         ['a','b','c','d','e','f','g','h','i','j'],
 52 |         ['k',    'l',    'm','n','o','p','q','r'],
 53 |         [    's','t','u',        'v','w','x','y'],
 54 |         ['z','a','b',                'c','d','e'],
 55 |         ['f','g',        'h','i','j','k',       ],
 56 |         [                'l',                   ],
 57 |     ]
 58 | 
 59 |     def _createPeakLists(self):
 60 |         mz = [np.array(m) + np.random.normal(0, 1e-5, len(m)) for m in self.mz]
 61 |         pkls = []
 62 |         for i in range(len(mz)):
 63 |             pl = PeakList('peaklist_' + str(i), mz[i], self.ints[i])
 64 |             pl.add_attribute('str_attr', self.strs[i])
 65 |             pkls += [pl]
 66 |         return pkls
 67 | 
 68 |     def _checkAlignmentResults(self, pm):
 69 |         self.assertTrue(np.allclose(np.unique(np.round(pm.to_peaklist('merged').mz)), np.arange(10, 110, step = 10)))
 70 |         self.assertTrue(all(np.allclose(mi[mm != 0], ri) for mi, mm, ri in zip(pm.intensity_matrix, pm.mz_matrix, self.ints)))
 71 | 
 72 |     def test_normal_alignment(self):
 73 |         pkls = self._createPeakLists()
 74 | 
 75 |         try:
 76 |             pm = align_peaks(pkls, ppm = 2.0, block_size = 5, fixed_block = True, edge_extend = 10, ncpus = 2)
 77 |             # print pm.attr_matrix('str_attr')
 78 |             # print pm.attr_mean_vector('str_attr')
 79 |         except Exception as e:
 80 |             self.fail('alignment failed: ' + str(e))
 81 | 
 82 |         self._checkAlignmentResults(pm)
 83 | 
 84 |     def test_block_size(self):
 85 |         pkls = self._createPeakLists()
 86 |         try:
 87 |             pm = align_peaks(pkls, ppm = 2.0, block_size = 1, fixed_block = True, edge_extend = 10, ncpus = 2)
 88 |         except Exception as e:
 89 |             self.fail('alignment failed: ' + str(e))
 90 |         self._checkAlignmentResults(pm)
 91 | 
 92 |         pkls = self._createPeakLists()
 93 |         try:
 94 |             pm = align_peaks(pkls, ppm = 2.0, block_size = 20, fixed_block = True, edge_extend = 10, ncpus = 2)
 95 |         except Exception as e:
 96 |             self.fail('alignment failed: ' + str(e))
 97 |         self._checkAlignmentResults(pm)
 98 | 
 99 |     def test_ppm(self):
100 |         pkls = self._createPeakLists()
101 | 
102 |         try:
103 |             pm = align_peaks(pkls, ppm = 1e+10, block_size = 5, fixed_block = True, edge_extend = 10, ncpus = 2)
104 |         except Exception as e:
105 |             self.fail('alignment failed: ' + str(e))
106 | 
107 |         self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, [np.mean(list(map(np.mean, self.mz)))]))
108 |         self.assertTrue(np.allclose(pm.intensity_matrix.flatten(), list(map(np.mean, self.ints))))
109 |         self.assertTrue(np.allclose(pm.attr_matrix('intra_count').flatten(), list(map(len, self.mz))))
110 | 
111 |         try:
112 |             pm = align_peaks(pkls, ppm = 1e-10, block_size = 5, fixed_block = True, edge_extend = 10, ncpus = 2)
113 |         except Exception as e:
114 |             self.fail('alignment failed: ' + str(e))
115 | 
116 |         self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, np.sort(reduce(lambda x,y: x+y, list(map(list, self.mz))))))
117 |         self.assertTrue(np.allclose(np.sort(np.sum(pm.intensity_matrix, axis = 0)), np.sort(reduce(lambda x,y: x+y, self.ints))))
118 |         self.assertTrue(np.allclose(np.sum(pm.attr_matrix('intra_count'), axis = 0), np.ones(pm.shape[1])))
119 | 
120 |     def test_single_peaklist(self):
121 |         pkls = [PeakList('peaklist_0', np.arange(10, 110, step = 10), np.arange(10) + 11)]
122 | 
123 |         try:
124 |             pm = align_peaks(pkls, ppm = 2.0, block_size = 5, fixed_block = True, edge_extend = 10, ncpus = 2)
125 |         except Exception as e:
126 |             self.fail('alignment failed: ' + str(e))
127 | 
128 |         self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, np.arange(10, 110, step = 10)))
129 |         self.assertTrue(np.allclose(pm.intensity_matrix, [np.arange(10) + 11]))
130 | 
131 |     def test_special_peaklists(self):
132 |         pkls = [PeakList('peaklist_' + str(i), np.ones(10) * 10, np.ones(10)) for i in range(6)]
133 | 
134 |         try:
135 |             pm = align_peaks(pkls, ppm = 2.0, block_size = 5, fixed_block = False, edge_extend = 10, ncpus = 2)
136 |         except Exception as e:
137 |             self.fail('alignment failed: ' + str(e))
138 | 
139 |         self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, [10.]))
140 |         self.assertTrue(np.allclose(np.sum(pm.intensity_matrix, axis = 0), [6]))
141 |         self.assertTrue(np.allclose(np.sum(pm.attr_matrix('intra_count'), axis = 0), [60]))
142 | 
143 |         try:
144 |             pm = align_peaks(pkls, ppm = 1e-10, block_size = 1, fixed_block = True, edge_extend = 1, ncpus = 2)
145 |         except Exception as e:
146 |             self.fail('alignment failed: ' + str(e))
147 | 
148 |         self.assertTrue(np.allclose(pm.to_peaklist('merged').mz, [10.]))
149 |         self.assertTrue(np.allclose(np.sum(pm.intensity_matrix, axis = 0), [6]))
150 |         self.assertTrue(np.allclose(np.sum(pm.attr_matrix('intra_count'), axis = 0), [60]))
151 | 
152 |     # may take a while to run
153 |     # def test_large_peaklists(self):
154 |     #     pkls = [PeakList('peaklist_' + str(i),
155 |     #                      np.sort(np.random.uniform(100, 1200, size = 10000)),
156 |     #                      np.random.normal(100, 10, size = 10000))
157 |     #             for i in range(100)]
158 |     #
159 |     #     try:
160 |     #         pm = align_peaks(pkls, ppm = 2.0, block_size = 5000, fixed_block = False, edge_extend = 10, ncpus = 2)
161 |     #     except Exception, e:
162 |     #         self.fail('alignment failed: ' + str(e))
163 | 
164 | 
165 | if __name__ == '__main__':
166 |     unittest.main()
167 | 


--------------------------------------------------------------------------------
/tests/test_peak_filters.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import unittest
 24 | 
 25 | from dimspy.models.peaklist_tags import PeakList_Tags
 26 | from dimspy.process.peak_filters import *
 27 | 
 28 | 
 29 | class PeakFiltersTestCase(unittest.TestCase):
 30 |     @staticmethod
 31 |     def _createPeakList():
 32 |         pkl = PeakList('peaklist', np.arange(10, dtype = float), np.arange(10, dtype = float) + 1)
 33 |         pkl.add_attribute('snr', (np.arange(10, dtype = float) + 1) / 10)
 34 |         return pkl
 35 | 
 36 |     @staticmethod
 37 |     def _createPeakMatrix():
 38 |         pids, tags = list(zip(*[
 39 |             ('sample_1_1', PeakList_Tags('sample', treatment = 'compound_1', time_point = '1hr', plate = 1, order = 1)),
 40 |             ('sample_1_2', PeakList_Tags('sample', treatment = 'compound_1', time_point = '6hr', plate = 1, order = 2)),
 41 |             ('QC_1',       PeakList_Tags('qc', plate = 1, order = 3)),
 42 |             ('Blank_1',    PeakList_Tags('blank', plate = 1, order = 4)),
 43 |             ('sample_2_1', PeakList_Tags('sample', treatment = 'compound_2', time_point = '1hr', plate = 2, order = 1)),
 44 |             ('sample_2_2', PeakList_Tags('sample', treatment = 'compound_2', time_point = '6hr', plate = 2, order = 2)),
 45 |             ('QC_2',       PeakList_Tags('qc', plate = 2, order = 3)),
 46 |             ('Blank_2',    PeakList_Tags('blank', plate = 2, order = 4)),
 47 |         ]))
 48 | 
 49 |         mzs = np.tile(np.arange(0, 1000, step = 100, dtype = float), (8, 1))
 50 |         ints = np.arange(80, dtype = float).reshape((8, 10)) / 20.
 51 |         ints[3, 1] = ints[7, 1] = ints[7, 3] = 0 # test blank filter
 52 |         ics = np.array([[1, 2] * 5] * 8)
 53 | 
 54 |         return PeakMatrix(pids, tags, (('mz', mzs), ('intensity', ints), ('intra_count', ics)))
 55 | 
 56 |     # peaklist filters
 57 |     def test_peaklist_attr_filter(self):
 58 |         pkl = self._createPeakList()
 59 | 
 60 |         try:
 61 |             filter_attr(pkl, 'snr', 0.5, flag_index = 2)
 62 |         except Exception as e:
 63 |             self.fail('filter peaklist attribute failed: ' + str(e))
 64 |         self.assertListEqual(pkl.snr.tolist(), [0.1, 0.2, 0.3, 0.4, 0.5])
 65 |         self.assertTupleEqual(pkl.attributes, ('mz', 'intensity', 'snr_flag', 'snr'))
 66 | 
 67 |         self.assertRaises(AttributeError, lambda: filter_attr(pkl, 'not_exists', 0.5))
 68 |         self.assertRaises(AttributeError, lambda: filter_attr(pkl, 'snr', 0.6))
 69 |         self.assertRaises(ValueError, lambda: filter_attr(pkl, 'snr'))
 70 | 
 71 |         filter_attr(pkl, 'snr', min_threshold = 0.4, max_threshold = 0.4, flag_name = 'new_snr_flag')
 72 |         self.assertListEqual(pkl.mz.tolist(), [3])
 73 | 
 74 |     def test_peaklist_ringing_filter(self):
 75 |         pkl = self._createPeakList()
 76 | 
 77 |         try:
 78 |             filter_ringing(pkl, threshold = 0.9, bin_size = 3.0)
 79 |         except Exception as e:
 80 |             self.fail('filter peaklist ringing failed: ' + str(e))
 81 |         self.assertListEqual(pkl.mz.tolist(), [2., 5., 8., 9.])
 82 | 
 83 |     def test_peaklist_mz_ranges(self):
 84 |         pkl = self._createPeakList()
 85 | 
 86 |         try:
 87 |             filter_mz_ranges(pkl, [(1.,3.), (5.,8.)])
 88 |         except Exception as e:
 89 |             self.fail('filter peaklist mz ranges failed: ' + str(e))
 90 |         self.assertListEqual(pkl.mz.tolist(), [0., 4., 9.])
 91 | 
 92 |     # peakmatrix filters
 93 |     def test_peak_matrix_rsd_filter(self):
 94 |         pm = self._createPeakMatrix()
 95 | 
 96 |         try:
 97 |             pm = filter_rsd(pm, 62, 'qc')
 98 |         except Exception as e:
 99 |             self.fail('filter peak_matrix rsd failed: ' + str(e))
100 |         self.assertTrue(np.allclose(pm.rsd('qc'),
101 |             [61.48754619, 60.17930052, 58.92556509, 57.72300254]))
102 | 
103 |         self.assertRaises(AttributeError, lambda: filter_rsd(pm, 45, 'not_QC'))
104 | 
105 |     def test_peak_matrix_fraction_filter(self):
106 |         pm = self._createPeakMatrix()
107 |         for attr in ('mz', 'intensity', 'intra_count'): pm._attr_dict[attr][:,1] = 0
108 | 
109 |         try:
110 |             pm = filter_fraction(pm, 1)
111 |         except Exception as e:
112 |             self.fail('filter peak_matrix fraction failed: ' + str(e))
113 |         self.assertEqual(pm.shape[1], 9)
114 | 
115 |         pm = self._createPeakMatrix()
116 |         for attr in ('mz', 'intensity', 'intra_count'):
117 |             pm._attr_dict[attr][:,1] *= [1, 1, 1, 0, 1, 1, 1, 0]
118 |             pm._attr_dict[attr][:,2] *= [1, 1, 1, 1, 1, 1, 0, 0]
119 | 
120 |         pm = filter_fraction(pm, 0.6, within_classes = True, class_tag_type = 'plate')
121 |         self.assertEqual(pm.shape[1], 10)
122 |         self.assertRaises(AttributeError, lambda: filter_fraction(pm, 1, within_classes = True, class_tag_type = 'time_point'))
123 |         self.assertRaises(KeyError, lambda: filter_fraction(pm, 1, within_classes = True))
124 | 
125 |     def test_peak_matrix_blank_filter(self):
126 |         pm = self._createPeakMatrix()
127 |         pm = filter_blank_peaks(pm, 'blank', 0.3)
128 |         self.assertTupleEqual(pm.shape, (6, 10))
129 | 
130 |         pm = self._createPeakMatrix()
131 |         pm = filter_blank_peaks(pm, 'blank', 0.4)
132 |         self.assertTupleEqual(pm.shape, (6, 2))
133 | 
134 |         pm = self._createPeakMatrix()
135 |         pm = filter_blank_peaks(pm, 'blank', 0.3, method = 'max')
136 |         self.assertTupleEqual(pm.shape, (6, 2))
137 | 
138 |         pm = self._createPeakMatrix()
139 |         pm = filter_blank_peaks(pm, 'blank', 0.3, fold_threshold = 2)
140 |         self.assertTupleEqual(pm.shape, (6, 1))
141 | 
142 |         pm = self._createPeakMatrix()
143 |         self.assertRaises(ValueError, lambda: filter_blank_peaks(pm, 'Not_blank', 0.3))
144 | 
145 | 
146 | if __name__ == '__main__':
147 |     unittest.main()
148 | 


--------------------------------------------------------------------------------
/tests/test_peak_matrix.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import pickle as cp
 24 | import unittest
 25 | 
 26 | import numpy as np
 27 | from dimspy.models.peak_matrix import PeakMatrix
 28 | from dimspy.models.peak_matrix import mask_peakmatrix, unmask_peakmatrix, mask_all_peakmatrix, unmask_all_peakmatrix
 29 | from dimspy.models.peaklist_tags import Tag, PeakList_Tags
 30 | 
 31 | 
 32 | class PeakMatrixTestCase(unittest.TestCase):
 33 |     @staticmethod
 34 |     def _createPeakMatrix():
 35 |         pids, tags = list(zip(*[
 36 |             ('sample_1_1', PeakList_Tags('sample', treatment = 'compound_1', time_point = '1hr', plate = 1, order = 1)),
 37 |             ('sample_1_2', PeakList_Tags('sample', treatment = 'compound_1', time_point = '6hr', plate = 1, order = 2)),
 38 |             ('QC_1',       PeakList_Tags('qc', plate = 1, order = 3)),
 39 |             ('sample_2_1', PeakList_Tags('sample', treatment = 'compound_2', time_point = '1hr', plate = 2, order = 1)),
 40 |             ('sample_2_2', PeakList_Tags('sample', treatment = 'compound_2', time_point = '6hr', plate = 2, order = 2)),
 41 |             ('QC_2',       PeakList_Tags('qc', plate = 2, order = 3)),
 42 |         ]))
 43 | 
 44 |         mzs = np.tile(np.arange(0, 1000, step = 100, dtype = float) + 1, (6, 1))
 45 |         ints = np.arange(60, dtype = float).reshape((6, 10)) / 20.
 46 |         ics = np.array([[2] * 10] * 6)
 47 |         # simulate missing values
 48 |         for m in (mzs, ints, ics):
 49 |             np.fill_diagonal(m, 0)
 50 |             m[:,2] = 0
 51 |         return PeakMatrix(pids, tags, [('mz', mzs), ('intensity', ints), ('intra_count', ics)])
 52 | 
 53 |     def test_pm_creation(self):
 54 |         try:
 55 |             self._createPeakMatrix()
 56 |         except Exception as e:
 57 |             self.fail('create PeakMatrix object failed: ' + str(e))
 58 | 
 59 |     def test_pm_properties(self):
 60 |         pm = self._createPeakMatrix()
 61 | 
 62 |         pm.mask = [True, False] * 3
 63 |         self.assertTrue(np.all(pm.mask == [True, False, True, False, True, False]))
 64 |         pm.mask = None
 65 |         self.assertTrue(np.all(pm.mask == [False] * 6))
 66 | 
 67 |         self.assertTupleEqual(pm.flag_names, ())
 68 |         self.assertTrue(np.all(pm.flags == np.ones(10)))
 69 | 
 70 |         self.assertTupleEqual(pm.attributes, ('mz', 'intensity', 'intra_count'))
 71 | 
 72 |         self.assertTupleEqual(pm.peaklist_ids,
 73 |             ('sample_1_1', 'sample_1_2', 'QC_1', 'sample_2_1', 'sample_2_2', 'QC_2'))
 74 | 
 75 |         self.assertEqual(len(pm.peaklist_tags), 6)
 76 |         self.assertEqual(pm.peaklist_tag_types, {None, 'treatment', 'time_point', 'plate', 'order'})
 77 |         self.assertEqual(pm.peaklist_tag_values, {'sample', 'qc', 'compound_1', 'compound_2', '1hr', '6hr', 1, 2, 3})
 78 | 
 79 |         pm.mask = [True, False] * 3
 80 |         self.assertTupleEqual(pm.shape, (3, 10))
 81 |         self.assertTupleEqual(pm.full_shape, (6, 10))
 82 |         pm.mask = None
 83 | 
 84 |         self.assertTrue(np.all(pm.present == [5]*2+[0]+[5]*3+[6]*4))
 85 |         self.assertTrue(np.allclose(pm.fraction, [0.83333333]*2+[0]+[0.83333333]*3+[1]*4))
 86 |         self.assertTrue(np.all(pm.missing_values == [2]*2+[1]+[2]*3))
 87 |         self.assertTrue(np.all(pm.occurrence == [10]*2+[0]+[10]*3+[12]*4))
 88 |         self.assertTrue(np.allclose(pm.purity[~np.isnan(pm.purity)], [0]*9))
 89 | 
 90 |         ics = pm._attr_dict['intra_count']
 91 |         ics[0, 1] = ics[2, 1] = 1
 92 |         self.assertTrue(np.isclose(pm.purity[1], 0.4))
 93 |         ics[0, 1] = ics[2, 1] = 2
 94 | 
 95 |         pm.add_flag('odd_flag', [True, False] * 5)
 96 |         self.assertTrue(np.all(pm.property('present') == [5, 0, 5, 6, 6]))
 97 |         self.assertTrue(np.all(pm.property('present', flagged_only = False) == [5]*2+[0]+[5]*3+[6]*4))
 98 |         pm.drop_flag('odd_flag')
 99 | 
100 |         mmz = np.arange(0, 1000, step = 100, dtype = float) + 1
101 |         mmz[2] = np.nan
102 |         self.assertTrue(np.allclose(*list(map(np.nan_to_num, (pm.mz_mean_vector, mmz)))))
103 |         mit = [30., 29., np.nan, 27., 26., 25., 31., 32., 33., 34.]
104 |         self.assertTrue(np.allclose(*list(map(np.nan_to_num, (pm.intensity_mean_vector*20, mit)))))
105 | 
106 |     def test_pm_mask(self):
107 |         pm = self._createPeakMatrix()
108 | 
109 |         self.assertEqual(set([x.value for x in pm.tags_of('plate')]), {1, 2})
110 |         self.assertEqual(set([x.value for x in pm.tags_of()]), {'sample', 'qc'})
111 |         self.assertRaises(KeyError, lambda: pm.tags_of('treatment'))
112 |         self.assertRaises(KeyError, lambda: pm.tags_of('not_exist'))
113 | 
114 |         pm.mask_tags(1)
115 |         self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'QC_1', 'sample_2_1', 'sample_2_2', 'QC_2'))
116 |         pm.mask_tags('qc', plate = 1) # mask samples with both of the two
117 |         self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'sample_2_1', 'sample_2_2', 'QC_2'))
118 |         pm.mask = None
119 |         pm.mask_tags('qc')
120 |         self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'sample_2_1', 'sample_2_2'))
121 |         pm.mask = None
122 |         pm.mask_tags('qc').mask_tags(plate = 1)
123 |         self.assertTupleEqual(pm.peaklist_ids, ('sample_2_1', 'sample_2_2'))
124 |         pm.mask = None
125 |         pm.mask_tags('not_exist')
126 |         self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'QC_1', 'sample_2_1', 'sample_2_2', 'QC_2'))
127 | 
128 |         pm.mask = [True] * 6
129 |         pm.unmask_tags('qc', plate = 1) # unmask samples with both of the two
130 |         self.assertTupleEqual(pm.peaklist_ids, ('QC_1',))
131 |         pm.mask = [True] * 6
132 |         pm.unmask_tags('qc')
133 |         self.assertTupleEqual(pm.peaklist_ids, ('QC_1', 'QC_2'))
134 |         pm.mask = [True] * 6
135 |         pm.unmask_tags('qc').unmask_tags(plate = 1)
136 |         self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'QC_1', 'QC_2'))
137 |         pm.mask = [True] * 6
138 |         pm.unmask_tags('not_exist')
139 |         self.assertTupleEqual(pm.peaklist_ids, ())
140 | 
141 |         pm.unmask_tags('qc', override = True)
142 |         self.assertTupleEqual(pm.peaklist_ids, ('QC_1', 'QC_2'))
143 |         with mask_all_peakmatrix(pm) as m:
144 |             m.unmask_tags('sample')
145 |             self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'sample_2_1', 'sample_2_2'))
146 | 
147 |         pm.mask_tags('qc', override = True)
148 |         self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'sample_2_1', 'sample_2_2'))
149 |         with unmask_all_peakmatrix(pm) as m:
150 |             m.mask_tags('sample')
151 |             self.assertTupleEqual(m.peaklist_ids, ('QC_1', 'QC_2'))
152 |         self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'sample_1_2', 'sample_2_1', 'sample_2_2'))
153 | 
154 |         pm.mask = None
155 |         with unmask_peakmatrix(pm, plate = 1) as m:
156 |             self.assertTupleEqual(m.peaklist_ids, ('sample_1_1', 'sample_1_2', 'QC_1'))
157 |             self.assertTupleEqual(m.full_shape, (6, 10))
158 |         self.assertEqual(len(pm.peaklist_ids), 6)
159 | 
160 |         with mask_peakmatrix(pm, plate = 2) as m:
161 |             self.assertTupleEqual(m.peaklist_ids, ('sample_1_1', 'sample_1_2', 'QC_1'))
162 |             with unmask_all_peakmatrix(pm) as mm:
163 |                 self.assertTupleEqual(mm.peaklist_ids,
164 |                                       ('sample_1_1', 'sample_1_2', 'QC_1', 'sample_2_1', 'sample_2_2', 'QC_2'))
165 | 
166 |         with mask_peakmatrix(pm, 'qc') as m:
167 |             m.remove_samples((1, 2))
168 |         self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'QC_1', 'sample_2_2', 'QC_2'))
169 | 
170 |     def test_pm_flags(self):
171 |         pm = self._createPeakMatrix()
172 | 
173 |         self.assertTrue(np.sum(pm.flags) == 10)
174 | 
175 |         pm.add_flag('qua_flag', [True, True, False, True] * 2 + [True, True], flagged_only = True)
176 |         pm.add_flag('odd_flag', [True, False] * 5, flagged_only = False)
177 | 
178 |         self.assertTupleEqual(pm.flag_names, ('qua_flag', 'odd_flag'))
179 |         self.assertTrue(np.all(pm.flags == [1, 0, 0, 0, 1, 0, 0, 0, 1, 0]))
180 |         self.assertTrue(np.all(pm.flag_values('odd_flag') == [1, 0] * 5))
181 | 
182 |         with mask_peakmatrix(pm, 'qc') as m:
183 |             self.assertTupleEqual(m.shape, (4, 3))
184 |         self.assertTupleEqual(m.shape, (6, 3))
185 |         self.assertTupleEqual(m.full_shape, (6, 10))
186 | 
187 |         with mask_peakmatrix(pm, plate = 1) as m:
188 |             mzs = np.array([
189 |                 [   1.,    0.,  401.,  601.,  801.],
190 |                 [   1.,    0.,    0.,  601.,  801.],
191 |                 [   1.,    0.,  401.,  601.,  801.],
192 |             ])
193 |             m.drop_flag('qua_flag')
194 |             self.assertTrue(np.allclose(m.mz_matrix, mzs))
195 |         self.assertTupleEqual(pm.shape, (6, 5))
196 | 
197 |     def test_pm_access(self):
198 |         pm = self._createPeakMatrix()
199 | 
200 |         pm.add_flag('even_flag', [False, True] * 5)
201 |         self.assertTrue(np.allclose(pm.attr_mean_vector('mz'),
202 |                                     [101.0, 301.0, 501.0, 701.0, 901.0]))
203 |         self.assertTrue(np.allclose(*list(map(np.nan_to_num, (pm.attr_mean_vector('mz', flagged_only = False),
204 |                                     [1.0, 101.0, np.nan, 301.0, 401.0, 501.0, 601.0, 701.0, 801.0, 901.0])))))
205 |         self.assertTrue(np.allclose((lambda x: x[~np.isnan(x)])(pm.rsd('qc')),
206 |                                     [58.92556509, 55.82421956, 50.50762722, 48.21182598]))
207 |         self.assertTrue(np.allclose((lambda x: x[~np.isnan(x)])(pm.rsd()),
208 |                                     [66.32891055, 76.80163464, 63.24555320, 58.46339666, 55.02437333]))
209 | 
210 |         pm.remove_peaks((0, 1), flagged_only = False)
211 |         self.assertTrue(np.allclose((lambda x: x[~np.isnan(x)])(pm.rsd('qc')),
212 |                                     [55.82421956, 50.50762722, 48.21182598]))
213 |         pm.remove_peaks((0, 1), flagged_only = True)
214 |         self.assertTrue(np.allclose(pm.rsd('qc'),
215 |                                     [50.50762722, 48.21182598]))
216 | 
217 |         self.assertRaises(AttributeError, lambda: pm.rsd('no_such_tag'))
218 | 
219 |         with mask_peakmatrix(pm, 'sample', plate = 1):
220 |             pm.remove_samples((0, 1))
221 |             self.assertTupleEqual(pm.peaklist_ids, ('sample_2_2', 'QC_2'))
222 |             pm.remove_samples((1, 2), masked_only = False)
223 |             self.assertTupleEqual(pm.peaklist_ids, ('QC_2',))
224 |         self.assertTupleEqual(pm.peaklist_ids, ('sample_1_1', 'QC_2'))
225 | 
226 |     def test_pm_exports(self):
227 |         pm = self._createPeakMatrix()
228 | 
229 |         pm.add_flag('even_flag', [False, True] * 5)
230 |         with mask_peakmatrix(pm, plate = 1):
231 |             peaklists = pm.extract_peaklists()
232 |         self.assertListEqual([x.ID for x in peaklists], ['sample_2_1', 'sample_2_2', 'QC_2'])
233 | 
234 |         mzs = [
235 |             [101.0, 501.0, 701.0, 901.0],
236 |             [101.0, 301.0, 501.0, 701.0, 901.0],
237 |             [101.0, 301.0, 701.0, 901.0],
238 |         ]
239 |         self.assertTrue(all([np.allclose(x[0].mz, x[1]) for x in zip(peaklists, mzs)]))
240 | 
241 |         pm.drop_flag('even_flag')
242 |         pkl = pm.to_peaklist('merged_pkl')
243 |         self.assertTrue(np.allclose(pkl.mz, [1.0, 101.0, 301.0, 401.0, 501.0, 601.0, 701.0, 801.0, 901.0]))
244 | 
245 |         pm.to_str(comprehensive = True, rsd_tags = (Tag('compound_1', 'treatment'), Tag('compound_2', 'treatment'), 'qc'))
246 | 
247 |     def test_pm_pickle(self):
248 |         pm = self._createPeakMatrix()
249 |         try:
250 |             pstr = cp.dumps(pm)
251 |             pm = cp.loads(pstr)
252 |         except Exception as e:
253 |             self.fail('PeakMatrix pickle failed: ' + str(e))
254 |         self.assertTupleEqual(pm.attributes, ('mz', 'intensity', 'intra_count'))
255 | 
256 | 
257 | if __name__ == '__main__':
258 |     unittest.main()
259 | 


--------------------------------------------------------------------------------
/tests/test_peaklist.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import pickle as cp
 24 | import unittest
 25 | 
 26 | import numpy as np
 27 | import pandas as pd
 28 | from dimspy.models.peaklist import PeakList
 29 | 
 30 | 
 31 | class PeakListTestCase(unittest.TestCase):
 32 |     @staticmethod
 33 |     def _createPeakList():
 34 |         mzs = np.arange(0, 1000, step = 100)
 35 |         ints = np.abs(np.random.normal(10, 3, size = 10))
 36 |         pl = PeakList('sample_peaklist', mzs, ints, mz_range = (100, 1000), frag_mode = 'slb')
 37 |         return pl
 38 | 
 39 |     def test_pl_creation(self):
 40 |         try:
 41 |             self._createPeakList()
 42 |         except Exception as e:
 43 |             self.fail('create PeakList object failed: ' + str(e))
 44 | 
 45 |     def test_pl_properties(self):
 46 |         pl = self._createPeakList()
 47 |         self.assertEqual(pl.ID, 'sample_peaklist')
 48 | 
 49 |         pl.add_attribute('odd_flag', [1, 0] * 5, is_flag = True)
 50 |         self.assertEqual(pl.size, 5)
 51 |         self.assertEqual(pl.full_size, 10)
 52 |         self.assertTupleEqual(pl.shape, (5, 3))
 53 |         self.assertTupleEqual(pl.full_shape, (10, 3))
 54 | 
 55 |         try:
 56 |             pl.metadata.type = 'blank'
 57 |         except Exception as e:
 58 |             self.fail('access metadata failed: ' + str(e))
 59 |         self.assertListEqual(sorted(pl.metadata.keys()), ['frag_mode', 'mz_range', 'type'])
 60 | 
 61 |         try:
 62 |             pl.tags.add_tag('sample')
 63 |             pl.tags.add_tag('passed_qc')
 64 |             pl.tags.add_tag('high_dose', tag_type = 'treatment')
 65 |         except Exception as e:
 66 |             self.fail('access tags failed: ' + str(e))
 67 |         self.assertEqual(set(pl.tags.tag_types), {None, 'treatment'})
 68 |         self.assertEqual(set(pl.tags.tag_values), {'sample', 'passed_qc', 'high_dose'})
 69 | 
 70 |         self.assertTupleEqual(pl.attributes, ('mz', 'intensity', 'odd_flag'))
 71 |         self.assertTupleEqual(pl.flag_attributes, ('odd_flag',))
 72 | 
 73 |         self.assertTrue(np.all(pl.flags == [1, 0] * 5))
 74 | 
 75 |         self.assertTupleEqual((len(pl.peaks), len(pl.peaks[0])), (5, 3))
 76 |         self.assertTupleEqual((len(pl.dtable), len(pl.dtable[0])), (10, 3))
 77 | 
 78 |     def test_pl_attribute_operations(self):
 79 |         pl = self._createPeakList()
 80 | 
 81 |         self.assertTrue(pl.has_attribute('mz'))
 82 |         self.assertFalse(pl.has_attribute('snr'))
 83 |         self.assertFalse(pl.has_attribute('flag')) # flag is not a real attribute
 84 | 
 85 |         snr = np.array([20, 0] * 5, dtype = int)
 86 |         pl.add_attribute('snr', snr, attr_dtype = float)
 87 |         pl.add_attribute('snr_flag', snr > 10, is_flag = True)
 88 |         self.assertTrue(np.all(pl.get_attribute('snr') > 10))
 89 |         self.assertTrue(np.all(pl.get_attribute('snr', flagged_only = False) == snr))
 90 | 
 91 |         pl.add_attribute('values_1', [0, 1] * 5, on_index = 2, flagged_only = False)
 92 |         self.assertEqual(pl.attributes[2], 'values_1')
 93 |         pl.set_attribute('values_1', [1] * 5) # snr_flag already masked odd peaks
 94 |         self.assertTrue(np.all(pl.get_attribute('values_1', flagged_only = False) == np.ones(10)))
 95 |         pl.set_attribute('values_1', [0] * 10, flagged_only = False)
 96 |         self.assertTrue(np.all(pl.get_attribute('values_1', flagged_only = False) == np.zeros(10)))
 97 |         pl.drop_attribute('values_1')
 98 |         self.assertFalse(pl.has_attribute('values_1'))
 99 | 
100 |         self.assertRaises(AttributeError, lambda: pl.add_attribute('mz', np.ones(pl.size)))
101 |         self.assertRaises(AttributeError, lambda: pl.add_attribute('snr', np.ones(pl.size)))
102 |         self.assertRaises(AttributeError, lambda: pl.add_attribute('_dtable', np.ones(pl.size)))
103 |         self.assertRaises(ValueError, lambda: pl.add_attribute('flags_1', np.arange(pl.size), is_flag = True))
104 |         self.assertRaises(IndexError, lambda: pl.add_attribute('values_2', np.arange(pl.size), on_index = 0))
105 |         self.assertRaises(IndexError, lambda: pl.add_attribute('values_2', np.arange(pl.size), on_index = -pl.shape[1]))
106 |         self.assertRaises(ValueError, lambda: pl.add_attribute('values_2', np.arange(pl.full_size)))
107 | 
108 |         self.assertRaises(AttributeError, lambda: pl.set_attribute('flags', np.ones_like(pl.size)))
109 |         self.assertRaises(AttributeError, lambda: pl.set_attribute('values_3', np.arange(pl.size)))
110 |         self.assertRaises(ValueError, lambda: pl.set_attribute('mz', np.arange(10)[::-1], flagged_only = False))
111 | 
112 |         try:
113 |             pl.set_attribute('mz', np.arange(10)[::-1], flagged_only = False, unsorted_mz = True)
114 |         except Exception as e:
115 |             self.fail('unsorted_mz flag failed: ' + str(e))
116 |         self.assertTrue(np.all(pl.get_attribute('mz') == np.arange(10)[1::2])) # setting mz reversed the snr_flag
117 | 
118 |         self.assertRaises(AttributeError, lambda: pl.get_attribute('values_4'))
119 |         self.assertRaises(AttributeError, lambda: pl.drop_attribute('values_4'))
120 |         self.assertRaises(AttributeError, lambda: pl.drop_attribute('mz'))
121 | 
122 |     def test_pl_peaks_operations(self):
123 |         pl = self._createPeakList()
124 |         pl.add_attribute('value_flag', [1, 0] * 5, is_flag = True)
125 | 
126 |         # mz = 0, (100), 200, (300), 400, (500), 600, (700), 800, (900)
127 |         pl.set_peak(4, (50, 10., True), flagged_only = False)
128 |         self.assertTupleEqual((0, 50, 200, 600, 800), tuple(pl.get_attribute('mz')))
129 | 
130 |         # mz = 0, 50, (100), 200, (300), (500), 600, (700), 800, (900)
131 |         pl.insert_peak((150, 10., True))
132 |         self.assertTupleEqual((0, 50, 150, 200, 600, 800), tuple(pl.get_attribute('mz')))
133 |         self.assertEqual(pl.full_size, 11)
134 | 
135 |         # mz = 0, 50, (100), 150, 200, (300), (500), 600, (700), 800, (900)
136 |         pl.remove_peak((1,2))
137 |         self.assertTupleEqual((0, 100, 200, 300, 500, 600, 700, 800, 900), tuple(pl.get_attribute('mz', flagged_only = False)))
138 |         pl.remove_peak(1, flagged_only = False)
139 |         self.assertTupleEqual((0, 200, 300, 500, 600, 700, 800, 900), tuple(pl.get_attribute('mz', flagged_only = False)))
140 |         self.assertEqual(pl.size, 4)
141 |         self.assertEqual(pl.full_size, 8)
142 | 
143 |         # mz = 0, 200, (300), (500), 600, (700), 800, (900)
144 |         self.assertRaises(AttributeError, lambda: pl.cleanup_unflagged_peaks('mz'))
145 |         self.assertRaises(AttributeError, lambda: pl.cleanup_unflagged_peaks('not_exists'))
146 |         pl.cleanup_unflagged_peaks('value_flag')
147 |         self.assertEqual(pl.full_size, pl.size)
148 |         pl.cleanup_unflagged_peaks()
149 |         self.assertTupleEqual((0, 200, 600, 800), tuple(pl.get_attribute('mz')))
150 | 
151 |     def test_pl_build_ins(self):
152 |         pl = self._createPeakList()
153 | 
154 |         try:
155 |             str(pl)
156 |         except Exception as e:
157 |             self.fail('__str__ failed: ' + str(e))
158 |         self.assertEqual(len(pl), 10)
159 | 
160 |         pl.add_attribute('value_flag', [1, 0] * 5, is_flag = True)
161 |         # mz = 0, (100), 200, (300), 400, (500), 600, (700), 800, (900)
162 |         self.assertEqual(len(pl), 5)
163 | 
164 |         self.assertListEqual([0, 200, 400, 600, 800], pl.mz.tolist())
165 |         self.assertListEqual(np.arange(0, 1000, step = 100).tolist(), pl.mz_all.tolist())
166 | 
167 |         self.assertListEqual([0, 200, 400, 600, 800], pl['mz'].tolist())
168 |         self.assertListEqual([0, 200, 400], list(list(zip(*pl[:3].tolist()))[0]))
169 | 
170 |     def test_pl_exports(self):
171 |         pl = self._createPeakList()
172 | 
173 |         try:
174 |             lst = pl.to_list()
175 |         except Exception as e:
176 |             self.fail('to_list function failed: ' + str(e))
177 |         self.assertListEqual(np.arange(0, 1000, step = 100).tolist(), list(lst[0]))
178 | 
179 |         try:
180 |             psr = pl.to_str(',')
181 |         except Exception as e:
182 |             self.fail('to_str function failed: ' + str(e))
183 |         self.assertListEqual(np.arange(0, 1000, step = 100).tolist(),
184 |                              list(map(float, list(zip(*[x.split(',') for x in psr.split('\n')[1:]]))[0])))
185 | 
186 |         try:
187 |             pl_df = pl.to_df()
188 |         except Exception as e:
189 |             self.fail('to_df function failed: ' + str(e))
190 | 
191 |         title, data = zip(*pl.to_dict().items())
192 |         self.assertTrue(pl_df.equals(pd.DataFrame(list(zip(*data)), columns=title)))
193 | 
194 |     def test_pl_pickle(self):
195 |         pl = self._createPeakList()
196 |         try:
197 |             pstr = cp.dumps(pl)
198 |             pl = cp.loads(pstr)
199 |         except Exception as e:
200 |             self.fail('PeakList pickle failed: ' + str(e))
201 |         self.assertTupleEqual(pl.attributes, ('mz', 'intensity'))
202 | 
203 | 
204 | if __name__ == '__main__':
205 |     unittest.main()
206 | 


--------------------------------------------------------------------------------
/tests/test_peaklist_metadata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | import pickle as cp
24 | import unittest
25 | 
26 | from dimspy.models.peaklist_metadata import PeakList_Metadata
27 | 
28 | 
29 | class PeakListMetadataTestCase(unittest.TestCase):
30 |     @staticmethod
31 |     def _createMetadata():
32 |         return PeakList_Metadata((('a', 1), ('b', 2), ('c', 3)))
33 | 
34 |     def test_pl_meta_creation(self):
35 |         try:
36 |             self._createMetadata()
37 |         except Exception as e:
38 |             self.fail('create metadata object failed: ' + str(e))
39 | 
40 |     def test_pl_meta_operations(self):
41 |         meta = self._createMetadata()
42 | 
43 |         self.assertListEqual(sorted(meta.keys()), ['a', 'b', 'c'])
44 |         self.assertListEqual(sorted(meta.values()), [1, 2, 3])
45 |         self.assertListEqual(sorted(meta.items()), [('a', 1), ('b', 2), ('c', 3)])
46 |         self.assertTrue(meta['a'] == 1 and meta['b'] == 2 and meta['c'] == 3)
47 |         self.assertTrue(('a' in meta) == True and ('d' in meta) == False)
48 |         self.assertTrue(meta.get('a', 4) == 1 and meta.get('d', 4) == 4)
49 | 
50 |         meta['a'] = 4
51 |         self.assertEqual(meta['a'], 4)
52 |         meta['d'] = 5
53 |         self.assertEqual(meta['d'], 5)
54 |         del meta['b']
55 |         self.assertFalse('b' in meta)
56 | 
57 |     def test_pl_meta_pickle(self):
58 |         meta = self._createMetadata()
59 |         try:
60 |             mstr = cp.dumps(meta)
61 |             meta = cp.loads(mstr)
62 |         except Exception as e:
63 |             self.fail('metadata pickle failed: ' + str(e))
64 |         self.assertTrue(meta['a'] == 1 and meta['b'] == 2 and meta['c'] == 3)
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     unittest.main()
69 | 


--------------------------------------------------------------------------------
/tests/test_peaklist_tags.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
  5 | #
  6 | # This file is part of DIMSpy.
  7 | #
  8 | # DIMSpy is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # DIMSpy is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
 20 | #
 21 | 
 22 | 
 23 | import pickle as cp
 24 | import unittest
 25 | 
 26 | from dimspy.models.peaklist_tags import Tag, PeakList_Tags
 27 | 
 28 | 
 29 | class TagTestCase(unittest.TestCase):
 30 |     def test_tag_creation(self):
 31 |         try:
 32 |             tag1 = Tag('1')
 33 |             tag2 = Tag(2, 'batch')
 34 |             tag3 = Tag(tag2)
 35 |         except Exception as e:
 36 |             self.fail('create tag object failed: ' + str(e))
 37 | 
 38 |         self.assertTrue(tag1.value == '1' and tag1.ttype is None)
 39 |         self.assertTrue(tag2.value == 2 and tag2.ttype == 'batch')
 40 |         self.assertTrue(tag3.value == 2 and tag3.ttype == 'batch')
 41 |         self.assertRaises(KeyError, lambda: Tag(9, ttype = 'None'))
 42 | 
 43 |     def test_tag_property(self):
 44 |         tag = Tag('value', ttype = 'type')
 45 |         self.assertTrue(tag.typed)
 46 | 
 47 |         tag.value = 1
 48 |         tag.ttype = None
 49 |         self.assertTrue(tag.value == 1 and tag.ttype is None)
 50 |         self.assertFalse(tag.typed)
 51 | 
 52 |         def _assign_type(t): tag.ttype = t
 53 |         self.assertRaises(KeyError, lambda: _assign_type('None'))
 54 | 
 55 |     def test_tag_magic(self):
 56 |         tag = Tag(1, ttype = 'type')
 57 | 
 58 |         self.assertEqual(tag, Tag(1, 'type'))
 59 |         self.assertNotEqual(tag, 1)
 60 | 
 61 |         tag.ttype = None
 62 |         self.assertEqual(tag, 1)
 63 |         self.assertTrue(1 == tag)
 64 |         self.assertFalse(1 != tag)
 65 |         self.assertTrue(2 != tag)
 66 |         self.assertTrue(tag in (1, 2, 3))
 67 |         self.assertTrue(1 in (tag, 2, 3))
 68 | 
 69 |         self.assertEqual(str(tag), '1')
 70 |         tag.ttype = 'type'
 71 |         self.assertEqual(str(tag), 'type:1')
 72 | 
 73 | class PeakListTagsTestCase(unittest.TestCase):
 74 |     @staticmethod
 75 |     def _createTags():
 76 |         return PeakList_Tags(0, 'str_tag', 'ustr_tag', Tag(1, 'typed_tag1'), typed_tag2 = 2)
 77 | 
 78 |     def test_pl_tags_creation(self):
 79 |         try:
 80 |             self._createTags()
 81 |         except Exception as e:
 82 |             self.fail('create tags object failed: ' + str(e))
 83 | 
 84 |     def test_pl_tags_properties(self):
 85 |         tags = self._createTags()
 86 |         self.assertEqual(tags.tag_types, {None, 'typed_tag1', 'typed_tag2'})
 87 |         self.assertEqual(tags.tag_values, {0, 1, 2, 'str_tag', 'ustr_tag'})
 88 |         self.assertEqual(len(tags), 5)
 89 |         self.assertTrue(all([x.ttype is not None for x in tags.typed_tags]))
 90 |         self.assertTrue(all([x.ttype is None for x in tags.untyped_tags]))
 91 | 
 92 |     def test_pl_tags_checking_methods(self):
 93 |         tags = self._createTags()
 94 | 
 95 |         self.assertTrue(tags.has_tag_type('typed_tag1') and tags.has_tag_type('typed_tag2'))
 96 |         self.assertTrue(tags.has_tag_type(None))
 97 |         self.assertFalse(tags.has_tag_type('not_exist'))
 98 | 
 99 |         self.assertTrue(Tag(2, 'typed_tag2') in tags)
100 |         self.assertTrue(tags.has_tag(0) and tags.has_tag('str_tag') and tags.has_tag('ustr_tag'))
101 |         self.assertTrue(tags.has_tag(1, 'typed_tag1') and tags.has_tag(2, 'typed_tag2'))
102 |         self.assertTrue(tags.has_tag(Tag(1, 'typed_tag1')))
103 |         self.assertFalse(tags.has_tag(0, 'typed_tag1'))
104 |         self.assertFalse(tags.has_tag(1) or tags.has_tag(2))
105 |         self.assertFalse(tags.has_tag('not_exist') or tags.has_tag(1, 'wrong_type'))
106 | 
107 |         self.assertTupleEqual(tags.tag_of(), (0, 'str_tag', 'ustr_tag'))
108 |         self.assertTrue(tags.tag_of('typed_tag1').value == 1 and tags.tag_of('typed_tag2').value == 2)
109 |         self.assertTrue(tags.tag_of('not_such_type') is None)
110 | 
111 |     def test_pl_tags_adding_methods(self):
112 |         tags = self._createTags()
113 | 
114 |         self.assertRaises(KeyError, lambda: tags.add_tag(3, 'typed_tag1'))
115 |         self.assertRaises(ValueError, lambda: tags.add_tag(0))
116 |         self.assertRaises(ValueError, lambda: tags.add_tag('ustr_tag'))
117 | 
118 |         tags.add_tag(1)
119 |         tags.add_tag(1, 'typed_tag3')
120 |         tags.add_tag(Tag('new_value', 'typed_tag4'))
121 |         self.assertEqual(tags.tag_types, {None, 'typed_tag1', 'typed_tag2', 'typed_tag3', 'typed_tag4'})
122 |         self.assertEqual(tags.tag_values, {0, 1, 2, 'new_value', 'str_tag', 'ustr_tag'})
123 | 
124 |     def test_pl_tags_dropping_methods(self):
125 |         tags = self._createTags()
126 | 
127 |         tags.drop_tag(0)
128 |         tags.drop_tag(1)
129 |         tags.drop_tag(1, 'wrong_type')
130 |         self.assertEqual(tags.tag_types, {None, 'typed_tag1', 'typed_tag2'})
131 |         self.assertEqual(tags.tag_values, {1, 2, 'str_tag', 'ustr_tag'})
132 |         tags.drop_tag('str_tag')
133 |         tags.drop_tag('ustr_tag')
134 |         self.assertEqual(tags.tag_types, {'typed_tag1', 'typed_tag2'})
135 |         self.assertEqual(tags.tag_values, {1, 2})
136 | 
137 |         tags.drop_tag_type('typed_tag1')
138 |         self.assertEqual(tags.tag_types, {'typed_tag2'})
139 |         self.assertEqual(tags.tag_values, {2})
140 | 
141 |         tags.drop_all_tags()
142 |         self.assertTupleEqual(tags.tags, ())
143 | 
144 |     def test_pl_tags_portals(self):
145 |         tags = self._createTags()
146 |         self.assertListEqual(tags.to_list(), [(0, None), ('str_tag', None), ('ustr_tag', None), (1, 'typed_tag1'), (2, 'typed_tag2')])
147 |         self.assertEqual(tags.to_str(), '0, str_tag, ustr_tag, typed_tag1:1, typed_tag2:2')
148 |         self.assertEqual(str(tags), '0, str_tag, ustr_tag, typed_tag1:1, typed_tag2:2')
149 | 
150 |     def test_pl_tags_pickle(self):
151 |         tags = self._createTags()
152 |         try:
153 |             tstr = cp.dumps(tags)
154 |             tags = cp.loads(tstr)
155 |         except Exception as e:
156 |             self.fail('tags pickle failed: ' + str(e))
157 |         self.assertEqual(tags.tag_types, {None, 'typed_tag1', 'typed_tag2'})
158 |         self.assertEqual(tags.tag_values, {0, 1, 2, 'str_tag', 'ustr_tag'})
159 |         self.assertEqual(len(tags), 5)
160 | 
161 | 
162 | if __name__ == '__main__':
163 |     unittest.main()
164 | 


--------------------------------------------------------------------------------
/tests/test_replicate_processing.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | import os
24 | import unittest
25 | import zipfile
26 | from dimspy.process.replicate_processing import read_scans
27 | 
28 | 
29 | def to_test_data(*args):
30 |     return os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", "MTBLS79_subset", *args)
31 | 
32 | def to_test_results(*args):
33 |     return os.path.join(os.path.dirname(os.path.realpath(__file__)), "results", *args)
34 | 
35 | 
36 | class ReplicateProcessingTestCase(unittest.TestCase):
37 | 
38 |     @classmethod
39 |     def setUpClass(cls):
40 | 
41 |         zip_ref = zipfile.ZipFile(to_test_data("MTBLS79_mzml_single.zip"), 'r')
42 |         zip_ref.extractall(to_test_results("zip_data"))
43 |         zip_ref.close()
44 | 
45 |     def test_read_scans(self):
46 | 
47 |         scans = read_scans(to_test_data("mzml", "batch04_QC17_rep01_262.mzML"), function_noise="median",
48 |                            min_scans=1, filter_scan_events={"exclude": [["70.0", "170.0", "sim"]]})
49 |         self.assertListEqual(list(scans.keys()), ['FTMS + p ESI w SIM ms [140.00-240.00]',
50 |                                                   'FTMS + p ESI w SIM ms [210.00-310.00]',
51 |                                                   'FTMS + p ESI w SIM ms [280.00-380.00]',
52 |                                                   'FTMS + p ESI w SIM ms [350.00-450.00]',
53 |                                                   'FTMS + p ESI w SIM ms [420.00-520.00]',
54 |                                                   'FTMS + p ESI w SIM ms [490.00-590.00]'])
55 | 
56 |         scans = read_scans(to_test_data("mzml", "batch04_QC17_rep01_262.mzML"), function_noise="median",
57 |                            min_scans=1, filter_scan_events={"include": [["70.0", "170.0", "sim"]]})
58 |         self.assertListEqual(list(scans.keys()), ['FTMS + p ESI w SIM ms [70.00-170.00]'])
59 | 
60 |         scans = read_scans(to_test_data("mzml", "batch04_QC17_rep01_262.mzML"), function_noise="median",
61 |                            min_scans=1, filter_scan_events={"exclude": ["FTMS + p ESI w SIM ms [70.00-170.00]"]})
62 |         self.assertListEqual(list(scans.keys()), ['FTMS + p ESI w SIM ms [140.00-240.00]',
63 |                                                   'FTMS + p ESI w SIM ms [210.00-310.00]',
64 |                                                   'FTMS + p ESI w SIM ms [280.00-380.00]',
65 |                                                   'FTMS + p ESI w SIM ms [350.00-450.00]',
66 |                                                   'FTMS + p ESI w SIM ms [420.00-520.00]',
67 |                                                   'FTMS + p ESI w SIM ms [490.00-590.00]'])
68 | 
69 |         scans = read_scans(to_test_data("mzml", "batch04_QC17_rep01_262.mzML"), function_noise="median",
70 |                            min_scans=1, filter_scan_events={"include": ["FTMS + p ESI w SIM ms [70.00-170.00]"]})
71 |         self.assertListEqual(list(scans.keys()), ['FTMS + p ESI w SIM ms [70.00-170.00]'])
72 | 


--------------------------------------------------------------------------------
/tests/test_suite_models.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | import sys
24 | import unittest
25 | from pathlib import Path
26 | 
27 | sys.path.insert(0, str(Path(__file__).parent.parent.resolve()))
28 | from . import test_peaklist_metadata, test_peaklist_tags, test_peaklist, test_peak_matrix
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     suite = unittest.TestSuite()
33 | 
34 |     suite.addTest(unittest.findTestCases(test_peaklist_metadata))
35 |     suite.addTest(unittest.findTestCases(test_peaklist_tags))
36 |     suite.addTest(unittest.findTestCases(test_peaklist))
37 |     suite.addTest(unittest.findTestCases(test_peak_matrix))
38 | 
39 |     runner = unittest.TextTestRunner()
40 |     runner.run(suite)
41 | 


--------------------------------------------------------------------------------
/tests/test_suite_portals.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | import sys
24 | import unittest
25 | from pathlib import Path
26 | 
27 | sys.path.insert(0, str(Path(__file__).parent.parent.resolve()))
28 | from . import test_txt_portal, test_hdf5_portal, test_paths_portal
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     suite = unittest.TestSuite()
33 | 
34 |     suite.addTest(unittest.findTestCases(test_txt_portal))
35 |     suite.addTest(unittest.findTestCases(test_hdf5_portal))
36 |     suite.addTest(unittest.findTestCases(test_paths_portal))
37 | 
38 |     runner = unittest.TextTestRunner()
39 |     runner.run(suite)
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/test_suite_process.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | import sys
24 | import unittest
25 | from pathlib import Path
26 | 
27 | sys.path.insert(0, str(Path(__file__).parent.parent.resolve()))
28 | from . import test_peak_filters, test_peak_alignment
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     suite = unittest.TestSuite()
33 | 
34 |     suite.addTest(unittest.findTestCases(test_peak_alignment))
35 |     suite.addTest(unittest.findTestCases(test_peak_filters))
36 | 
37 |     runner = unittest.TextTestRunner()
38 |     runner.run(suite)
39 | 


--------------------------------------------------------------------------------
/tests/test_suite_tools.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | import sys
24 | import unittest
25 | from pathlib import Path
26 | 
27 | sys.path.insert(0, str(Path(__file__).parent.parent.resolve()))
28 | from . import test_tools
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     suite = unittest.TestSuite()
33 | 
34 |     suite.addTest(unittest.findTestCases(test_tools))
35 | 
36 |     runner = unittest.TextTestRunner()
37 |     runner.run(suite)
38 | 


--------------------------------------------------------------------------------
/tests/test_thermo_raw_portal.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | import os
24 | import unittest
25 | import platform
26 | 
27 | from dimspy.portals.thermo_raw_portal import ThermoRaw
28 | 
29 | 
30 | def to_test_data(*args):
31 |     return os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", "MTBLS79_subset", *args)
32 | 
33 | def to_test_results(*args):
34 |     return os.path.join(os.path.dirname(os.path.realpath(__file__)), "results", *args)
35 | 
36 | 
37 | class ThermoRawPortalsTestCase(unittest.TestCase):
38 | 
39 |     def test_thermo_raw_portal(self):
40 | 
41 |         run = ThermoRaw(to_test_data("raw", "batch04_QC17_rep01_262.RAW"))
42 | 
43 |         self.assertTrue(str(run.timestamp) == "4/2/2011 3:28:02 AM" or str(run.timestamp) == "02/04/2011 03:28:02")
44 | 
45 |         self.assertListEqual(list(run.headers().keys()), ['FTMS + p ESI w SIM ms [70.00-170.00]',
46 |                                                           'FTMS + p ESI w SIM ms [140.00-240.00]',
47 |                                                           'FTMS + p ESI w SIM ms [210.00-310.00]',
48 |                                                           'FTMS + p ESI w SIM ms [280.00-380.00]',
49 |                                                           'FTMS + p ESI w SIM ms [350.00-450.00]',
50 |                                                           'FTMS + p ESI w SIM ms [420.00-520.00]',
51 |                                                           'FTMS + p ESI w SIM ms [490.00-590.00]'])
52 |         self.assertListEqual(list(run.scan_ids().keys()), list(range(1,89)))
53 |         self.assertListEqual(list(run.tics().values())[0:2], [39800032.0, 38217892.0])
54 |         self.assertEqual(len(run.tics()), 88)
55 |         self.assertListEqual(list(run.ion_injection_times().values())[0:2], [40.434, 40.095])
56 |         self.assertEqual(len(run.ion_injection_times()), 88)
57 |         self.assertListEqual(run.scan_dependents(), [])
58 |         pl = run.peaklist(1)
59 |         self.assertEqual(pl.ID, 1)
60 |         self.assertEqual(pl.metadata["header"], "FTMS + p ESI w SIM ms [70.00-170.00]")
61 |         self.assertEqual(pl.metadata["ms_level"], 1.0)
62 |         self.assertEqual(pl.metadata["ion_injection_time"], 40.434)
63 |         self.assertEqual(pl.metadata["scan_time"], 0.5010899999999999)
64 |         self.assertEqual(pl.metadata["elapsed_scan_time"], 1.05)
65 |         self.assertEqual(pl.metadata["tic"], 39800032.0)
66 |         self.assertEqual(pl.metadata["function_noise"], "noise_packets")
67 |         self.assertEqual(pl.metadata["mz_range"], [70.0, 170.0])
68 |         run.close()
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     unittest.main()
73 | 


--------------------------------------------------------------------------------
/tests/test_txt_portal.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # Copyright © 2017-2020 Ralf Weber, Albert Zhou.
 5 | #
 6 | # This file is part of DIMSpy.
 7 | #
 8 | # DIMSpy is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # DIMSpy is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with DIMSpy.  If not, see <https://www.gnu.org/licenses/>.
20 | #
21 | 
22 | 
23 | import os
24 | import unittest
25 | 
26 | import numpy as np
27 | from dimspy.models.peaklist import PeakList
28 | from dimspy.models.peaklist_tags import Tag
29 | from dimspy.portals.txt_portal import save_peak_matrix_as_txt, load_peak_matrix_from_txt
30 | from dimspy.portals.txt_portal import save_peaklist_as_txt, load_peaklist_from_txt
31 | from dimspy.process.peak_alignment import align_peaks
32 | 
33 | 
34 | class TxtPortalsTestCase(unittest.TestCase):
35 |     def test_peaklist_portal(self):
36 |         pkl = PeakList('peaklist', np.sort(np.random.uniform(100, 1200, size = 100)), np.random.normal(100, 10, size = 100))
37 |         pkl.add_attribute('odd_flag', [0, 1] * 50, is_flag = True)
38 | 
39 |         save_peaklist_as_txt(pkl, '.test_peaklist.txt')
40 |         npkl = load_peaklist_from_txt('.test_peaklist.txt', 'peaklist')
41 | 
42 |         self.assertEqual(npkl.size, 50)
43 |         self.assertEqual(npkl.full_size, 100)
44 |         self.assertTrue(np.allclose(pkl.mz_all, npkl.mz_all))
45 |         self.assertTrue(np.allclose(pkl.intensity, npkl.intensity))
46 | 
47 |     def test_peak_matrix_portal(self):
48 |         _mzs = lambda: sorted(np.random.uniform(100, 1200, size = 100))
49 |         _ints = lambda: np.abs(np.random.normal(100, 10, size = 100))
50 | 
51 |         pkls = [
52 |             PeakList('sample_1_1', _mzs(), _ints()),
53 |             PeakList('sample_1_2', _mzs(), _ints()),
54 |             PeakList('QC_1', _mzs(), _ints()),
55 |             PeakList('sample_2_1', _mzs(), _ints()),
56 |             PeakList('sample_2_2', _mzs(), _ints()),
57 |             PeakList('QC_2', _mzs(), _ints()),
58 |         ]
59 |         for t in ('sample', Tag('compound_1', 'treatment'), Tag('1hr', 'time_point'), Tag(1, 'plate')): pkls[0].tags.add_tag(t)
60 |         for t in ('sample', Tag('compound_1', 'treatment'), Tag('6hr', 'time_point'), Tag(1, 'plate')): pkls[1].tags.add_tag(t)
61 |         for t in ('qc', Tag(1, 'plate')): pkls[2].tags.add_tag(t)
62 |         for t in ('sample', Tag('compound_2', 'treatment'), Tag('1hr', 'time_point'), Tag(2, 'plate')): pkls[3].tags.add_tag(t)
63 |         for t in ('sample', Tag('compound_2', 'treatment'), Tag('6hr', 'time_point'), Tag(2, 'plate')): pkls[4].tags.add_tag(t)
64 |         for t in ('qc', Tag(2, 'plate')): pkls[5].tags.add_tag(t)
65 | 
66 |         pm = align_peaks(pkls, ppm = 2e+4, block_size = 10, ncpus = 2)
67 |         pm.add_flag('odd_flag', ([0, 1] * int(pm.shape[1]/2+1))[:pm.shape[1]])
68 |         pm.add_flag('qua_flag', ([0, 0, 1, 1] * int(pm.shape[1]/4+1))[:pm.shape[1]])
69 | 
70 |         save_peak_matrix_as_txt(pm, '.test_peak_matrix.txt', samples_in_rows = True, comprehensive = True,
71 |                                 rsd_tags = ('qc', Tag('compound_1', 'treatment'), Tag('compound_2', 'treatment')))
72 |         npm = load_peak_matrix_from_txt('.test_peak_matrix.txt', samples_in_rows = True, comprehensive = 'auto')
73 | 
74 |         self.assertEqual(pm.shape, npm.shape)
75 |         self.assertEqual(pm.full_shape, npm.full_shape)
76 |         self.assertTrue(np.all(pm.flags == npm.flags))
77 |         self.assertTrue(np.all(pm.flag_names == npm.flag_names))
78 |         self.assertTrue(np.allclose(pm.intensity_matrix, npm.intensity_matrix))
79 |         self.assertEqual(pm.peaklist_tag_types, npm.peaklist_tag_types)
80 |         self.assertEqual(pm.peaklist_tag_values, npm.peaklist_tag_values)
81 | 
82 |     def tearDown(self):
83 |         if os.path.isfile('.test_peaklist.txt'): os.remove('.test_peaklist.txt')
84 |         if os.path.isfile('.test_peak_matrix.txt'): os.remove('.test_peak_matrix.txt')
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     unittest.main()
89 | 


--------------------------------------------------------------------------------