├── .github
    └── workflows
    │   ├── build.yml
    │   └── codecov.yml
├── .gitignore
├── CHANGELOG.rst
├── CITATION.cff
├── LICENSE
├── MANIFEST.in
├── PUBLICATIONS.rst
├── README.rst
├── docs
    ├── Makefile
    ├── _static
    │   ├── images
    │   │   ├── mwtab_demo.gif
    │   │   └── mwtab_logo.png
    │   └── mwfiles
    │   │   ├── ST000017_AN000035.json
    │   │   ├── ST000017_AN000035.json.gz
    │   │   ├── ST000017_AN000035.txt
    │   │   ├── ST000017_AN000035.txt.gz
    │   │   ├── ST000040_AN000060.json
    │   │   ├── ST000040_AN000060.txt
    │   │   ├── diabetes
    │   │       ├── ST000048_AN000084.txt
    │   │       └── ST000057_AN000095.txt
    │   │   ├── mwfiles_dir_json
    │   │       ├── ST000017_AN000035.json
    │   │       └── ST000040_AN000060.json
    │   │   ├── mwfiles_dir_mwtab
    │   │       ├── ST000017_AN000035.txt
    │   │       └── ST000040_AN000060.txt
    │   │   ├── mwfiles_json.tar.gz
    │   │   ├── mwfiles_mwtab.zip
    │   │   └── out
    │   │       └── readme.txt
    ├── api.rst
    ├── conf.py
    ├── guide.rst
    ├── index.rst
    ├── license.rst
    ├── requirements-rtd.txt
    └── tutorial.ipynb
├── mwtab
    ├── __init__.py
    ├── __main__.py
    ├── cli.py
    ├── converter.py
    ├── fileio.py
    ├── mwextract.py
    ├── mwrest.py
    ├── mwschema.py
    ├── mwtab.py
    ├── tokenizer.py
    └── validator.py
├── requirements.txt
├── setup.py
└── tests
    ├── example_data
        ├── mwtab_files.tar
        ├── mwtab_files.tar.bz2
        ├── mwtab_files.tar.gz
        ├── mwtab_files.zip
        ├── mwtab_files
        │   ├── ST000122_AN000204.json
        │   └── ST000122_AN000204.txt
        └── validation_files
        │   ├── ST000122_AN000204_error_1.json
        │   ├── ST000122_AN000204_error_1.txt
        │   ├── ST000122_AN000204_error_2.json
        │   ├── ST000122_AN000204_error_2.txt
        │   ├── ST000122_AN000204_error_3.json
        │   ├── ST000122_AN000204_error_3.txt
        │   ├── ST000122_AN000204_error_4.json
        │   └── ST000122_AN000204_error_4.txt
    ├── test_cli.py
    ├── test_converter.py
    ├── test_mwextract.py
    ├── test_mwrest.py
    ├── test_reading.py
    └── test_validator.py


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | # This is a basic workflow to help you get started with Actions
 2 | 
 3 | name: build
 4 | 
 5 | # Controls when the workflow will run
 6 | on:
 7 |   # Triggers the workflow on push or pull request events but only for the master branch
 8 |   push:
 9 |     branches: [ master ]
10 |   pull_request:
11 |     branches: [ master ]
12 | 
13 |   # Allows you to run this workflow manually from the Actions tab
14 |   workflow_dispatch:
15 | 
16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
17 | jobs:
18 |   # This workflow contains a single job called "build"
19 |   build:
20 |     # The type of runner that the job will run on
21 |     runs-on: ubuntu-latest
22 |     strategy:
23 |       matrix:
24 |         python-version: ['3.5', '3.6', '3.7', '3.8', '3.9', '3.10']
25 | 
26 |     # Steps represent a sequence of tasks that will be executed as part of the job
27 |     steps:
28 |       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
29 |       - uses: actions/checkout@v2
30 | 
31 |       - name: Set up Python ${{ matrix.python-version }}
32 |         uses: actions/setup-python@v2
33 |         with:
34 |           python-version: ${{ matrix.python-version }}
35 | 
36 |       # Install dependencies
37 |       - name: Install dependencies
38 |         run: |
39 |           python -m pip install --upgrade pip
40 |           pip install pytest-cov
41 |           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
42 |           python setup.py install
43 | 
44 |       # Run pytest
45 |       - name: Test with pytest
46 |         run: pytest


--------------------------------------------------------------------------------
/.github/workflows/codecov.yml:
--------------------------------------------------------------------------------
 1 | name: codecov.io
 2 | 
 3 | # Controls when the workflow will run
 4 | on:
 5 |   # Triggers the workflow on push or pull request events but only for the master branch
 6 |   push:
 7 |     branches: [ master ]
 8 |   pull_request:
 9 |     branches: [ master ]
10 | 
11 |   # Allows you to run this workflow manually from the Actions tab
12 |   workflow_dispatch:
13 | 
14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
15 | jobs:
16 |   # This workflow contains a single job called "build"
17 |   build:
18 |     # The type of runner that the job will run on
19 |     runs-on: ubuntu-latest
20 | 
21 |     # Steps represent a sequence of tasks that will be executed as part of the job
22 |     steps:
23 |       - name: Checkout
24 |         uses: actions/checkout@v2
25 | 
26 |       - name: Set up Python 3.9
27 |         uses: actions/setup-python@v2
28 |         with:
29 |           python-version: 3.9
30 | 
31 |       - name: Install dependencies
32 |         run: |
33 |           pip install -r requirements.txt
34 |           pip install pytest-cov
35 |           python setup.py install
36 | 
37 |       - name: Run tests and collect coverage
38 |         run: pytest --cov=./mwtab --cov-report=xml
39 | 
40 |       # codecov
41 |       - name: "Upload coverage to Codecov"
42 |         uses: codecov/codecov-action@v2


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | docs/_build
3 | venv/*
4 | data/*
5 | .DS_Store
6 | .ipynb_checkpoints
7 | .idea


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
  1 | Release History
  2 | ===============
  3 | 
  4 | 
  5 | 1.2.5.post1 (2022-05-11)
  6 | ~~~~~~~~~~~~~~~~~~~~~~~~
  7 | 
  8 | **Improvements**
  9 | 
 10 | - Add citation information to GitHub repository.
 11 | 
 12 |     - Adds CITATION.cff file with citation info.
 13 | 
 14 | 
 15 | 1.2.5 (2022-03-18)
 16 | ~~~~~~~~~~~~~~~~~~
 17 | 
 18 | **Improvements**
 19 | 
 20 | - Updates ``mwschema.py`` and ``validator.py`` modules to match Metabolomics Workbench's mwTab File Format
 21 | Specification Version 1.5 (March 2022).
 22 | 
 23 |     - Adds optional NMR_RESULTS_FILE field to NMR block.
 24 | 
 25 |     - Adds optional MS_COMMENTS field to MS block.
 26 | 
 27 |     - Removes requirement for there to be data results for every sample listed in the Study Design
 28 | (SUBJECT_SAMPLE_FACTORS). Allows for instances where samples have technical issues preventing data from being provided.
 29 | 
 30 | 
 31 | 1.2.4 (2022-01-07)
 32 | ~~~~~~~~~~~~~~~~~~
 33 | 
 34 | **Improvements**
 35 | 
 36 | - Adds check for blank source files when parsing to create ``mwtab`` objects.
 37 | 
 38 | 
 39 | 1.2.3 (2021-11-02)
 40 | ~~~~~~~~~~~~~~~~~~
 41 | 
 42 | **Bugfixes**
 43 | 
 44 | - Removes hard coding of version number in ``validator.validate_file()`` method.
 45 | 
 46 | - Removes mention of Python 3.4 support in README.
 47 | 
 48 | 
 49 | 1.2.2 (2021-10-22)
 50 | ~~~~~~~~~~~~~~~~~~
 51 | 
 52 | **Improvements**
 53 | 
 54 | - Migrates Continuous Integration (CI) from Travis CI to GitHub Actions.
 55 | 
 56 |     - Adds ``.github/workflows/`` folder which contains .yml files for workflows.
 57 | 
 58 |         - Adds ``build.yml`` to folder for testing build with pytest.
 59 | 
 60 |         - Adds ``codecov.yml`` to folder for generating/uploading code coverage info to codecov.io
 61 |           (https://app.codecov.io/gh/MoseleyBioinformaticsLab/mwtab).
 62 | 
 63 |     - Changes build and codecov badges to match new sources.
 64 | 
 65 | 
 66 | 1.2.1 (2021-09-03)
 67 | ~~~~~~~~~~~~~~~~~~
 68 | 
 69 | **Improvements**
 70 | 
 71 | - Updates format of ``~mwtab.mwtab.validate_file()`` validation log generated during validation.
 72 | 
 73 |     - Includes metadata header in validation logs containing; datetime, mwtab version, file source, study id, analysis
 74 |       id, and file format.
 75 | 
 76 |     - Minor changes to error messages for MS(NMR)_METABOLITE_DATA, NMR_BINNED_DATA, and SUBJECT_SAMPLE_FACTORS sections.
 77 | 
 78 | **Bugfixes**
 79 | 
 80 | - Fixes error where pytests for ``~mwtab.mwtab.validate_file()`` method were repeatedly using the same text files for
 81 | validation rather than both the test text and JSON files.
 82 | 
 83 | - Verbose file validation enabled in commandline.
 84 | 
 85 | - Default value given to ``base_url`` parameter in ``~mwtab.mwatb._pull_study_analysis()`` methods.
 86 | 
 87 | 
 88 | 1.0.1 (2021-03-06)
 89 | ~~~~~~~~~~~~~~~~~~
 90 | 
 91 | **Improvements**
 92 | 
 93 | - Updated ``~mwtab.mwtab.MWTabFile`` to match Metabolomics Workbench JSON
 94 |   format.
 95 | 
 96 |     - Internal dictionary representation now matches Metabolomics Workbench
 97 |       JSON format.
 98 |     - ``~mwtab.mwtab.MWTabFile.write()`` and
 99 |       ``~mwtab.mwtab.MWTabFile.write_str()`` methods now produce files
100 |       consistent with Metabolomics Workbench's JSON format.
101 | 
102 | - Updated ``mwschema.py`` to be consistent with Metabolomics Workbench's
103 |   updated `mwTab` format specification.
104 | 
105 | - Added ``mwrest.py`` module for working with Metabolomics Workbench's REST API.
106 | 
107 |     - Allows for additional data file to be requested through Metabolomics
108 |       Workbench's REST API.
109 | 
110 | - Added ``mwextract.py`` module for extracting metadata and metabolites from
111 |   `mwTab` formatted files.
112 | 
113 | - Updated ``validator.py``.
114 | 
115 |     - Validator now collects all present errors.
116 |     - Performs detection of common field names in `#METABOLITES` blocks.
117 | 
118 | - Updated ``docs/tutorial.ipynb`` to document improved and updated package
119 |   functionality.
120 | 
121 | - Updated `mwtab` package to include Python 3.8 support.
122 | 
123 | 
124 | 0.1.10 (2019-02-18)
125 | ~~~~~~~~~~~~~~~~~~~
126 | 
127 | **Bugfixes**
128 | 
129 | - Metabolomics Workbench started using HTTPS,
130 |   update reading from ANALYSIS_ID to address the change.
131 | 
132 | 
133 | 0.1.9 (2018-04-21)
134 | ~~~~~~~~~~~~~~~~~~
135 | 
136 | **Improvements**
137 | 
138 | - Added citation link to `mwtab` package.
139 | 
140 | 
141 | 0.1.8 (2018-04-05)
142 | ~~~~~~~~~~~~~~~~~~
143 | 
144 | **Improvements**
145 | 
146 | - Added `mwtab` package logo.
147 | - Minor update: Simplified section validation function.
148 | 
149 | 
150 | 0.1.7 (2017-12-07)
151 | ~~~~~~~~~~~~~~~~~~
152 | 
153 | **Improvements**
154 | 
155 | - Minor update: Included test for additional header line within `mwTab` files
156 |   that may or may not be present.
157 | 
158 | 
159 | 0.1.4, 0.1.5, 0.1.6 (2017-11-13)
160 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
161 | 
162 | **Improvements**
163 | 
164 | - Minor update: package README file examples. 
165 | - Minor update: update README to properly render on PyPI.
166 | 
167 | 
168 | 0.1.3 (2017-09-14)
169 | ~~~~~~~~~~~~~~~~~~
170 | 
171 | **Bugfixes**
172 | 
173 | - Fixed bug in the command-line interface.
174 | - Fixed bug in ``mwschema.py`` module definition causing validation to fail.
175 | - Fixed validation optional argument (to ``read_files()`` generator) in order
176 |   to validate mwTab formatted files before returning them.
177 | - Fixed Python2/3 compatibility bug that uses ``bz2`` Python module.
178 | - Fixed Python2/3 unicode/str compatibility bug in ``mwschema.py`` module.
179 | 
180 | **Improvements**
181 | 
182 | - Added Travis CI tests: https://travis-ci.org/MoseleyBioinformaticsLab/mwtab
183 | - Added code coverage reports: https://codecov.io/gh/MoseleyBioinformaticsLab/mwtab
184 | 
185 | 
186 | 0.1.2 (2017-09-14)
187 | ~~~~~~~~~~~~~~~~~~
188 | 
189 | **Bugfixes**
190 | 
191 | - Fixed issue with mwTab formatted file printable representation.
192 | 
193 | 
194 | 0.1.1 (2017-09-12)
195 | ~~~~~~~~~~~~~~~~~~
196 | 
197 | **Improvements**
198 | 
199 | - Improved README display on PyPI.
200 | 
201 | 
202 | 0.1.0 (2017-09-12)
203 | ~~~~~~~~~~~~~~~~~~
204 | 
205 | - Initial public release.
206 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.0.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Powell"
 5 |   given-names: "Christian"
 6 |   orcid: "https://orcid.org/0000-0002-4242-080X"
 7 | - family-names: "Smelter"
 8 |   given-names: "Andrey"
 9 |   orcid: "https://orcid.org/0000-0003-3056-9225"
10 | - family-names: "Moseley"
11 |   given-names: "Hunter"
12 |   orcid: "https://orcid.org/0000-0003-3995-5368"
13 | title: "mwtab"
14 | version: 1.2.5
15 | date-released: 2017-03-18
16 | url: "https://github.com/MoseleyBioinformaticsLab/mwtab"
17 | preferred-citation:
18 |   type: article
19 |   authors:
20 |   - family-names: "Powell"
21 |     given-names: "Christian"
22 |     orcid: "https://orcid.org/0000-0002-4242-080X"
23 |   - family-names: "Moseley"
24 |     given-names: "Hunter"
25 |     orcid: "https://orcid.org/0000-0003-3995-5368"
26 |   doi: "10.3390/metabo11030163"
27 |   journal: "Metabolites"
28 |   month: 3
29 |   title: "The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository"
30 |   issue: 11
31 |   volume: 3
32 |   year: 2021
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The Clear BSD License
 2 | 
 3 | Copyright (c) 2020, Christian D. Powell, Andrey Smelter, Hunter N.B. Moseley
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted (subject to the limitations in the disclaimer
 8 | below) provided that the following conditions are met:
 9 | 
10 |      * Redistributions of source code must retain the above copyright notice,
11 |      this list of conditions and the following disclaimer.
12 | 
13 |      * Redistributions in binary form must reproduce the above copyright
14 |      notice, this list of conditions and the following disclaimer in the
15 |      documentation and/or other materials provided with the distribution.
16 | 
17 |      * Neither the name of the copyright holder nor the names of its
18 |      contributors may be used to endorse or promote products derived from this
19 |      software without specific prior written permission.
20 | 
21 | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
22 | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
23 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
25 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
30 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 | POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst LICENSE CHANGELOG.rst
2 | include requirements.txt
3 | include docs/Makefile
4 | recursive-include docs *.rst *.txt *.py *.png *.svg
5 | recursive-include mwtab *.py *.pyx *.c


--------------------------------------------------------------------------------
/PUBLICATIONS.rst:
--------------------------------------------------------------------------------
 1 | mwtab Publications
 2 | ==================
 3 | 
 4 | 
 5 | When using the ``mwtab`` package in published work, please cite the latest paper:
 6 | 
 7 |     2. Powell, Christian D., and Hunter NB Moseley. "The mwtab Python Library for RESTful Access and Enhanced Quality
 8 |        Control, Deposition, and Curation of the Metabolomics Workbench Data Repository." *Metabolites* 11.3 (2021): 163.
 9 |        doi: `10.3390/metabo11030163`_.
10 | 
11 |         * Data available on FigShare: `10.6084/m9.figshare.12094104`_
12 | 
13 |     1. Smelter, Andrey and Hunter NB Moseley. "A Python library for FAIRer access and deposition to the Metabolomics
14 |        Workbench Data Repository." *Metabolomics* 2018, 14(5): 64. doi: `10.1007/s11306-018-1356-6`_.
15 | 
16 |         * Data available FigShare: `figshare.com/s/8d5a837cdc3f500fbcaa`_
17 | 
18 | .. _10.3390/metabo11030163: https://doi.org/10.3390/metabo11030163
19 | .. _10.6084/m9.figshare.12094104: https://doi.org/10.6084/m9.figshare.12094104
20 | .. _10.1007/s11306-018-1356-6: http://dx.doi.org/10.1007/s11306-018-1356-6
21 | .. _figshare.com/s/8d5a837cdc3f500fbcaa: https://figshare.com/s/8d5a837cdc3f500fbcaa
22 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | mwtab
  2 | =====
  3 | 
  4 | .. image:: https://img.shields.io/pypi/l/mwtab.svg
  5 |    :target: https://choosealicense.com/licenses/bsd-3-clause-clear/
  6 |    :alt: License information
  7 | 
  8 | .. image:: https://img.shields.io/pypi/v/mwtab.svg
  9 |    :target: https://pypi.org/project/mwtab
 10 |    :alt: Current library version
 11 | 
 12 | .. image:: https://img.shields.io/pypi/pyversions/mwtab.svg
 13 |    :target: https://pypi.org/project/mwtab
 14 |    :alt: Supported Python versions
 15 | 
 16 | .. image:: https://readthedocs.org/projects/nmrstarlib/badge/?version=latest
 17 |    :target: http://mwtab.readthedocs.io/en/latest/?badge=latest
 18 |    :alt: Documentation status
 19 | 
 20 | .. image:: https://github.com/MoseleyBioinformaticsLab/mwtab/actions/workflows/build.yml/badge.svg
 21 |    :target: https://github.com/MoseleyBioinformaticsLab/mwtab/actions/workflows/build.yml
 22 |    :alt: Build status
 23 | 
 24 | .. image:: https://codecov.io/gh/MoseleyBioinformaticsLab/mwtab/branch/master/graph/badge.svg?token=jhjMsP1qma
 25 |    :target: https://codecov.io/gh/MoseleyBioinformaticsLab/mwtab
 26 |    :alt: CodeCov
 27 | 
 28 | .. image:: https://img.shields.io/badge/DOI-10.3390%2Fmetabo11030163-blue.svg
 29 |    :target: https://doi.org/10.3390/metabo11030163
 30 |    :alt: Citation link
 31 | 
 32 | .. image:: https://img.shields.io/github/stars/MoseleyBioinformaticsLab/mwtab.svg?style=social&label=Star
 33 |    :target: https://github.com/MoseleyBioinformaticsLab/mwtab
 34 |    :alt: GitHub project
 35 | 
 36 | |
 37 | 
 38 | .. image:: https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/master/docs/_static/images/mwtab_logo.png
 39 |    :width: 50%
 40 |    :align: center
 41 |    :target: http://mwtab.readthedocs.io/
 42 | 
 43 | 
 44 | The ``mwtab`` package is a Python library that facilitates reading and writing
 45 | files in ``mwTab`` format used by the `Metabolomics Workbench`_ for archival of
 46 | Mass Spectrometry (MS) and Nuclear Magnetic Resonance (NMR) experimental data.
 47 | 
 48 | The ``mwtab`` package provides facilities to convert ``mwTab`` formatted files into
 49 | their equivalent ``JSON`` ized representation and vice versa.  ``JSON`` stands for JavaScript
 50 | Object Notation, an open-standard format that uses human-readable text to transmit
 51 | data objects consisting of attribute-value pairs.
 52 | 
 53 | The ``mwtab`` package can be used in several ways:
 54 | 
 55 |    * As a library for accessing and manipulating data stored in ``mwTab`` format files.
 56 |    * As a command-line tool to convert between ``mwTab`` format and its equivalent
 57 |      ``JSON`` representation.
 58 | 
 59 | 
 60 | Citation
 61 | ~~~~~~~~
 62 | 
 63 | When using ``mwtab`` package in published work, please cite the following papers:
 64 | 
 65 |    * Powell, Christian D., and Hunter NB Moseley. "The mwtab Python Library for RESTful
 66 |      Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics
 67 |      Workbench Data Repository." *Metabolites* 11.3 (2021): 163. doi:
 68 |      `10.3390/metabo11030163`_.
 69 | 
 70 |    * Smelter, Andrey and Hunter NB Moseley. "A Python library for FAIRer access and
 71 |      deposition to the Metabolomics Workbench Data Repository."
 72 |      *Metabolomics* 2018, 14(5): 64. doi: `10.1007/s11306-018-1356-6`_.
 73 | 
 74 | 
 75 | Links
 76 | ~~~~~
 77 | 
 78 |    * mwtab @ GitHub_
 79 |    * mwtab @ PyPI_
 80 |    * Documentation @ ReadTheDocs_
 81 | 
 82 | 
 83 | Installation
 84 | ~~~~~~~~~~~~
 85 | 
 86 | The ``mwtab`` package runs under Python 3.5+. Use pip_ to install.
 87 | Starting with Python 3.4, pip_ is included by default.
 88 | 
 89 | 
 90 | Install on Linux, Mac OS X
 91 | --------------------------
 92 | 
 93 | .. code:: bash
 94 | 
 95 |    python3 -m pip install mwtab
 96 | 
 97 | 
 98 | Install on Windows
 99 | ------------------
100 | 
101 | .. code:: bash
102 | 
103 |    py -3 -m pip install mwtab
104 | 
105 | 
106 | Upgrade on Linux, Mac OS X
107 | --------------------------
108 | 
109 | .. code:: bash
110 | 
111 |    python3 -m pip install mwtab --upgrade
112 | 
113 | 
114 | Upgrade on Windows
115 | ------------------
116 | 
117 | .. code:: bash
118 | 
119 |    py -3 -m pip install mwtab --upgrade
120 | 
121 | 
122 | Quickstart
123 | ~~~~~~~~~~
124 | 
125 | .. code:: python
126 | 
127 |    >>> import mwtab
128 |    >>>
129 |    >>> # Here we use ANALYSIS_ID of file to fetch data from URL
130 |    >>> for mwfile in mwtab.read_files("1", "2"):
131 |    ...      print("STUDY_ID:", mwfile.study_id)
132 |    ...      print("ANALYSIS_ID:", mwfile.analysis_id)
133 |    ...      print("SOURCE:", mwfile.source)
134 |    ...      print("Blocks:", list(mwfile.keys()))
135 |    >>>
136 | 
137 | 
138 | .. image:: https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/master/docs/_static/images/mwtab_demo.gif
139 |    :align: center
140 | 
141 | 
142 | .. note:: Read the User Guide and the ``mwtab`` Tutorial on ReadTheDocs_
143 |           to learn more and to see code examples on using the ``mwtab`` as a
144 |           library and as a command-line tool.
145 | 
146 | 
147 | License
148 | ~~~~~~~
149 | 
150 | This package is distributed under the BSD_ `license`.
151 | 
152 | 
153 | .. _Metabolomics Workbench: http://www.metabolomicsworkbench.org
154 | .. _GitHub: https://github.com/MoseleyBioinformaticsLab/mwtab
155 | .. _ReadTheDocs: http://mwtab.readthedocs.io
156 | .. _PyPI: https://pypi.org/project/mwtab
157 | .. _pip: https://pip.pypa.io
158 | .. _BSD: https://choosealicense.com/licenses/bsd-3-clause-clear/
159 | .. _10.3390/metabo11030163: https://doi.org/10.3390/metabo11030163
160 | .. _10.1007/s11306-018-1356-6: http://dx.doi.org/10.1007/s11306-018-1356-6
161 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = mwtab
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/_static/images/mwtab_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/images/mwtab_demo.gif


--------------------------------------------------------------------------------
/docs/_static/images/mwtab_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/images/mwtab_logo.png


--------------------------------------------------------------------------------
/docs/_static/mwfiles/ST000017_AN000035.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/mwfiles/ST000017_AN000035.json.gz


--------------------------------------------------------------------------------
/docs/_static/mwfiles/ST000017_AN000035.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/mwfiles/ST000017_AN000035.txt.gz


--------------------------------------------------------------------------------
/docs/_static/mwfiles/mwfiles_json.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/mwfiles/mwfiles_json.tar.gz


--------------------------------------------------------------------------------
/docs/_static/mwfiles/mwfiles_mwtab.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/mwfiles/mwfiles_mwtab.zip


--------------------------------------------------------------------------------
/docs/_static/mwfiles/out/readme.txt:
--------------------------------------------------------------------------------
1 | # Folder to collect all files generated by tutorial.ipynb
2 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | The mwtab API Reference
 2 | =======================
 3 | 
 4 | 
 5 | .. automodule:: mwtab
 6 | 
 7 | .. automodule:: mwtab.mwtab
 8 |    :member-order: bysource
 9 |    :members:
10 | 
11 | 
12 | .. automodule:: mwtab.cli
13 | 
14 | .. autofunction:: cli
15 | 
16 | 
17 | .. automodule:: mwtab.tokenizer
18 | 
19 | .. autofunction:: tokenizer
20 | 
21 | 
22 | .. automodule:: mwtab.fileio
23 | 
24 | .. autofunction:: read_files
25 | 
26 | 
27 | .. automodule:: mwtab.converter
28 |    :member-order: bysource
29 |    :members:
30 | 
31 | 
32 | .. automodule:: mwtab.validator
33 | 
34 | .. autofunction:: validate_section
35 | 
36 | .. autofunction:: validate_file
37 | 
38 | 
39 | .. automodule:: mwtab.mwrest
40 |    :member-order: bysource
41 |    :members:
42 | 
43 | 
44 | .. automodule:: mwtab.mwextract
45 |    :member-order: bysource
46 |    :members:
47 | 
48 | 
49 | .. automodule:: mwtab.mwschema
50 | 
51 | .. autodata:: metabolomics_workbench_schema
52 |    :annotation:
53 | 
54 | .. autodata:: project_schema
55 |    :annotation:
56 | 
57 | .. autodata:: study_schema
58 |    :annotation:
59 | 
60 | .. autodata:: analysis_schema
61 |    :annotation:
62 | 
63 | .. autodata:: subject_schema
64 |    :annotation:
65 | 
66 | .. autodata:: subject_sample_factors_schema
67 |    :annotation:
68 | 
69 | .. autodata:: collection_schema
70 |    :annotation:
71 | 
72 | .. autodata:: treatment_schema
73 |    :annotation:
74 | 
75 | .. autodata:: sampleprep_schema
76 |    :annotation:
77 | 
78 | .. autodata:: chromatography_schema
79 |    :annotation:
80 | 
81 | .. autodata:: ms_schema
82 |    :annotation:
83 | 
84 | .. autodata:: nmr_schema
85 |    :annotation:
86 | 
87 | .. autodata:: metabolites_schema
88 |    :annotation:
89 | 
90 | .. autodata:: ms_metabolite_data_schema
91 |    :annotation:
92 | 
93 | .. autodata:: nmr_binned_data_schema
94 |    :annotation:
95 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # mwtab documentation build configuration file, created by
  5 | # sphinx-quickstart on Mon Aug 21 15:32:57 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | import sys
 22 | # sys.path.insert(0, os.path.abspath('.'))
 23 | sys.path.insert(0, os.path.abspath('..'))
 24 | 
 25 | from mwtab import __version__
 26 | 
 27 | # -- General configuration ------------------------------------------------
 28 | 
 29 | # If your documentation needs a minimal Sphinx version, state it here.
 30 | #
 31 | # needs_sphinx = '1.0'
 32 | 
 33 | # Add any Sphinx extension module names here, as strings. They can be
 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 35 | # ones.
 36 | extensions = ['sphinx.ext.autodoc',
 37 |     'sphinx.ext.doctest',
 38 |     'sphinx.ext.intersphinx',
 39 |     'sphinx.ext.todo',
 40 |     'sphinx.ext.coverage',
 41 |     'sphinx.ext.mathjax',
 42 |     'sphinx.ext.ifconfig',
 43 |     'sphinx.ext.viewcode',
 44 |     'nbsphinx']
 45 | 
 46 | # Add any paths that contain templates here, relative to this directory.
 47 | templates_path = ['_templates']
 48 | 
 49 | # The suffix(es) of source filenames.
 50 | # You can specify multiple suffix as a list of string:
 51 | #
 52 | # source_suffix = ['.rst', '.md']
 53 | source_suffix = '.rst'
 54 | 
 55 | # The master toctree document.
 56 | master_doc = 'index'
 57 | 
 58 | # General information about the project.
 59 | project = 'mwtab'
 60 | copyright = '2020, Christian D. Powell, Andrey Smelter, Hunter N.B. Moseley'
 61 | author = 'Christian D. Powell, Andrey Smelter, Hunter N.B. Moseley'
 62 | 
 63 | # The version info for the project you're documenting, acts as replacement for
 64 | # |version| and |release|, also used in various other places throughout the
 65 | # built documents.
 66 | #
 67 | # The short X.Y version.
 68 | version = __version__
 69 | # The full version, including alpha/beta/rc tags.
 70 | release = __version__
 71 | 
 72 | # The language for content autogenerated by Sphinx. Refer to documentation
 73 | # for a list of supported languages.
 74 | #
 75 | # This is also used if you do content translation via gettext catalogs.
 76 | # Usually you set "language" from the command line for these cases.
 77 | language = None
 78 | 
 79 | # List of patterns, relative to source directory, that match files and
 80 | # directories to ignore when looking for source files.
 81 | # This patterns also effect to html_static_path and html_extra_path
 82 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 83 | 
 84 | # The name of the Pygments (syntax highlighting) style to use.
 85 | pygments_style = 'sphinx'
 86 | 
 87 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 88 | todo_include_todos = True
 89 | 
 90 | 
 91 | # -- Options for HTML output ----------------------------------------------
 92 | 
 93 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 94 | # a list of builtin themes.
 95 | #
 96 | html_theme = 'alabaster'
 97 | 
 98 | # Theme options are theme-specific and customize the look and feel of a theme
 99 | # further.  For a list of options available for each theme, see the
100 | # documentation.
101 | #
102 | # html_theme_options = {}
103 | 
104 | # Add any paths that contain custom static files (such as style sheets) here,
105 | # relative to this directory. They are copied after the builtin static files,
106 | # so a file named "default.css" will overwrite the builtin "default.css".
107 | html_static_path = ['_static']
108 | 
109 | 
110 | # -- Options for HTMLHelp output ------------------------------------------
111 | 
112 | # Output file base name for HTML help builder.
113 | htmlhelp_basename = 'mwtabdoc'
114 | 
115 | 
116 | # -- Options for LaTeX output ---------------------------------------------
117 | 
118 | latex_elements = {
119 |     # The paper size ('letterpaper' or 'a4paper').
120 |     #
121 |     # 'papersize': 'letterpaper',
122 | 
123 |     # The font size ('10pt', '11pt' or '12pt').
124 |     #
125 |     # 'pointsize': '10pt',
126 | 
127 |     # Additional stuff for the LaTeX preamble.
128 |     #
129 |     # 'preamble': '',
130 | 
131 |     # Latex figure (float) alignment
132 |     #
133 |     # 'figure_align': 'htbp',
134 | }
135 | 
136 | # Grouping the document tree into LaTeX files. List of tuples
137 | # (source start file, target name, title,
138 | #  author, documentclass [howto, manual, or own class]).
139 | latex_documents = [
140 |     (master_doc, 'mwtab.tex', 'mwtab Documentation',
141 |      'Christian D. Powell, Andrey Smelter, Hunter N.B. Moseley', 'manual'),
142 | ]
143 | 
144 | 
145 | # -- Options for manual page output ---------------------------------------
146 | 
147 | # One entry per manual page. List of tuples
148 | # (source start file, name, description, authors, manual section).
149 | man_pages = [
150 |     (master_doc, 'mwtab', 'mwtab Documentation',
151 |      [author], 1)
152 | ]
153 | 
154 | 
155 | # -- Options for Texinfo output -------------------------------------------
156 | 
157 | # Grouping the document tree into Texinfo files. List of tuples
158 | # (source start file, target name, title, author,
159 | #  dir menu entry, description, category)
160 | texinfo_documents = [
161 |     (master_doc, 'mwtab', 'mwtab Documentation',
162 |      author, 'mwtab', 'One line description of project.',
163 |      'Miscellaneous'),
164 | ]
165 | 
166 | 
167 | 
168 | # -- Options for Epub output ----------------------------------------------
169 | 
170 | # Bibliographic Dublin Core info.
171 | epub_title = project
172 | epub_author = author
173 | epub_publisher = author
174 | epub_copyright = copyright
175 | 
176 | # The unique identifier of the text. This can be a ISBN number
177 | # or the project homepage.
178 | #
179 | # epub_identifier = ''
180 | 
181 | # A unique identification for the text.
182 | #
183 | # epub_uid = ''
184 | 
185 | # A list of files that should not be packed into the epub file.
186 | epub_exclude_files = ['search.html']
187 | 
188 | 
189 | 
190 | # Example configuration for intersphinx: refer to the Python standard library.
191 | intersphinx_mapping = {'https://docs.python.org/3': None}
192 | 


--------------------------------------------------------------------------------
/docs/guide.rst:
--------------------------------------------------------------------------------
  1 | User Guide
  2 | ==========
  3 | 
  4 | Description
  5 | ~~~~~~~~~~~
  6 | 
  7 | The ``mwtab`` package is a Python library that facilitates reading and writing
  8 | files in ``mwTab`` format used by the `Metabolomics Workbench`_ for archival of
  9 | Mass Spectrometry (MS) and Nuclear Magnetic Resonance (NMR) experimental data.
 10 | 
 11 | The ``mwtab`` package provides facilities to convert ``mwTab`` formatted files into
 12 | their equivalent JSONized (JavaScript Object Notation, an open-standard format that
 13 | uses human-readable text to transmit data objects consisting of attribute-value pairs)
 14 | representation and vice versa.
 15 | 
 16 | The ``mwtab`` package can be used in several ways:
 17 | 
 18 |    * As a library for accessing and manipulating data stored in ``mwTab`` format files.
 19 |    * As a command-line tool to convert between ``mwTab`` format and its equivalent
 20 |      ``JSON`` representation.
 21 | 
 22 | Installation
 23 | ~~~~~~~~~~~~
 24 | 
 25 | The :mod:`mwtab` package runs under Python 2.7 and Python 3.4+.
 26 | Starting with Python 3.4, pip_ is included by default. To install
 27 | system-wide with pip_ run the following:
 28 | 
 29 | Install on Linux, Mac OS X
 30 | --------------------------
 31 | 
 32 | .. code:: bash
 33 | 
 34 |    python3 -m pip install mwtab
 35 | 
 36 | Install on Windows
 37 | ------------------
 38 | 
 39 | .. code:: bash
 40 | 
 41 |    py -3 -m pip install mwtab
 42 | 
 43 | Install inside virtualenv
 44 | -------------------------
 45 | 
 46 | For an isolated install, you can run the same inside a virtualenv_.
 47 | 
 48 | .. code:: bash
 49 | 
 50 |    $ virtualenv -p /usr/bin/python3 venv  # create virtual environment, use python3 interpreter
 51 | 
 52 |    $ source venv/bin/activate             # activate virtual environment
 53 | 
 54 |    $ python3 -m pip install mwtab         # install mwtab as usual
 55 | 
 56 |    $ deactivate                           # if you are done working in the virtual environment
 57 | 
 58 | Get the source code
 59 | ~~~~~~~~~~~~~~~~~~~
 60 | 
 61 | Code is available on GitHub: https://github.com/MoseleyBioinformaticsLab/mwtab
 62 | 
 63 | You can either clone the public repository:
 64 | 
 65 | .. code:: bash
 66 | 
 67 |    $ https://github.com/MoseleyBioinformaticsLab/mwtab.git
 68 | 
 69 | Or, download the tarball and/or zipball:
 70 | 
 71 | .. code:: bash
 72 | 
 73 |    $ curl -OL https://github.com/MoseleyBioinformaticsLab/mwtab/tarball/master
 74 | 
 75 |    $ curl -OL https://github.com/MoseleyBioinformaticsLab/mwtab/zipball/master
 76 | 
 77 | Once you have a copy of the source, you can embed it in your own Python package,
 78 | or install it into your system site-packages easily:
 79 | 
 80 | .. code:: bash
 81 | 
 82 |    $ python3 setup.py install
 83 | 
 84 | Dependencies
 85 | ~~~~~~~~~~~~
 86 | 
 87 | The :mod:`mwtab` package depends on several Python libraries. The ``pip`` command
 88 | will install all dependencies automatically, but if you wish to install them manually,
 89 | run the following commands:
 90 | 
 91 |    * docopt_ for creating :mod:`mwtab` command-line interface.
 92 |       * To install docopt_ run the following:
 93 | 
 94 |         .. code:: bash
 95 | 
 96 |            python3 -m pip install docopt  # On Linux, Mac OS X
 97 |            py -3 -m pip install docopt    # On Windows
 98 | 
 99 |    * schema_ for validating functionality of ``mwTab`` files based on ``JSON`` schema.
100 |       * To install the schema_ Python library run the following:
101 | 
102 |         .. code:: bash
103 | 
104 |            python3 -m pip install schema  # On Linux, Mac OS X
105 |            py -3 -m pip install schema    # On Windows
106 | 
107 | 
108 | Basic usage
109 | ~~~~~~~~~~~
110 | 
111 | The :mod:`mwtab` package can be used in several ways:
112 | 
113 |    * As a library for accessing and manipulating data stored in ``mwTab`` formatted files.
114 | 
115 |       * Create the :class:`~mwtab.mwtab.MWTabFile` generator function that will generate
116 |         (yield) a single :class:`~mwtab.mwtab.MWTabFile` instance at a time.
117 | 
118 |       * Process each :class:`~mwtab.mwtab.MWTabFile` instance:
119 | 
120 |          * Process ``mwTab`` files in a for-loop, one file at a time.
121 |          * Process as an iterator calling the :py:func:`next` built-in function.
122 |          * Convert the generator into a :py:class:`list` of :class:`~mwtab.mwtab.MWTabFile` objects.
123 | 
124 |    * As a command-line tool:
125 | 
126 |       * Convert from ``mwTab`` file format into its equivalent ``JSON`` file format and vice versa.
127 |       * Validate data stored in ``mwTab`` file based on schema definition.
128 | 
129 | .. note:: Read :doc:`tutorial` to learn more and see code examples on using the :mod:`mwtab`
130 |           as a library and as a command-line tool.
131 | 
132 | 
133 | .. _pip: https://pip.pypa.io/
134 | .. _virtualenv: https://virtualenv.pypa.io/
135 | .. _docopt: https://pypi.org/project/docopt/
136 | .. _schema: https://pypi.org/project/schema/
137 | .. _Metabolomics Workbench: http://www.metabolomicsworkbench.org/
138 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to mwtab's documentation!
 2 | =================================
 3 | 
 4 | .. include:: ../README.rst
 5 | 
 6 | Documentation index:
 7 | ====================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    guide
14 |    tutorial
15 |    api
16 |    license
17 | 
18 | 
19 | Indices and tables
20 | ==================
21 | 
22 | * :ref:`genindex`
23 | * :ref:`modindex`
24 | * :ref:`search`
25 | 


--------------------------------------------------------------------------------
/docs/license.rst:
--------------------------------------------------------------------------------
1 | .. _license:
2 | 
3 | License
4 | =======
5 | 
6 | .. include:: ../LICENSE
7 | 


--------------------------------------------------------------------------------
/docs/requirements-rtd.txt:
--------------------------------------------------------------------------------
1 | nbsphinx
2 | ipykernel
3 | mwtab


--------------------------------------------------------------------------------
/mwtab/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """Routines for working with ``mwTab`` format files used by the 
 5 | Metabolomics Workbench.
 6 | 
 7 | This package includes the following modules:
 8 | 
 9 | ``mwtab``
10 |     This module provides the :class:`~mwtab.mwtab.MWTabFile` class which is a python
11 |     dictionary representation of a Metabolomics Workbench `mwtab` file. Data can be accessed
12 |     directly from the :class:`~mwtab.mwtab.MWTabFile` instance using bracket accessors.
13 | 
14 | ``cli``
15 |     This module provides command-line interface for the ``mwtab`` package.
16 | 
17 | ``tokenizer``
18 |     This module provides the :func:`~mwtab.tokenizer.tokenizer` generator that generates
19 |     tuples of key-value pairs from `mwtab` files.
20 | 
21 | ``fileio``
22 |     This module provides the :func:`~mwtab.fileio.read_files` generator
23 |     to open files from different sources (single file/multiple files on a local 
24 |     machine, directory/archive of files, URL address of a file).
25 | 
26 | ``converter``
27 |     This module provides the :class:`~mwtab.converter.Converter` class that is
28 |     responsible for the conversion of ``mwTab`` formated files into their JSON
29 |     representation and vice versa.
30 | 
31 | ``mwschema``
32 |     This module provides JSON schema definitions for the ``mwTab`` formatted files,
33 |     i.e. specifies required and optional keys as well as data types.
34 | 
35 | ``validator``
36 |     This module provides routines to validate ``mwTab`` formatted files based
37 |     on schema definitions as well as checks for file self-consistency.
38 | 
39 | ``mwrest``
40 |     This module provides the :class:`~mwtab.mwrest.GenericMWURL` class which is a
41 |     python dictionary representation of a Metabolomics Workbench REST URL. The class
42 |     is used to validate query parameters and to generate a URL path which can be
43 |     used to request data from Metabolomics Workbench through their REST API.
44 | """
45 | 
46 | from logging import getLogger, NullHandler
47 | from .fileio import read_files, read_mwrest
48 | from .validator import validate_file
49 | from .mwrest import GenericMWURL
50 | 
51 | 
52 | __version__ = "1.2.5.post1"
53 | 
54 | 
55 | # Setting default logging handler
56 | getLogger(__name__).addHandler(NullHandler())
57 | 


--------------------------------------------------------------------------------
/mwtab/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import docopt
 5 | 
 6 | from . import cli
 7 | from . import __version__
 8 | 
 9 | 
10 | def main():
11 | 
12 |     args = docopt.docopt(cli.__doc__, version=__version__)
13 |     cli.cli(args)
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     main()
18 | 


--------------------------------------------------------------------------------
/mwtab/cli.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | The mwtab command-line interface
  6 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  7 | 
  8 | Usage:
  9 |     mwtab -h | --help
 10 |     mwtab --version
 11 |     mwtab convert (<from-path> <to-path>) [--from-format=<format>] [--to-format=<format>] [--validate] [--mw-rest=<url>] [--verbose]
 12 |     mwtab validate <from-path> [--mw-rest=<url>] [--verbose]
 13 |     mwtab download url <url> [--to-path=<path>] [--verbose]
 14 |     mwtab download study all [--to-path=<path>] [--input-item=<item>] [--output-format=<format>] [--mw-rest=<url>] [--validate] [--verbose]
 15 |     mwtab download study <input-value> [--to-path=<path>] [--input-item=<item>] [--output-item=<item>] [--output-format=<format>] [--mw-rest=<url>] [--validate] [--verbose]
 16 |     mwtab download (study | compound | refmet | gene | protein) <input-item> <input-value> <output-item> [--output-format=<format>] [--to-path=<path>] [--mw-rest=<url>] [--verbose]
 17 |     mwtab download moverz <input-item> <m/z-value> <ion-type-value> <m/z-tolerance-value> [--to-path=<path>] [--mw-rest=<url>] [--verbose]
 18 |     mwtab download exactmass <LIPID-abbreviation> <ion-type-value> [--to-path=<path>] [--mw-rest=<url>] [--verbose]
 19 |     mwtab extract metadata <from-path> <to-path> <key> ... [--to-format=<format>] [--no-header]
 20 |     mwtab extract metabolites <from-path> <to-path> (<key> <value>) ... [--to-format=<format>] [--no-header]
 21 | 
 22 | Options:
 23 |     -h, --help                      Show this screen.
 24 |     --version                       Show version.
 25 |     --verbose                       Print what files are processing.
 26 |     --validate                      Validate the mwTab file.
 27 |     --from-format=<format>          Input file format, available formats: mwtab, json [default: mwtab].
 28 |     --to-format=<format>            Output file format [default: json].
 29 |                                     Available formats for convert:
 30 |                                         mwtab, json.
 31 |                                     Available formats for extract:
 32 |                                         json, csv.
 33 |     --mw-rest=<url>                 URL to MW REST interface
 34 |                                     [default: https://www.metabolomicsworkbench.org/rest/].
 35 |     --context=<context>             Type of resource to access from MW REST interface, available contexts: study,
 36 |                                     compound, refmet, gene, protein, moverz, exactmass [default: study].
 37 |     --input-item=<item>             Item to search Metabolomics Workbench with.
 38 |     --output-item=<item>            Item to be retrieved from Metabolomics Workbench.
 39 |     --output-format=<format>        Format for item to be retrieved in, available formats: mwtab, json.
 40 |     --no-header                     Include header at the top of csv formatted files.
 41 | 
 42 |     For extraction <to-path> can take a "-" which will use stdout.
 43 | """
 44 | 
 45 | from . import fileio, mwextract, mwrest
 46 | from .converter import Converter
 47 | from .validator import validate_file
 48 | from .mwschema import section_schema_mapping
 49 | 
 50 | from os import getcwd, makedirs, path
 51 | from os.path import join, isfile
 52 | from urllib.parse import quote_plus
 53 | 
 54 | import json
 55 | import re
 56 | 
 57 | # remove
 58 | import time
 59 | import datetime
 60 | 
 61 | 
 62 | OUTPUT_FORMATS = {
 63 |     "txt": "txt",
 64 |     "mwtab": "txt",
 65 |     "json": "json",
 66 |     None: None
 67 | }
 68 | VERBOSE = False
 69 | 
 70 | 
 71 | def check_filepath(filepath):
 72 |     """Method for validating that a given path directory exits. If not, the directory is created.
 73 | 
 74 |     :param str filepath: File path string.
 75 |     :return: None
 76 |     :rtype: :py:obj:`None`
 77 |     """
 78 |     if not path.exists(path.dirname(filepath)):
 79 |         dirname = path.dirname(filepath)
 80 |         if dirname:
 81 |             makedirs(dirname)
 82 | 
 83 | 
 84 | def get_file_path(dir_path, filename, extension):
 85 |     """Helper method for validating that the commandline arguments "--to-path" or _ are not "None". Returns the given
 86 |     command argument if not none or creates a default file path from the given filename and the current working
 87 |     directory.
 88 | 
 89 |     :param dir_path: Path to directory file is to be saved in.
 90 |     :type dir_path: :py:class:`str` or :py:class:`None`
 91 |     :param str filename: Filename processed file is to be saved as.
 92 |     :param str extension: File extension.
 93 |     :return: Complete file path.
 94 |     :rtype: :py:class:`str`
 95 |     """
 96 |     # check to see if given directory path is not None
 97 |     dir_path = dir_path if dir_path else getcwd()
 98 |     if path.splitext(dir_path)[1]:
 99 |         return dir_path
100 |     extension = extension if extension else "txt"
101 |     return join(dir_path, ".".join([quote_plus(filename).replace(".", "_"), extension]))
102 | 
103 | 
104 | def download(context, cmdparams):
105 |     """Method for creating Metabolomics Workbench REST URLs and requesting files based on given commandline arguments.
106 |     Retrieved data is then saved out as specified.
107 | 
108 |     :param str context: String indicating the type of data ("context") to be accessed from the Metabolomics Workbench.
109 |     :param dict cmdparams: Commandline arguments specifying data to be accessed from Metabolomics Workbench.
110 |     :return: None
111 |     :rtype: :py:obj:`None`
112 |     """
113 |     try:
114 |         # TODO: Convert to using mwrest.generate_study_urls() method
115 |         # create and validate a callable URL to pull data from Metabolomics Workbench's REST API
116 |         mwresturl = mwrest.GenericMWURL({
117 |             "context": context,
118 |             "input_item": cmdparams.get("<input-item>") if cmdparams.get("<input-item>") else "analysis_id",
119 |             "input_value": cmdparams["<input-value>"],
120 |             "output_item": cmdparams.get("<output-item>") if cmdparams.get("<output-item>") else "mwtab",
121 |             "output_format": OUTPUT_FORMATS[cmdparams.get("--output-format")] if cmdparams.get("--output-format") else "txt",
122 |         }).url
123 |         mwrestfile = next(fileio.read_mwrest(mwresturl))
124 | 
125 |         if mwrestfile.text:  # if the text file isn't blank
126 |             with open(get_file_path(
127 |                     cmdparams.get("--to-path"),
128 |                     mwrestfile.source,
129 |                     OUTPUT_FORMATS[cmdparams.get("--output-format")]
130 |             ), "w", encoding="utf-8") as fh:
131 |                 mwrestfile.write(fh)
132 |         else:
133 |             print("BLANK FILE")
134 |     except Exception as e:
135 |         print(e)
136 | 
137 | 
138 | def cli(cmdargs):
139 |     """Implements the command line interface.
140 | 
141 |     param dict cmdargs: dictionary of command line arguments.
142 |     """
143 | 
144 |     VERBOSE = cmdargs["--verbose"]
145 |     fileio.VERBOSE = cmdargs["--verbose"]
146 |     fileio.MWREST = cmdargs["--mw-rest"]
147 |     mwrest.VERBOSE = cmdargs["--verbose"]
148 | 
149 |     # mwtab convert ...
150 |     if cmdargs["convert"]:
151 |         converter = Converter(from_path=cmdargs["<from-path>"],
152 |                               to_path=cmdargs["<to-path>"],
153 |                               from_format=cmdargs["--from-format"],
154 |                               to_format=cmdargs["--to-format"],
155 |                               validate=cmdargs["--validate"])
156 |         converter.convert()
157 | 
158 |     # mwtab validate ...
159 |     elif cmdargs["validate"]:
160 |         for mwfile in fileio.read_files(cmdargs["<from-path>"], validate=cmdargs["--validate"]):
161 |             validate_file(
162 |                 mwtabfile=mwfile,
163 |                 section_schema_mapping=section_schema_mapping,
164 |                 verbose=cmdargs.get("--verbose")
165 |             )
166 | 
167 |     # mwtab download ...
168 |     elif cmdargs["download"]:
169 | 
170 |         # mwtab download url ...
171 |         if cmdargs["<url>"]:
172 |             mwrestfile = next(fileio.read_mwrest(cmdargs["<url>"]))
173 |             with open(get_file_path(
174 |                     cmdargs["--to-path"],
175 |                     mwrestfile.source,
176 |                     OUTPUT_FORMATS[cmdargs.get("--output-format")]),
177 |                 "w",
178 |                 encoding="utf-8"
179 |             ) as fh:
180 |                 mwrestfile.write(fh)
181 | 
182 |         # mwtab download study ...
183 |         elif cmdargs["study"]:
184 | 
185 |             # mwtab download study all ...
186 |             if cmdargs["all"]:
187 |                 # mwtab download study all ...
188 |                 # mwtab download study all --input-item=analysis_id ...
189 |                 # mwtab download study all --input-item=study_id ...
190 |                 # TODO: mwtab download study all --input-item=project_id ...
191 |                 if not cmdargs["--input-item"] or cmdargs["--input-item"] in ("analysis_id", "study_id"):
192 |                     cmdargs["<input-item>"] = cmdargs["--input-item"]
193 | 
194 |                     id_list = list()
195 |                     if not cmdargs["--input-item"] or cmdargs["--input-item"] == "analysis_id":
196 |                         id_list = mwrest.analysis_ids()
197 |                     elif cmdargs["--input-item"] == "study_id":
198 |                         id_list = mwrest.study_ids()
199 | 
200 |                     for count, input_id in enumerate(id_list):
201 |                         if VERBOSE:
202 |                             print("[{:4}/{:4}]".format(count+1, len(id_list)), input_id, datetime.datetime.now())
203 |                         cmdargs["<input-value>"] = input_id
204 |                         download("study", cmdargs)
205 |                         time.sleep(3)
206 | 
207 |                 else:
208 |                     raise ValueError("Unknown \"--input-item\" {}".format(cmdargs["--input-item"]))
209 | 
210 |             # mwtab download study <input_value> ...
211 |             elif cmdargs["<input-value>"] and not cmdargs["<input-item>"]:
212 |                 if isfile(cmdargs["<input-value>"]):
213 |                     with open(cmdargs["<input-value>"], "r") as fh:
214 |                         id_list = json.loads(fh.read())
215 | 
216 |                     if VERBOSE:
217 |                         print("Found {} Files to be Downloaded".format(len(id_list)))
218 |                     for count, input_id in enumerate(id_list):
219 |                         if VERBOSE:
220 |                             print("[{:4}/{:4}]".format(count + 1, len(id_list)), input_id, datetime.datetime.now())
221 |                         cmdargs["<input-value>"] = input_id
222 |                         download("study", cmdargs)
223 |                         time.sleep(3)
224 | 
225 |                 else:
226 |                     input_item = cmdargs.get("--input-item")
227 |                     input_value = cmdargs["<input-value>"]
228 |                     if not input_item:
229 |                         if input_value.isdigit():
230 |                             input_value = "AN{}".format(input_value.zfill(6))
231 |                             input_item = "analysis_id"
232 |                         elif re.match(r'(AN[0-9]{6}$)', input_value):
233 |                             input_item = "analysis_id"
234 |                         elif re.match(r'(ST[0-9]{6}$)', input_value):
235 |                             input_item = "study_id"
236 |                     mwresturl = mwrest.GenericMWURL({
237 |                         "context": "study",
238 |                         "input_item": input_item,
239 |                         "input_value": input_value,
240 |                         "output_item": cmdargs.get("--output-item") or "mwtab",
241 |                         "output_format": cmdargs["--output-format"],
242 |                     }, cmdargs["--mw-rest"]).url
243 |                     mwrestfile = next(fileio.read_mwrest(mwresturl))
244 |                     with open(cmdargs["--to-path"] or join(getcwd(),
245 |                                                            quote_plus(mwrestfile.source).replace(".", "_") + "." + cmdargs[
246 |                                                                "--output-format"]),
247 |                               "w", encoding="utf-8") as fh:
248 |                         mwrestfile.write(fh)
249 | 
250 |             # mwtab download (study | ...) <input_item> ...
251 |             elif cmdargs["<input-item>"]:
252 |                 download("study", cmdargs)
253 | 
254 |         # mwtab download (... compound | refmet | gene | protein) ...
255 |         elif cmdargs["compound"]:
256 |             download("compound", cmdargs)
257 |         elif cmdargs["refmet"]:
258 |             download("refmet", cmdargs)
259 |         elif cmdargs["gene"]:
260 |             download("gene", cmdargs)
261 |         elif cmdargs["protein"]:
262 |             download("protein", cmdargs)
263 | 
264 |         # mwtab download moverz <input-value> <m/z-value> <ion-type-value> <m/z-tolerance-value> [--verbose]
265 |         elif cmdargs["moverz"]:
266 |             mwresturl = mwrest.GenericMWURL({
267 |                 "context": "moverz",
268 |                 "input_item": cmdargs["<input-item>"],
269 |                 "m/z_value": cmdargs["<m/z-value>"],
270 |                 "ion_type_value": cmdargs["<ion-type-value>"],
271 |                 "m/z_tolerance_value": cmdargs["<m/z-tolerance-value>"],
272 |             }).url
273 |             mwrestfile = next(fileio.read_mwrest(mwresturl))
274 |             with open(cmdargs["--to-path"] or join(getcwd(), quote_plus(mwrestfile.source).replace(".", "_") + ".txt"),
275 |                       "w") as fh:
276 |                 mwrestfile.write(fh)
277 | 
278 |         # mwtab download exactmass <LIPID-abbreviation> <ion-type-value> [--verbose]
279 |         elif cmdargs["exactmass"]:
280 |             mwresturl = mwrest.GenericMWURL({
281 |                 "context": "exactmass",
282 |                 "LIPID_abbreviation": cmdargs["<LIPID-abbreviation>"],
283 |                 "ion_type_value": cmdargs["<ion-type-value>"],
284 |             }).url
285 |             mwrestfile = next(fileio.read_mwrest(mwresturl))
286 |             with open(cmdargs["--to-path"] or join(getcwd(), quote_plus(mwrestfile.source).replace(".", "_") + ".txt"),
287 |                       "w") as fh:
288 |                 mwrestfile.write(fh)
289 | 
290 |     # mwtab extract ...
291 |     elif cmdargs["extract"]:
292 |         mwfile_generator = fileio.read_files(cmdargs["<from-path>"])
293 |         if cmdargs["metabolites"]:
294 |             metabolites_dict = mwextract.extract_metabolites(
295 |                 mwfile_generator,
296 |                 mwextract.generate_matchers(
297 |                     [(cmdargs["<key>"][i],
298 |                       cmdargs["<value>"][i] if not cmdargs["<value>"][i][:2] == "r'" else re.compile(cmdargs["<value>"][i][2:-1]))
299 |                      for i in range(len(cmdargs["<key>"]))]
300 |                 )
301 |             )
302 | 
303 |             if cmdargs["<to-path>"] != "-":
304 |                 if cmdargs["--to-format"] == "csv":
305 |                     mwextract.write_metabolites_csv(cmdargs["<to-path>"], metabolites_dict, cmdargs["--no-header"])
306 |                 else:
307 |                     mwextract.write_json(cmdargs["<to-path>"], metabolites_dict)
308 |             else:
309 |                 print(json.dumps(metabolites_dict, indent=4, cls=mwextract.SetEncoder))
310 | 
311 |         elif cmdargs["metadata"]:
312 |             metadata = dict()
313 |             for mwtabfile in mwfile_generator:
314 |                 extracted_values = mwextract.extract_metadata(mwtabfile, cmdargs["<key>"])
315 |                 [metadata.setdefault(key, set()).update(val) for (key, val) in extracted_values.items()]
316 |             if cmdargs["<to-path>"] != "-":
317 |                 if cmdargs["--to-format"] == "csv":
318 |                     mwextract.write_metadata_csv(cmdargs["<to-path>"], metadata, cmdargs["--no-header"])
319 |                 else:
320 |                     mwextract.write_json(cmdargs["<to-path>"], metadata)
321 |             else:
322 |                 print(metadata)
323 | 


--------------------------------------------------------------------------------
/mwtab/converter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | mwtab.converter
  6 | ~~~~~~~~~~~~~~~
  7 | 
  8 | This module provides functionality for converting between the
  9 | Metabolomics Workbench ``mwTab`` formatted file and its equivalent
 10 | JSONized representation.
 11 | 
 12 | The following conversions are possible:
 13 | 
 14 | Local files:
 15 |    * One-to-one file conversions:
 16 |       * textfile - to - textfile
 17 |       * textfile - to - textfile.gz
 18 |       * textfile - to - textfile.bz2
 19 |       * textfile.gz - to - textfile
 20 |       * textfile.gz - to - textfile.gz
 21 |       * textfile.gz - to - textfile.bz2
 22 |       * textfile.bz2 - to - textfile
 23 |       * textfile.bz2 - to - textfile.gz
 24 |       * textfile.bz2 - to - textfile.bz2
 25 |       * textfile / textfile.gz / textfile.bz2 - to - textfile.zip / textfile.tar / textfile.tar.gz / textfile.tar.bz2 (TypeError: One-to-many conversion)
 26 |    * Many-to-many files conversions:
 27 |       * Directories:
 28 |          * directory - to - directory
 29 |          * directory - to - directory.zip
 30 |          * directory - to - directory.tar
 31 |          * directory - to - directory.tar.bz2
 32 |          * directory - to - directory.tar.gz
 33 |          * directory - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
 34 |       * Zipfiles:
 35 |          * zipfile.zip - to - directory
 36 |          * zipfile.zip - to - zipfile.zip
 37 |          * zipfile.zip - to - tarfile.tar
 38 |          * zipfile.zip - to - tarfile.tar.gz
 39 |          * zipfile.zip - to - tarfile.tar.bz2
 40 |          * zipfile.zip - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
 41 |       * Tarfiles:
 42 |          * tarfile.tar - to - directory
 43 |          * tarfile.tar - to - zipfile.zip
 44 |          * tarfile.tar - to - tarfile.tar
 45 |          * tarfile.tar - to - tarfile.tar.gz
 46 |          * tarfile.tar - to - tarfile.tar.bz2
 47 |          * tarfile.tar - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
 48 |          * tarfile.tar.gz - to - directory
 49 |          * tarfile.tar.gz - to - zipfile.zip
 50 |          * tarfile.tar.gz - to - tarfile.tar
 51 |          * tarfile.tar.gz - to - tarfile.tar.gz
 52 |          * tarfile.tar.gz - to - tarfile.tar.bz2
 53 |          * tarfile.tar.gz - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
 54 |          * tarfile.tar.bz2 - to - directory
 55 |          * tarfile.tar.bz2 - to - zipfile.zip
 56 |          * tarfile.tar.bz2 - to - tarfile.tar
 57 |          * tarfile.tar.bz2 - to - tarfile.tar.gz
 58 |          * tarfile.tar.bz2 - to - tarfile.tar.bz2
 59 |          * tarfile.tar.bz2 - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
 60 | URL files:
 61 |    * One-to-one file conversions:
 62 |       * analysis_id - to - textfile
 63 |       * analysis_id - to - textfile.gz
 64 |       * analysis_id - to - textfile.bz2
 65 |       * analysis_id - to - textfile.zip / textfile.tar / textfile.tar.gz / textfile.tar.bz2 (TypeError: One-to-many conversion)
 66 |       * textfileurl - to - textfile
 67 |       * textfileurl - to - textfile.gz
 68 |       * textfileurl - to - textfile.bz2
 69 |       * textfileurl.gz - to - textfile
 70 |       * textfileurl.gz - to - textfile.gz
 71 |       * textfileurl.gz - to - textfile.bz2
 72 |       * textfileurl.bz2 - to - textfile
 73 |       * textfileurl.bz2 - to - textfile.gz
 74 |       * textfileurl.bz2 - to - textfile.bz2
 75 |       * textfileurl / textfileurl.gz / textfileurl.bz2 - to - textfile.zip / textfile.tar / textfile.tar.gz / textfile.tar.bz2 (TypeError: One-to-many conversion)
 76 |    * Many-to-many files conversions:
 77 |       * Zipfiles:
 78 |          * zipfileurl.zip - to - directory
 79 |          * zipfileurl.zip - to - zipfile.zip
 80 |          * zipfileurl.zip - to - tarfile.tar
 81 |          * zipfileurl.zip - to - tarfile.tar.gz
 82 |          * zipfileurl.zip - to - tarfile.tar.bz2
 83 |          * zipfileurl.zip - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
 84 |       * Tarfiles:
 85 |          * tarfileurl.tar - to - directory
 86 |          * tarfileurl.tar - to - zipfile.zip
 87 |          * tarfileurl.tar - to - tarfile.tar
 88 |          * tarfileurl.tar - to - tarfile.tar.gz
 89 |          * tarfileurl.tar - to - tarfile.tar.bz2
 90 |          * tarfileurl.tar - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
 91 |          * tarfileurl.tar.gz - to - directory
 92 |          * tarfileurl.tar.gz - to - zipfile.zip
 93 |          * tarfileurl.tar.gz - to - tarfile.tar
 94 |          * tarfileurl.tar.gz - to - tarfile.tar.gz
 95 |          * tarfileurl.tar.gz - to - tarfile.tar.bz2
 96 |          * tarfileurl.tar.gz - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
 97 |          * tarfileurl.tar.bz2 - to - directory
 98 |          * tarfileurl.tar.bz2 - to - zipfile.zip
 99 |          * tarfileurl.tar.bz2 - to - tarfile.tar
100 |          * tarfileurl.tar.bz2 - to - tarfile.tar.gz
101 |          * tarfileurl.tar.bz2 - to - tarfile.tar.bz2
102 |          * tarfileurl.tar.bz2 - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion)
103 | """
104 | 
105 | import os
106 | import io
107 | import zipfile
108 | import tarfile
109 | import bz2
110 | import gzip
111 | 
112 | from . import fileio
113 | 
114 | 
115 | class Translator(object):
116 |     """Translator abstract class."""
117 | 
118 |     def __init__(self, from_path, to_path, from_format=None, to_format=None, validate=False):
119 |         """Translator initializer.
120 |         :param str from_path: Path to input file(s).
121 |         :param str to_path: Path to output file(s).
122 |         :param str from_format: Input format.
123 |         :param str to_format: Output format.
124 |         """
125 |         self.from_path = from_path
126 |         self.to_path = to_path
127 |         self.from_format = from_format
128 |         self.to_format = to_format
129 |         self.from_path_compression = fileio.GenericFilePath.is_compressed(from_path)
130 |         self.to_path_compression = fileio.GenericFilePath.is_compressed(to_path)
131 |         self.validate = validate
132 | 
133 |     def __iter__(self):
134 |         """Abstract iterator must be implemented in a subclass."""
135 |         raise NotImplementedError()
136 | 
137 | 
138 | class MWTabFileToMWTabFile(Translator):
139 |     """Translator concrete class that can convert between ``mwTab`` and ``JSON`` formats."""
140 | 
141 |     file_extension = {"json": ".json",
142 |                       "mwtab": ".txt"}
143 | 
144 |     def __init__(self, from_path, to_path, from_format=None, to_format=None, validate=False):
145 |         """MWTabFileToMWTabFile translator initializer.
146 |         :param str from_path: Path to input file(s).
147 |         :param str to_path: Path to output file(s).
148 |         :param str from_format: Input format: `mwtab` or `json`.
149 |         :param str to_format: Output format: `mwtab` or `json`.
150 |         :param bool validate: whether to validate or not.
151 |         """
152 |         super(MWTabFileToMWTabFile, self).__init__(from_path, to_path, from_format, to_format, validate)
153 | 
154 |     def __iter__(self):
155 |         """Iterator that yields instances of :class:`~mwtab.mwtab.MWTabFile` instances.
156 |         :return: instance of :class:`~mwtab.mwtab.MWTabFile` object instance.
157 |         :rtype: :class:`~mwtab.mwtab.MWTabFile`
158 |         """
159 |         for mwtabfile in fileio.read_files(self.from_path, validate=self.validate):
160 |             yield mwtabfile
161 | 
162 | 
163 | class Converter(object):
164 |     """Converter class to convert ``mwTab`` files from ``mwTab`` to ``JSON`` or from ``JSON`` to ``mwTab`` format."""
165 | 
166 |     def __init__(self, from_path, to_path, from_format="mwtab", to_format="json", validate=False):
167 |         """Converter initializer.
168 |         :param str from_path: Path to input file(s).
169 |         :param str to_path: Path to output file(s).
170 |         :param str from_format: Input format: `mwtab` or `json`.
171 |         :param str to_format: Output format: `mwtab` or `json`.
172 |         :param bool validate: whether to validate or not.
173 |         """
174 |         self.file_generator = MWTabFileToMWTabFile(from_path, to_path, from_format, to_format, validate)
175 | 
176 |     def convert(self):
177 |         """Convert file(s) from ``mwTab`` format to ``JSON`` format or from ``JSON`` format to ``mwTab`` format.
178 |         :return: None
179 |         :rtype: :py:obj:`None`
180 |         """
181 |         if not os.path.exists(os.path.dirname(self.file_generator.to_path)):
182 |             dirname = os.path.dirname(self.file_generator.to_path)
183 |             if dirname:
184 |                 os.makedirs(dirname)
185 | 
186 |         if os.path.isdir(self.file_generator.from_path):
187 |             self._many_to_many()
188 |         elif os.path.isfile(self.file_generator.from_path) or fileio.GenericFilePath.is_url(self.file_generator.from_path):
189 |             if self.file_generator.from_path_compression in ("zip", "tar", "tar.gz", "tar.bz2"):
190 |                 self._many_to_many()
191 |             elif self.file_generator.from_path_compression in ("gz", "bz2"):
192 |                 self._one_to_one()
193 |             elif not self.file_generator.from_path_compression:
194 |                 self._one_to_one()
195 |         elif self.file_generator.from_path.isdigit():
196 |             self._one_to_one()
197 |         else:
198 |             raise TypeError('Unknown input file format: "{}"'.format(self.file_generator.from_path))
199 | 
200 |     def _many_to_many(self):
201 |         """Perform many-to-many files conversion.
202 |         :return: None
203 |         :rtype: :py:obj:`None`
204 |         """
205 |         if not self.file_generator.to_path_compression:
206 |             self._to_dir(self.file_generator)
207 |         elif self.file_generator.to_path_compression == "zip":
208 |             self._to_zipfile(self.file_generator)
209 |         elif self.file_generator.to_path_compression in ("tar", "tar.gz", "tar.bz2"):
210 |             self._to_tarfile(self.file_generator)
211 |         elif self.file_generator.to_path_compression in ("gz", "bz2"):
212 |             raise TypeError('Many-to-one conversion, cannot convert "{}" into "{}"'.format(self.file_generator.from_path,
213 |                                                                                            self.file_generator.to_path))
214 |         else:
215 |             raise TypeError('Unknown output file format: "{}"'.format(self.file_generator.to_path))
216 | 
217 |     def _one_to_one(self):
218 |         """Perform one-to-one file conversion.
219 |         :return: None
220 |         :rtype: :py:obj:`None`
221 |         """
222 |         if not self.file_generator.to_path_compression:
223 |             self._to_textfile(self.file_generator)
224 |         elif self.file_generator.to_path_compression == "gz":
225 |             self._to_gzipfile(self.file_generator)
226 |         elif self.file_generator.to_path_compression == "bz2":
227 |             self._to_bz2file(self.file_generator)
228 |         elif self.file_generator.to_path_compression in ("tar", "tar.gz", "tar.bz2", "zip"):
229 |             raise TypeError('One-to-many conversion, cannot convert "{}" into "{}"'.format(self.file_generator.from_path,
230 |                                                                                            self.file_generator.to_path))
231 |         else:
232 |             raise TypeError('Unknown format: "{}"'.format(self.file_generator.to_path))
233 | 
234 |     def _to_dir(self, file_generator):
235 |         """Convert files to directory.
236 |         :return: None
237 |         :rtype: :py:obj:`None`
238 |         """
239 |         for f in file_generator:
240 |             outpath = self._output_path(f.source, file_generator.to_format)
241 | 
242 |             if not os.path.exists(os.path.dirname(outpath)):
243 |                 os.makedirs(os.path.dirname(outpath))
244 | 
245 |             with open(outpath, mode="w") as outfile:
246 |                 f.write(outfile, file_generator.to_format)
247 | 
248 |     def _to_zipfile(self, file_generator):
249 |         """Convert files to zip archive.
250 |         :return: None
251 |         :rtype: :py:obj:`None`
252 |         """
253 |         with zipfile.ZipFile(file_generator.to_path, mode="w", compression=zipfile.ZIP_DEFLATED) as outfile:
254 |             for f in file_generator:
255 |                 outpath = self._output_path(f.source, file_generator.to_format, archive=True)
256 |                 outfile.writestr(outpath, f.writestr(file_generator.to_format))
257 | 
258 |     def _to_tarfile(self, file_generator):
259 |         """Convert files to tar archive.
260 |         :return: None
261 |         :rtype: :py:obj:`None`
262 |         """
263 |         if file_generator.to_path_compression == "tar":
264 |             tar_mode = "w"
265 |         elif file_generator.to_path_compression == "tar.gz":
266 |             tar_mode = "w:gz"
267 |         elif file_generator.to_path_compression == 'tar.bz2':
268 |             tar_mode = "w:bz2"
269 |         else:
270 |             tar_mode = "w"
271 | 
272 |         with tarfile.open(file_generator.to_path, mode=tar_mode) as outfile:
273 |             for f in file_generator:
274 |                 outpath = self._output_path(f.source, file_generator.to_format, archive=True)
275 |                 info = tarfile.TarInfo(outpath)
276 |                 data = f.writestr(file_generator.to_format).encode()
277 |                 info.size = len(data)
278 |                 outfile.addfile(tarinfo=info, fileobj=io.BytesIO(data))
279 | 
280 |     def _to_bz2file(self, file_generator):
281 |         """Convert file to bz2-compressed file.
282 |         :return: None
283 |         :rtype: :py:obj:`None`
284 |         """
285 |         with bz2.BZ2File(file_generator.to_path, mode="wb") as outfile:
286 |             for f in file_generator:
287 |                 outfile.write(f.writestr(file_generator.to_format).encode())
288 | 
289 |     def _to_gzipfile(self, file_generator):
290 |         """Convert file to gzip-compressed file.
291 |         :return: None
292 |         :rtype: :py:obj:`None`
293 |         """
294 |         with gzip.GzipFile(file_generator.to_path, mode="wb") as outfile:
295 |             for f in file_generator:
296 |                 outfile.write(f.writestr(file_generator.to_format).encode())
297 | 
298 |     def _to_textfile(self, file_generator):
299 |         """Convert file to regular text file.
300 |         :return: None
301 |         :rtype: :py:obj:`None`
302 |         """
303 |         to_path = file_generator.to_path \
304 |             if file_generator.to_path.endswith(file_generator.file_extension[file_generator.to_format]) \
305 |             else file_generator.to_path + file_generator.file_extension[file_generator.to_format]
306 | 
307 |         with open(to_path, mode="w") as outfile:
308 |             for f in file_generator:
309 |                 outfile.write(f.writestr(file_generator.to_format))
310 | 
311 |     def _output_path(self, input_path, to_format, archive=False):
312 |         """Construct an output path string from an input path string.
313 |         :param str input_path: Input path string.
314 |         :return: Output path string.
315 |         :rtype: :py:class:`str`
316 |         """
317 |         indirpath, fname = os.path.split(os.path.abspath(os.path.normpath(input_path)))
318 | 
319 |         commonprefix = os.path.commonprefix([os.path.abspath(self.file_generator.from_path),
320 |                                              os.path.abspath(indirpath)])
321 | 
322 |         commonparts = commonprefix.split(os.sep)
323 |         inparts = indirpath.split(os.sep)
324 |         outparts = inparts[len(commonparts):]
325 | 
326 |         if archive:
327 |             outdirpath = os.path.join(*outparts) if outparts else ""
328 |         else:
329 |             outdirpath = os.path.join(self.file_generator.to_path, *outparts)
330 | 
331 |         return os.path.join(outdirpath, fname + self.file_generator.file_extension[to_format])


--------------------------------------------------------------------------------
/mwtab/fileio.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | mwtab.fileio
  6 | ~~~~~~~~~~~~
  7 | 
  8 | This module provides routines for reading ``mwTab`` formatted files
  9 | from difference kinds of sources:
 10 | 
 11 |    * Single ``mwTab`` formatted file on a local machine.
 12 |    * Directory containing multiple ``mwTab`` formatted files.
 13 |    * Compressed zip/tar archive of ``mwTab`` formatted files.
 14 |    * URL address of ``mwTab`` formatted file.
 15 |    * ``ANALYSIS_ID`` of ``mwTab`` formatted file. 
 16 | """
 17 | 
 18 | import os
 19 | import io
 20 | import zipfile
 21 | import tarfile
 22 | import bz2
 23 | import gzip
 24 | from re import match
 25 | 
 26 | from . import mwtab
 27 | from . import validator
 28 | from . import mwschema
 29 | from . import mwrest
 30 | 
 31 | from urllib.request import urlopen
 32 | from urllib.parse import urlparse
 33 | 
 34 | 
 35 | VERBOSE = False
 36 | 
 37 | 
 38 | def _generate_filenames(sources):
 39 |     """Generate filenames.
 40 | 
 41 |     :param tuple sources: Sequence of strings representing path to file(s).
 42 |     :return: Path to file(s).
 43 |     :rtype: :py:class:`str`
 44 |     """
 45 |     for source in sources:
 46 |         if os.path.isdir(source):
 47 |             for path, _, filelist in os.walk(source):
 48 |                 for fname in filelist:
 49 |                     if os.path.splitext(fname)[1].lower() in {".csv", ".txt", ".json"}:
 50 |                         if GenericFilePath.is_compressed(fname):
 51 |                             if VERBOSE:
 52 |                                 print("Skipping compressed file: {}".format(os.path.abspath(fname)))
 53 |                             continue
 54 |                         else:
 55 |                             yield os.path.join(path, fname)
 56 | 
 57 |         elif os.path.isfile(source):
 58 |             yield source
 59 | 
 60 |         elif source.isdigit():
 61 |             yield next(mwrest.generate_mwtab_urls([source]))
 62 | 
 63 |         # TODO: Add ST parsing
 64 |         elif match(r"(AN[0-9]{6}$)", source):
 65 |             yield next(mwrest.generate_mwtab_urls([source]))
 66 | 
 67 |         elif GenericFilePath.is_url(source):
 68 |             yield source
 69 | 
 70 |         else:
 71 |             raise TypeError("Unknown file source.")
 72 | 
 73 | 
 74 | def _generate_handles(filenames):
 75 |     """Open a sequence of filenames one at time producing file objects.
 76 |     The file is closed immediately when proceeding to the next iteration.
 77 | 
 78 |     :param generator filenames: Generator object that yields the path to each file, one at a time.
 79 |     :return: Filehandle to be processed into an instance.
 80 |     """
 81 |     for fname in filenames:
 82 |         path = GenericFilePath(fname)
 83 |         for filehandle, source in path.open():
 84 |             yield filehandle, source
 85 |             filehandle.close()
 86 | 
 87 | 
 88 | def read_files(*sources, **kwds):
 89 |     """Construct a generator that yields file instances.
 90 | 
 91 |     :param sources: One or more strings representing path to file(s).
 92 |     """
 93 |     filenames = _generate_filenames(sources)
 94 |     filehandles = _generate_handles(filenames)
 95 |     for fh, source in filehandles:
 96 |         try:
 97 |             f = mwtab.MWTabFile(source)
 98 |             f.read(fh)
 99 | 
100 |             if kwds.get('validate'):
101 |                 validator.validate_file(mwtabfile=f,
102 |                                         section_schema_mapping=mwschema.section_schema_mapping)
103 | 
104 |             if VERBOSE:
105 |                 print("Processed file: {}".format(os.path.abspath(source)))
106 | 
107 |             yield f
108 | 
109 |         except Exception as e:
110 |             if VERBOSE:
111 |                 print("Error processing file: ", os.path.abspath(source), "\nReason:", e)
112 |             raise e
113 | 
114 | 
115 | def read_mwrest(*sources, **kwds):
116 |     """Construct a generator that yields file instances.
117 | 
118 |     :param sources: One or more strings representing path to file(s).
119 |     """
120 |     filenames = _generate_filenames(sources)
121 |     filehandles = _generate_handles(filenames)
122 |     for fh, source in filehandles:
123 |         try:
124 |             f = mwrest.MWRESTFile(source)
125 |             f.read(fh)
126 | 
127 |             if VERBOSE:
128 |                 print("Processed url: {}".format(source))
129 | 
130 |             yield f
131 | 
132 |         except Exception as e:
133 |             if VERBOSE:
134 |                 print("Error processing url: ", source, "\nReason:", e)
135 |             pass
136 | 
137 | 
138 | class GenericFilePath(object):
139 |     """`GenericFilePath` class knows how to open local files or files over URL."""
140 | 
141 |     def __init__(self, path):
142 |         """Initialize path.
143 | 
144 |         :param str path: String representing a path to local file(s) or valid URL address of file(s).
145 |         """
146 |         self.path = path
147 | 
148 |     def open(self):
149 |         """Generator that opens and yields filehandles using appropriate facilities:
150 |         test if path represents a local file or file over URL, if file is compressed
151 |         or not.
152 | 
153 |         :return: Filehandle to be processed into an instance.
154 |         """
155 |         is_url = self.is_url(self.path)
156 |         compression_type = self.is_compressed(self.path)
157 | 
158 |         if not compression_type:
159 |             if is_url:
160 |                 filehandle = urlopen(self.path)
161 |             else:
162 |                 filehandle = open(self.path, "r", encoding="utf-8")
163 |             source = self.path
164 |             yield filehandle, source
165 |             filehandle.close()
166 | 
167 |         elif compression_type:
168 |             if is_url:
169 |                 response = urlopen(self.path)
170 |                 path = response.read()
171 |                 response.close()
172 |             else:
173 |                 path = self.path
174 | 
175 |             if compression_type == "zip":
176 |                 ziparchive = zipfile.ZipFile(io.BytesIO(path), "r") if is_url else zipfile.ZipFile(path)
177 |                 for name in ziparchive.infolist():
178 |                     if not name.filename.endswith("/"):
179 |                         filehandle = ziparchive.open(name)
180 |                         source = self.path + "/" + name.filename
181 |                         yield filehandle, source
182 |                         filehandle.close()
183 | 
184 |             elif compression_type in ("tar", "tar.bz2", "tar.gz"):
185 |                 tararchive = tarfile.open(fileobj=io.BytesIO(path)) if is_url else tarfile.open(path)
186 |                 for name in tararchive:
187 |                     if name.isfile():
188 |                         filehandle = tararchive.extractfile(name)
189 |                         source = self.path + "/" + name.name
190 |                         yield filehandle, source
191 |                         filehandle.close()
192 | 
193 |             elif compression_type == "bz2":
194 |                 filehandle = bz2.BZ2File(io.BytesIO(path)) if is_url else bz2.BZ2File(path)
195 |                 source = self.path
196 |                 yield filehandle, source
197 |                 filehandle.close()
198 | 
199 |             elif compression_type == "gz":
200 |                 filehandle = gzip.open(io.BytesIO(path)) if is_url else gzip.open(path)
201 |                 source = self.path
202 |                 yield filehandle, source
203 |                 filehandle.close()
204 | 
205 |     @staticmethod
206 |     def is_compressed(path):
207 |         """Test if path represents compressed file(s).
208 | 
209 |         :param str path: Path to file(s).
210 |         :return: String specifying compression type if compressed, "" otherwise.
211 |         :rtype: :py:class:`str`
212 |         """
213 |         if path.endswith(".zip"):
214 |             return "zip"
215 |         elif path.endswith(".tar.gz"):
216 |             return "tar.gz"
217 |         elif path.endswith(".tar.bz2"):
218 |             return "tar.bz2"
219 |         elif path.endswith(".gz"):
220 |             return "gz"
221 |         elif path.endswith(".bz2"):
222 |             return "bz2"
223 |         elif path.endswith(".tar"):
224 |             return "tar"
225 |         return ""
226 | 
227 |     @staticmethod
228 |     def is_url(path):
229 |         """Test if path represents a valid URL.
230 | 
231 |         :param str path: Path to file.
232 |         :return: True if path is valid url string, False otherwise.
233 |         :rtype: :py:obj:`True` or :py:obj:`False`
234 |         """
235 |         try:
236 |             parse_result = urlparse(path)
237 |             return all((parse_result.scheme, parse_result.netloc, parse_result.path))
238 |         except ValueError:
239 |             return False
240 | 


--------------------------------------------------------------------------------
/mwtab/mwextract.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | mwtab.mwextract
  6 | ~~~~~~~~~~~
  7 | 
  8 | This module provides a number of functions and classes for extracting and saving data and metadata
  9 | stored in ``mwTab`` formatted files in the form of :class:`~mwtab.mwtab.MWTabFile`.
 10 | """
 11 | import csv
 12 | import json
 13 | import os
 14 | import re
 15 | 
 16 | 
 17 | class ItemMatcher(object):
 18 |     """ItemMatcher class that can be called to match items from ``mwTab`` formatted files in the form of
 19 |     :class:`~mwtab.mwtab.MWTabFile`.
 20 |     """
 21 | 
 22 |     section_conversion = {
 23 |         "PR": "PROJECT",
 24 |         "ST": "STUDY",
 25 |         "SU": "SUBJECT",
 26 |         "CO": "COLLECTION",
 27 |         "TR": "TREATMENT",
 28 |         "SP": "SAMPLEPREP",
 29 |         "CH": "CHROMATOGRAPHY",
 30 |         "AN": "ANALYSIS",
 31 |         "MS": "MS",
 32 |         "NM": "NMR",
 33 |     }
 34 | 
 35 |     def __init__(self, full_key, value_comparison):
 36 |         """ItemMatcher initializer.
 37 | 
 38 |         :param str full_key: Key to match in :class:`~mwtab.mwtab.MWTabFile`.
 39 |         :param value_comparison: Value to match in :class:`~mwtab.mwtab.MWTabFile`.
 40 |         :type value_comparison: :class:`re.Pattern` or :py:class:`str`
 41 |         """
 42 |         self.full_key = full_key
 43 |         self.section, self.key = self.full_key.split(":")
 44 |         self.section = ItemMatcher.section_conversion[self.section]
 45 |         self.value_comparison = value_comparison
 46 | 
 47 |     def __call__(self, mwtabfile):
 48 |         """Match key value pair in :class:`~mwtab.mwtab.MWTabFile`.
 49 | 
 50 |         :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`.
 51 |         :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile`
 52 |         :return: True if key and value are present, False otherwise.
 53 |         :rtype: :py:obj:`True` or :py:obj:`False`
 54 |         """
 55 |         return mwtabfile[self.section][self.key] == self.value_comparison
 56 | 
 57 | 
 58 | class ReGeXMatcher(ItemMatcher):
 59 |     """ReGeXMatcher class that can be called to match items from ``mwTab`` formatted files in the form of
 60 |     :class:`~mwtab.mwtab.MWTabFile` using regular expressions.
 61 |     """
 62 | 
 63 |     def __init__(self, full_key, value_comparison):
 64 |         """ItemMatcher initializer.
 65 | 
 66 |         :param str full_key: Key to match in :class:`~mwtab.mwtab.MWTabFile`.
 67 |         :param value_comparison: Value, in the form of a regular expression, to match in
 68 |         :class:`~mwtab.mwtab.MWTabFile`.
 69 |         :type value_comparison: :class:`re.Pattern`
 70 |         """
 71 |         super(ReGeXMatcher, self).__init__(full_key, value_comparison)
 72 | 
 73 |     def __call__(self, mwtabfile):
 74 |         """Match key value pair in :class:`~mwtab.mwtab.MWTabFile`.
 75 | 
 76 |         :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`.
 77 |         :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile`
 78 |         :return: True if key and value are present, False otherwise.
 79 |         :rtype: :py:obj:`True` or :py:obj:`False`
 80 |         """
 81 |         return re.search(self.value_comparison, mwtabfile[self.section][self.key])
 82 | 
 83 | 
 84 | def generate_matchers(items):
 85 |     """Construct a generator that yields Matchers :class:`~mwtab.mwtab.ItemMatcher` or
 86 |     :class:`~mwtab.mwtab.ReGeXMatcher`.
 87 | 
 88 |     :param iterable items: Iterable object containing key value pairs to match.
 89 |     :return: Yields a Matcher object for each given item.
 90 |     :rtype: :class:`~mwtab.mwtab.ItemMatcher` or :class:`~mwtab.mwtab.ReGeXMatcher`
 91 |     """
 92 |     for item in items:
 93 |         if type(item[1]) == re.Pattern:
 94 |             yield ReGeXMatcher(item[0], item[1])
 95 | 
 96 |         else:
 97 |             yield ItemMatcher(item[0], item[1])
 98 | 
 99 | 
100 | def extract_metabolites(sources, matchers):
101 |     """Extract metabolite data from ``mwTab`` formatted files in the form of :class:`~mwtab.mwtab.MWTabFile`.
102 | 
103 |     :param generator sources: Generator of mwtab file objects (:class:`~mwtab.mwtab.MWTabFile`).
104 |     :param generator matchers: Generator of matcher objects (:class:`~mwtab.mwextract.ItemMatcher` or
105 |     :class:`~mwtab.mwextract.ReGeXMatcher`).
106 |     :return: Extracted metabolites dictionary.
107 |     :rtype: :py:class:`dict`
108 |     """
109 |     metabolites = dict()
110 |     for mwtabfile in sources:
111 |         if all(matcher(mwtabfile) for matcher in matchers):
112 |             data_section_key = list(set(mwtabfile.keys()) & {"MS_METABOLITE_DATA", "NMR_METABOLITE_DATA", "NMR_BINNED_DATA"})[0]
113 |             for data_list in mwtabfile[data_section_key]["Data"]:
114 |                 for test_key in (key for key in data_list.keys() if key != "Metabolite"):
115 |                     try:
116 |                         if float(data_list[test_key]) > 0:
117 |                             metabolites.setdefault(data_list["Metabolite"], dict())\
118 |                                 .setdefault(mwtabfile.study_id, dict())\
119 |                                 .setdefault(mwtabfile.analysis_id, set())\
120 |                                 .add(test_key)
121 |                     except Exception as e:
122 |                         pass
123 |     return metabolites
124 | 
125 | 
126 | def extract_metadata(mwtabfile, keys):
127 |     """Extract metadata data from ``mwTab`` formatted files in the form of :class:`~mwtab.mwtab.MWTabFile`.
128 | 
129 |     :param mwtabfile: mwTab file object for metadata to be extracted from.
130 |     :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile`
131 |     :param list keys: List of metadata field keys for metadata values to be extracted.
132 |     :return: Extracted metadata dictionary.
133 |     :rtype: :py:class:`dict`
134 |     """
135 |     extracted_values = {}
136 |     for section in mwtabfile:
137 |         for metadata in mwtabfile[section]:
138 |             for key in keys:
139 |                 if metadata == key:  # TODO: Allow for partial match, ReGeX, etc.
140 |                     extracted_values.setdefault(key, set()).add(mwtabfile[section][metadata])
141 | 
142 |     return extracted_values
143 | 
144 | 
145 | def write_metadata_csv(to_path, extracted_values, no_header=False):
146 |     """Write extracted metadata :py:class:`dict` into csv file.
147 | 
148 |     Example:
149 |     "metadata","value1","value2"
150 |     "SUBJECT_TYPE","Human","Plant"
151 | 
152 |     :param str to_path: Path to output file.
153 |     :param dict extracted_values: Metadata dictionary to be saved.
154 |     :param bool no_header: If true header is not included, otherwise header is included.
155 |     :return: None
156 |     :rtype: :py:obj:`None`
157 |     """
158 |     if not os.path.exists(os.path.dirname(os.path.splitext(to_path)[0])):
159 |         dirname = os.path.dirname(to_path)
160 |         if dirname:
161 |             os.makedirs(dirname)
162 | 
163 |     if not os.path.splitext(to_path)[1]:
164 |         to_path += ".csv"
165 | 
166 |     with open(to_path, "w", newline="") as outfile:
167 |         wr = csv.writer(outfile, quoting=csv.QUOTE_ALL)
168 |         if not no_header:
169 |             max_value_num = max([len(extracted_values[key]) for key in extracted_values.keys()])
170 |             line_list = ["metadata"]
171 |             line_list.extend(["value{}".format(num) for num in range(max_value_num)])
172 |             wr.writerow(line_list)
173 |         for key in extracted_values:
174 |             line_list = [key]
175 |             line_list.extend([val for val in sorted(extracted_values[key])])
176 |             wr.writerow(line_list)
177 | 
178 | 
179 | def write_metabolites_csv(to_path, extracted_values, no_header=False):
180 |     """Write extracted metabolites data :py:class:`dict` into csv file.
181 | 
182 |     Example:
183 |     "metabolite_name","num-studies","num_analyses","num_samples"
184 |     "1,2,4-benzenetriol","1","1","24"
185 |     "1-monostearin","1","1","24"
186 |     ...
187 | 
188 |     :param str to_path: Path to output file.
189 |     :param dict extracted_values: Metabolites data dictionary to be saved.
190 |     :param bool no_header: If true header is not included, otherwise header is included.
191 |     :return: None
192 |     :rtype: :py:obj:`None`
193 |     """
194 |     csv_list = []
195 |     for metabolite_key in extracted_values.keys():
196 |         num_analyses = 0
197 |         num_samples = 0
198 |         for study_key in extracted_values[metabolite_key]:
199 |             num_analyses += len(extracted_values[metabolite_key][study_key])
200 |             for analysis_key in extracted_values[metabolite_key][study_key]:
201 |                 num_samples += len(extracted_values[metabolite_key][study_key][analysis_key])
202 | 
203 |         csv_list.append([
204 |             metabolite_key,
205 |             len(extracted_values[metabolite_key]),
206 |             num_analyses,
207 |             num_samples
208 |         ])
209 | 
210 |     if not os.path.exists(os.path.dirname(os.path.splitext(to_path)[0])):
211 |         dirname = os.path.dirname(to_path)
212 |         if dirname:
213 |             os.makedirs(dirname)
214 | 
215 |     if not os.path.splitext(to_path)[1]:
216 |         to_path += ".csv"
217 | 
218 |     with open(to_path, "w", newline="") as outfile:
219 |         wr = csv.writer(outfile, quoting=csv.QUOTE_ALL)
220 |         if not no_header:
221 |             wr.writerow(["metabolite_name", "num-studies", "num_analyses", "num_samples"])
222 |         for line_list in csv_list:
223 |             wr.writerow(line_list)
224 | 
225 | 
226 | class SetEncoder(json.JSONEncoder):
227 |     """SetEncoder class for encoding Python sets :py:class:`set` into json serializable objects :py:class:`list`.
228 |     """
229 | 
230 |     def default(self, obj):
231 |         """Method for encoding Python objects. If object passed is a set, converts the set to JSON serializable lists
232 |         or calls base implementation.
233 | 
234 |         :param object obj: Python object to be json encoded.
235 |         :return: JSON serializable object.
236 |         :rtype: :py:class:`dict`, :py:class:`list`,
237 |                 :py:class:`tuple`, :py:class:`str`,
238 |                 :py:class:`int`, :py:class:`float`,
239 |                 :py:obj:`bool`, or :py:obj:`None`
240 |         """
241 |         if isinstance(obj, set):
242 |             return list(obj)
243 |         return json.JSONEncoder.default(self, obj)
244 | 
245 | 
246 | def write_json(to_path, extracted_dict):
247 |     """Write extracted data or metadata :py:class:`dict` into json file.
248 | 
249 |     Metabolites example:
250 |     {
251 |         "1,2,4-benzenetriol": {
252 |             "ST000001": {
253 |                 "AN000001": [
254 |                     "LabF_115816",
255 |                     ...
256 |                 ]
257 |             }
258 |         }
259 |     }
260 | 
261 |     Metadata example:
262 |     {
263 |         "SUBJECT_TYPE": [
264 |             "Plant",
265 |             "Human"
266 |         ]
267 |     }
268 | 
269 |     :param str to_path: Path to output file.
270 |     :param dict extracted_dict: Metabolites data or metadata dictionary to be saved.
271 |     :return: None
272 |     :rtype: :py:obj:`None`
273 |     """
274 |     if not os.path.exists(os.path.dirname(os.path.splitext(to_path)[0])):
275 |         dirname = os.path.dirname(to_path)
276 |         if dirname:
277 |             os.makedirs(dirname)
278 | 
279 |     if not os.path.splitext(to_path)[1]:
280 |         to_path += ".json"
281 | 
282 |     with open(to_path, "w") as outfile:
283 |         json.dump(extracted_dict, outfile, sort_keys=True, indent=4, cls=SetEncoder)
284 | 


--------------------------------------------------------------------------------
/mwtab/tokenizer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | mwtab.tokenizer
  6 | ~~~~~~~~~~~~~~~
  7 | 
  8 | This module provides the :func:`~mwtab.tokenizer.tokenizer` lexical analyzer for
  9 | `mwTab` format syntax. It is implemented as Python generator-based state
 10 | machine which generates (yields) tokens one at a time when :py:func:`next()`
 11 | is invoked on :func:`~mwtab.tokenizer.tokenizer` instance.
 12 | 
 13 | Each token is a tuple of "key-value"-like pairs, tuple of
 14 | ``SUBJECT_SAMPLE_FACTORS`` or tuple of data deposited between
 15 | ``*_START`` and ``*_END`` blocks.
 16 | """
 17 | 
 18 | from __future__ import print_function, division, unicode_literals
 19 | from collections import deque, namedtuple, OrderedDict
 20 | 
 21 | 
 22 | KeyValue = namedtuple("KeyValue", ["key", "value"])
 23 | KeyValueExtra = namedtuple("KeyValueExtra", ["key", "value", "extra"])
 24 | 
 25 | 
 26 | def tokenizer(text):
 27 |     """A lexical analyzer for the `mwtab` formatted files.
 28 | 
 29 |     :param text: `mwTab` formatted text.
 30 |     :type text: py:class:`str`
 31 |     :return: Tuples of data.
 32 |     :rtype: py:class:`~collections.namedtuple`
 33 |     """
 34 |     stream = deque(text.split("\n"))
 35 | 
 36 |     while len(stream) > 0:
 37 |         line = stream.popleft()
 38 |         try:
 39 | 
 40 |             # header
 41 |             if line.startswith("#METABOLOMICS WORKBENCH"):
 42 |                 yield KeyValue("#METABOLOMICS WORKBENCH", "\n")
 43 |                 for identifier in line.split(" "):
 44 |                     if ":" in identifier:
 45 |                         key, value = identifier.split(":")
 46 |                         yield KeyValue(key, value)
 47 | 
 48 |             # SUBJECT_SAMPLE_FACTORS header (reached new section)
 49 |             elif line.startswith("#SUBJECT_SAMPLE_FACTORS:"):
 50 |                 yield KeyValue("#ENDSECTION", "\n")
 51 |                 yield KeyValue("#SUBJECT_SAMPLE_FACTORS", "\n")
 52 | 
 53 |             # section header (reached new section)
 54 |             elif line.startswith("#"):
 55 |                 yield KeyValue("#ENDSECTION", "\n")
 56 |                 yield KeyValue(line.strip(), "\n")
 57 | 
 58 |             # SUBJECT_SAMPLE_FACTORS line
 59 |             elif line.startswith("SUBJECT_SAMPLE_FACTORS"):
 60 |                 line_items = line.split("\t")
 61 |                 subject_sample_factors_dict = OrderedDict({
 62 |                     "Subject ID": line_items[1],
 63 |                     "Sample ID": line_items[2],
 64 |                     "Factors": {factor_item.split(":")[0].strip(): factor_item.split(":")[1].strip() for factor_item in
 65 |                                 line_items[3].split("|")}
 66 |                 })
 67 |                 if line_items[4]:
 68 |                     subject_sample_factors_dict["Additional sample data"] = {
 69 |                         factor_item.split("=")[0].strip(): factor_item.split("=")[1].strip() for factor_item in line_items[4].split(";")
 70 |                     }
 71 |                 yield KeyValue(line_items[0].strip(), subject_sample_factors_dict)
 72 | 
 73 |             # data start header
 74 |             elif line.endswith("_START"):
 75 |                 yield KeyValue(line, "\n")
 76 | 
 77 |                 # tokenize lines in data section till line ending with "_END" is reached
 78 |                 while not line.endswith("_END"):
 79 |                     line = stream.popleft()
 80 |                     if line.endswith("_END"):
 81 |                         yield KeyValue(line.strip(), "\n")
 82 |                     else:
 83 |                         data = line.split("\t")
 84 |                         yield KeyValue(data[0], tuple(data))
 85 | 
 86 |             # item line in item section (e.g. PROJECT, SUBJECT, etc..)
 87 |             elif line:
 88 |                 if "_RESULTS_FILE" in line:
 89 |                     line_items = line.split("\t")
 90 |                     # if len(line_items) > 2:
 91 |                     #     extra_items = list()
 92 |                     #     for extra_item in line_items[2:]:
 93 |                     #         k, v = extra_item.split(":")
 94 |                     #         extra_items.append(tuple([k.strip(), v.strip()]))
 95 |                     #     yield KeyValueExtra(line_items[0].strip()[3:], line_items[1], extra_items)
 96 |                     # else:
 97 |                     #     yield KeyValue(line_items[0].strip()[3:], line_items[1])
 98 |                     yield KeyValue(line_items[0].strip()[3:], " ".join(line_items[1:]))
 99 |                 else:
100 |                     key, value = line.split("\t")
101 |                     if ":" in key:
102 |                         if ":UNITS" in key:
103 |                             yield KeyValue("Units", value)
104 |                         else:
105 |                             yield KeyValue(key.strip()[3:], value)
106 |                     else:
107 |                         yield KeyValue(key.strip(), value)
108 | 
109 |         except IndexError as e:
110 |             raise IndexError("LINE WITH ERROR:\n\t", repr(line), e)
111 |         except ValueError as e:
112 |             raise ValueError("LINE WITH ERROR:\n\t", repr(line), e)
113 | 
114 |     # end of file
115 |     yield KeyValue("#ENDSECTION", "\n")
116 |     yield KeyValue("!#ENDFILE", "\n")  # This is to ensure that tokenizer terminates when #END is missing.
117 | 


--------------------------------------------------------------------------------
/mwtab/validator.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | mwtab.validator
  6 | ~~~~~~~~~~~~~~~
  7 | 
  8 | This module contains routines to validate consistency of the ``mwTab``
  9 | formatted files, e.g. make sure that ``Samples`` and ``Factors``
 10 | identifiers are consistent across the file, make sure that all
 11 | required key-value pairs are present.
 12 | """
 13 | 
 14 | from copy import deepcopy
 15 | from collections import OrderedDict
 16 | from datetime import datetime
 17 | from .mwschema import section_schema_mapping
 18 | from re import match
 19 | import io
 20 | import sys
 21 | import mwtab
 22 | 
 23 | 
 24 | VERBOSE = False
 25 | LOG = None
 26 | 
 27 | METABOLITES_REGEXS = {
 28 |     "hmdb_id": {
 29 |         r"(?i)[\s|\S]{,}(HMDB)",
 30 |         r"(?i)(Human Metabolome D)[\S]{,}",
 31 |     },
 32 |     "inchi_key": {
 33 |         r"(?i)(inchi)[\S]{,}",
 34 |     },
 35 |     "kegg_id": {
 36 |         r"(?i)(kegg)$",
 37 |         r"(?i)(kegg)(\s|_)(i)",
 38 |     },
 39 |     "moverz": {
 40 |         r"(?i)(m/z)",
 41 |     },
 42 |     "moverz_quant": {
 43 |         r"(?i)(moverz)(\s|_)(quant)",
 44 |         r"(?i)(quan)[\S]{,}(\s|_)(m)[\S]{,}(z)",
 45 |     },
 46 |     "other_id": {
 47 |         r"(?i)(other)(\s|_)(id)$",
 48 |     },
 49 |     "other_id_type": {
 50 |         r"(?i)(other)(\s|_)(id)(\s|_)(type)$",
 51 |     },
 52 |     "pubchem_id": {
 53 |         r"(?i)(pubchem)[\S]{,}",
 54 |     },
 55 |     "retention_index": {
 56 |         r"(?i)(ri)$",
 57 |         r"(?i)(ret)[\s|\S]{,}(index)",
 58 |     },
 59 |     "retention_index_type": {
 60 |         r"(?i)(ri)(\s|_)(type)",
 61 |     },
 62 |     "retention_time": {
 63 |         r"(?i)(r)[\s|\S]{,}(time)[\S]{,}",
 64 |     },
 65 | }
 66 | 
 67 | ITEM_SECTIONS = {
 68 |     "METABOLOMICS WORKBENCH",
 69 |     "PROJECT",
 70 |     "STUDY",
 71 |     "ANALYSIS",
 72 |     "SUBJECT",
 73 |     "COLLECTION",
 74 |     "TREATMENT",
 75 |     "SAMPLEPREP",
 76 |     "CHROMATOGRAPHY",
 77 |     "MS",
 78 |     "NM",
 79 | }
 80 | 
 81 | VALIDATION_LOG_HEADER = \
 82 | """Validation Log
 83 | {}
 84 | mwtab Python Library Version: {}
 85 | Source:        {}
 86 | Study ID:      {}
 87 | Analysis ID:   {}
 88 | File format:   {}"""
 89 | 
 90 | 
 91 | def validate_subject_samples_factors(mwtabfile):
 92 |     """Validate ``SUBJECT_SAMPLE_FACTORS`` section.
 93 | 
 94 |     :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`.
 95 |     :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` or
 96 |                      :py:class:`collections.OrderedDict`
 97 |     """
 98 |     subject_samples_factors_errors = list()
 99 | 
100 |     for index, subject_sample_factor in enumerate(mwtabfile["SUBJECT_SAMPLE_FACTORS"]):
101 |         if not subject_sample_factor["Subject ID"]:
102 |             subject_samples_factors_errors.append(
103 |                 "SUBJECT_SAMPLE_FACTORS: Entry #{} missing Subject ID.".format(index+1)
104 |             )
105 |         if not subject_sample_factor["Sample ID"]:
106 |             subject_samples_factors_errors.append(
107 |                 "SUBJECT_SAMPLE_FACTORS: Entry #{} missing Sample ID.".format(index + 1)
108 |             )
109 |         if subject_sample_factor.get("Factors"):
110 |             for factor_key in subject_sample_factor["Factors"]:
111 |                 if not subject_sample_factor["Factors"][factor_key]:
112 |                     subject_samples_factors_errors.append(
113 |                         "SUBJECT_SAMPLE_FACTORS: Entry #{} missing value for Factor {}.".format(index + 1, factor_key)
114 |                     )
115 |         if subject_sample_factor.get("Additional sample data"):
116 |             for additional_key in subject_sample_factor["Additional sample data"]:
117 |                 if not subject_sample_factor["Additional sample data"][additional_key]:
118 |                     subject_samples_factors_errors.append(
119 |                         "SUBJECT_SAMPLE_FACTORS: Entry #{} missing value for Additional sample data {}.".format(
120 |                             index + 1, additional_key
121 |                         )
122 |                     )
123 | 
124 |     return subject_samples_factors_errors
125 | 
126 | 
127 | def validate_data(mwtabfile, data_section_key, null_values):
128 |     """Validates ``MS_METABOLITE_DATA``, ``NMR_METABOLITE_DATA``, and ``NMR_BINNED_DATA`` sections.
129 | 
130 |     :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`.
131 |     :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` or
132 |                      :py:class:`collections.OrderedDict`
133 |     :param data_section_key: Section key (either MS_METABOLITE_DATA, NMR_METABOLITE_DATA, or NMR_BINNED_DATA)
134 |     :type data_section_key: :py:class:`str`
135 |     :param bool null_values: whether null values are present.
136 |     """
137 |     data_errors = list()
138 | 
139 |     subject_sample_factors_sample_id_set = {subject_sample_factor["Sample ID"] for subject_sample_factor in mwtabfile["SUBJECT_SAMPLE_FACTORS"]}
140 |     data_sample_id_set = set(list(mwtabfile[data_section_key]["Data"][0].keys())[1:])
141 | 
142 |     # Removed for mwTab File Spec. 1.5
143 |     # if subject_sample_factors_sample_id_set - data_sample_id_set:
144 |     #     data_errors.append("{}: Section missing data entry for sample(s): {}.".format(
145 |     #         data_section_key,
146 |     #         subject_sample_factors_sample_id_set - data_sample_id_set
147 |     #     ))
148 |     if data_sample_id_set - subject_sample_factors_sample_id_set:
149 |         data_errors.append("SUBJECT_SAMPLE_FACTORS: Section missing sample ID(s) {} found in {} section.".format(
150 |             data_sample_id_set - subject_sample_factors_sample_id_set,
151 |             data_section_key
152 |         ))
153 | 
154 |     for index, metabolite in enumerate(mwtabfile[data_section_key]["Data"]):
155 |         # if set(list(metabolite.keys())[1:]) != subject_sample_factors_sample_id_set:
156 |         #     print(len(subject_sample_factors_sample_id_set), len(metabolite) - 1)
157 |         #     print(
158 |         #         "{}: Metabolite \"{}\" missing data entry for {} samples".format(
159 |         #             data_section_key,
160 |         #             metabolite[list(metabolite.keys())[0]],
161 |         #             len(subject_sample_factors_sample_id_set - set(list(metabolite.keys())[1:]))
162 |         #         ),
163 |         #         file=error_stream
164 |         #     )
165 |         if null_values:
166 |             for data_point_key in metabolite.keys():
167 |                 if data_point_key != "Metabolite":
168 |                     try:
169 |                         float(metabolite[data_point_key])
170 |                     except ValueError as e:
171 |                         metabolite[data_point_key] = ""
172 |                         data_errors.append(
173 |                             "{}: Data entry #{} contains non-numeric value converted to \"\".".format(data_section_key, index + 1))
174 | 
175 |     return data_errors
176 | 
177 | 
178 | def validate_metabolites(mwtabfile, data_section_key):
179 |     """Validate ``METABOLITES`` section.
180 | 
181 |     :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`.
182 |     :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` or
183 |                      :py:class:`collections.OrderedDict`
184 |     :param data_section_key: Section key (either MS_METABOLITE_DATA, NMR_METABOLITE_DATA, or NMR_BINNED_DATA)
185 |     :type data_section_key: :py:class:`str`
186 |     """
187 |     metabolites_errors = list()
188 | 
189 |     for index, metabolite in enumerate(mwtabfile[data_section_key]["Metabolites"]):
190 |         for field_key in list(metabolite.keys())[1:]:
191 |             if not any(k == field_key for k in METABOLITES_REGEXS.keys()):
192 |                 for regex_key in METABOLITES_REGEXS.keys():
193 |                     if any(match(p, field_key) for p in METABOLITES_REGEXS[regex_key]):
194 |                         metabolites_errors.append("METABOLITES: Data entry #{} contains field name \"{}\" which matches a commonly used field name \"{}\".".format(index + 1, field_key, regex_key))
195 |                         field_key = regex_key
196 |                         break
197 | 
198 |     return metabolites_errors
199 | 
200 | 
201 | def validate_extended(mwtabfile, data_section_key):
202 |     """Validate ``EXTENDED_MS_METABOLITE_DATA``, ``EXTENDED_NMR_METABOLITE_DATA``, and ``EXTENDED_NMR_BINNED_DATA`` sections.
203 | 
204 |     :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`.
205 |     :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` or
206 |                      :py:class:`collections.OrderedDict`
207 |     :param data_section_key: Section key (either MS_METABOLITE_DATA, NMR_METABOLITE_DATA, or NMR_BINNED_DATA)
208 |     :type data_section_key: :py:class:`str`
209 |     """
210 |     extended_errors = list()
211 | 
212 |     sample_id_set = {subject_sample_factor["Sample ID"] for subject_sample_factor in
213 |                      mwtabfile["SUBJECT_SAMPLE_FACTORS"]}
214 | 
215 |     for index, extended_data in enumerate(mwtabfile[data_section_key]["Extended"]):
216 |         if "sample_id" not in extended_data.keys():
217 |             extended_errors.append("EXTENDED_{}: Data entry #{} missing Sample ID.".format(data_section_key, index + 1))
218 |         elif not extended_data["sample_id"] in sample_id_set:
219 |             extended_errors.append(
220 |                 "EXTENDED_{}: Data entry #{} contains Sample ID \"{}\" not found in SUBJECT_SAMPLE_FACTORS section.".format(
221 |                     data_section_key, index + 1, extended_data["sample_id"]
222 |                 ))
223 | 
224 |     return extended_errors
225 | 
226 | 
227 | def validate_section_schema(section, schema, section_key):
228 |     """Validate section of ``mwTab`` formatted file.
229 | 
230 |     :param section: Section of :class:`~mwtab.mwtab.MWTabFile`.
231 |     :type section: :py:class:`collections.OrderedDict`
232 |     :param schema: Schema definition.
233 |     :type schema: :py:class:`~schema.schema`
234 |     :param str section_key: Section key.
235 | 
236 |     :return: Validated section.
237 |     :rtype: :py:class:`collections.OrderedDict`
238 |     """
239 |     schema_errors = list()
240 | 
241 |     if section_key in ITEM_SECTIONS:
242 |         for key in section.keys():
243 |             if not section[key]:
244 |                 schema_errors.append("{}: Contains item \"{}\" with null value.".format(section_key, key))
245 |                 del section[key]
246 | 
247 |     return schema.validate(section), schema_errors
248 | 
249 | 
250 | def validate_file(mwtabfile, section_schema_mapping=section_schema_mapping, verbose=False, metabolites=True):
251 |     """Validate ``mwTab`` formatted file.
252 | 
253 |     :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`.
254 |     :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` or
255 |                      :py:class:`collections.OrderedDict`
256 |     :param dict section_schema_mapping: Dictionary that provides mapping between section name and schema definition.
257 |     :param bool verbose: whether to be verbose or not.
258 |     :param bool metabolites: whether to validate metabolites section.
259 |     :return: Validated file.
260 |     :rtype: :py:class:`collections.OrderedDict`
261 |     """
262 |     # setup
263 |     if not verbose:
264 |         error_stout = io.StringIO()
265 |     else:
266 |         error_stout = sys.stdout
267 |     validated_mwtabfile = deepcopy(OrderedDict(mwtabfile))
268 | 
269 |     # generate validation log header(s)
270 |     file_format = mwtabfile.source.split("/")[-1] if "https://www.metabolomicsworkbench.org/" in mwtabfile.source else \
271 |         mwtabfile.source.split(".")[1]
272 |     print(VALIDATION_LOG_HEADER.format(
273 |         str(datetime.now()),
274 |         mwtab.__version__,
275 |         mwtabfile.source,
276 |         mwtabfile.study_id,
277 |         mwtabfile.analysis_id,
278 |         file_format
279 |     ), file=error_stout)
280 | 
281 |     # create list to collect validation errors
282 |     errors = list()
283 | 
284 |     # validate PROJECT, STUDY, ANALYSIS... and Schemas
285 |     for section_key, section in mwtabfile.items():
286 |         try:
287 |             schema = section_schema_mapping[section_key]
288 |             # section = validate_section_schema(section, schema, section_key, error_stout)
289 |             section, schema_errors = validate_section_schema(section, schema, section_key)
290 |             errors.extend(schema_errors)
291 |             validated_mwtabfile[section_key] = section
292 |         except Exception as e:
293 |             errors.append("SCHEMA: Section \"{}\" does not match the allowed schema. ".format(section_key) + str(e))
294 | 
295 |     # validate SUBJECT_SAMPLE_FACTORS
296 |     # validate_subject_samples_factors(validated_mwtabfile, error_stout)
297 |     errors.extend(validate_subject_samples_factors(validated_mwtabfile))
298 | 
299 |     # validate ..._DATA sections
300 |     data_section_key = list(set(validated_mwtabfile.keys()) &
301 |                             {"MS_METABOLITE_DATA", "NMR_METABOLITE_DATA", "NMR_BINNED_DATA"})
302 |     if data_section_key:
303 |         data_section_key = data_section_key[0]
304 |         # validate_data(validated_mwtabfile, data_section_key, error_stout, False)
305 |         errors.extend(validate_data(validated_mwtabfile, data_section_key, False))
306 | 
307 |         if data_section_key in ("MS_METABOLITE_DATA", "NMR_METABOLITE_DATA"):
308 |             # temp for testing
309 |             if metabolites:
310 |                 if "Metabolites" in validated_mwtabfile[data_section_key].keys():
311 |                     errors.extend(validate_metabolites(validated_mwtabfile, data_section_key))
312 |                 else:
313 |                     errors.append("DATA: Missing METABOLITES section.")
314 |         if "Extended" in validated_mwtabfile[data_section_key].keys():
315 |             errors.extend(validate_extended(validated_mwtabfile, data_section_key))
316 | 
317 |     else:
318 |         if "MS" in validated_mwtabfile.keys():
319 |             if not validated_mwtabfile["MS"].get("MS_RESULTS_FILE"):
320 |                 errors.append("DATA: Missing MS_METABOLITE_DATA section or MS_RESULTS_FILE item in MS section.")
321 |         elif "NM" in validated_mwtabfile.keys():
322 |             if not validated_mwtabfile['NM'].get('NMR_RESULTS_FILE'):
323 |                 errors.append("DATA: Missing either NMR_METABOLITE_DATA or NMR_BINNED_DATA section or NMR_RESULTS_FILE item in NM secction.")
324 | 
325 |     # finish writing validation/error log
326 |     if errors:
327 |         print("Status: Contains Validation Errors", file=error_stout)
328 |         print("Number Errors: {}\n".format(len(errors)), file=error_stout)
329 |         print("Error Log:\n" + "\n".join(errors), file=error_stout)
330 |     else:
331 |         print("Status: Passing", file=error_stout)
332 | 
333 |     if verbose:
334 |         return validated_mwtabfile, None
335 |     else:
336 |         return validated_mwtabfile, error_stout.getvalue()
337 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | docopt >= 0.6.2
2 | schema >= 0.6.6


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os
 5 | import sys
 6 | import re
 7 | from setuptools import setup, find_packages
 8 | 
 9 | 
10 | if sys.argv[-1] == 'publish':
11 |     os.system('python3 setup.py sdist')
12 |     os.system('twine upload dist/*')
13 |     sys.exit()
14 | 
15 | 
16 | def readme():
17 |     with open('README.rst') as readme_file:
18 |         return readme_file.read()
19 | 
20 | 
21 | def find_version():
22 |     with open('mwtab/__init__.py', 'r') as fd:
23 |         version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
24 |                             fd.read(), re.MULTILINE).group(1)
25 |     if not version:
26 |         raise RuntimeError('Cannot find version information')
27 |     return version
28 | 
29 | 
30 | REQUIRES = [
31 |     "docopt >= 0.6.2",
32 |     "schema >= 0.6.6"
33 | ]
34 | 
35 | 
36 | setup(
37 |     name='mwtab',
38 |     version=find_version(),
39 |     author='Andrey Smelter',
40 |     author_email='andrey.smelter@gmail.com',
41 |     description='Parser for mwtab files from the Metabolomics Workbench',
42 |     keywords='mwtab metabolomics workbench',
43 |     license='BSD',
44 |     url='https://github.com/MoseleyBioinformaticsLab/mwtab',
45 |     packages=find_packages(),
46 |     platforms='any',
47 |     long_description=readme(),
48 |     install_requires=REQUIRES,
49 |     classifiers=[
50 |         'Development Status :: 4 - Beta',
51 |         'Environment :: Console',
52 |         'Intended Audience :: Developers',
53 |         'Intended Audience :: Science/Research',
54 |         'License :: OSI Approved :: BSD License',
55 |         'Operating System :: OS Independent',
56 |         'Programming Language :: Python :: 3.5',
57 |         'Programming Language :: Python :: 3.6',
58 |         'Programming Language :: Python :: 3.7',
59 |         'Programming Language :: Python :: 3.8',
60 |         'Programming Language :: Python :: 3.9',
61 |         'Programming Language :: Python :: 3.10',
62 |         'Topic :: Scientific/Engineering :: Bio-Informatics',
63 |         'Topic :: Software Development :: Libraries :: Python Modules',
64 |     ],
65 |     entry_points={"console_scripts": ["mwtab = mwtab.__main__:main"]},
66 | )
67 | 


--------------------------------------------------------------------------------
/tests/example_data/mwtab_files.tar.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/tests/example_data/mwtab_files.tar.bz2


--------------------------------------------------------------------------------
/tests/example_data/mwtab_files.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/tests/example_data/mwtab_files.tar.gz


--------------------------------------------------------------------------------
/tests/example_data/mwtab_files.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/tests/example_data/mwtab_files.zip


--------------------------------------------------------------------------------
/tests/example_data/mwtab_files/ST000122_AN000204.txt:
--------------------------------------------------------------------------------
  1 | #METABOLOMICS WORKBENCH STUDY_ID:ST000122 ANALYSIS_ID:AN000204 PROJECT_ID:PR000109
  2 | VERSION             	1
  3 | CREATED_ON          	2016-09-17
  4 | #PROJECT
  5 | PR:PROJECT_TITLE                 	Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic
  6 | PR:PROJECT_TYPE                  	Pilot and Feasibility Projects
  7 | PR:PROJECT_SUMMARY               	-
  8 | PR:INSTITUTE                     	University of California, Davis
  9 | PR:DEPARTMENT                    	Nutrition
 10 | PR:LABORATORY                    	Gaikwad Lab
 11 | PR:LAST_NAME                     	Gaikwad
 12 | PR:FIRST_NAME                    	Nilesh
 13 | PR:ADDRESS                       	-
 14 | PR:EMAIL                         	nwgaikwad@ucdavis.edu
 15 | PR:PHONE                         	530-752-2906
 16 | PR:FUNDING_SOURCE                	NIH 1U24DK097154 ;  PI Fiehn, Oliver  ; UC Davis WEST COAST CENTRAL
 17 | PR:FUNDING_SOURCE                	METABOLOMICS RESOURCE CORE (WC3MRC)
 18 | #STUDY
 19 | ST:STUDY_TITLE                   	Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic
 20 | ST:STUDY_TYPE                    	steroid panel
 21 | ST:STUDY_SUMMARY                 	-
 22 | ST:INSTITUTE                     	University of California, Davis
 23 | ST:DEPARTMENT                    	Nutrition
 24 | ST:LABORATORY                    	Gaikwad Lab
 25 | ST:LAST_NAME                     	Gaikwad
 26 | ST:FIRST_NAME                    	Nilesh
 27 | ST:ADDRESS                       	-
 28 | ST:EMAIL                         	nwgaikwad@ucdavis.edu
 29 | ST:PHONE                         	-
 30 | ST:NUM_GROUPS                    	NA
 31 | #SUBJECT
 32 | SU:SUBJECT_TYPE                  	Human
 33 | SU:SUBJECT_SPECIES               	Homo sapiens
 34 | SU:TAXONOMY_ID                   	9606
 35 | #SUBJECT_SAMPLE_FACTORS:         	SUBJECT(optional)[tab]SAMPLE[tab]FACTORS(NAME:VALUE pairs separated by |)[tab]Additional sample data
 36 | SUBJECT_SAMPLE_FACTORS           	CER030_294717_ML_1	CER030_294717_ML_1	Tissue/Fluid:Serum	
 37 | SUBJECT_SAMPLE_FACTORS           	CER040_242995_ML_2	CER040_242995_ML_2	Tissue/Fluid:Serum	
 38 | SUBJECT_SAMPLE_FACTORS           	CER055_249947_ML_3	CER055_249947_ML_3	Tissue/Fluid:Serum	
 39 | SUBJECT_SAMPLE_FACTORS           	CER062_246153_ML_4	CER062_246153_ML_4	Tissue/Fluid:Serum	
 40 | SUBJECT_SAMPLE_FACTORS           	CER085_251176_ML_5	CER085_251176_ML_5	Tissue/Fluid:Serum	
 41 | SUBJECT_SAMPLE_FACTORS           	CER093_242931_ML_6	CER093_242931_ML_6	Tissue/Fluid:Serum	
 42 | SUBJECT_SAMPLE_FACTORS           	CER110_238825_ML_7	CER110_238825_ML_7	Tissue/Fluid:Serum	
 43 | SUBJECT_SAMPLE_FACTORS           	CER120_253690_ML_8	CER120_253690_ML_8	Tissue/Fluid:Serum	
 44 | SUBJECT_SAMPLE_FACTORS           	CER147_254803_ML_9	CER147_254803_ML_9	Tissue/Fluid:Serum	
 45 | SUBJECT_SAMPLE_FACTORS           	CER149_266689_ML_10	CER149_266689_ML_10	Tissue/Fluid:Serum	
 46 | SUBJECT_SAMPLE_FACTORS           	CER158_254231_ML_11	CER158_254231_ML_11	Tissue/Fluid:Serum	
 47 | SUBJECT_SAMPLE_FACTORS           	CER165_287001_ML_12	CER165_287001_ML_12	Tissue/Fluid:Serum	
 48 | SUBJECT_SAMPLE_FACTORS           	CER178_295145_ML_13	CER178_295145_ML_13	Tissue/Fluid:Serum	
 49 | SUBJECT_SAMPLE_FACTORS           	CER181_244392_ML_14	CER181_244392_ML_14	Tissue/Fluid:Serum	
 50 | SUBJECT_SAMPLE_FACTORS           	CER188_250760_ML_15	CER188_250760_ML_15	Tissue/Fluid:Serum	
 51 | SUBJECT_SAMPLE_FACTORS           	CER192_254091_ML_16	CER192_254091_ML_16	Tissue/Fluid:Serum	
 52 | SUBJECT_SAMPLE_FACTORS           	CER201_244193_ML_17	CER201_244193_ML_17	Tissue/Fluid:Serum	
 53 | SUBJECT_SAMPLE_FACTORS           	CER216_242490_ML_18	CER216_242490_ML_18	Tissue/Fluid:Serum	
 54 | SUBJECT_SAMPLE_FACTORS           	CER220_274308_ML_19	CER220_274308_ML_19	Tissue/Fluid:Serum	
 55 | SUBJECT_SAMPLE_FACTORS           	CER223_264067_ML_20	CER223_264067_ML_20	Tissue/Fluid:Serum	
 56 | SUBJECT_SAMPLE_FACTORS           	CER226_254303_ML_21	CER226_254303_ML_21	Tissue/Fluid:Serum	
 57 | SUBJECT_SAMPLE_FACTORS           	CER277_255328_ML_22	CER277_255328_ML_22	Tissue/Fluid:Serum	
 58 | SUBJECT_SAMPLE_FACTORS           	CER287_248530_ML_23	CER287_248530_ML_23	Tissue/Fluid:Serum	
 59 | SUBJECT_SAMPLE_FACTORS           	CER303_253023_ML_24	CER303_253023_ML_24	Tissue/Fluid:Serum	
 60 | SUBJECT_SAMPLE_FACTORS           	CER315_282966_ML_25	CER315_282966_ML_25	Tissue/Fluid:Serum	
 61 | SUBJECT_SAMPLE_FACTORS           	CER324_285069_ML_26	CER324_285069_ML_26	Tissue/Fluid:Serum	
 62 | SUBJECT_SAMPLE_FACTORS           	CER340_244448_ML_27	CER340_244448_ML_27	Tissue/Fluid:Serum	
 63 | SUBJECT_SAMPLE_FACTORS           	CER346_246320_ML_28	CER346_246320_ML_28	Tissue/Fluid:Serum	
 64 | SUBJECT_SAMPLE_FACTORS           	CER356_269662_ML_29	CER356_269662_ML_29	Tissue/Fluid:Serum	
 65 | SUBJECT_SAMPLE_FACTORS           	CER368_250104_ML_30	CER368_250104_ML_30	Tissue/Fluid:Serum	
 66 | SUBJECT_SAMPLE_FACTORS           	CER369_276355_ML_31	CER369_276355_ML_31	Tissue/Fluid:Serum	
 67 | SUBJECT_SAMPLE_FACTORS           	CER384_264971_ML_32	CER384_264971_ML_32	Tissue/Fluid:Serum	
 68 | SUBJECT_SAMPLE_FACTORS           	CER445_286527_ML_33	CER445_286527_ML_33	Tissue/Fluid:Serum	
 69 | SUBJECT_SAMPLE_FACTORS           	CER452_240972_ML_34	CER452_240972_ML_34	Tissue/Fluid:Serum	
 70 | SUBJECT_SAMPLE_FACTORS           	CER463_271249_ML_35	CER463_271249_ML_35	Tissue/Fluid:Serum	
 71 | SUBJECT_SAMPLE_FACTORS           	CER465_265004_ML_36	CER465_265004_ML_36	Tissue/Fluid:Serum	
 72 | SUBJECT_SAMPLE_FACTORS           	CER483_294606_ML_37	CER483_294606_ML_37	Tissue/Fluid:Serum	
 73 | SUBJECT_SAMPLE_FACTORS           	CER488_274343_ML_38	CER488_274343_ML_38	Tissue/Fluid:Serum	
 74 | SUBJECT_SAMPLE_FACTORS           	CER530_249229_ML_39	CER530_249229_ML_39	Tissue/Fluid:Serum	
 75 | SUBJECT_SAMPLE_FACTORS           	CER540_240346_ML_40	CER540_240346_ML_40	Tissue/Fluid:Serum	
 76 | SUBJECT_SAMPLE_FACTORS           	CER552_241945_ML_41	CER552_241945_ML_41	Tissue/Fluid:Serum	
 77 | SUBJECT_SAMPLE_FACTORS           	CER555_251239_ML_42	CER555_251239_ML_42	Tissue/Fluid:Serum	
 78 | #COLLECTION
 79 | CO:COLLECTION_SUMMARY            	-
 80 | #TREATMENT
 81 | TR:TREATMENT_SUMMARY             	-
 82 | #SAMPLEPREP
 83 | SP:SAMPLEPREP_SUMMARY            	Methanol: Water Extraction
 84 | SP:SAMPLEPREP_PROTOCOL_FILENAME  	NIH_WCMC_LaMerrill_Method_GaikwadLab__SteroidAnalysis_2013-14.docx
 85 | SP:PROCESSING_METHOD             	Homogenization and Solvent Removal w/ Speed Vac
 86 | SP:PROCESSING_STORAGE_CONDITIONS 	On Ice
 87 | SP:EXTRACTION_METHOD             	1:1 Methanol: Water
 88 | SP:EXTRACT_STORAGE               	-80C
 89 | SP:SAMPLE_RESUSPENSION           	150ul CH3OH/H2O
 90 | SP:ORGAN                         	Sprague-Dawley Maternal: Adrenal, liver, placenta, amniotic fluid
 91 | SP:ORGAN                         	Fetal: Male and female brain, male and female liver
 92 | #CHROMATOGRAPHY
 93 | CH:CHROMATOGRAPHY_SUMMARY        	Targeted UPLC-MS/MS
 94 | CH:CHROMATOGRAPHY_TYPE           	Reversed phase
 95 | CH:INSTRUMENT_NAME               	Waters Acquity
 96 | CH:COLUMN_NAME                   	Waters Acquity HSS T3 (150 x 2.1mm, 1.8um)
 97 | CH:FLOW_GRADIENT                 	0-2 min 100% A (Water 0.1% formic acid) 0% B (CH3CN 0.1 % formic acid), 2-4 min
 98 | CH:FLOW_GRADIENT                 	A, 4-9mins 45% A, 9-11 mins 20% A, 11-12 mins 100% A
 99 | CH:FLOW_RATE                     	0.15 ml/min
100 | CH:SAMPLE_INJECTION              	10ul
101 | CH:SOLVENT_A                     	Water 0.1% formic acid
102 | CH:SOLVENT_B                     	CH3CN 0.1 % formic acid
103 | CH:ANALYTICAL_TIME               	12 mins
104 | #ANALYSIS
105 | AN:ANALYSIS_TYPE                 	MS
106 | AN:LABORATORY_NAME               	Gaikwad Laboratory
107 | AN:ACQUISITION_DATE              	41716
108 | AN:SOFTWARE_VERSION              	Masslynx
109 | AN:OPERATOR_NAME                 	Nilesh Gaikwad
110 | #MS
111 | MS:INSTRUMENT_NAME               	Waters Xevo-TQ
112 | MS:INSTRUMENT_TYPE               	Triple quadrupole
113 | MS:MS_TYPE                       	ESI
114 | MS:ION_MODE                      	POSITIVE
115 | MS:CAPILLARY_VOLTAGE             	3.0 kV
116 | MS:COLLISION_GAS                 	N2
117 | MS:IONIZATION                    	Electrospray Ionization
118 | MS:SOURCE_TEMPERATURE            	150C
119 | MS:DESOLVATION_GAS_FLOW          	600 L/h
120 | MS:DESOLVATION_TEMPERATURE       	350C
121 | MS:MS_COMMENTS                   	UPLC-MS/MS
122 | #MS_METABOLITE_DATA
123 | MS_METABOLITE_DATA:UNITS         	pg/ml
124 | MS_METABOLITE_DATA_START
125 | Samples	CER030_294717_ML_1	CER040_242995_ML_2	CER055_249947_ML_3	CER062_246153_ML_4	CER085_251176_ML_5	CER093_242931_ML_6	CER110_238825_ML_7	CER120_253690_ML_8	CER147_254803_ML_9	CER149_266689_ML_10	CER158_254231_ML_11	CER165_287001_ML_12	CER178_295145_ML_13	CER181_244392_ML_14	CER188_250760_ML_15	CER192_254091_ML_16	CER201_244193_ML_17	CER216_242490_ML_18	CER220_274308_ML_19	CER223_264067_ML_20	CER226_254303_ML_21	CER277_255328_ML_22	CER287_248530_ML_23	CER303_253023_ML_24	CER315_282966_ML_25	CER324_285069_ML_26	CER340_244448_ML_27	CER346_246320_ML_28	CER356_269662_ML_29	CER368_250104_ML_30	CER369_276355_ML_31	CER384_264971_ML_32	CER445_286527_ML_33	CER452_240972_ML_34	CER463_271249_ML_35	CER465_265004_ML_36	CER483_294606_ML_37	CER488_274343_ML_38	CER530_249229_ML_39	CER540_240346_ML_40	CER552_241945_ML_41	CER555_251239_ML_42
126 | Factors	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum
127 | 17-hydroxypregnenolone	946.2500	0.0000	676.2500	0.0000	2251.2500	0.0000	0.0000	1134.7500	0.0000	0.0000	2016.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	1919.7500	0.0000	972.7500	0.0000	1542.2500	1687.7500	421.0000	0.0000	373.2500	0.0000	614.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	528.2500
128 | 17-hydroxyprogesterone	0.0000	2.0000	0.0000	0.0000	19.2500	0.0000	0.0000	27.0000	2.0000	120.7500	27.7500	83.0000	0.0000	8.0000	3.5000	274.0000	0.0000	0.0000	3.0000	3.2500	0.0000	43.7500	15.2500	25.7500	4.2500	0.0000	0.0000	49.5000	27.7500	14.0000	9.7500	35.2500	34.7500	4.5000	8.0000	17.2500	0.0000	24.7500	19.0000	0.0000	4.5000	132.0000
129 | Allodihydrotestosterone	80.0000	1181.0000	0.0000	0.0000	0.0000	112.2500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	288.0000	0.0000	0.0000	374.7500	0.0000	27.5000	112.7500	247.7500	39.0000	0.0000	0.0000	0.0000	0.0000	0.0000	761.0000	245.5000	332.5000	52.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	465.7500	159.0000	0.0000	77.0000	315.5000	466.0000
130 | Androstenedione	76.7500	57.0000	176.2500	399.5000	208.5000	37.0000	281.2500	79.7500	250.7500	420.5000	123.0000	186.2500	34.7500	224.5000	67.7500	335.0000	126.5000	277.0000	50.5000	153.7500	62.2500	107.0000	431.2500	167.5000	134.0000	60.7500	38.5000	42.0000	78.7500	43.0000	60.0000	114.7500	237.7500	53.5000	51.7500	298.0000	220.2500	15.0000	256.5000	172.5000	79.2500	52.5000
131 | Androstenolone (DHEA)	1779.7500	1409.2500	945.7500	748.2500	2284.0000	2351.0000	2183.7500	1916.5000	5079.5000	1474.0000	1338.5000	1646.0000	2051.7500	2039.7500	2618.0000	306.7500	574.5000	1794.2500	1429.0000	2293.2500	2066.2500	2493.2500	918.0000	1579.2500	2042.2500	2645.7500	2393.7500	1913.0000	1641.5000	853.2500	586.5000	537.2500	562.5000	1887.2500	979.0000	678.5000	1357.2500	1526.2500	2300.7500	129.0000	409.2500	282.2500
132 | Cortexolone	0.0000	0.0000	0.0000	54.0000	0.0000	0.0000	0.0000	0.0000	215.7500	135.7500	72.7500	53.0000	11.7500	0.0000	0.0000	0.0000	0.0000	101.2500	11.2500	0.0000	0.0000	315.0000	181.2500	0.0000	7.7500	151.2500	0.0000	0.0000	104.0000	0.0000	0.0000	30.7500	94.2500	210.5000	33.2500	126.0000	0.0000	10.0000	17.0000	15.7500	0.0000	0.0000
133 | Cortexone	108.0000	16.0000	13.0000	117.5000	3.2500	63.2500	42.5000	146.7500	29.5000	204.2500	28.7500	67.0000	30.5000	103.0000	23.0000	416.7500	63.5000	32.5000	32.5000	127.2500	39.0000	84.2500	7.2500	16.2500	68.7500	27.0000	46.5000	21.7500	3.2500	14.7500	28.7500	67.0000	33.0000	40.7500	31.0000	32.2500	40.0000	13.7500	18.7500	0.0000	25.7500	29.0000
134 | Corticosterone_ DOC	0.0000	354.5000	0.0000	0.0000	322.5000	419.7500	420.7500	0.0000	0.0000	0.0000	393.2500	915.5000	0.0000	432.2500	1233.0000	0.0000	525.5000	1700.0000	0.0000	98.7500	285.5000	42.5000	428.2500	0.0000	427.5000	271.7500	254.7500	478.0000	303.5000	462.2500	532.0000	715.0000	1073.0000	836.2500	0.0000	1639.0000	601.7500	287.7500	0.0000	0.0000	435.2500	1602.2500
135 | Cortisol	7643.0000	39245.7500	11671.5000	20216.0000	14908.7500	14386.5000	16815.2500	7806.2500	27135.5000	7095.0000	12175.2500	36413.0000	2499.2500	15101.7500	22045.0000	24832.0000	13257.0000	19528.5000	4539.7500	7681.7500	9585.2500	19361.0000	24203.7500	5667.0000	19437.2500	10849.2500	11855.7500	7546.5000	3093.7500	19035.7500	18575.0000	14801.5000	22960.7500	22506.5000	8001.5000	31037.5000	18577.2500	15506.2500	8364.7500	2145.7500	5574.7500	19662.5000
136 | Estradiol	123992.2500	796595.7500	619110.0000	449415.7500	320835.5000	326124.2500	249087.2500	311589.2500	345598.5000	485857.0000	332055.2500	211831.0000	334929.7500	235466.7500	352555.0000	410500.0000	887955.0000	865791.7500	1648163.5000	856726.7500	579044.2500	254013.2500	326272.7500	239893.7500	329553.2500	438715.5000	248489.0000	380251.0000	338965.5000	337231.2500	342754.5000	370657.2500	2028106.5000	733521.0000	399244.2500	321007.5000	634463.0000	231294.0000	349439.2500	75746.7500	399415.5000	303855.7500
137 | Estrone	484.5000	1663.7500	1680.7500	794.5000	557.2500	625.7500	669.7500	885.0000	715.0000	1225.5000	697.7500	478.2500	659.0000	575.5000	871.7500	1089.0000	1726.2500	2325.2500	3286.7500	1955.7500	1094.0000	486.2500	650.5000	574.2500	601.7500	842.7500	757.7500	732.7500	571.7500	693.7500	1004.2500	879.2500	3154.7500	1095.2500	22680.2500	637.2500	1108.2500	474.2500	810.2500	421.2500	680.7500	623.7500
138 | Pregnenolone	12.2500	0.0000	0.0000	0.0000	0.0000	144.2500	14.7500	807.2500	0.0000	30.0000	0.0000	0.0000	0.0000	0.0000	16.5000	139.5000	132.5000	0.0000	0.0000	13.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	488.5000	0.0000	0.0000	0.0000	0.0000	280.7500	0.0000	0.0000	0.0000	0.0000	0.0000	205.5000
139 | Progesterone	28.2500	6.2500	725.2500	57.2500	767.0000	2.7500	388.0000	9.0000	19.5000	242.5000	4.0000	0.0000	94.5000	160.7500	0.0000	3214.5000	218.2500	1.0000	0.0000	20.0000	4.5000	55.7500	24.5000	57.0000	200.5000	138.7500	132.2500	120.5000	80.5000	59.5000	315.7500	247.2500	211.5000	198.5000	232.2500	241.0000	199.5000	282.5000	216.5000	358.5000	289.5000	199.2500
140 | Testosterone	75.7500	63.2500	42.7500	98.0000	24.2500	35.0000	165.7500	23.2500	73.7500	52.7500	118.7500	35.7500	65.2500	127.2500	14.2500	202.5000	110.7500	53.5000	54.2500	2.2500	105.2500	182.7500	116.0000	66.2500	52.5000	106.2500	43.2500	57.2500	97.2500	16.0000	192.0000	53.7500	182.5000	0.2500	11.5000	87.2500	33.7500	45.5000	26.2500	96.0000	17.5000	79.7500
141 | MS_METABOLITE_DATA_END
142 | #METABOLITES
143 | METABOLITES_START
144 | metabolite_name	moverz_quant	ri	ri_type	pubchem_id	inchi_key	kegg_id	other_id	other_id_type
145 | 17-hydroxypregnenolone				91451			2Q4710	UCDavis_Gaikwad_Lab_ID
146 | 17-hydroxyprogesterone				6238			6Q3360	UCDavis_Gaikwad_Lab_ID
147 | Allodihydrotestosterone				10635			14A2570	UCDavis_Gaikwad_Lab_ID
148 | Androstenedione				6128			12A6030	UCDavis_Gaikwad_Lab_ID
149 | Androstenolone (DHEA)				5881			3A8500	UCDavis_Gaikwad_Lab_ID
150 | Cortexolone				440707			7Q1610	UCDavis_Gaikwad_Lab_ID
151 | Cortexone				6166			9Q3460	UCDavis_Gaikwad_Lab_ID
152 | Corticosterone, DOC				5753			10Q1550	UCDavis_Gaikwad_Lab_ID
153 | Cortisol				5754			8Q3880	UCDavis_Gaikwad_Lab_ID
154 | Estradiol				5757			16E0950	UCDavis_Gaikwad_Lab_ID
155 | Estrone				5870			15E2300	UCDavis_Gaikwad_Lab_ID
156 | Pregnenolone				8955			1Q5500	UCDavis_Gaikwad_Lab_ID
157 | Progesterone				5994			5Q2600	UCDavis_Gaikwad_Lab_ID
158 | Testosterone				6013			13A6950	UCDavis_Gaikwad_Lab_ID
159 | METABOLITES_END
160 | #END
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/tests/example_data/validation_files/ST000122_AN000204_error_1.txt:
--------------------------------------------------------------------------------
  1 | #METABOLOMICS WORKBENCH STUDY_ID:ST000122 ANALYSIS_ID:AN000204 PROJECT_ID:PR000109
  2 | VERSION             	1
  3 | CREATED_ON          	2016-09-17
  4 | #PROJECT
  5 | PR:PROJECT_TITLE                 	Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic
  6 | PR:PROJECT_TYPE                  	Pilot and Feasibility Projects
  7 | PR:PROJECT_SUMMARY               	-
  8 | PR:INSTITUTE                     	University of California, Davis
  9 | PR:DEPARTMENT                    	Nutrition
 10 | PR:LABORATORY                    	Gaikwad Lab
 11 | PR:LAST_NAME                     	Gaikwad
 12 | PR:FIRST_NAME                    	Nilesh
 13 | PR:ADDRESS                       	-
 14 | PR:EMAIL                         	nwgaikwad@ucdavis.edu
 15 | PR:PHONE                         	530-752-2906
 16 | PR:FUNDING_SOURCE                	NIH 1U24DK097154 ;  PI Fiehn, Oliver  ; UC Davis WEST COAST CENTRAL
 17 | PR:FUNDING_SOURCE                	METABOLOMICS RESOURCE CORE (WC3MRC)
 18 | #STUDY
 19 | ST:STUDY_TITLE                   	Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic
 20 | ST:STUDY_TYPE                    	steroid panel
 21 | ST:STUDY_SUMMARY                 	-
 22 | ST:INSTITUTE                     	University of California, Davis
 23 | ST:DEPARTMENT                    	Nutrition
 24 | ST:LABORATORY                    	Gaikwad Lab
 25 | ST:LAST_NAME                     	Gaikwad
 26 | ST:FIRST_NAME                    	Nilesh
 27 | ST:ADDRESS                       	-
 28 | ST:EMAIL                         	nwgaikwad@ucdavis.edu
 29 | ST:PHONE                         	-
 30 | ST:NUM_GROUPS                    	NA
 31 | #SUBJECT
 32 | SU:SUBJECT_TYPE                  	Human
 33 | SU:SUBJECT_SPECIES               	Homo sapiens
 34 | SU:TAXONOMY_ID                   	9606
 35 | #SUBJECT_SAMPLE_FACTORS:         	SUBJECT(optional)[tab]SAMPLE[tab]FACTORS(NAME:VALUE pairs separated by |)[tab]Additional sample data
 36 | SUBJECT_SAMPLE_FACTORS           			Tissue/Fluid:	
 37 | SUBJECT_SAMPLE_FACTORS           	CER040_242995_ML_2	CER040_242995_ML_2	Tissue/Fluid:Serum	
 38 | SUBJECT_SAMPLE_FACTORS           	CER055_249947_ML_3	CER055_249947_ML_3	Tissue/Fluid:Serum	
 39 | SUBJECT_SAMPLE_FACTORS           	CER062_246153_ML_4	CER062_246153_ML_4	Tissue/Fluid:Serum	
 40 | SUBJECT_SAMPLE_FACTORS           	CER085_251176_ML_5	CER085_251176_ML_5	Tissue/Fluid:Serum	
 41 | SUBJECT_SAMPLE_FACTORS           	CER093_242931_ML_6	CER093_242931_ML_6	Tissue/Fluid:Serum	
 42 | SUBJECT_SAMPLE_FACTORS           	CER110_238825_ML_7	CER110_238825_ML_7	Tissue/Fluid:Serum	
 43 | SUBJECT_SAMPLE_FACTORS           	CER120_253690_ML_8	CER120_253690_ML_8	Tissue/Fluid:Serum	
 44 | SUBJECT_SAMPLE_FACTORS           	CER147_254803_ML_9	CER147_254803_ML_9	Tissue/Fluid:Serum	
 45 | SUBJECT_SAMPLE_FACTORS           	CER149_266689_ML_10	CER149_266689_ML_10	Tissue/Fluid:Serum	
 46 | SUBJECT_SAMPLE_FACTORS           	CER158_254231_ML_11	CER158_254231_ML_11	Tissue/Fluid:Serum	
 47 | SUBJECT_SAMPLE_FACTORS           	CER165_287001_ML_12	CER165_287001_ML_12	Tissue/Fluid:Serum	
 48 | SUBJECT_SAMPLE_FACTORS           	CER178_295145_ML_13	CER178_295145_ML_13	Tissue/Fluid:Serum	
 49 | SUBJECT_SAMPLE_FACTORS           	CER181_244392_ML_14	CER181_244392_ML_14	Tissue/Fluid:Serum	
 50 | SUBJECT_SAMPLE_FACTORS           	CER188_250760_ML_15	CER188_250760_ML_15	Tissue/Fluid:Serum	
 51 | SUBJECT_SAMPLE_FACTORS           	CER192_254091_ML_16	CER192_254091_ML_16	Tissue/Fluid:Serum	
 52 | SUBJECT_SAMPLE_FACTORS           	CER201_244193_ML_17	CER201_244193_ML_17	Tissue/Fluid:Serum	
 53 | SUBJECT_SAMPLE_FACTORS           	CER216_242490_ML_18	CER216_242490_ML_18	Tissue/Fluid:Serum	
 54 | SUBJECT_SAMPLE_FACTORS           	CER220_274308_ML_19	CER220_274308_ML_19	Tissue/Fluid:Serum	
 55 | SUBJECT_SAMPLE_FACTORS           	CER223_264067_ML_20	CER223_264067_ML_20	Tissue/Fluid:Serum	
 56 | SUBJECT_SAMPLE_FACTORS           	CER226_254303_ML_21	CER226_254303_ML_21	Tissue/Fluid:Serum	
 57 | SUBJECT_SAMPLE_FACTORS           	CER277_255328_ML_22	CER277_255328_ML_22	Tissue/Fluid:Serum	
 58 | SUBJECT_SAMPLE_FACTORS           	CER287_248530_ML_23	CER287_248530_ML_23	Tissue/Fluid:Serum	
 59 | SUBJECT_SAMPLE_FACTORS           	CER303_253023_ML_24	CER303_253023_ML_24	Tissue/Fluid:Serum	
 60 | SUBJECT_SAMPLE_FACTORS           	CER315_282966_ML_25	CER315_282966_ML_25	Tissue/Fluid:Serum	
 61 | SUBJECT_SAMPLE_FACTORS           	CER324_285069_ML_26	CER324_285069_ML_26	Tissue/Fluid:Serum	
 62 | SUBJECT_SAMPLE_FACTORS           	CER340_244448_ML_27	CER340_244448_ML_27	Tissue/Fluid:Serum	
 63 | SUBJECT_SAMPLE_FACTORS           	CER346_246320_ML_28	CER346_246320_ML_28	Tissue/Fluid:Serum	
 64 | SUBJECT_SAMPLE_FACTORS           	CER356_269662_ML_29	CER356_269662_ML_29	Tissue/Fluid:Serum	
 65 | SUBJECT_SAMPLE_FACTORS           	CER368_250104_ML_30	CER368_250104_ML_30	Tissue/Fluid:Serum	
 66 | SUBJECT_SAMPLE_FACTORS           	CER369_276355_ML_31	CER369_276355_ML_31	Tissue/Fluid:Serum	
 67 | SUBJECT_SAMPLE_FACTORS           	CER384_264971_ML_32	CER384_264971_ML_32	Tissue/Fluid:Serum	
 68 | SUBJECT_SAMPLE_FACTORS           	CER445_286527_ML_33	CER445_286527_ML_33	Tissue/Fluid:Serum	
 69 | SUBJECT_SAMPLE_FACTORS           	CER452_240972_ML_34	CER452_240972_ML_34	Tissue/Fluid:Serum	
 70 | SUBJECT_SAMPLE_FACTORS           	CER463_271249_ML_35	CER463_271249_ML_35	Tissue/Fluid:Serum	
 71 | SUBJECT_SAMPLE_FACTORS           	CER465_265004_ML_36	CER465_265004_ML_36	Tissue/Fluid:Serum	
 72 | SUBJECT_SAMPLE_FACTORS           	CER483_294606_ML_37	CER483_294606_ML_37	Tissue/Fluid:Serum	
 73 | SUBJECT_SAMPLE_FACTORS           	CER488_274343_ML_38	CER488_274343_ML_38	Tissue/Fluid:Serum	
 74 | SUBJECT_SAMPLE_FACTORS           	CER530_249229_ML_39	CER530_249229_ML_39	Tissue/Fluid:Serum	
 75 | SUBJECT_SAMPLE_FACTORS           	CER540_240346_ML_40	CER540_240346_ML_40	Tissue/Fluid:Serum	
 76 | SUBJECT_SAMPLE_FACTORS           	CER552_241945_ML_41	CER552_241945_ML_41	Tissue/Fluid:Serum	
 77 | SUBJECT_SAMPLE_FACTORS           	CER555_251239_ML_42	CER555_251239_ML_42	Tissue/Fluid:Serum	
 78 | #COLLECTION
 79 | CO:COLLECTION_SUMMARY            	-
 80 | #TREATMENT
 81 | TR:TREATMENT_SUMMARY             	-
 82 | #SAMPLEPREP
 83 | SP:SAMPLEPREP_SUMMARY            	Methanol: Water Extraction
 84 | SP:SAMPLEPREP_PROTOCOL_FILENAME  	NIH_WCMC_LaMerrill_Method_GaikwadLab__SteroidAnalysis_2013-14.docx
 85 | SP:PROCESSING_METHOD             	Homogenization and Solvent Removal w/ Speed Vac
 86 | SP:PROCESSING_STORAGE_CONDITIONS 	On Ice
 87 | SP:EXTRACTION_METHOD             	1:1 Methanol: Water
 88 | SP:EXTRACT_STORAGE               	-80C
 89 | SP:SAMPLE_RESUSPENSION           	150ul CH3OH/H2O
 90 | SP:ORGAN                         	Sprague-Dawley Maternal: Adrenal, liver, placenta, amniotic fluid
 91 | SP:ORGAN                         	Fetal: Male and female brain, male and female liver
 92 | #CHROMATOGRAPHY
 93 | CH:CHROMATOGRAPHY_SUMMARY        	Targeted UPLC-MS/MS
 94 | CH:CHROMATOGRAPHY_TYPE           	Reversed phase
 95 | CH:INSTRUMENT_NAME               	Waters Acquity
 96 | CH:COLUMN_NAME                   	Waters Acquity HSS T3 (150 x 2.1mm, 1.8um)
 97 | CH:FLOW_GRADIENT                 	0-2 min 100% A (Water 0.1% formic acid) 0% B (CH3CN 0.1 % formic acid), 2-4 min
 98 | CH:FLOW_GRADIENT                 	A, 4-9mins 45% A, 9-11 mins 20% A, 11-12 mins 100% A
 99 | CH:FLOW_RATE                     	0.15 ml/min
100 | CH:SAMPLE_INJECTION              	10ul
101 | CH:SOLVENT_A                     	Water 0.1% formic acid
102 | CH:SOLVENT_B                     	CH3CN 0.1 % formic acid
103 | CH:ANALYTICAL_TIME               	12 mins
104 | #ANALYSIS
105 | AN:ANALYSIS_TYPE                 	MS
106 | AN:LABORATORY_NAME               	Gaikwad Laboratory
107 | AN:ACQUISITION_DATE              	41716
108 | AN:SOFTWARE_VERSION              	Masslynx
109 | AN:OPERATOR_NAME                 	Nilesh Gaikwad
110 | #MS
111 | MS:INSTRUMENT_NAME               	Waters Xevo-TQ
112 | MS:INSTRUMENT_TYPE               	Triple quadrupole
113 | MS:MS_TYPE                       	ESI
114 | MS:ION_MODE                      	POSITIVE
115 | MS:CAPILLARY_VOLTAGE             	3.0 kV
116 | MS:COLLISION_GAS                 	N2
117 | MS:IONIZATION                    	Electrospray Ionization
118 | MS:SOURCE_TEMPERATURE            	150C
119 | MS:DESOLVATION_GAS_FLOW          	600 L/h
120 | MS:DESOLVATION_TEMPERATURE       	350C
121 | MS:MS_COMMENTS                   	UPLC-MS/MS
122 | #MS_METABOLITE_DATA
123 | MS_METABOLITE_DATA:UNITS         	pg/ml
124 | MS_METABOLITE_DATA_START
125 | Samples	CER030_294717_ML_1	CER040_242995_ML_2	CER055_249947_ML_3	CER062_246153_ML_4	CER085_251176_ML_5	CER093_242931_ML_6	CER110_238825_ML_7	CER120_253690_ML_8	CER147_254803_ML_9	CER149_266689_ML_10	CER158_254231_ML_11	CER165_287001_ML_12	CER178_295145_ML_13	CER181_244392_ML_14	CER188_250760_ML_15	CER192_254091_ML_16	CER201_244193_ML_17	CER216_242490_ML_18	CER220_274308_ML_19	CER223_264067_ML_20	CER226_254303_ML_21	CER277_255328_ML_22	CER287_248530_ML_23	CER303_253023_ML_24	CER315_282966_ML_25	CER324_285069_ML_26	CER340_244448_ML_27	CER346_246320_ML_28	CER356_269662_ML_29	CER368_250104_ML_30	CER369_276355_ML_31	CER384_264971_ML_32	CER445_286527_ML_33	CER452_240972_ML_34	CER463_271249_ML_35	CER465_265004_ML_36	CER483_294606_ML_37	CER488_274343_ML_38	CER530_249229_ML_39	CER540_240346_ML_40	CER552_241945_ML_41	CER555_251239_ML_42
126 | Factors	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum
127 | 17-hydroxypregnenolone	946.2500	0.0000	676.2500	0.0000	2251.2500	0.0000	0.0000	1134.7500	0.0000	0.0000	2016.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	1919.7500	0.0000	972.7500	0.0000	1542.2500	1687.7500	421.0000	0.0000	373.2500	0.0000	614.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	528.2500
128 | 17-hydroxyprogesterone	0.0000	2.0000	0.0000	0.0000	19.2500	0.0000	0.0000	27.0000	2.0000	120.7500	27.7500	83.0000	0.0000	8.0000	3.5000	274.0000	0.0000	0.0000	3.0000	3.2500	0.0000	43.7500	15.2500	25.7500	4.2500	0.0000	0.0000	49.5000	27.7500	14.0000	9.7500	35.2500	34.7500	4.5000	8.0000	17.2500	0.0000	24.7500	19.0000	0.0000	4.5000	132.0000
129 | Allodihydrotestosterone	80.0000	1181.0000	0.0000	0.0000	0.0000	112.2500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	288.0000	0.0000	0.0000	374.7500	0.0000	27.5000	112.7500	247.7500	39.0000	0.0000	0.0000	0.0000	0.0000	0.0000	761.0000	245.5000	332.5000	52.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	465.7500	159.0000	0.0000	77.0000	315.5000	466.0000
130 | Androstenedione	76.7500	57.0000	176.2500	399.5000	208.5000	37.0000	281.2500	79.7500	250.7500	420.5000	123.0000	186.2500	34.7500	224.5000	67.7500	335.0000	126.5000	277.0000	50.5000	153.7500	62.2500	107.0000	431.2500	167.5000	134.0000	60.7500	38.5000	42.0000	78.7500	43.0000	60.0000	114.7500	237.7500	53.5000	51.7500	298.0000	220.2500	15.0000	256.5000	172.5000	79.2500	52.5000
131 | Androstenolone (DHEA)	1779.7500	1409.2500	945.7500	748.2500	2284.0000	2351.0000	2183.7500	1916.5000	5079.5000	1474.0000	1338.5000	1646.0000	2051.7500	2039.7500	2618.0000	306.7500	574.5000	1794.2500	1429.0000	2293.2500	2066.2500	2493.2500	918.0000	1579.2500	2042.2500	2645.7500	2393.7500	1913.0000	1641.5000	853.2500	586.5000	537.2500	562.5000	1887.2500	979.0000	678.5000	1357.2500	1526.2500	2300.7500	129.0000	409.2500	282.2500
132 | Cortexolone	0.0000	0.0000	0.0000	54.0000	0.0000	0.0000	0.0000	0.0000	215.7500	135.7500	72.7500	53.0000	11.7500	0.0000	0.0000	0.0000	0.0000	101.2500	11.2500	0.0000	0.0000	315.0000	181.2500	0.0000	7.7500	151.2500	0.0000	0.0000	104.0000	0.0000	0.0000	30.7500	94.2500	210.5000	33.2500	126.0000	0.0000	10.0000	17.0000	15.7500	0.0000	0.0000
133 | Cortexone	108.0000	16.0000	13.0000	117.5000	3.2500	63.2500	42.5000	146.7500	29.5000	204.2500	28.7500	67.0000	30.5000	103.0000	23.0000	416.7500	63.5000	32.5000	32.5000	127.2500	39.0000	84.2500	7.2500	16.2500	68.7500	27.0000	46.5000	21.7500	3.2500	14.7500	28.7500	67.0000	33.0000	40.7500	31.0000	32.2500	40.0000	13.7500	18.7500	0.0000	25.7500	29.0000
134 | Corticosterone_ DOC	0.0000	354.5000	0.0000	0.0000	322.5000	419.7500	420.7500	0.0000	0.0000	0.0000	393.2500	915.5000	0.0000	432.2500	1233.0000	0.0000	525.5000	1700.0000	0.0000	98.7500	285.5000	42.5000	428.2500	0.0000	427.5000	271.7500	254.7500	478.0000	303.5000	462.2500	532.0000	715.0000	1073.0000	836.2500	0.0000	1639.0000	601.7500	287.7500	0.0000	0.0000	435.2500	1602.2500
135 | Cortisol	7643.0000	39245.7500	11671.5000	20216.0000	14908.7500	14386.5000	16815.2500	7806.2500	27135.5000	7095.0000	12175.2500	36413.0000	2499.2500	15101.7500	22045.0000	24832.0000	13257.0000	19528.5000	4539.7500	7681.7500	9585.2500	19361.0000	24203.7500	5667.0000	19437.2500	10849.2500	11855.7500	7546.5000	3093.7500	19035.7500	18575.0000	14801.5000	22960.7500	22506.5000	8001.5000	31037.5000	18577.2500	15506.2500	8364.7500	2145.7500	5574.7500	19662.5000
136 | Estradiol	123992.2500	796595.7500	619110.0000	449415.7500	320835.5000	326124.2500	249087.2500	311589.2500	345598.5000	485857.0000	332055.2500	211831.0000	334929.7500	235466.7500	352555.0000	410500.0000	887955.0000	865791.7500	1648163.5000	856726.7500	579044.2500	254013.2500	326272.7500	239893.7500	329553.2500	438715.5000	248489.0000	380251.0000	338965.5000	337231.2500	342754.5000	370657.2500	2028106.5000	733521.0000	399244.2500	321007.5000	634463.0000	231294.0000	349439.2500	75746.7500	399415.5000	303855.7500
137 | Estrone	484.5000	1663.7500	1680.7500	794.5000	557.2500	625.7500	669.7500	885.0000	715.0000	1225.5000	697.7500	478.2500	659.0000	575.5000	871.7500	1089.0000	1726.2500	2325.2500	3286.7500	1955.7500	1094.0000	486.2500	650.5000	574.2500	601.7500	842.7500	757.7500	732.7500	571.7500	693.7500	1004.2500	879.2500	3154.7500	1095.2500	22680.2500	637.2500	1108.2500	474.2500	810.2500	421.2500	680.7500	623.7500
138 | Pregnenolone	12.2500	0.0000	0.0000	0.0000	0.0000	144.2500	14.7500	807.2500	0.0000	30.0000	0.0000	0.0000	0.0000	0.0000	16.5000	139.5000	132.5000	0.0000	0.0000	13.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	488.5000	0.0000	0.0000	0.0000	0.0000	280.7500	0.0000	0.0000	0.0000	0.0000	0.0000	205.5000
139 | Progesterone	28.2500	6.2500	725.2500	57.2500	767.0000	2.7500	388.0000	9.0000	19.5000	242.5000	4.0000	0.0000	94.5000	160.7500	0.0000	3214.5000	218.2500	1.0000	0.0000	20.0000	4.5000	55.7500	24.5000	57.0000	200.5000	138.7500	132.2500	120.5000	80.5000	59.5000	315.7500	247.2500	211.5000	198.5000	232.2500	241.0000	199.5000	282.5000	216.5000	358.5000	289.5000	199.2500
140 | Testosterone	75.7500	63.2500	42.7500	98.0000	24.2500	35.0000	165.7500	23.2500	73.7500	52.7500	118.7500	35.7500	65.2500	127.2500	14.2500	202.5000	110.7500	53.5000	54.2500	2.2500	105.2500	182.7500	116.0000	66.2500	52.5000	106.2500	43.2500	57.2500	97.2500	16.0000	192.0000	53.7500	182.5000	0.2500	11.5000	87.2500	33.7500	45.5000	26.2500	96.0000	17.5000	79.7500
141 | MS_METABOLITE_DATA_END
142 | #METABOLITES
143 | METABOLITES_START
144 | metabolite_name	moverz_quant	ri	ri_type	pubchem_id	inchi_key	kegg_id	other_id	other_id_type
145 | 17-hydroxypregnenolone				91451			2Q4710	UCDavis_Gaikwad_Lab_ID
146 | 17-hydroxyprogesterone				6238			6Q3360	UCDavis_Gaikwad_Lab_ID
147 | Allodihydrotestosterone				10635			14A2570	UCDavis_Gaikwad_Lab_ID
148 | Androstenedione				6128			12A6030	UCDavis_Gaikwad_Lab_ID
149 | Androstenolone (DHEA)				5881			3A8500	UCDavis_Gaikwad_Lab_ID
150 | Cortexolone				440707			7Q1610	UCDavis_Gaikwad_Lab_ID
151 | Cortexone				6166			9Q3460	UCDavis_Gaikwad_Lab_ID
152 | Corticosterone, DOC				5753			10Q1550	UCDavis_Gaikwad_Lab_ID
153 | Cortisol				5754			8Q3880	UCDavis_Gaikwad_Lab_ID
154 | Estradiol				5757			16E0950	UCDavis_Gaikwad_Lab_ID
155 | Estrone				5870			15E2300	UCDavis_Gaikwad_Lab_ID
156 | Pregnenolone				8955			1Q5500	UCDavis_Gaikwad_Lab_ID
157 | Progesterone				5994			5Q2600	UCDavis_Gaikwad_Lab_ID
158 | Testosterone				6013			13A6950	UCDavis_Gaikwad_Lab_ID
159 | METABOLITES_END
160 | #END
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/tests/example_data/validation_files/ST000122_AN000204_error_2.txt:
--------------------------------------------------------------------------------
  1 | #METABOLOMICS WORKBENCH STUDY_ID:ST000122 ANALYSIS_ID:AN000204 PROJECT_ID:PR000109
  2 | VERSION             	1
  3 | CREATED_ON          	2016-09-17
  4 | #PROJECT
  5 | PR:PROJECT_TITLE                 	Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic
  6 | PR:PROJECT_TYPE                  	Pilot and Feasibility Projects
  7 | PR:PROJECT_SUMMARY               	-
  8 | PR:INSTITUTE                     	University of California, Davis
  9 | PR:DEPARTMENT                    	Nutrition
 10 | PR:LABORATORY                    	Gaikwad Lab
 11 | PR:LAST_NAME                     	Gaikwad
 12 | PR:FIRST_NAME                    	Nilesh
 13 | PR:ADDRESS                       	-
 14 | PR:EMAIL                         	nwgaikwad@ucdavis.edu
 15 | PR:PHONE                         	530-752-2906
 16 | PR:FUNDING_SOURCE                	NIH 1U24DK097154 ;  PI Fiehn, Oliver  ; UC Davis WEST COAST CENTRAL
 17 | PR:FUNDING_SOURCE                	METABOLOMICS RESOURCE CORE (WC3MRC)
 18 | #STUDY
 19 | ST:STUDY_TITLE                   	Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic
 20 | ST:STUDY_TYPE                    	steroid panel
 21 | ST:STUDY_SUMMARY                 	-
 22 | ST:INSTITUTE                     	University of California, Davis
 23 | ST:DEPARTMENT                    	Nutrition
 24 | ST:LABORATORY                    	Gaikwad Lab
 25 | ST:LAST_NAME                     	Gaikwad
 26 | ST:FIRST_NAME                    	Nilesh
 27 | ST:ADDRESS                       	-
 28 | ST:EMAIL                         	nwgaikwad@ucdavis.edu
 29 | ST:PHONE                         	-
 30 | ST:NUM_GROUPS                    	NA
 31 | #SUBJECT
 32 | SU:SUBJECT_TYPE                  	Human
 33 | SU:SUBJECT_SPECIES               	Homo sapiens
 34 | SU:TAXONOMY_ID                   	9606
 35 | #SUBJECT_SAMPLE_FACTORS:         	SUBJECT(optional)[tab]SAMPLE[tab]FACTORS(NAME:VALUE pairs separated by |)[tab]Additional sample data
 36 | SUBJECT_SAMPLE_FACTORS           	CER030_294717_ML_1	TEST	Tissue/Fluid:Serum	
 37 | SUBJECT_SAMPLE_FACTORS           	CER040_242995_ML_2	CER040_242995_ML_2	Tissue/Fluid:Serum	
 38 | SUBJECT_SAMPLE_FACTORS           	CER055_249947_ML_3	CER055_249947_ML_3	Tissue/Fluid:Serum	
 39 | SUBJECT_SAMPLE_FACTORS           	CER062_246153_ML_4	CER062_246153_ML_4	Tissue/Fluid:Serum	
 40 | SUBJECT_SAMPLE_FACTORS           	CER085_251176_ML_5	CER085_251176_ML_5	Tissue/Fluid:Serum	
 41 | SUBJECT_SAMPLE_FACTORS           	CER093_242931_ML_6	CER093_242931_ML_6	Tissue/Fluid:Serum	
 42 | SUBJECT_SAMPLE_FACTORS           	CER110_238825_ML_7	CER110_238825_ML_7	Tissue/Fluid:Serum	
 43 | SUBJECT_SAMPLE_FACTORS           	CER120_253690_ML_8	CER120_253690_ML_8	Tissue/Fluid:Serum	
 44 | SUBJECT_SAMPLE_FACTORS           	CER147_254803_ML_9	CER147_254803_ML_9	Tissue/Fluid:Serum	
 45 | SUBJECT_SAMPLE_FACTORS           	CER149_266689_ML_10	CER149_266689_ML_10	Tissue/Fluid:Serum	
 46 | SUBJECT_SAMPLE_FACTORS           	CER158_254231_ML_11	CER158_254231_ML_11	Tissue/Fluid:Serum	
 47 | SUBJECT_SAMPLE_FACTORS           	CER165_287001_ML_12	CER165_287001_ML_12	Tissue/Fluid:Serum	
 48 | SUBJECT_SAMPLE_FACTORS           	CER178_295145_ML_13	CER178_295145_ML_13	Tissue/Fluid:Serum	
 49 | SUBJECT_SAMPLE_FACTORS           	CER181_244392_ML_14	CER181_244392_ML_14	Tissue/Fluid:Serum	
 50 | SUBJECT_SAMPLE_FACTORS           	CER188_250760_ML_15	CER188_250760_ML_15	Tissue/Fluid:Serum	
 51 | SUBJECT_SAMPLE_FACTORS           	CER192_254091_ML_16	CER192_254091_ML_16	Tissue/Fluid:Serum	
 52 | SUBJECT_SAMPLE_FACTORS           	CER201_244193_ML_17	CER201_244193_ML_17	Tissue/Fluid:Serum	
 53 | SUBJECT_SAMPLE_FACTORS           	CER216_242490_ML_18	CER216_242490_ML_18	Tissue/Fluid:Serum	
 54 | SUBJECT_SAMPLE_FACTORS           	CER220_274308_ML_19	CER220_274308_ML_19	Tissue/Fluid:Serum	
 55 | SUBJECT_SAMPLE_FACTORS           	CER223_264067_ML_20	CER223_264067_ML_20	Tissue/Fluid:Serum	
 56 | SUBJECT_SAMPLE_FACTORS           	CER226_254303_ML_21	CER226_254303_ML_21	Tissue/Fluid:Serum	
 57 | SUBJECT_SAMPLE_FACTORS           	CER277_255328_ML_22	CER277_255328_ML_22	Tissue/Fluid:Serum	
 58 | SUBJECT_SAMPLE_FACTORS           	CER287_248530_ML_23	CER287_248530_ML_23	Tissue/Fluid:Serum	
 59 | SUBJECT_SAMPLE_FACTORS           	CER303_253023_ML_24	CER303_253023_ML_24	Tissue/Fluid:Serum	
 60 | SUBJECT_SAMPLE_FACTORS           	CER315_282966_ML_25	CER315_282966_ML_25	Tissue/Fluid:Serum	
 61 | SUBJECT_SAMPLE_FACTORS           	CER324_285069_ML_26	CER324_285069_ML_26	Tissue/Fluid:Serum	
 62 | SUBJECT_SAMPLE_FACTORS           	CER340_244448_ML_27	CER340_244448_ML_27	Tissue/Fluid:Serum	
 63 | SUBJECT_SAMPLE_FACTORS           	CER346_246320_ML_28	CER346_246320_ML_28	Tissue/Fluid:Serum	
 64 | SUBJECT_SAMPLE_FACTORS           	CER356_269662_ML_29	CER356_269662_ML_29	Tissue/Fluid:Serum	
 65 | SUBJECT_SAMPLE_FACTORS           	CER368_250104_ML_30	CER368_250104_ML_30	Tissue/Fluid:Serum	
 66 | SUBJECT_SAMPLE_FACTORS           	CER369_276355_ML_31	CER369_276355_ML_31	Tissue/Fluid:Serum	
 67 | SUBJECT_SAMPLE_FACTORS           	CER384_264971_ML_32	CER384_264971_ML_32	Tissue/Fluid:Serum	
 68 | SUBJECT_SAMPLE_FACTORS           	CER445_286527_ML_33	CER445_286527_ML_33	Tissue/Fluid:Serum	
 69 | SUBJECT_SAMPLE_FACTORS           	CER452_240972_ML_34	CER452_240972_ML_34	Tissue/Fluid:Serum	
 70 | SUBJECT_SAMPLE_FACTORS           	CER463_271249_ML_35	CER463_271249_ML_35	Tissue/Fluid:Serum	
 71 | SUBJECT_SAMPLE_FACTORS           	CER465_265004_ML_36	CER465_265004_ML_36	Tissue/Fluid:Serum	
 72 | SUBJECT_SAMPLE_FACTORS           	CER483_294606_ML_37	CER483_294606_ML_37	Tissue/Fluid:Serum	
 73 | SUBJECT_SAMPLE_FACTORS           	CER488_274343_ML_38	CER488_274343_ML_38	Tissue/Fluid:Serum	
 74 | SUBJECT_SAMPLE_FACTORS           	CER530_249229_ML_39	CER530_249229_ML_39	Tissue/Fluid:Serum	
 75 | SUBJECT_SAMPLE_FACTORS           	CER540_240346_ML_40	CER540_240346_ML_40	Tissue/Fluid:Serum	
 76 | SUBJECT_SAMPLE_FACTORS           	CER552_241945_ML_41	CER552_241945_ML_41	Tissue/Fluid:Serum	
 77 | SUBJECT_SAMPLE_FACTORS           	CER555_251239_ML_42	CER555_251239_ML_42	Tissue/Fluid:Serum	
 78 | #COLLECTION
 79 | CO:COLLECTION_SUMMARY            	-
 80 | #TREATMENT
 81 | TR:TREATMENT_SUMMARY             	-
 82 | #SAMPLEPREP
 83 | SP:SAMPLEPREP_SUMMARY            	Methanol: Water Extraction
 84 | SP:SAMPLEPREP_PROTOCOL_FILENAME  	NIH_WCMC_LaMerrill_Method_GaikwadLab__SteroidAnalysis_2013-14.docx
 85 | SP:PROCESSING_METHOD             	Homogenization and Solvent Removal w/ Speed Vac
 86 | SP:PROCESSING_STORAGE_CONDITIONS 	On Ice
 87 | SP:EXTRACTION_METHOD             	1:1 Methanol: Water
 88 | SP:EXTRACT_STORAGE               	-80C
 89 | SP:SAMPLE_RESUSPENSION           	150ul CH3OH/H2O
 90 | SP:ORGAN                         	Sprague-Dawley Maternal: Adrenal, liver, placenta, amniotic fluid
 91 | SP:ORGAN                         	Fetal: Male and female brain, male and female liver
 92 | #CHROMATOGRAPHY
 93 | CH:CHROMATOGRAPHY_SUMMARY        	Targeted UPLC-MS/MS
 94 | CH:CHROMATOGRAPHY_TYPE           	Reversed phase
 95 | CH:INSTRUMENT_NAME               	Waters Acquity
 96 | CH:COLUMN_NAME                   	Waters Acquity HSS T3 (150 x 2.1mm, 1.8um)
 97 | CH:FLOW_GRADIENT                 	0-2 min 100% A (Water 0.1% formic acid) 0% B (CH3CN 0.1 % formic acid), 2-4 min
 98 | CH:FLOW_GRADIENT                 	A, 4-9mins 45% A, 9-11 mins 20% A, 11-12 mins 100% A
 99 | CH:FLOW_RATE                     	0.15 ml/min
100 | CH:SAMPLE_INJECTION              	10ul
101 | CH:SOLVENT_A                     	Water 0.1% formic acid
102 | CH:SOLVENT_B                     	CH3CN 0.1 % formic acid
103 | CH:ANALYTICAL_TIME               	12 mins
104 | #ANALYSIS
105 | AN:ANALYSIS_TYPE                 	MS
106 | AN:LABORATORY_NAME               	Gaikwad Laboratory
107 | AN:ACQUISITION_DATE              	41716
108 | AN:SOFTWARE_VERSION              	Masslynx
109 | AN:OPERATOR_NAME                 	Nilesh Gaikwad
110 | #MS
111 | MS:INSTRUMENT_NAME               	Waters Xevo-TQ
112 | MS:INSTRUMENT_TYPE               	Triple quadrupole
113 | MS:MS_TYPE                       	ESI
114 | MS:ION_MODE                      	POSITIVE
115 | MS:CAPILLARY_VOLTAGE             	3.0 kV
116 | MS:COLLISION_GAS                 	N2
117 | MS:IONIZATION                    	Electrospray Ionization
118 | MS:SOURCE_TEMPERATURE            	150C
119 | MS:DESOLVATION_GAS_FLOW          	600 L/h
120 | MS:DESOLVATION_TEMPERATURE       	350C
121 | MS:MS_COMMENTS                   	UPLC-MS/MS
122 | #MS_METABOLITE_DATA
123 | MS_METABOLITE_DATA:UNITS         	pg/ml
124 | MS_METABOLITE_DATA_START
125 | Samples	CER030_294717_ML_1	CER040_242995_ML_2	CER055_249947_ML_3	CER062_246153_ML_4	CER085_251176_ML_5	CER093_242931_ML_6	CER110_238825_ML_7	CER120_253690_ML_8	CER147_254803_ML_9	CER149_266689_ML_10	CER158_254231_ML_11	CER165_287001_ML_12	CER178_295145_ML_13	CER181_244392_ML_14	CER188_250760_ML_15	CER192_254091_ML_16	CER201_244193_ML_17	CER216_242490_ML_18	CER220_274308_ML_19	CER223_264067_ML_20	CER226_254303_ML_21	CER277_255328_ML_22	CER287_248530_ML_23	CER303_253023_ML_24	CER315_282966_ML_25	CER324_285069_ML_26	CER340_244448_ML_27	CER346_246320_ML_28	CER356_269662_ML_29	CER368_250104_ML_30	CER369_276355_ML_31	CER384_264971_ML_32	CER445_286527_ML_33	CER452_240972_ML_34	CER463_271249_ML_35	CER465_265004_ML_36	CER483_294606_ML_37	CER488_274343_ML_38	CER530_249229_ML_39	CER540_240346_ML_40	CER552_241945_ML_41	CER555_251239_ML_42
126 | Factors	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum
127 | 17-hydroxypregnenolone	946.2500	0.0000	676.2500	0.0000	2251.2500	0.0000	0.0000	1134.7500	0.0000	0.0000	2016.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	1919.7500	0.0000	972.7500	0.0000	1542.2500	1687.7500	421.0000	0.0000	373.2500	0.0000	614.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	528.2500
128 | 17-hydroxyprogesterone	0.0000	2.0000	0.0000	0.0000	19.2500	0.0000	0.0000	27.0000	2.0000	120.7500	27.7500	83.0000	0.0000	8.0000	3.5000	274.0000	0.0000	0.0000	3.0000	3.2500	0.0000	43.7500	15.2500	25.7500	4.2500	0.0000	0.0000	49.5000	27.7500	14.0000	9.7500	35.2500	34.7500	4.5000	8.0000	17.2500	0.0000	24.7500	19.0000	0.0000	4.5000	132.0000
129 | Allodihydrotestosterone	80.0000	1181.0000	0.0000	0.0000	0.0000	112.2500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	288.0000	0.0000	0.0000	374.7500	0.0000	27.5000	112.7500	247.7500	39.0000	0.0000	0.0000	0.0000	0.0000	0.0000	761.0000	245.5000	332.5000	52.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	465.7500	159.0000	0.0000	77.0000	315.5000	466.0000
130 | Androstenedione	76.7500	57.0000	176.2500	399.5000	208.5000	37.0000	281.2500	79.7500	250.7500	420.5000	123.0000	186.2500	34.7500	224.5000	67.7500	335.0000	126.5000	277.0000	50.5000	153.7500	62.2500	107.0000	431.2500	167.5000	134.0000	60.7500	38.5000	42.0000	78.7500	43.0000	60.0000	114.7500	237.7500	53.5000	51.7500	298.0000	220.2500	15.0000	256.5000	172.5000	79.2500	52.5000
131 | Androstenolone (DHEA)	1779.7500	1409.2500	945.7500	748.2500	2284.0000	2351.0000	2183.7500	1916.5000	5079.5000	1474.0000	1338.5000	1646.0000	2051.7500	2039.7500	2618.0000	306.7500	574.5000	1794.2500	1429.0000	2293.2500	2066.2500	2493.2500	918.0000	1579.2500	2042.2500	2645.7500	2393.7500	1913.0000	1641.5000	853.2500	586.5000	537.2500	562.5000	1887.2500	979.0000	678.5000	1357.2500	1526.2500	2300.7500	129.0000	409.2500	282.2500
132 | Cortexolone	0.0000	0.0000	0.0000	54.0000	0.0000	0.0000	0.0000	0.0000	215.7500	135.7500	72.7500	53.0000	11.7500	0.0000	0.0000	0.0000	0.0000	101.2500	11.2500	0.0000	0.0000	315.0000	181.2500	0.0000	7.7500	151.2500	0.0000	0.0000	104.0000	0.0000	0.0000	30.7500	94.2500	210.5000	33.2500	126.0000	0.0000	10.0000	17.0000	15.7500	0.0000	0.0000
133 | Cortexone	108.0000	16.0000	13.0000	117.5000	3.2500	63.2500	42.5000	146.7500	29.5000	204.2500	28.7500	67.0000	30.5000	103.0000	23.0000	416.7500	63.5000	32.5000	32.5000	127.2500	39.0000	84.2500	7.2500	16.2500	68.7500	27.0000	46.5000	21.7500	3.2500	14.7500	28.7500	67.0000	33.0000	40.7500	31.0000	32.2500	40.0000	13.7500	18.7500	0.0000	25.7500	29.0000
134 | Corticosterone_ DOC	0.0000	354.5000	0.0000	0.0000	322.5000	419.7500	420.7500	0.0000	0.0000	0.0000	393.2500	915.5000	0.0000	432.2500	1233.0000	0.0000	525.5000	1700.0000	0.0000	98.7500	285.5000	42.5000	428.2500	0.0000	427.5000	271.7500	254.7500	478.0000	303.5000	462.2500	532.0000	715.0000	1073.0000	836.2500	0.0000	1639.0000	601.7500	287.7500	0.0000	0.0000	435.2500	1602.2500
135 | Cortisol	7643.0000	39245.7500	11671.5000	20216.0000	14908.7500	14386.5000	16815.2500	7806.2500	27135.5000	7095.0000	12175.2500	36413.0000	2499.2500	15101.7500	22045.0000	24832.0000	13257.0000	19528.5000	4539.7500	7681.7500	9585.2500	19361.0000	24203.7500	5667.0000	19437.2500	10849.2500	11855.7500	7546.5000	3093.7500	19035.7500	18575.0000	14801.5000	22960.7500	22506.5000	8001.5000	31037.5000	18577.2500	15506.2500	8364.7500	2145.7500	5574.7500	19662.5000
136 | Estradiol	123992.2500	796595.7500	619110.0000	449415.7500	320835.5000	326124.2500	249087.2500	311589.2500	345598.5000	485857.0000	332055.2500	211831.0000	334929.7500	235466.7500	352555.0000	410500.0000	887955.0000	865791.7500	1648163.5000	856726.7500	579044.2500	254013.2500	326272.7500	239893.7500	329553.2500	438715.5000	248489.0000	380251.0000	338965.5000	337231.2500	342754.5000	370657.2500	2028106.5000	733521.0000	399244.2500	321007.5000	634463.0000	231294.0000	349439.2500	75746.7500	399415.5000	303855.7500
137 | Estrone	484.5000	1663.7500	1680.7500	794.5000	557.2500	625.7500	669.7500	885.0000	715.0000	1225.5000	697.7500	478.2500	659.0000	575.5000	871.7500	1089.0000	1726.2500	2325.2500	3286.7500	1955.7500	1094.0000	486.2500	650.5000	574.2500	601.7500	842.7500	757.7500	732.7500	571.7500	693.7500	1004.2500	879.2500	3154.7500	1095.2500	22680.2500	637.2500	1108.2500	474.2500	810.2500	421.2500	680.7500	623.7500
138 | Pregnenolone	12.2500	0.0000	0.0000	0.0000	0.0000	144.2500	14.7500	807.2500	0.0000	30.0000	0.0000	0.0000	0.0000	0.0000	16.5000	139.5000	132.5000	0.0000	0.0000	13.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	488.5000	0.0000	0.0000	0.0000	0.0000	280.7500	0.0000	0.0000	0.0000	0.0000	0.0000	205.5000
139 | Progesterone	28.2500	6.2500	725.2500	57.2500	767.0000	2.7500	388.0000	9.0000	19.5000	242.5000	4.0000	0.0000	94.5000	160.7500	0.0000	3214.5000	218.2500	1.0000	0.0000	20.0000	4.5000	55.7500	24.5000	57.0000	200.5000	138.7500	132.2500	120.5000	80.5000	59.5000	315.7500	247.2500	211.5000	198.5000	232.2500	241.0000	199.5000	282.5000	216.5000	358.5000	289.5000	199.2500
140 | Testosterone	75.7500	63.2500	42.7500	98.0000	24.2500	35.0000	165.7500	23.2500	73.7500	52.7500	118.7500	35.7500	65.2500	127.2500	14.2500	202.5000	110.7500	53.5000	54.2500	2.2500	105.2500	182.7500	116.0000	66.2500	52.5000	106.2500	43.2500	57.2500	97.2500	16.0000	192.0000	53.7500	182.5000	0.2500	11.5000	87.2500	33.7500	45.5000	26.2500	96.0000	17.5000	79.7500
141 | MS_METABOLITE_DATA_END
142 | #METABOLITES
143 | METABOLITES_START
144 | metabolite_name	moverz_quant	ri	ri_type	pubchem_id	inchi_key	kegg_id	other_id	other_id_type
145 | 17-hydroxypregnenolone				91451			2Q4710	UCDavis_Gaikwad_Lab_ID
146 | 17-hydroxyprogesterone				6238			6Q3360	UCDavis_Gaikwad_Lab_ID
147 | Allodihydrotestosterone				10635			14A2570	UCDavis_Gaikwad_Lab_ID
148 | Androstenedione				6128			12A6030	UCDavis_Gaikwad_Lab_ID
149 | Androstenolone (DHEA)				5881			3A8500	UCDavis_Gaikwad_Lab_ID
150 | Cortexolone				440707			7Q1610	UCDavis_Gaikwad_Lab_ID
151 | Cortexone				6166			9Q3460	UCDavis_Gaikwad_Lab_ID
152 | Corticosterone, DOC				5753			10Q1550	UCDavis_Gaikwad_Lab_ID
153 | Cortisol				5754			8Q3880	UCDavis_Gaikwad_Lab_ID
154 | Estradiol				5757			16E0950	UCDavis_Gaikwad_Lab_ID
155 | Estrone				5870			15E2300	UCDavis_Gaikwad_Lab_ID
156 | Pregnenolone				8955			1Q5500	UCDavis_Gaikwad_Lab_ID
157 | Progesterone				5994			5Q2600	UCDavis_Gaikwad_Lab_ID
158 | Testosterone				6013			13A6950	UCDavis_Gaikwad_Lab_ID
159 | METABOLITES_END
160 | #END
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/tests/example_data/validation_files/ST000122_AN000204_error_3.txt:
--------------------------------------------------------------------------------
  1 | #METABOLOMICS WORKBENCH STUDY_ID:ST000122 ANALYSIS_ID:AN000204 PROJECT_ID:PR000109
  2 | VERSION             	1
  3 | CREATED_ON          	2016-09-17
  4 | #PROJECT
  5 | PR:PROJECT_TITLE                 	Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic
  6 | PR:PROJECT_TYPE                  	Pilot and Feasibility Projects
  7 | PR:PROJECT_SUMMARY               	-
  8 | PR:INSTITUTE                     	University of California, Davis
  9 | PR:DEPARTMENT                    	Nutrition
 10 | PR:LABORATORY                    	Gaikwad Lab
 11 | PR:LAST_NAME                     	Gaikwad
 12 | PR:FIRST_NAME                    	Nilesh
 13 | PR:ADDRESS                       	-
 14 | PR:EMAIL                         	nwgaikwad@ucdavis.edu
 15 | PR:PHONE                         	530-752-2906
 16 | PR:FUNDING_SOURCE                	NIH 1U24DK097154 ;  PI Fiehn, Oliver  ; UC Davis WEST COAST CENTRAL
 17 | PR:FUNDING_SOURCE                	METABOLOMICS RESOURCE CORE (WC3MRC)
 18 | #STUDY
 19 | ST:STUDY_TITLE                   	Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic
 20 | ST:STUDY_TYPE                    	steroid panel
 21 | ST:STUDY_SUMMARY                 	-
 22 | ST:INSTITUTE                     	University of California, Davis
 23 | ST:DEPARTMENT                    	Nutrition
 24 | ST:LABORATORY                    	Gaikwad Lab
 25 | ST:LAST_NAME                     	Gaikwad
 26 | ST:FIRST_NAME                    	Nilesh
 27 | ST:ADDRESS                       	-
 28 | ST:EMAIL                         	nwgaikwad@ucdavis.edu
 29 | ST:PHONE                         	-
 30 | ST:NUM_GROUPS                    	NA
 31 | #SUBJECT
 32 | SU:SUBJECT_TYPE                  	Human
 33 | SU:SUBJECT_SPECIES               	Homo sapiens
 34 | SU:TAXONOMY_ID                   	9606
 35 | #SUBJECT_SAMPLE_FACTORS:         	SUBJECT(optional)[tab]SAMPLE[tab]FACTORS(NAME:VALUE pairs separated by |)[tab]Additional sample data
 36 | SUBJECT_SAMPLE_FACTORS           	CER030_294717_ML_1	CER030_294717_ML_1	Tissue/Fluid:Serum	
 37 | SUBJECT_SAMPLE_FACTORS           	CER040_242995_ML_2	CER040_242995_ML_2	Tissue/Fluid:Serum	
 38 | SUBJECT_SAMPLE_FACTORS           	CER055_249947_ML_3	CER055_249947_ML_3	Tissue/Fluid:Serum	
 39 | SUBJECT_SAMPLE_FACTORS           	CER062_246153_ML_4	CER062_246153_ML_4	Tissue/Fluid:Serum	
 40 | SUBJECT_SAMPLE_FACTORS           	CER085_251176_ML_5	CER085_251176_ML_5	Tissue/Fluid:Serum	
 41 | SUBJECT_SAMPLE_FACTORS           	CER093_242931_ML_6	CER093_242931_ML_6	Tissue/Fluid:Serum	
 42 | SUBJECT_SAMPLE_FACTORS           	CER110_238825_ML_7	CER110_238825_ML_7	Tissue/Fluid:Serum	
 43 | SUBJECT_SAMPLE_FACTORS           	CER120_253690_ML_8	CER120_253690_ML_8	Tissue/Fluid:Serum	
 44 | SUBJECT_SAMPLE_FACTORS           	CER147_254803_ML_9	CER147_254803_ML_9	Tissue/Fluid:Serum	
 45 | SUBJECT_SAMPLE_FACTORS           	CER149_266689_ML_10	CER149_266689_ML_10	Tissue/Fluid:Serum	
 46 | SUBJECT_SAMPLE_FACTORS           	CER158_254231_ML_11	CER158_254231_ML_11	Tissue/Fluid:Serum	
 47 | SUBJECT_SAMPLE_FACTORS           	CER165_287001_ML_12	CER165_287001_ML_12	Tissue/Fluid:Serum	
 48 | SUBJECT_SAMPLE_FACTORS           	CER178_295145_ML_13	CER178_295145_ML_13	Tissue/Fluid:Serum	
 49 | SUBJECT_SAMPLE_FACTORS           	CER181_244392_ML_14	CER181_244392_ML_14	Tissue/Fluid:Serum	
 50 | SUBJECT_SAMPLE_FACTORS           	CER188_250760_ML_15	CER188_250760_ML_15	Tissue/Fluid:Serum	
 51 | SUBJECT_SAMPLE_FACTORS           	CER192_254091_ML_16	CER192_254091_ML_16	Tissue/Fluid:Serum	
 52 | SUBJECT_SAMPLE_FACTORS           	CER201_244193_ML_17	CER201_244193_ML_17	Tissue/Fluid:Serum	
 53 | SUBJECT_SAMPLE_FACTORS           	CER216_242490_ML_18	CER216_242490_ML_18	Tissue/Fluid:Serum	
 54 | SUBJECT_SAMPLE_FACTORS           	CER220_274308_ML_19	CER220_274308_ML_19	Tissue/Fluid:Serum	
 55 | SUBJECT_SAMPLE_FACTORS           	CER223_264067_ML_20	CER223_264067_ML_20	Tissue/Fluid:Serum	
 56 | SUBJECT_SAMPLE_FACTORS           	CER226_254303_ML_21	CER226_254303_ML_21	Tissue/Fluid:Serum	
 57 | SUBJECT_SAMPLE_FACTORS           	CER277_255328_ML_22	CER277_255328_ML_22	Tissue/Fluid:Serum	
 58 | SUBJECT_SAMPLE_FACTORS           	CER287_248530_ML_23	CER287_248530_ML_23	Tissue/Fluid:Serum	
 59 | SUBJECT_SAMPLE_FACTORS           	CER303_253023_ML_24	CER303_253023_ML_24	Tissue/Fluid:Serum	
 60 | SUBJECT_SAMPLE_FACTORS           	CER315_282966_ML_25	CER315_282966_ML_25	Tissue/Fluid:Serum	
 61 | SUBJECT_SAMPLE_FACTORS           	CER324_285069_ML_26	CER324_285069_ML_26	Tissue/Fluid:Serum	
 62 | SUBJECT_SAMPLE_FACTORS           	CER340_244448_ML_27	CER340_244448_ML_27	Tissue/Fluid:Serum	
 63 | SUBJECT_SAMPLE_FACTORS           	CER346_246320_ML_28	CER346_246320_ML_28	Tissue/Fluid:Serum	
 64 | SUBJECT_SAMPLE_FACTORS           	CER356_269662_ML_29	CER356_269662_ML_29	Tissue/Fluid:Serum	
 65 | SUBJECT_SAMPLE_FACTORS           	CER368_250104_ML_30	CER368_250104_ML_30	Tissue/Fluid:Serum	
 66 | SUBJECT_SAMPLE_FACTORS           	CER369_276355_ML_31	CER369_276355_ML_31	Tissue/Fluid:Serum	
 67 | SUBJECT_SAMPLE_FACTORS           	CER384_264971_ML_32	CER384_264971_ML_32	Tissue/Fluid:Serum	
 68 | SUBJECT_SAMPLE_FACTORS           	CER445_286527_ML_33	CER445_286527_ML_33	Tissue/Fluid:Serum	
 69 | SUBJECT_SAMPLE_FACTORS           	CER452_240972_ML_34	CER452_240972_ML_34	Tissue/Fluid:Serum	
 70 | SUBJECT_SAMPLE_FACTORS           	CER463_271249_ML_35	CER463_271249_ML_35	Tissue/Fluid:Serum	
 71 | SUBJECT_SAMPLE_FACTORS           	CER465_265004_ML_36	CER465_265004_ML_36	Tissue/Fluid:Serum	
 72 | SUBJECT_SAMPLE_FACTORS           	CER483_294606_ML_37	CER483_294606_ML_37	Tissue/Fluid:Serum	
 73 | SUBJECT_SAMPLE_FACTORS           	CER488_274343_ML_38	CER488_274343_ML_38	Tissue/Fluid:Serum	
 74 | SUBJECT_SAMPLE_FACTORS           	CER530_249229_ML_39	CER530_249229_ML_39	Tissue/Fluid:Serum	
 75 | SUBJECT_SAMPLE_FACTORS           	CER540_240346_ML_40	CER540_240346_ML_40	Tissue/Fluid:Serum	
 76 | SUBJECT_SAMPLE_FACTORS           	CER552_241945_ML_41	CER552_241945_ML_41	Tissue/Fluid:Serum	
 77 | SUBJECT_SAMPLE_FACTORS           	CER555_251239_ML_42	CER555_251239_ML_42	Tissue/Fluid:Serum	
 78 | #COLLECTION
 79 | CO:COLLECTION_SUMMARY            	-
 80 | #TREATMENT
 81 | TR:TREATMENT_SUMMARY             	-
 82 | #SAMPLEPREP
 83 | SP:SAMPLEPREP_SUMMARY            	Methanol: Water Extraction
 84 | SP:SAMPLEPREP_PROTOCOL_FILENAME  	NIH_WCMC_LaMerrill_Method_GaikwadLab__SteroidAnalysis_2013-14.docx
 85 | SP:PROCESSING_METHOD             	Homogenization and Solvent Removal w/ Speed Vac
 86 | SP:PROCESSING_STORAGE_CONDITIONS 	On Ice
 87 | SP:EXTRACTION_METHOD             	1:1 Methanol: Water
 88 | SP:EXTRACT_STORAGE               	-80C
 89 | SP:SAMPLE_RESUSPENSION           	150ul CH3OH/H2O
 90 | SP:ORGAN                         	Sprague-Dawley Maternal: Adrenal, liver, placenta, amniotic fluid
 91 | SP:ORGAN                         	Fetal: Male and female brain, male and female liver
 92 | #CHROMATOGRAPHY
 93 | CH:CHROMATOGRAPHY_SUMMARY        	Targeted UPLC-MS/MS
 94 | CH:CHROMATOGRAPHY_TYPE           	Reversed phase
 95 | CH:INSTRUMENT_NAME               	Waters Acquity
 96 | CH:COLUMN_NAME                   	Waters Acquity HSS T3 (150 x 2.1mm, 1.8um)
 97 | CH:FLOW_GRADIENT                 	0-2 min 100% A (Water 0.1% formic acid) 0% B (CH3CN 0.1 % formic acid), 2-4 min
 98 | CH:FLOW_GRADIENT                 	A, 4-9mins 45% A, 9-11 mins 20% A, 11-12 mins 100% A
 99 | CH:FLOW_RATE                     	0.15 ml/min
100 | CH:SAMPLE_INJECTION              	10ul
101 | CH:SOLVENT_A                     	Water 0.1% formic acid
102 | CH:SOLVENT_B                     	CH3CN 0.1 % formic acid
103 | CH:ANALYTICAL_TIME               	12 mins
104 | #ANALYSIS
105 | AN:ANALYSIS_TYPE                 	MS
106 | AN:LABORATORY_NAME               	Gaikwad Laboratory
107 | AN:ACQUISITION_DATE              	41716
108 | AN:SOFTWARE_VERSION              	Masslynx
109 | AN:OPERATOR_NAME                 	Nilesh Gaikwad
110 | #MS
111 | MS:INSTRUMENT_NAME               	Waters Xevo-TQ
112 | MS:INSTRUMENT_TYPE               	Triple quadrupole
113 | MS:MS_TYPE                       	ESI
114 | MS:ION_MODE                      	POSITIVE
115 | MS:CAPILLARY_VOLTAGE             	3.0 kV
116 | MS:COLLISION_GAS                 	N2
117 | MS:IONIZATION                    	Electrospray Ionization
118 | MS:SOURCE_TEMPERATURE            	150C
119 | MS:DESOLVATION_GAS_FLOW          	600 L/h
120 | MS:DESOLVATION_TEMPERATURE       	350C
121 | MS:MS_COMMENTS                   	UPLC-MS/MS
122 | #MS_METABOLITE_DATA
123 | MS_METABOLITE_DATA:UNITS         	pg/ml
124 | MS_METABOLITE_DATA_START
125 | Samples	CER030_294717_ML_1	CER040_242995_ML_2	CER055_249947_ML_3	CER062_246153_ML_4	CER085_251176_ML_5	CER093_242931_ML_6	CER110_238825_ML_7	CER120_253690_ML_8	CER147_254803_ML_9	CER149_266689_ML_10	CER158_254231_ML_11	CER165_287001_ML_12	CER178_295145_ML_13	CER181_244392_ML_14	CER188_250760_ML_15	CER192_254091_ML_16	CER201_244193_ML_17	CER216_242490_ML_18	CER220_274308_ML_19	CER223_264067_ML_20	CER226_254303_ML_21	CER277_255328_ML_22	CER287_248530_ML_23	CER303_253023_ML_24	CER315_282966_ML_25	CER324_285069_ML_26	CER340_244448_ML_27	CER346_246320_ML_28	CER356_269662_ML_29	CER368_250104_ML_30	CER369_276355_ML_31	CER384_264971_ML_32	CER445_286527_ML_33	CER452_240972_ML_34	CER463_271249_ML_35	CER465_265004_ML_36	CER483_294606_ML_37	CER488_274343_ML_38	CER530_249229_ML_39	CER540_240346_ML_40	CER552_241945_ML_41	CER555_251239_ML_42
126 | Factors	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum
127 | 17-hydroxypregnenolone	946.2500	0.0000	676.2500	0.0000	2251.2500	0.0000	0.0000	1134.7500	0.0000	0.0000	2016.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	1919.7500	0.0000	972.7500	0.0000	1542.2500	1687.7500	421.0000	0.0000	373.2500	0.0000	614.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	528.2500
128 | 17-hydroxyprogesterone	0.0000	2.0000	0.0000	0.0000	19.2500	0.0000	0.0000	27.0000	2.0000	120.7500	27.7500	83.0000	0.0000	8.0000	3.5000	274.0000	0.0000	0.0000	3.0000	3.2500	0.0000	43.7500	15.2500	25.7500	4.2500	0.0000	0.0000	49.5000	27.7500	14.0000	9.7500	35.2500	34.7500	4.5000	8.0000	17.2500	0.0000	24.7500	19.0000	0.0000	4.5000	132.0000
129 | Allodihydrotestosterone	80.0000	1181.0000	0.0000	0.0000	0.0000	112.2500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	288.0000	0.0000	0.0000	374.7500	0.0000	27.5000	112.7500	247.7500	39.0000	0.0000	0.0000	0.0000	0.0000	0.0000	761.0000	245.5000	332.5000	52.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	465.7500	159.0000	0.0000	77.0000	315.5000	466.0000
130 | Androstenedione	76.7500	57.0000	176.2500	399.5000	208.5000	37.0000	281.2500	79.7500	250.7500	420.5000	123.0000	186.2500	34.7500	224.5000	67.7500	335.0000	126.5000	277.0000	50.5000	153.7500	62.2500	107.0000	431.2500	167.5000	134.0000	60.7500	38.5000	42.0000	78.7500	43.0000	60.0000	114.7500	237.7500	53.5000	51.7500	298.0000	220.2500	15.0000	256.5000	172.5000	79.2500	52.5000
131 | Androstenolone (DHEA)	1779.7500	1409.2500	945.7500	748.2500	2284.0000	2351.0000	2183.7500	1916.5000	5079.5000	1474.0000	1338.5000	1646.0000	2051.7500	2039.7500	2618.0000	306.7500	574.5000	1794.2500	1429.0000	2293.2500	2066.2500	2493.2500	918.0000	1579.2500	2042.2500	2645.7500	2393.7500	1913.0000	1641.5000	853.2500	586.5000	537.2500	562.5000	1887.2500	979.0000	678.5000	1357.2500	1526.2500	2300.7500	129.0000	409.2500	282.2500
132 | Cortexolone	0.0000	0.0000	0.0000	54.0000	0.0000	0.0000	0.0000	0.0000	215.7500	135.7500	72.7500	53.0000	11.7500	0.0000	0.0000	0.0000	0.0000	101.2500	11.2500	0.0000	0.0000	315.0000	181.2500	0.0000	7.7500	151.2500	0.0000	0.0000	104.0000	0.0000	0.0000	30.7500	94.2500	210.5000	33.2500	126.0000	0.0000	10.0000	17.0000	15.7500	0.0000	0.0000
133 | Cortexone	108.0000	16.0000	13.0000	117.5000	3.2500	63.2500	42.5000	146.7500	29.5000	204.2500	28.7500	67.0000	30.5000	103.0000	23.0000	416.7500	63.5000	32.5000	32.5000	127.2500	39.0000	84.2500	7.2500	16.2500	68.7500	27.0000	46.5000	21.7500	3.2500	14.7500	28.7500	67.0000	33.0000	40.7500	31.0000	32.2500	40.0000	13.7500	18.7500	0.0000	25.7500	29.0000
134 | Corticosterone_ DOC	0.0000	354.5000	0.0000	0.0000	322.5000	419.7500	420.7500	0.0000	0.0000	0.0000	393.2500	915.5000	0.0000	432.2500	1233.0000	0.0000	525.5000	1700.0000	0.0000	98.7500	285.5000	42.5000	428.2500	0.0000	427.5000	271.7500	254.7500	478.0000	303.5000	462.2500	532.0000	715.0000	1073.0000	836.2500	0.0000	1639.0000	601.7500	287.7500	0.0000	0.0000	435.2500	1602.2500
135 | Cortisol	7643.0000	39245.7500	11671.5000	20216.0000	14908.7500	14386.5000	16815.2500	7806.2500	27135.5000	7095.0000	12175.2500	36413.0000	2499.2500	15101.7500	22045.0000	24832.0000	13257.0000	19528.5000	4539.7500	7681.7500	9585.2500	19361.0000	24203.7500	5667.0000	19437.2500	10849.2500	11855.7500	7546.5000	3093.7500	19035.7500	18575.0000	14801.5000	22960.7500	22506.5000	8001.5000	31037.5000	18577.2500	15506.2500	8364.7500	2145.7500	5574.7500	19662.5000
136 | Estradiol	123992.2500	796595.7500	619110.0000	449415.7500	320835.5000	326124.2500	249087.2500	311589.2500	345598.5000	485857.0000	332055.2500	211831.0000	334929.7500	235466.7500	352555.0000	410500.0000	887955.0000	865791.7500	1648163.5000	856726.7500	579044.2500	254013.2500	326272.7500	239893.7500	329553.2500	438715.5000	248489.0000	380251.0000	338965.5000	337231.2500	342754.5000	370657.2500	2028106.5000	733521.0000	399244.2500	321007.5000	634463.0000	231294.0000	349439.2500	75746.7500	399415.5000	303855.7500
137 | Estrone	484.5000	1663.7500	1680.7500	794.5000	557.2500	625.7500	669.7500	885.0000	715.0000	1225.5000	697.7500	478.2500	659.0000	575.5000	871.7500	1089.0000	1726.2500	2325.2500	3286.7500	1955.7500	1094.0000	486.2500	650.5000	574.2500	601.7500	842.7500	757.7500	732.7500	571.7500	693.7500	1004.2500	879.2500	3154.7500	1095.2500	22680.2500	637.2500	1108.2500	474.2500	810.2500	421.2500	680.7500	623.7500
138 | Pregnenolone	12.2500	0.0000	0.0000	0.0000	0.0000	144.2500	14.7500	807.2500	0.0000	30.0000	0.0000	0.0000	0.0000	0.0000	16.5000	139.5000	132.5000	0.0000	0.0000	13.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	488.5000	0.0000	0.0000	0.0000	0.0000	280.7500	0.0000	0.0000	0.0000	0.0000	0.0000	205.5000
139 | Progesterone	28.2500	6.2500	725.2500	57.2500	767.0000	2.7500	388.0000	9.0000	19.5000	242.5000	4.0000	0.0000	94.5000	160.7500	0.0000	3214.5000	218.2500	1.0000	0.0000	20.0000	4.5000	55.7500	24.5000	57.0000	200.5000	138.7500	132.2500	120.5000	80.5000	59.5000	315.7500	247.2500	211.5000	198.5000	232.2500	241.0000	199.5000	282.5000	216.5000	358.5000	289.5000	199.2500
140 | Testosterone	75.7500	63.2500	42.7500	98.0000	24.2500	35.0000	165.7500	23.2500	73.7500	52.7500	118.7500	35.7500	65.2500	127.2500	14.2500	202.5000	110.7500	53.5000	54.2500	2.2500	105.2500	182.7500	116.0000	66.2500	52.5000	106.2500	43.2500	57.2500	97.2500	16.0000	192.0000	53.7500	182.5000	0.2500	11.5000	87.2500	33.7500	45.5000	26.2500	96.0000	17.5000	79.7500
141 | MS_METABOLITE_DATA_END
142 | #METABOLITES
143 | METABOLITES_START
144 | metabolite_name	moverz_quant	ri	ri_type	pubchem_id	inchi_key	kegg_id	other_id	other_id_type
145 | 17-hydroxypregnenolone				91451			2Q4710	UCDavis_Gaikwad_Lab_ID
146 | 17-hydroxyprogesterone				6238			6Q3360	UCDavis_Gaikwad_Lab_ID
147 | Allodihydrotestosterone				10635			14A2570	UCDavis_Gaikwad_Lab_ID
148 | Androstenedione				6128			12A6030	UCDavis_Gaikwad_Lab_ID
149 | Androstenolone (DHEA)				5881			3A8500	UCDavis_Gaikwad_Lab_ID
150 | Cortexolone				440707			7Q1610	UCDavis_Gaikwad_Lab_ID
151 | Cortexone				6166			9Q3460	UCDavis_Gaikwad_Lab_ID
152 | Corticosterone, DOC				5753			10Q1550	UCDavis_Gaikwad_Lab_ID
153 | Cortisol				5754			8Q3880	UCDavis_Gaikwad_Lab_ID
154 | Estradiol				5757			16E0950	UCDavis_Gaikwad_Lab_ID
155 | Estrone				5870			15E2300	UCDavis_Gaikwad_Lab_ID
156 | Pregnenolone				8955			1Q5500	UCDavis_Gaikwad_Lab_ID
157 | Progesterone				5994			5Q2600	UCDavis_Gaikwad_Lab_ID
158 | Testosterone				6013			13A6950	UCDavis_Gaikwad_Lab_ID
159 | METABOLITES_END
160 | #END
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/tests/example_data/validation_files/ST000122_AN000204_error_4.txt:
--------------------------------------------------------------------------------
  1 | #METABOLOMICS WORKBENCH STUDY_ID:ST000122 ANALYSIS_ID:AN000204 PROJECT_ID:PR000109
  2 | VERSION             	1
  3 | CREATED_ON          	2016-09-17
  4 | #PROJECT
  5 | PR:PROJECT_TYPE                  	Pilot and Feasibility Projects
  6 | PR:PROJECT_SUMMARY               	-
  7 | PR:INSTITUTE                     	University of California, Davis
  8 | PR:DEPARTMENT                    	Nutrition
  9 | PR:LABORATORY                    	Gaikwad Lab
 10 | PR:LAST_NAME                     	Gaikwad
 11 | PR:FIRST_NAME                    	Nilesh
 12 | PR:ADDRESS                       	-
 13 | PR:EMAIL                         	nwgaikwad@ucdavis.edu
 14 | PR:PHONE                         	530-752-2906
 15 | PR:FUNDING_SOURCE                	NIH 1U24DK097154 ;  PI Fiehn, Oliver  ; UC Davis WEST COAST CENTRAL
 16 | PR:FUNDING_SOURCE                	METABOLOMICS RESOURCE CORE (WC3MRC)
 17 | #STUDY
 18 | ST:STUDY_TITLE                   	Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic
 19 | ST:STUDY_TYPE                    	steroid panel
 20 | ST:STUDY_SUMMARY                 	-
 21 | ST:INSTITUTE                     	University of California, Davis
 22 | ST:DEPARTMENT                    	Nutrition
 23 | ST:LABORATORY                    	Gaikwad Lab
 24 | ST:LAST_NAME                     	Gaikwad
 25 | ST:FIRST_NAME                    	Nilesh
 26 | ST:ADDRESS                       	-
 27 | ST:EMAIL                         	nwgaikwad@ucdavis.edu
 28 | ST:PHONE                         	-
 29 | ST:NUM_GROUPS                    	NA
 30 | #SUBJECT
 31 | SU:SUBJECT_TYPE                  	Human
 32 | SU:SUBJECT_SPECIES               	Homo sapiens
 33 | SU:TAXONOMY_ID                   	9606
 34 | #SUBJECT_SAMPLE_FACTORS:         	SUBJECT(optional)[tab]SAMPLE[tab]FACTORS(NAME:VALUE pairs separated by |)[tab]Additional sample data
 35 | SUBJECT_SAMPLE_FACTORS           	CER030_294717_ML_1	CER030_294717_ML_1	Tissue/Fluid:Serum	
 36 | SUBJECT_SAMPLE_FACTORS           	CER040_242995_ML_2	CER040_242995_ML_2	Tissue/Fluid:Serum	
 37 | SUBJECT_SAMPLE_FACTORS           	CER055_249947_ML_3	CER055_249947_ML_3	Tissue/Fluid:Serum	
 38 | SUBJECT_SAMPLE_FACTORS           	CER062_246153_ML_4	CER062_246153_ML_4	Tissue/Fluid:Serum	
 39 | SUBJECT_SAMPLE_FACTORS           	CER085_251176_ML_5	CER085_251176_ML_5	Tissue/Fluid:Serum	
 40 | SUBJECT_SAMPLE_FACTORS           	CER093_242931_ML_6	CER093_242931_ML_6	Tissue/Fluid:Serum	
 41 | SUBJECT_SAMPLE_FACTORS           	CER110_238825_ML_7	CER110_238825_ML_7	Tissue/Fluid:Serum	
 42 | SUBJECT_SAMPLE_FACTORS           	CER120_253690_ML_8	CER120_253690_ML_8	Tissue/Fluid:Serum	
 43 | SUBJECT_SAMPLE_FACTORS           	CER147_254803_ML_9	CER147_254803_ML_9	Tissue/Fluid:Serum	
 44 | SUBJECT_SAMPLE_FACTORS           	CER149_266689_ML_10	CER149_266689_ML_10	Tissue/Fluid:Serum	
 45 | SUBJECT_SAMPLE_FACTORS           	CER158_254231_ML_11	CER158_254231_ML_11	Tissue/Fluid:Serum	
 46 | SUBJECT_SAMPLE_FACTORS           	CER165_287001_ML_12	CER165_287001_ML_12	Tissue/Fluid:Serum	
 47 | SUBJECT_SAMPLE_FACTORS           	CER178_295145_ML_13	CER178_295145_ML_13	Tissue/Fluid:Serum	
 48 | SUBJECT_SAMPLE_FACTORS           	CER181_244392_ML_14	CER181_244392_ML_14	Tissue/Fluid:Serum	
 49 | SUBJECT_SAMPLE_FACTORS           	CER188_250760_ML_15	CER188_250760_ML_15	Tissue/Fluid:Serum	
 50 | SUBJECT_SAMPLE_FACTORS           	CER192_254091_ML_16	CER192_254091_ML_16	Tissue/Fluid:Serum	
 51 | SUBJECT_SAMPLE_FACTORS           	CER201_244193_ML_17	CER201_244193_ML_17	Tissue/Fluid:Serum	
 52 | SUBJECT_SAMPLE_FACTORS           	CER216_242490_ML_18	CER216_242490_ML_18	Tissue/Fluid:Serum	
 53 | SUBJECT_SAMPLE_FACTORS           	CER220_274308_ML_19	CER220_274308_ML_19	Tissue/Fluid:Serum	
 54 | SUBJECT_SAMPLE_FACTORS           	CER223_264067_ML_20	CER223_264067_ML_20	Tissue/Fluid:Serum	
 55 | SUBJECT_SAMPLE_FACTORS           	CER226_254303_ML_21	CER226_254303_ML_21	Tissue/Fluid:Serum	
 56 | SUBJECT_SAMPLE_FACTORS           	CER277_255328_ML_22	CER277_255328_ML_22	Tissue/Fluid:Serum	
 57 | SUBJECT_SAMPLE_FACTORS           	CER287_248530_ML_23	CER287_248530_ML_23	Tissue/Fluid:Serum	
 58 | SUBJECT_SAMPLE_FACTORS           	CER303_253023_ML_24	CER303_253023_ML_24	Tissue/Fluid:Serum	
 59 | SUBJECT_SAMPLE_FACTORS           	CER315_282966_ML_25	CER315_282966_ML_25	Tissue/Fluid:Serum	
 60 | SUBJECT_SAMPLE_FACTORS           	CER324_285069_ML_26	CER324_285069_ML_26	Tissue/Fluid:Serum	
 61 | SUBJECT_SAMPLE_FACTORS           	CER340_244448_ML_27	CER340_244448_ML_27	Tissue/Fluid:Serum	
 62 | SUBJECT_SAMPLE_FACTORS           	CER346_246320_ML_28	CER346_246320_ML_28	Tissue/Fluid:Serum	
 63 | SUBJECT_SAMPLE_FACTORS           	CER356_269662_ML_29	CER356_269662_ML_29	Tissue/Fluid:Serum	
 64 | SUBJECT_SAMPLE_FACTORS           	CER368_250104_ML_30	CER368_250104_ML_30	Tissue/Fluid:Serum	
 65 | SUBJECT_SAMPLE_FACTORS           	CER369_276355_ML_31	CER369_276355_ML_31	Tissue/Fluid:Serum	
 66 | SUBJECT_SAMPLE_FACTORS           	CER384_264971_ML_32	CER384_264971_ML_32	Tissue/Fluid:Serum	
 67 | SUBJECT_SAMPLE_FACTORS           	CER445_286527_ML_33	CER445_286527_ML_33	Tissue/Fluid:Serum	
 68 | SUBJECT_SAMPLE_FACTORS           	CER452_240972_ML_34	CER452_240972_ML_34	Tissue/Fluid:Serum	
 69 | SUBJECT_SAMPLE_FACTORS           	CER463_271249_ML_35	CER463_271249_ML_35	Tissue/Fluid:Serum	
 70 | SUBJECT_SAMPLE_FACTORS           	CER465_265004_ML_36	CER465_265004_ML_36	Tissue/Fluid:Serum	
 71 | SUBJECT_SAMPLE_FACTORS           	CER483_294606_ML_37	CER483_294606_ML_37	Tissue/Fluid:Serum	
 72 | SUBJECT_SAMPLE_FACTORS           	CER488_274343_ML_38	CER488_274343_ML_38	Tissue/Fluid:Serum	
 73 | SUBJECT_SAMPLE_FACTORS           	CER530_249229_ML_39	CER530_249229_ML_39	Tissue/Fluid:Serum	
 74 | SUBJECT_SAMPLE_FACTORS           	CER540_240346_ML_40	CER540_240346_ML_40	Tissue/Fluid:Serum	
 75 | SUBJECT_SAMPLE_FACTORS           	CER552_241945_ML_41	CER552_241945_ML_41	Tissue/Fluid:Serum	
 76 | SUBJECT_SAMPLE_FACTORS           	CER555_251239_ML_42	CER555_251239_ML_42	Tissue/Fluid:Serum	
 77 | #COLLECTION
 78 | CO:COLLECTION_SUMMARY            	-
 79 | #TREATMENT
 80 | TR:TREATMENT_SUMMARY             	-
 81 | #SAMPLEPREP
 82 | SP:SAMPLEPREP_SUMMARY            	Methanol: Water Extraction
 83 | SP:SAMPLEPREP_PROTOCOL_FILENAME  	NIH_WCMC_LaMerrill_Method_GaikwadLab__SteroidAnalysis_2013-14.docx
 84 | SP:PROCESSING_METHOD             	Homogenization and Solvent Removal w/ Speed Vac
 85 | SP:PROCESSING_STORAGE_CONDITIONS 	On Ice
 86 | SP:EXTRACTION_METHOD             	1:1 Methanol: Water
 87 | SP:EXTRACT_STORAGE               	-80C
 88 | SP:SAMPLE_RESUSPENSION           	150ul CH3OH/H2O
 89 | SP:ORGAN                         	Sprague-Dawley Maternal: Adrenal, liver, placenta, amniotic fluid
 90 | SP:ORGAN                         	Fetal: Male and female brain, male and female liver
 91 | #CHROMATOGRAPHY
 92 | CH:CHROMATOGRAPHY_SUMMARY        	Targeted UPLC-MS/MS
 93 | CH:CHROMATOGRAPHY_TYPE           	Reversed phase
 94 | CH:INSTRUMENT_NAME               	Waters Acquity
 95 | CH:COLUMN_NAME                   	Waters Acquity HSS T3 (150 x 2.1mm, 1.8um)
 96 | CH:FLOW_GRADIENT                 	0-2 min 100% A (Water 0.1% formic acid) 0% B (CH3CN 0.1 % formic acid), 2-4 min
 97 | CH:FLOW_GRADIENT                 	A, 4-9mins 45% A, 9-11 mins 20% A, 11-12 mins 100% A
 98 | CH:FLOW_RATE                     	0.15 ml/min
 99 | CH:SAMPLE_INJECTION              	10ul
100 | CH:SOLVENT_A                     	Water 0.1% formic acid
101 | CH:SOLVENT_B                     	CH3CN 0.1 % formic acid
102 | CH:ANALYTICAL_TIME               	12 mins
103 | #ANALYSIS
104 | AN:ANALYSIS_TYPE                 	MS
105 | AN:LABORATORY_NAME               	Gaikwad Laboratory
106 | AN:ACQUISITION_DATE              	41716
107 | AN:SOFTWARE_VERSION              	Masslynx
108 | AN:OPERATOR_NAME                 	Nilesh Gaikwad
109 | #MS
110 | MS:INSTRUMENT_NAME               	Waters Xevo-TQ
111 | MS:INSTRUMENT_TYPE               	Triple quadrupole
112 | MS:MS_TYPE                       	ESI
113 | MS:ION_MODE                      	POSITIVE
114 | MS:CAPILLARY_VOLTAGE             	3.0 kV
115 | MS:COLLISION_GAS                 	N2
116 | MS:IONIZATION                    	Electrospray Ionization
117 | MS:SOURCE_TEMPERATURE            	150C
118 | MS:DESOLVATION_GAS_FLOW          	600 L/h
119 | MS:DESOLVATION_TEMPERATURE       	350C
120 | MS:MS_COMMENTS                   	UPLC-MS/MS
121 | #MS_METABOLITE_DATA
122 | MS_METABOLITE_DATA:UNITS         	pg/ml
123 | MS_METABOLITE_DATA_START
124 | Samples	CER030_294717_ML_1	CER040_242995_ML_2	CER055_249947_ML_3	CER062_246153_ML_4	CER085_251176_ML_5	CER093_242931_ML_6	CER110_238825_ML_7	CER120_253690_ML_8	CER147_254803_ML_9	CER149_266689_ML_10	CER158_254231_ML_11	CER165_287001_ML_12	CER178_295145_ML_13	CER181_244392_ML_14	CER188_250760_ML_15	CER192_254091_ML_16	CER201_244193_ML_17	CER216_242490_ML_18	CER220_274308_ML_19	CER223_264067_ML_20	CER226_254303_ML_21	CER277_255328_ML_22	CER287_248530_ML_23	CER303_253023_ML_24	CER315_282966_ML_25	CER324_285069_ML_26	CER340_244448_ML_27	CER346_246320_ML_28	CER356_269662_ML_29	CER368_250104_ML_30	CER369_276355_ML_31	CER384_264971_ML_32	CER445_286527_ML_33	CER452_240972_ML_34	CER463_271249_ML_35	CER465_265004_ML_36	CER483_294606_ML_37	CER488_274343_ML_38	CER530_249229_ML_39	CER540_240346_ML_40	CER552_241945_ML_41	CER555_251239_ML_42
125 | Factors	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum	Tissue/Fluid:Serum
126 | 17-hydroxypregnenolone	946.2500	0.0000	676.2500	0.0000	2251.2500	0.0000	0.0000	1134.7500	0.0000	0.0000	2016.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	1919.7500	0.0000	972.7500	0.0000	1542.2500	1687.7500	421.0000	0.0000	373.2500	0.0000	614.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	528.2500
127 | 17-hydroxyprogesterone	0.0000	2.0000	0.0000	0.0000	19.2500	0.0000	0.0000	27.0000	2.0000	120.7500	27.7500	83.0000	0.0000	8.0000	3.5000	274.0000	0.0000	0.0000	3.0000	3.2500	0.0000	43.7500	15.2500	25.7500	4.2500	0.0000	0.0000	49.5000	27.7500	14.0000	9.7500	35.2500	34.7500	4.5000	8.0000	17.2500	0.0000	24.7500	19.0000	0.0000	4.5000	132.0000
128 | Allodihydrotestosterone	80.0000	1181.0000	0.0000	0.0000	0.0000	112.2500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	288.0000	0.0000	0.0000	374.7500	0.0000	27.5000	112.7500	247.7500	39.0000	0.0000	0.0000	0.0000	0.0000	0.0000	761.0000	245.5000	332.5000	52.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	465.7500	159.0000	0.0000	77.0000	315.5000	466.0000
129 | Androstenedione	76.7500	57.0000	176.2500	399.5000	208.5000	37.0000	281.2500	79.7500	250.7500	420.5000	123.0000	186.2500	34.7500	224.5000	67.7500	335.0000	126.5000	277.0000	50.5000	153.7500	62.2500	107.0000	431.2500	167.5000	134.0000	60.7500	38.5000	42.0000	78.7500	43.0000	60.0000	114.7500	237.7500	53.5000	51.7500	298.0000	220.2500	15.0000	256.5000	172.5000	79.2500	52.5000
130 | Androstenolone (DHEA)	1779.7500	1409.2500	945.7500	748.2500	2284.0000	2351.0000	2183.7500	1916.5000	5079.5000	1474.0000	1338.5000	1646.0000	2051.7500	2039.7500	2618.0000	306.7500	574.5000	1794.2500	1429.0000	2293.2500	2066.2500	2493.2500	918.0000	1579.2500	2042.2500	2645.7500	2393.7500	1913.0000	1641.5000	853.2500	586.5000	537.2500	562.5000	1887.2500	979.0000	678.5000	1357.2500	1526.2500	2300.7500	129.0000	409.2500	282.2500
131 | Cortexolone	0.0000	0.0000	0.0000	54.0000	0.0000	0.0000	0.0000	0.0000	215.7500	135.7500	72.7500	53.0000	11.7500	0.0000	0.0000	0.0000	0.0000	101.2500	11.2500	0.0000	0.0000	315.0000	181.2500	0.0000	7.7500	151.2500	0.0000	0.0000	104.0000	0.0000	0.0000	30.7500	94.2500	210.5000	33.2500	126.0000	0.0000	10.0000	17.0000	15.7500	0.0000	0.0000
132 | Cortexone	108.0000	16.0000	13.0000	117.5000	3.2500	63.2500	42.5000	146.7500	29.5000	204.2500	28.7500	67.0000	30.5000	103.0000	23.0000	416.7500	63.5000	32.5000	32.5000	127.2500	39.0000	84.2500	7.2500	16.2500	68.7500	27.0000	46.5000	21.7500	3.2500	14.7500	28.7500	67.0000	33.0000	40.7500	31.0000	32.2500	40.0000	13.7500	18.7500	0.0000	25.7500	29.0000
133 | Corticosterone_ DOC	0.0000	354.5000	0.0000	0.0000	322.5000	419.7500	420.7500	0.0000	0.0000	0.0000	393.2500	915.5000	0.0000	432.2500	1233.0000	0.0000	525.5000	1700.0000	0.0000	98.7500	285.5000	42.5000	428.2500	0.0000	427.5000	271.7500	254.7500	478.0000	303.5000	462.2500	532.0000	715.0000	1073.0000	836.2500	0.0000	1639.0000	601.7500	287.7500	0.0000	0.0000	435.2500	1602.2500
134 | Cortisol	7643.0000	39245.7500	11671.5000	20216.0000	14908.7500	14386.5000	16815.2500	7806.2500	27135.5000	7095.0000	12175.2500	36413.0000	2499.2500	15101.7500	22045.0000	24832.0000	13257.0000	19528.5000	4539.7500	7681.7500	9585.2500	19361.0000	24203.7500	5667.0000	19437.2500	10849.2500	11855.7500	7546.5000	3093.7500	19035.7500	18575.0000	14801.5000	22960.7500	22506.5000	8001.5000	31037.5000	18577.2500	15506.2500	8364.7500	2145.7500	5574.7500	19662.5000
135 | Estradiol	123992.2500	796595.7500	619110.0000	449415.7500	320835.5000	326124.2500	249087.2500	311589.2500	345598.5000	485857.0000	332055.2500	211831.0000	334929.7500	235466.7500	352555.0000	410500.0000	887955.0000	865791.7500	1648163.5000	856726.7500	579044.2500	254013.2500	326272.7500	239893.7500	329553.2500	438715.5000	248489.0000	380251.0000	338965.5000	337231.2500	342754.5000	370657.2500	2028106.5000	733521.0000	399244.2500	321007.5000	634463.0000	231294.0000	349439.2500	75746.7500	399415.5000	303855.7500
136 | Estrone	484.5000	1663.7500	1680.7500	794.5000	557.2500	625.7500	669.7500	885.0000	715.0000	1225.5000	697.7500	478.2500	659.0000	575.5000	871.7500	1089.0000	1726.2500	2325.2500	3286.7500	1955.7500	1094.0000	486.2500	650.5000	574.2500	601.7500	842.7500	757.7500	732.7500	571.7500	693.7500	1004.2500	879.2500	3154.7500	1095.2500	22680.2500	637.2500	1108.2500	474.2500	810.2500	421.2500	680.7500	623.7500
137 | Pregnenolone	12.2500	0.0000	0.0000	0.0000	0.0000	144.2500	14.7500	807.2500	0.0000	30.0000	0.0000	0.0000	0.0000	0.0000	16.5000	139.5000	132.5000	0.0000	0.0000	13.7500	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	488.5000	0.0000	0.0000	0.0000	0.0000	280.7500	0.0000	0.0000	0.0000	0.0000	0.0000	205.5000
138 | Progesterone	28.2500	6.2500	725.2500	57.2500	767.0000	2.7500	388.0000	9.0000	19.5000	242.5000	4.0000	0.0000	94.5000	160.7500	0.0000	3214.5000	218.2500	1.0000	0.0000	20.0000	4.5000	55.7500	24.5000	57.0000	200.5000	138.7500	132.2500	120.5000	80.5000	59.5000	315.7500	247.2500	211.5000	198.5000	232.2500	241.0000	199.5000	282.5000	216.5000	358.5000	289.5000	199.2500
139 | Testosterone	75.7500	63.2500	42.7500	98.0000	24.2500	35.0000	165.7500	23.2500	73.7500	52.7500	118.7500	35.7500	65.2500	127.2500	14.2500	202.5000	110.7500	53.5000	54.2500	2.2500	105.2500	182.7500	116.0000	66.2500	52.5000	106.2500	43.2500	57.2500	97.2500	16.0000	192.0000	53.7500	182.5000	0.2500	11.5000	87.2500	33.7500	45.5000	26.2500	96.0000	17.5000	79.7500
140 | MS_METABOLITE_DATA_END
141 | #METABOLITES
142 | METABOLITES_START
143 | metabolite_name	moverz_quant	ri	ri_type	pubchem_id	inchi_key	kegg_id	other_id	other_id_type
144 | 17-hydroxypregnenolone				91451			2Q4710	UCDavis_Gaikwad_Lab_ID
145 | 17-hydroxyprogesterone				6238			6Q3360	UCDavis_Gaikwad_Lab_ID
146 | Allodihydrotestosterone				10635			14A2570	UCDavis_Gaikwad_Lab_ID
147 | Androstenedione				6128			12A6030	UCDavis_Gaikwad_Lab_ID
148 | Androstenolone (DHEA)				5881			3A8500	UCDavis_Gaikwad_Lab_ID
149 | Cortexolone				440707			7Q1610	UCDavis_Gaikwad_Lab_ID
150 | Cortexone				6166			9Q3460	UCDavis_Gaikwad_Lab_ID
151 | Corticosterone, DOC				5753			10Q1550	UCDavis_Gaikwad_Lab_ID
152 | Cortisol				5754			8Q3880	UCDavis_Gaikwad_Lab_ID
153 | Estradiol				5757			16E0950	UCDavis_Gaikwad_Lab_ID
154 | Estrone				5870			15E2300	UCDavis_Gaikwad_Lab_ID
155 | Pregnenolone				8955			1Q5500	UCDavis_Gaikwad_Lab_ID
156 | Progesterone				5994			5Q2600	UCDavis_Gaikwad_Lab_ID
157 | Testosterone				6013			13A6950	UCDavis_Gaikwad_Lab_ID
158 | METABOLITES_END
159 | #END
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
  1 | from __future__ import unicode_literals
  2 | import csv
  3 | import json
  4 | import mwtab
  5 | import os
  6 | import pytest
  7 | import shutil
  8 | 
  9 | 
 10 | def teardown_module(module):
 11 |     if os.path.exists("tests/example_data/tmp/"):
 12 |         shutil.rmtree("tests/example_data/tmp")
 13 | 
 14 | 
 15 | @pytest.mark.parametrize("files_source", [
 16 |     "204",
 17 |     "AN000204",
 18 |     "https://www.metabolomicsworkbench.org/rest/study/analysis_id/AN000204/mwtab/txt",
 19 |     "tests/example_data/mwtab_files/ST000122_AN000204.txt",
 20 |     "tests/example_data/mwtab_files/ST000122_AN000204.json",
 21 |     "tests/example_data/mwtab_files",
 22 |     "tests/example_data/mwtab_files.zip",
 23 |     "tests/example_data/mwtab_files.tar",
 24 |     "tests/example_data/mwtab_files.tar.gz",
 25 |     "tests/example_data/mwtab_files.tar.bz2"
 26 | ])
 27 | def test_validate_command(files_source):
 28 |     command = "python -m mwtab validate {}".format(files_source)
 29 |     assert os.system(command) == 0
 30 | 
 31 | 
 32 | @pytest.mark.parametrize("from_path, to_path, from_format, to_format", [
 33 |     # one-to-one file conversions
 34 |     ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"),
 35 |     ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"),
 36 |     ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"),
 37 |     ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"),
 38 |     ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"),
 39 |     ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"),
 40 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"),
 41 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"),
 42 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"),
 43 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"),
 44 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"),
 45 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"),
 46 |     ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"),
 47 |     ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"),
 48 |     ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"),
 49 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"),
 50 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"),
 51 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"),
 52 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"),
 53 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"),
 54 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"),
 55 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"),
 56 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"),
 57 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"),
 58 |     # many-to-many file conversions
 59 |     ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json", "mwtab", "json"),
 60 |     ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.zip", "mwtab", "json"),
 61 |     ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar", "mwtab", "json"),
 62 |     ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "mwtab", "json"),
 63 |     ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "mwtab", "json"),
 64 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab", "json", "mwtab"),
 65 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.zip", "json", "mwtab"),
 66 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar", "json", "mwtab"),
 67 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar.gz", "json", "mwtab"),
 68 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar.bz2", "json", "mwtab"),
 69 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab", "json", "mwtab"),
 70 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.zip", "json", "mwtab"),
 71 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar", "json", "mwtab"),
 72 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar.gz", "json", "mwtab"),
 73 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar.bz2", "json", "mwtab"),
 74 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab", "json", "mwtab"),
 75 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.zip", "json", "mwtab"),
 76 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar", "json", "mwtab"),
 77 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar.gz", "json", "mwtab"),
 78 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar.bz2", "json", "mwtab"),
 79 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab", "json", "mwtab"),
 80 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.zip", "json", "mwtab"),
 81 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar", "json", "mwtab"),
 82 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar.gz", "json", "mwtab"),
 83 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar.bz2", "json", "mwtab")
 84 | ])
 85 | def test_convert_command(from_path, to_path, from_format, to_format):
 86 |     command = "python -m mwtab convert {} {} --from-format={} --to-format={}".format(
 87 |         from_path, to_path, from_format, to_format
 88 |     )
 89 |     assert os.system(command) == 0
 90 | 
 91 |     mwtabfile_generator = mwtab.read_files(to_path)
 92 |     mwtabfiles_list = list(mwtabfile_generator)
 93 |     mwtabfiles_study_ids_set = set(mwf.study_id for mwf in mwtabfiles_list)
 94 |     mwtabfiles_analysis_ids_set = set(mwf.analysis_id for mwf in mwtabfiles_list)
 95 |     assert mwtabfiles_study_ids_set.issubset({"ST000122"})
 96 |     assert mwtabfiles_analysis_ids_set.issubset({"AN000204"})
 97 | 
 98 | 
 99 | @pytest.mark.parametrize("command", [
100 |     # download by url
101 |     "python -m mwtab download url https://www.metabolomicsworkbench.org/rest/study/study_id/ST000001/summary --to-path=tests/example_data/tmp/tmp.txt",
102 |     # download by study methods
103 |     "python -m mwtab download study 2 --to-path=tests/example_data/tmp/tmp.txt --output-format=txt",
104 |     "python -m mwtab download study ST000002 --to-path=tests/example_data/tmp/tmp.txt --output-format=txt",
105 |     "python -m mwtab download study study_id ST000002 summary --to-path=tests/example_data/tmp/tmp.txt",
106 |     "python -m mwtab download study study_id ST analysis --to-path=tests/example_data/tmp/tmp.txt",
107 |     # download compound | refmet | gene | protein
108 |     "python -m mwtab download compound regno 11 name --to-path=tests/example_data/tmp/tmp.txt",
109 |     "python -m mwtab download refmet name Cholesterol all --to-path=tests/example_data/tmp/tmp.txt",
110 |     "python -m mwtab download gene gene_symbol acaca all --to-path=tests/example_data/tmp/tmp.txt",
111 |     "python -m mwtab download protein uniprot_id Q13085 all --to-path=tests/example_data/tmp/tmp.txt",
112 |     # download moverz
113 |     "python -m mwtab download moverz MB 635.52 M+H 0.5 --to-path=tests/example_data/tmp/tmp.txt",
114 |     "python -m mwtab download moverz LIPIDS 513.45 M-2H 0.2 --to-path=tests/example_data/tmp/tmp.txt",
115 |     "python -m mwtab download moverz REFMET 255.2 M+H 0.2 --to-path=tests/example_data/tmp/tmp.txt",
116 |     # download exactmass
117 |     "python -m mwtab download exactmass \"PC(34:1)\" M+H --to-path=tests/example_data/tmp/tmp.txt",
118 |     "python -m mwtab download exactmass  \"GlcCer(d42:2)\" M-H --to-path=tests/example_data/tmp/tmp.txt",
119 | 
120 | ])
121 | def test_download_command(command):
122 |     assert os.system(command) == 0
123 | 
124 |     file_str = ""
125 |     with open("tests/example_data/tmp/tmp.txt", "r") as fh:
126 |         file_str = fh.read()
127 |         fh.close()
128 |     with open("tests/example_data/tmp/tmp.txt", "w") as fh:
129 |         fh.close()
130 |     assert file_str
131 | 
132 | 
133 | @pytest.mark.parametrize("from_path, to_path, key, to_format, no_header", [
134 |     ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metadata", "SUBJECT_TYPE", "csv", " --no-header"),
135 |     ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metadata", "SUBJECT_TYPE", "csv", ""),
136 |     ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metadata", "SUBJECT_TYPE", "json", "")
137 | ])
138 | def test_extract_metadata_command(from_path, to_path, key, to_format, no_header):
139 |     command = "python -m mwtab extract metadata {} {} {} --to-format={}{}".format(
140 |         from_path, to_path, key, to_format, no_header
141 |     )
142 |     assert os.system(command) == 0
143 | 
144 |     with open(".".join([to_path, to_format]), "r") as f:
145 |         if to_format == "csv":
146 |             data = list(csv.reader(f))
147 |             if bool(no_header):
148 |                 assert set(data[0]) == {"SUBJECT_TYPE", "Human"}
149 |             else:
150 |                 assert set(data[0]) == {"metadata", "value0"}
151 |                 assert set(data[1]) == {"SUBJECT_TYPE", "Human"}
152 |         elif to_format == "json":
153 |             data = json.load(f)
154 |             data["SUBJECT_TYPE"] = set(data["SUBJECT_TYPE"])
155 |             assert data == {"SUBJECT_TYPE": {"Human"}}
156 |         else:
157 |             assert False
158 | 
159 | 
160 | # @pytest.mark.parametrize("from_path, to_path, key, value, to_format, no_header", [
161 | #     ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "Plant", "csv", " --no-header"),
162 | #     ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "Plant", "csv", ""),
163 | #     ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "Plant", "json", ""),
164 | #     ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites.csv", "SU:SUBJECT_TYPE", "Plant", "csv", ""),
165 | #     ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites.json", "SU:SUBJECT_TYPE", "Plant", "json", ""),
166 | #     ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "\"r'(Plant)'\"", "csv", " --no-header"),
167 | #     ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "\"r'(Plant)'\"", "csv", ""),
168 | #     ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "\"r'(Plant)'\"", "json", "")
169 | # ])
170 | # def test_extract_metabolites_command(from_path, to_path, key, value, to_format, no_header):
171 | #     command = "python -m mwtab extract metabolites {} {} {} {} --to-format={}{}".format(
172 | #         from_path, to_path, key, value, to_format, no_header
173 | #     )
174 | #     assert os.system(command) == 0
175 | #
176 | #     if to_format == "csv":
177 | #         filepath = to_path
178 | #         if not os.path.splitext(filepath)[1]:
179 | #             filepath += ".csv"
180 | #         with open(filepath, "r") as fh:
181 | #             data = list(csv.reader(fh))
182 | #             if bool(no_header):
183 | #                 assert set(data[0]) == {"1,2,4-benzenetriol", "1", "1", "24"}
184 | #                 assert len(data) == 191
185 | #             else:
186 | #                 assert set(data[0]) == {"metabolite_name", "num-studies", "num_analyses", "num_samples"}
187 | #                 assert set(data[1]) == {"1,2,4-benzenetriol", "1", "1", "24"}
188 | #                 assert len(data) == 192
189 | #             fh.close()
190 | #     elif to_format == 'json':
191 | #         filepath = to_path
192 | #         if not os.path.splitext(filepath)[1]:
193 | #             filepath += ".json"
194 | #         with open(filepath, "r") as fh:
195 | #             text = fh.read()
196 | #             fh.close()
197 | #         assert text
198 | #     else:
199 | #         assert False
200 | 


--------------------------------------------------------------------------------
/tests/test_converter.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import pytest
  4 | import mwtab
  5 | from json import loads
  6 | from mwtab.converter import Converter
  7 | 
  8 | 
  9 | ITEM_SECTIONS = {
 10 |     # "METABOLOMICS WORKBENCH",
 11 |     "PROJECT",
 12 |     "STUDY",
 13 |     "ANALYSIS",
 14 |     "SUBJECT",
 15 |     "COLLECTION",
 16 |     "TREATMENT",
 17 |     "SAMPLEPREP",
 18 |     "CHROMATOGRAPHY",
 19 |     "MS",
 20 |     "NMR",
 21 | }
 22 | 
 23 | 
 24 | def teardown_module(module):
 25 |     if os.path.exists("tests/example_data/tmp"):
 26 |         shutil.rmtree("tests/example_data/tmp")
 27 | 
 28 | 
 29 | def compare_item_sections(dict1, dict2):
 30 |     """
 31 |     Method for comparing the item sections of two given dictionaries.
 32 | 
 33 |     Helper method which asserts two item sections (dictionaries), section which only contain key-value item pairs, from
 34 |     two different `~mwtab.mwtab.MWTabFile` objects are equal.
 35 | 
 36 |     :param dict1: First dictionary representing mwTab file section containing key-value item pairs.
 37 |     :type dict1: :py:class:`collections.OrderedDict` or :py:class:`dict`
 38 |     :param dict2: Second dictionary representing mwTab file section containing key-value item pairs.
 39 |     :type dict2: :py:class:`collections.OrderedDict` or :py:class:`dict`
 40 |     """
 41 |     keys1 = set(dict1.keys())
 42 |     keys2 = set(dict2.keys())
 43 | 
 44 |     assert not keys1 ^ keys2
 45 | 
 46 |     for key in keys1 & keys2:
 47 |         assert dict1[key] == dict2[key]
 48 | 
 49 | 
 50 | @pytest.mark.parametrize("mwtab_file_path, json_file_path", [
 51 |     ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/mwtab_files/ST000122_AN000204.json")
 52 | ])
 53 | def test_convert_mwtab_to_json(mwtab_file_path, json_file_path):
 54 |     """
 55 | 
 56 |     """
 57 |     # convert given mwTab file to JSON
 58 |     mwfile = next(mwtab.read_files(mwtab_file_path))
 59 |     if not os.path.exists("tests/example_data/tmp/"):
 60 |         os.makedirs("tests/example_data/tmp/")
 61 |     with open("tests/example_data/tmp/tmp.json", "w") as f:
 62 |         mwfile.write(f, file_format="json")
 63 |         f.close()
 64 | 
 65 |     # open files
 66 |     with open("tests/example_data/tmp/tmp.json", "r") as f:
 67 |         mwtab_file = loads(f.read())
 68 |     with open(json_file_path, "r") as f:
 69 |         json_file = loads(f.read())
 70 | 
 71 |     # assert both files contain the same sections
 72 |     assert not set(mwtab_file.keys()) ^ set(json_file.keys())
 73 | 
 74 |     # Assert item sections are equal
 75 |     for section_key in ITEM_SECTIONS:
 76 |         if section_key in set(mwtab_file.keys()) & set(json_file.keys()):
 77 |             compare_item_sections(mwtab_file[section_key], json_file[section_key])
 78 | 
 79 |     # assert MS_METABOLITE_DATA or NMR_METABOLITE_DATA sections are the same
 80 | 
 81 | 
 82 | @pytest.mark.parametrize("from_path, to_path, from_format, to_format", [
 83 |     # one-to-one file conversions
 84 |     ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"),
 85 |     ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"),
 86 |     ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"),
 87 |     ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"),
 88 |     ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"),
 89 |     ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"),
 90 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"),
 91 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"),
 92 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"),
 93 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"),
 94 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"),
 95 |     ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"),
 96 |     ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"),
 97 |     ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"),
 98 |     ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"),
 99 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"),
100 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"),
101 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"),
102 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"),
103 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"),
104 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"),
105 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"),
106 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"),
107 |     ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"),
108 |     # many-to-many file conversions
109 |     ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json", "mwtab", "json"),
110 |     ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.zip", "mwtab", "json"),
111 |     ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar", "mwtab", "json"),
112 |     ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "mwtab", "json"),
113 |     ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "mwtab", "json"),
114 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab", "json", "mwtab"),
115 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.zip", "json", "mwtab"),
116 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar", "json", "mwtab"),
117 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar.gz", "json", "mwtab"),
118 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar.bz2", "json", "mwtab"),
119 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab", "json", "mwtab"),
120 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.zip", "json", "mwtab"),
121 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar", "json", "mwtab"),
122 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar.gz", "json", "mwtab"),
123 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar.bz2", "json", "mwtab"),
124 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab", "json", "mwtab"),
125 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.zip", "json", "mwtab"),
126 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar", "json", "mwtab"),
127 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar.gz", "json", "mwtab"),
128 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar.bz2", "json", "mwtab"),
129 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab", "json", "mwtab"),
130 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.zip", "json", "mwtab"),
131 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar", "json", "mwtab"),
132 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar.gz", "json", "mwtab"),
133 |     ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar.bz2", "json", "mwtab")
134 | ])
135 | def test_converter_module(from_path, to_path, from_format, to_format):
136 |     converter = Converter(from_path=from_path,
137 |                           to_path=to_path,
138 |                           from_format=from_format,
139 |                           to_format=to_format)
140 |     converter.convert()
141 | 
142 |     mwtabfile_generator = mwtab.read_files(to_path)
143 |     mwtabfiles_list = list(mwtabfile_generator)
144 |     mwtabfiles_study_ids_set = set(mwf.study_id for mwf in mwtabfiles_list)
145 |     mwtabfiles_analysis_ids_set = set(mwf.analysis_id for mwf in mwtabfiles_list)
146 |     assert mwtabfiles_study_ids_set.issubset({"ST000122"})
147 |     assert mwtabfiles_analysis_ids_set.issubset({"AN000204"})
148 | 


--------------------------------------------------------------------------------
/tests/test_mwextract.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import mwtab
3 | 
4 | 


--------------------------------------------------------------------------------
/tests/test_mwrest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from mwtab.mwrest import BASE_URL, GenericMWURL, analysis_ids, study_ids
 3 | 
 4 | 
 5 | def test_study_analysis():
 6 |     an_ids = analysis_ids()
 7 |     assert an_ids
 8 |     st_ids = study_ids()
 9 |     assert st_ids
10 | 
11 | 
12 | @pytest.mark.parametrize("kwds", [
13 |     ({"context": "study",
14 |       "input_item": "analysis_id",
15 |       "input_value": "AN000002",
16 |       "output_item": "mwtab",
17 |       'output_format': "txt"}),
18 |     ({"context": "study",
19 |       "input_item": "study_id",
20 |       "input_value": "ST000001",
21 |       "output_item": "mwtab",
22 |       'output_format': "txt"}),
23 |     ({"base_url": "https://www.test.org/rest/",
24 |       "context": "study",
25 |       "input_item": "study_id",
26 |       "input_value": "ST000001",
27 |       "output_item": "mwtab",
28 |       'output_format': "txt"}),
29 | ])
30 | def test_mwrest(kwds):
31 |     test_mwurl = GenericMWURL(kwds)
32 |     assert test_mwurl.url == test_mwurl.base_url + "/".join([
33 |         kwds["context"],
34 |         kwds["input_item"],
35 |         kwds["input_value"],
36 |         kwds["output_item"],
37 |         kwds.get("output_format") or ""
38 |     ])
39 | 
40 | 
41 | @pytest.mark.parametrize("kwds", [
42 |     ({"context": "study",
43 |       "input_item": "analysis_id",
44 |       "input_value": "ST000001",
45 |       "output_item": "mwtab",
46 |       'output_format': "txt"}),
47 |     ({"context": "moverz",
48 |       "input_item": "LIPIDS",
49 |       "m/z_value": 49,
50 |       "ion_type_value": "M+H",
51 |       "m/z_tolerance_value": 0.1,
52 |       'output_format': "txt"}),
53 |     ({"context": "exactmass",
54 |       "LIPID_abbreviation": "Test",
55 |       "ion_type_value": "M+H"}),
56 | ])
57 | def test_fail_mwrest(kwds):
58 |     try:
59 |         test_mwurl = GenericMWURL(kwds)
60 |         assert False
61 |     except Exception as e:
62 |         assert type(e) == ValueError
63 | 


--------------------------------------------------------------------------------
/tests/test_reading.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import mwtab
 3 | 
 4 | 
 5 | @pytest.mark.parametrize("files_source", [
 6 |     "204",
 7 |     "AN000204",
 8 |     "https://www.metabolomicsworkbench.org/rest/study/analysis_id/AN000204/mwtab/txt",
 9 |     "tests/example_data/mwtab_files/ST000122_AN000204.txt",
10 |     "tests/example_data/mwtab_files/ST000122_AN000204.json",
11 | ])
12 | def test_single_file_reading(files_source):
13 |     mwtabfile_generator = mwtab.read_files(files_source)
14 |     mwtabfile = next(mwtabfile_generator)
15 |     assert mwtabfile.study_id == "ST000122"
16 |     assert mwtabfile.analysis_id == "AN000204"
17 | 
18 | 
19 | @pytest.mark.parametrize("files_source", [
20 |     "tests/example_data/mwtab_files",
21 |     "tests/example_data/mwtab_files.zip",
22 |     "tests/example_data/mwtab_files.tar.gz",
23 |     "tests/example_data/mwtab_files.tar.bz2"
24 | ])
25 | def test_multiple_reading(files_source):
26 |     mwtabfile_generator = mwtab.read_files(files_source)
27 |     mwtabfiles_list = list(mwtabfile_generator)
28 |     mwtabfiles_study_ids_set = set(mwf.study_id for mwf in mwtabfiles_list)
29 |     mwtabfiles_analysis_ids_set = set(mwf.analysis_id for mwf in mwtabfiles_list)
30 |     assert mwtabfiles_study_ids_set == {"ST000122"}
31 |     assert mwtabfiles_analysis_ids_set == {"AN000204"}
32 | 


--------------------------------------------------------------------------------
/tests/test_validator.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import mwtab
 3 | 
 4 | 
 5 | @pytest.mark.parametrize("files_source", [
 6 |     "tests/example_data/mwtab_files/ST000122_AN000204.json",
 7 |     "tests/example_data/mwtab_files/ST000122_AN000204.txt"
 8 | ])
 9 | def test_validate(files_source):
10 |     """Test method for validating passing mwTab and JSON files from Metabolomics Workbench.
11 |     :param files_source: File path to Metabolomics Workbench file to be validated.
12 |     :type files_source: :py:class:`str` or
13 |     """
14 |     mwfile = next(mwtab.read_files(files_source))
15 |     _, validation_log = mwtab.validate_file(mwfile, metabolites=False)
16 |     assert len(validation_log.split('\n')) == 9
17 | 
18 | 
19 | @pytest.mark.parametrize("file_source", [
20 |     "tests/example_data/validation_files/ST000122_AN000204_error_1.txt",
21 |     "tests/example_data/validation_files/ST000122_AN000204_error_1.json"
22 | ])
23 | def test_validate_subject_sample_factors(file_source):
24 |     mwfile = next(mwtab.read_files(file_source))
25 |     _, validation_log = mwtab.validate_file(mwfile, metabolites=False)
26 |     assert "missing Subject ID" in validation_log
27 |     assert "missing Sample ID" in validation_log
28 |     assert "missing value for Factor" in validation_log
29 | 
30 | 
31 | @pytest.mark.parametrize("file_source", [
32 |     "tests/example_data/validation_files/ST000122_AN000204_error_2.txt",
33 |     "tests/example_data/validation_files/ST000122_AN000204_error_2.json"
34 | ])
35 | def test_validate_subject_sample_factors(file_source):
36 |     mwfile = next(mwtab.read_files(file_source))
37 |     _, validation_log = mwtab.validate_file(mwfile, metabolites=False)
38 |     # assert "Section missing data entry for sample(s):" in validation_log
39 |     assert "SUBJECT_SAMPLE_FACTORS: Section missing sample ID(s)" in validation_log
40 | 
41 | 
42 | @pytest.mark.parametrize("file_source", [
43 |     "tests/example_data/validation_files/ST000122_AN000204_error_3.txt",
44 |     "tests/example_data/validation_files/ST000122_AN000204_error_3.json"
45 | ])
46 | def test_validate_metabolites(file_source):
47 |     mwfile = next(mwtab.read_files(file_source))
48 |     _, validation_log = mwtab.validate_file(mwfile)
49 |     assert "which matches a commonly used field name" in validation_log
50 | 
51 | 
52 | @pytest.mark.parametrize("file_source", [
53 |     "tests/example_data/validation_files/ST000122_AN000204_error_4.txt",
54 |     "tests/example_data/validation_files/ST000122_AN000204_error_4.json"
55 | ])
56 | def test_validate_schema(file_source):
57 |     mwfile = next(mwtab.read_files(file_source))
58 |     _, validation_log = mwtab.validate_file(mwfile)
59 |     assert "does not match the allowed schema" in validation_log
60 | 
61 | 
62 | @pytest.mark.parametrize("file_source", [
63 |     "tests/example_data/mwtab_files/ST000122_AN000204.json"
64 | ])
65 | def test_validation_log_local(file_source):
66 |     mwfile = next(mwtab.read_files(file_source))
67 |     _, validation_log = mwtab.validate_file(mwfile)
68 |     # assert "mwtab version: {}".format(mwtab.__version__) in validation_log
69 |     assert "Source:        {}".format(file_source) in validation_log
70 |     assert "Study ID:      {}".format("ST000122") in validation_log
71 |     assert "Analysis ID:   {}".format("AN000204") in validation_log
72 |     assert "File format:   {}".format("json") in validation_log
73 | 
74 | 
75 | @pytest.mark.parametrize("file_source", [
76 |     "2"
77 | ])
78 | def test_validation_log_web(file_source):
79 |     mwfile = next(mwtab.read_files(file_source))
80 |     _, validation_log = mwtab.validate_file(mwfile, metabolites=False)
81 |     # assert "mwtab version: {}".format(mwtab.__version__) in validation_log
82 |     assert "Source:        {}".format("https://www.metabolomicsworkbench.org/rest/study/analysis_id/AN000002/mwtab/txt")\
83 |            in validation_log
84 |     assert "Study ID:      {}".format("ST000002") in validation_log
85 |     assert "Analysis ID:   {}".format("AN000002") in validation_log
86 |     assert "File format:   {}".format("txt") in validation_log


--------------------------------------------------------------------------------