├── .github └── workflows │ ├── build.yml │ └── codecov.yml ├── .gitignore ├── CHANGELOG.rst ├── CITATION.cff ├── LICENSE ├── MANIFEST.in ├── PUBLICATIONS.rst ├── README.rst ├── docs ├── Makefile ├── _static │ ├── images │ │ ├── mwtab_demo.gif │ │ └── mwtab_logo.png │ └── mwfiles │ │ ├── ST000017_AN000035.json │ │ ├── ST000017_AN000035.json.gz │ │ ├── ST000017_AN000035.txt │ │ ├── ST000017_AN000035.txt.gz │ │ ├── ST000040_AN000060.json │ │ ├── ST000040_AN000060.txt │ │ ├── diabetes │ │ ├── ST000048_AN000084.txt │ │ └── ST000057_AN000095.txt │ │ ├── mwfiles_dir_json │ │ ├── ST000017_AN000035.json │ │ └── ST000040_AN000060.json │ │ ├── mwfiles_dir_mwtab │ │ ├── ST000017_AN000035.txt │ │ └── ST000040_AN000060.txt │ │ ├── mwfiles_json.tar.gz │ │ ├── mwfiles_mwtab.zip │ │ └── out │ │ └── readme.txt ├── api.rst ├── conf.py ├── guide.rst ├── index.rst ├── license.rst ├── requirements-rtd.txt └── tutorial.ipynb ├── mwtab ├── __init__.py ├── __main__.py ├── cli.py ├── converter.py ├── fileio.py ├── mwextract.py ├── mwrest.py ├── mwschema.py ├── mwtab.py ├── tokenizer.py └── validator.py ├── requirements.txt ├── setup.py └── tests ├── example_data ├── mwtab_files.tar ├── mwtab_files.tar.bz2 ├── mwtab_files.tar.gz ├── mwtab_files.zip ├── mwtab_files │ ├── ST000122_AN000204.json │ └── ST000122_AN000204.txt └── validation_files │ ├── ST000122_AN000204_error_1.json │ ├── ST000122_AN000204_error_1.txt │ ├── ST000122_AN000204_error_2.json │ ├── ST000122_AN000204_error_2.txt │ ├── ST000122_AN000204_error_3.json │ ├── ST000122_AN000204_error_3.txt │ ├── ST000122_AN000204_error_4.json │ └── ST000122_AN000204_error_4.txt ├── test_cli.py ├── test_converter.py ├── test_mwextract.py ├── test_mwrest.py ├── test_reading.py └── test_validator.py /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: build 4 | 5 | # Controls when the workflow will run 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the master branch 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ master ] 12 | 13 | # Allows you to run this workflow manually from the Actions tab 14 | workflow_dispatch: 15 | 16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 17 | jobs: 18 | # This workflow contains a single job called "build" 19 | build: 20 | # The type of runner that the job will run on 21 | runs-on: ubuntu-latest 22 | strategy: 23 | matrix: 24 | python-version: ['3.5', '3.6', '3.7', '3.8', '3.9', '3.10'] 25 | 26 | # Steps represent a sequence of tasks that will be executed as part of the job 27 | steps: 28 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 29 | - uses: actions/checkout@v2 30 | 31 | - name: Set up Python ${{ matrix.python-version }} 32 | uses: actions/setup-python@v2 33 | with: 34 | python-version: ${{ matrix.python-version }} 35 | 36 | # Install dependencies 37 | - name: Install dependencies 38 | run: | 39 | python -m pip install --upgrade pip 40 | pip install pytest-cov 41 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 42 | python setup.py install 43 | 44 | # Run pytest 45 | - name: Test with pytest 46 | run: pytest -------------------------------------------------------------------------------- /.github/workflows/codecov.yml: -------------------------------------------------------------------------------- 1 | name: codecov.io 2 | 3 | # Controls when the workflow will run 4 | on: 5 | # Triggers the workflow on push or pull request events but only for the master branch 6 | push: 7 | branches: [ master ] 8 | pull_request: 9 | branches: [ master ] 10 | 11 | # Allows you to run this workflow manually from the Actions tab 12 | workflow_dispatch: 13 | 14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 15 | jobs: 16 | # This workflow contains a single job called "build" 17 | build: 18 | # The type of runner that the job will run on 19 | runs-on: ubuntu-latest 20 | 21 | # Steps represent a sequence of tasks that will be executed as part of the job 22 | steps: 23 | - name: Checkout 24 | uses: actions/checkout@v2 25 | 26 | - name: Set up Python 3.9 27 | uses: actions/setup-python@v2 28 | with: 29 | python-version: 3.9 30 | 31 | - name: Install dependencies 32 | run: | 33 | pip install -r requirements.txt 34 | pip install pytest-cov 35 | python setup.py install 36 | 37 | - name: Run tests and collect coverage 38 | run: pytest --cov=./mwtab --cov-report=xml 39 | 40 | # codecov 41 | - name: "Upload coverage to Codecov" 42 | uses: codecov/codecov-action@v2 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | docs/_build 3 | venv/* 4 | data/* 5 | .DS_Store 6 | .ipynb_checkpoints 7 | .idea -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | Release History 2 | =============== 3 | 4 | 5 | 1.2.5.post1 (2022-05-11) 6 | ~~~~~~~~~~~~~~~~~~~~~~~~ 7 | 8 | **Improvements** 9 | 10 | - Add citation information to GitHub repository. 11 | 12 | - Adds CITATION.cff file with citation info. 13 | 14 | 15 | 1.2.5 (2022-03-18) 16 | ~~~~~~~~~~~~~~~~~~ 17 | 18 | **Improvements** 19 | 20 | - Updates ``mwschema.py`` and ``validator.py`` modules to match Metabolomics Workbench's mwTab File Format 21 | Specification Version 1.5 (March 2022). 22 | 23 | - Adds optional NMR_RESULTS_FILE field to NMR block. 24 | 25 | - Adds optional MS_COMMENTS field to MS block. 26 | 27 | - Removes requirement for there to be data results for every sample listed in the Study Design 28 | (SUBJECT_SAMPLE_FACTORS). Allows for instances where samples have technical issues preventing data from being provided. 29 | 30 | 31 | 1.2.4 (2022-01-07) 32 | ~~~~~~~~~~~~~~~~~~ 33 | 34 | **Improvements** 35 | 36 | - Adds check for blank source files when parsing to create ``mwtab`` objects. 37 | 38 | 39 | 1.2.3 (2021-11-02) 40 | ~~~~~~~~~~~~~~~~~~ 41 | 42 | **Bugfixes** 43 | 44 | - Removes hard coding of version number in ``validator.validate_file()`` method. 45 | 46 | - Removes mention of Python 3.4 support in README. 47 | 48 | 49 | 1.2.2 (2021-10-22) 50 | ~~~~~~~~~~~~~~~~~~ 51 | 52 | **Improvements** 53 | 54 | - Migrates Continuous Integration (CI) from Travis CI to GitHub Actions. 55 | 56 | - Adds ``.github/workflows/`` folder which contains .yml files for workflows. 57 | 58 | - Adds ``build.yml`` to folder for testing build with pytest. 59 | 60 | - Adds ``codecov.yml`` to folder for generating/uploading code coverage info to codecov.io 61 | (https://app.codecov.io/gh/MoseleyBioinformaticsLab/mwtab). 62 | 63 | - Changes build and codecov badges to match new sources. 64 | 65 | 66 | 1.2.1 (2021-09-03) 67 | ~~~~~~~~~~~~~~~~~~ 68 | 69 | **Improvements** 70 | 71 | - Updates format of ``~mwtab.mwtab.validate_file()`` validation log generated during validation. 72 | 73 | - Includes metadata header in validation logs containing; datetime, mwtab version, file source, study id, analysis 74 | id, and file format. 75 | 76 | - Minor changes to error messages for MS(NMR)_METABOLITE_DATA, NMR_BINNED_DATA, and SUBJECT_SAMPLE_FACTORS sections. 77 | 78 | **Bugfixes** 79 | 80 | - Fixes error where pytests for ``~mwtab.mwtab.validate_file()`` method were repeatedly using the same text files for 81 | validation rather than both the test text and JSON files. 82 | 83 | - Verbose file validation enabled in commandline. 84 | 85 | - Default value given to ``base_url`` parameter in ``~mwtab.mwatb._pull_study_analysis()`` methods. 86 | 87 | 88 | 1.0.1 (2021-03-06) 89 | ~~~~~~~~~~~~~~~~~~ 90 | 91 | **Improvements** 92 | 93 | - Updated ``~mwtab.mwtab.MWTabFile`` to match Metabolomics Workbench JSON 94 | format. 95 | 96 | - Internal dictionary representation now matches Metabolomics Workbench 97 | JSON format. 98 | - ``~mwtab.mwtab.MWTabFile.write()`` and 99 | ``~mwtab.mwtab.MWTabFile.write_str()`` methods now produce files 100 | consistent with Metabolomics Workbench's JSON format. 101 | 102 | - Updated ``mwschema.py`` to be consistent with Metabolomics Workbench's 103 | updated `mwTab` format specification. 104 | 105 | - Added ``mwrest.py`` module for working with Metabolomics Workbench's REST API. 106 | 107 | - Allows for additional data file to be requested through Metabolomics 108 | Workbench's REST API. 109 | 110 | - Added ``mwextract.py`` module for extracting metadata and metabolites from 111 | `mwTab` formatted files. 112 | 113 | - Updated ``validator.py``. 114 | 115 | - Validator now collects all present errors. 116 | - Performs detection of common field names in `#METABOLITES` blocks. 117 | 118 | - Updated ``docs/tutorial.ipynb`` to document improved and updated package 119 | functionality. 120 | 121 | - Updated `mwtab` package to include Python 3.8 support. 122 | 123 | 124 | 0.1.10 (2019-02-18) 125 | ~~~~~~~~~~~~~~~~~~~ 126 | 127 | **Bugfixes** 128 | 129 | - Metabolomics Workbench started using HTTPS, 130 | update reading from ANALYSIS_ID to address the change. 131 | 132 | 133 | 0.1.9 (2018-04-21) 134 | ~~~~~~~~~~~~~~~~~~ 135 | 136 | **Improvements** 137 | 138 | - Added citation link to `mwtab` package. 139 | 140 | 141 | 0.1.8 (2018-04-05) 142 | ~~~~~~~~~~~~~~~~~~ 143 | 144 | **Improvements** 145 | 146 | - Added `mwtab` package logo. 147 | - Minor update: Simplified section validation function. 148 | 149 | 150 | 0.1.7 (2017-12-07) 151 | ~~~~~~~~~~~~~~~~~~ 152 | 153 | **Improvements** 154 | 155 | - Minor update: Included test for additional header line within `mwTab` files 156 | that may or may not be present. 157 | 158 | 159 | 0.1.4, 0.1.5, 0.1.6 (2017-11-13) 160 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 161 | 162 | **Improvements** 163 | 164 | - Minor update: package README file examples. 165 | - Minor update: update README to properly render on PyPI. 166 | 167 | 168 | 0.1.3 (2017-09-14) 169 | ~~~~~~~~~~~~~~~~~~ 170 | 171 | **Bugfixes** 172 | 173 | - Fixed bug in the command-line interface. 174 | - Fixed bug in ``mwschema.py`` module definition causing validation to fail. 175 | - Fixed validation optional argument (to ``read_files()`` generator) in order 176 | to validate mwTab formatted files before returning them. 177 | - Fixed Python2/3 compatibility bug that uses ``bz2`` Python module. 178 | - Fixed Python2/3 unicode/str compatibility bug in ``mwschema.py`` module. 179 | 180 | **Improvements** 181 | 182 | - Added Travis CI tests: https://travis-ci.org/MoseleyBioinformaticsLab/mwtab 183 | - Added code coverage reports: https://codecov.io/gh/MoseleyBioinformaticsLab/mwtab 184 | 185 | 186 | 0.1.2 (2017-09-14) 187 | ~~~~~~~~~~~~~~~~~~ 188 | 189 | **Bugfixes** 190 | 191 | - Fixed issue with mwTab formatted file printable representation. 192 | 193 | 194 | 0.1.1 (2017-09-12) 195 | ~~~~~~~~~~~~~~~~~~ 196 | 197 | **Improvements** 198 | 199 | - Improved README display on PyPI. 200 | 201 | 202 | 0.1.0 (2017-09-12) 203 | ~~~~~~~~~~~~~~~~~~ 204 | 205 | - Initial public release. 206 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.0.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Powell" 5 | given-names: "Christian" 6 | orcid: "https://orcid.org/0000-0002-4242-080X" 7 | - family-names: "Smelter" 8 | given-names: "Andrey" 9 | orcid: "https://orcid.org/0000-0003-3056-9225" 10 | - family-names: "Moseley" 11 | given-names: "Hunter" 12 | orcid: "https://orcid.org/0000-0003-3995-5368" 13 | title: "mwtab" 14 | version: 1.2.5 15 | date-released: 2017-03-18 16 | url: "https://github.com/MoseleyBioinformaticsLab/mwtab" 17 | preferred-citation: 18 | type: article 19 | authors: 20 | - family-names: "Powell" 21 | given-names: "Christian" 22 | orcid: "https://orcid.org/0000-0002-4242-080X" 23 | - family-names: "Moseley" 24 | given-names: "Hunter" 25 | orcid: "https://orcid.org/0000-0003-3995-5368" 26 | doi: "10.3390/metabo11030163" 27 | journal: "Metabolites" 28 | month: 3 29 | title: "The mwtab Python Library for RESTful Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics Workbench Data Repository" 30 | issue: 11 31 | volume: 3 32 | year: 2021 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The Clear BSD License 2 | 3 | Copyright (c) 2020, Christian D. Powell, Andrey Smelter, Hunter N.B. Moseley 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted (subject to the limitations in the disclaimer 8 | below) provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | * Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived from this 19 | software without specific prior written permission. 20 | 21 | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY 22 | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 23 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 25 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 29 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 30 | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 | POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst LICENSE CHANGELOG.rst 2 | include requirements.txt 3 | include docs/Makefile 4 | recursive-include docs *.rst *.txt *.py *.png *.svg 5 | recursive-include mwtab *.py *.pyx *.c -------------------------------------------------------------------------------- /PUBLICATIONS.rst: -------------------------------------------------------------------------------- 1 | mwtab Publications 2 | ================== 3 | 4 | 5 | When using the ``mwtab`` package in published work, please cite the latest paper: 6 | 7 | 2. Powell, Christian D., and Hunter NB Moseley. "The mwtab Python Library for RESTful Access and Enhanced Quality 8 | Control, Deposition, and Curation of the Metabolomics Workbench Data Repository." *Metabolites* 11.3 (2021): 163. 9 | doi: `10.3390/metabo11030163`_. 10 | 11 | * Data available on FigShare: `10.6084/m9.figshare.12094104`_ 12 | 13 | 1. Smelter, Andrey and Hunter NB Moseley. "A Python library for FAIRer access and deposition to the Metabolomics 14 | Workbench Data Repository." *Metabolomics* 2018, 14(5): 64. doi: `10.1007/s11306-018-1356-6`_. 15 | 16 | * Data available FigShare: `figshare.com/s/8d5a837cdc3f500fbcaa`_ 17 | 18 | .. _10.3390/metabo11030163: https://doi.org/10.3390/metabo11030163 19 | .. _10.6084/m9.figshare.12094104: https://doi.org/10.6084/m9.figshare.12094104 20 | .. _10.1007/s11306-018-1356-6: http://dx.doi.org/10.1007/s11306-018-1356-6 21 | .. _figshare.com/s/8d5a837cdc3f500fbcaa: https://figshare.com/s/8d5a837cdc3f500fbcaa 22 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | mwtab 2 | ===== 3 | 4 | .. image:: https://img.shields.io/pypi/l/mwtab.svg 5 | :target: https://choosealicense.com/licenses/bsd-3-clause-clear/ 6 | :alt: License information 7 | 8 | .. image:: https://img.shields.io/pypi/v/mwtab.svg 9 | :target: https://pypi.org/project/mwtab 10 | :alt: Current library version 11 | 12 | .. image:: https://img.shields.io/pypi/pyversions/mwtab.svg 13 | :target: https://pypi.org/project/mwtab 14 | :alt: Supported Python versions 15 | 16 | .. image:: https://readthedocs.org/projects/nmrstarlib/badge/?version=latest 17 | :target: http://mwtab.readthedocs.io/en/latest/?badge=latest 18 | :alt: Documentation status 19 | 20 | .. image:: https://github.com/MoseleyBioinformaticsLab/mwtab/actions/workflows/build.yml/badge.svg 21 | :target: https://github.com/MoseleyBioinformaticsLab/mwtab/actions/workflows/build.yml 22 | :alt: Build status 23 | 24 | .. image:: https://codecov.io/gh/MoseleyBioinformaticsLab/mwtab/branch/master/graph/badge.svg?token=jhjMsP1qma 25 | :target: https://codecov.io/gh/MoseleyBioinformaticsLab/mwtab 26 | :alt: CodeCov 27 | 28 | .. image:: https://img.shields.io/badge/DOI-10.3390%2Fmetabo11030163-blue.svg 29 | :target: https://doi.org/10.3390/metabo11030163 30 | :alt: Citation link 31 | 32 | .. image:: https://img.shields.io/github/stars/MoseleyBioinformaticsLab/mwtab.svg?style=social&label=Star 33 | :target: https://github.com/MoseleyBioinformaticsLab/mwtab 34 | :alt: GitHub project 35 | 36 | | 37 | 38 | .. image:: https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/master/docs/_static/images/mwtab_logo.png 39 | :width: 50% 40 | :align: center 41 | :target: http://mwtab.readthedocs.io/ 42 | 43 | 44 | The ``mwtab`` package is a Python library that facilitates reading and writing 45 | files in ``mwTab`` format used by the `Metabolomics Workbench`_ for archival of 46 | Mass Spectrometry (MS) and Nuclear Magnetic Resonance (NMR) experimental data. 47 | 48 | The ``mwtab`` package provides facilities to convert ``mwTab`` formatted files into 49 | their equivalent ``JSON`` ized representation and vice versa. ``JSON`` stands for JavaScript 50 | Object Notation, an open-standard format that uses human-readable text to transmit 51 | data objects consisting of attribute-value pairs. 52 | 53 | The ``mwtab`` package can be used in several ways: 54 | 55 | * As a library for accessing and manipulating data stored in ``mwTab`` format files. 56 | * As a command-line tool to convert between ``mwTab`` format and its equivalent 57 | ``JSON`` representation. 58 | 59 | 60 | Citation 61 | ~~~~~~~~ 62 | 63 | When using ``mwtab`` package in published work, please cite the following papers: 64 | 65 | * Powell, Christian D., and Hunter NB Moseley. "The mwtab Python Library for RESTful 66 | Access and Enhanced Quality Control, Deposition, and Curation of the Metabolomics 67 | Workbench Data Repository." *Metabolites* 11.3 (2021): 163. doi: 68 | `10.3390/metabo11030163`_. 69 | 70 | * Smelter, Andrey and Hunter NB Moseley. "A Python library for FAIRer access and 71 | deposition to the Metabolomics Workbench Data Repository." 72 | *Metabolomics* 2018, 14(5): 64. doi: `10.1007/s11306-018-1356-6`_. 73 | 74 | 75 | Links 76 | ~~~~~ 77 | 78 | * mwtab @ GitHub_ 79 | * mwtab @ PyPI_ 80 | * Documentation @ ReadTheDocs_ 81 | 82 | 83 | Installation 84 | ~~~~~~~~~~~~ 85 | 86 | The ``mwtab`` package runs under Python 3.5+. Use pip_ to install. 87 | Starting with Python 3.4, pip_ is included by default. 88 | 89 | 90 | Install on Linux, Mac OS X 91 | -------------------------- 92 | 93 | .. code:: bash 94 | 95 | python3 -m pip install mwtab 96 | 97 | 98 | Install on Windows 99 | ------------------ 100 | 101 | .. code:: bash 102 | 103 | py -3 -m pip install mwtab 104 | 105 | 106 | Upgrade on Linux, Mac OS X 107 | -------------------------- 108 | 109 | .. code:: bash 110 | 111 | python3 -m pip install mwtab --upgrade 112 | 113 | 114 | Upgrade on Windows 115 | ------------------ 116 | 117 | .. code:: bash 118 | 119 | py -3 -m pip install mwtab --upgrade 120 | 121 | 122 | Quickstart 123 | ~~~~~~~~~~ 124 | 125 | .. code:: python 126 | 127 | >>> import mwtab 128 | >>> 129 | >>> # Here we use ANALYSIS_ID of file to fetch data from URL 130 | >>> for mwfile in mwtab.read_files("1", "2"): 131 | ... print("STUDY_ID:", mwfile.study_id) 132 | ... print("ANALYSIS_ID:", mwfile.analysis_id) 133 | ... print("SOURCE:", mwfile.source) 134 | ... print("Blocks:", list(mwfile.keys())) 135 | >>> 136 | 137 | 138 | .. image:: https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/master/docs/_static/images/mwtab_demo.gif 139 | :align: center 140 | 141 | 142 | .. note:: Read the User Guide and the ``mwtab`` Tutorial on ReadTheDocs_ 143 | to learn more and to see code examples on using the ``mwtab`` as a 144 | library and as a command-line tool. 145 | 146 | 147 | License 148 | ~~~~~~~ 149 | 150 | This package is distributed under the BSD_ `license`. 151 | 152 | 153 | .. _Metabolomics Workbench: http://www.metabolomicsworkbench.org 154 | .. _GitHub: https://github.com/MoseleyBioinformaticsLab/mwtab 155 | .. _ReadTheDocs: http://mwtab.readthedocs.io 156 | .. _PyPI: https://pypi.org/project/mwtab 157 | .. _pip: https://pip.pypa.io 158 | .. _BSD: https://choosealicense.com/licenses/bsd-3-clause-clear/ 159 | .. _10.3390/metabo11030163: https://doi.org/10.3390/metabo11030163 160 | .. _10.1007/s11306-018-1356-6: http://dx.doi.org/10.1007/s11306-018-1356-6 161 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = mwtab 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/_static/images/mwtab_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/images/mwtab_demo.gif -------------------------------------------------------------------------------- /docs/_static/images/mwtab_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/images/mwtab_logo.png -------------------------------------------------------------------------------- /docs/_static/mwfiles/ST000017_AN000035.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/mwfiles/ST000017_AN000035.json.gz -------------------------------------------------------------------------------- /docs/_static/mwfiles/ST000017_AN000035.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/mwfiles/ST000017_AN000035.txt.gz -------------------------------------------------------------------------------- /docs/_static/mwfiles/mwfiles_json.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/mwfiles/mwfiles_json.tar.gz -------------------------------------------------------------------------------- /docs/_static/mwfiles/mwfiles_mwtab.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/docs/_static/mwfiles/mwfiles_mwtab.zip -------------------------------------------------------------------------------- /docs/_static/mwfiles/out/readme.txt: -------------------------------------------------------------------------------- 1 | # Folder to collect all files generated by tutorial.ipynb 2 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | The mwtab API Reference 2 | ======================= 3 | 4 | 5 | .. automodule:: mwtab 6 | 7 | .. automodule:: mwtab.mwtab 8 | :member-order: bysource 9 | :members: 10 | 11 | 12 | .. automodule:: mwtab.cli 13 | 14 | .. autofunction:: cli 15 | 16 | 17 | .. automodule:: mwtab.tokenizer 18 | 19 | .. autofunction:: tokenizer 20 | 21 | 22 | .. automodule:: mwtab.fileio 23 | 24 | .. autofunction:: read_files 25 | 26 | 27 | .. automodule:: mwtab.converter 28 | :member-order: bysource 29 | :members: 30 | 31 | 32 | .. automodule:: mwtab.validator 33 | 34 | .. autofunction:: validate_section 35 | 36 | .. autofunction:: validate_file 37 | 38 | 39 | .. automodule:: mwtab.mwrest 40 | :member-order: bysource 41 | :members: 42 | 43 | 44 | .. automodule:: mwtab.mwextract 45 | :member-order: bysource 46 | :members: 47 | 48 | 49 | .. automodule:: mwtab.mwschema 50 | 51 | .. autodata:: metabolomics_workbench_schema 52 | :annotation: 53 | 54 | .. autodata:: project_schema 55 | :annotation: 56 | 57 | .. autodata:: study_schema 58 | :annotation: 59 | 60 | .. autodata:: analysis_schema 61 | :annotation: 62 | 63 | .. autodata:: subject_schema 64 | :annotation: 65 | 66 | .. autodata:: subject_sample_factors_schema 67 | :annotation: 68 | 69 | .. autodata:: collection_schema 70 | :annotation: 71 | 72 | .. autodata:: treatment_schema 73 | :annotation: 74 | 75 | .. autodata:: sampleprep_schema 76 | :annotation: 77 | 78 | .. autodata:: chromatography_schema 79 | :annotation: 80 | 81 | .. autodata:: ms_schema 82 | :annotation: 83 | 84 | .. autodata:: nmr_schema 85 | :annotation: 86 | 87 | .. autodata:: metabolites_schema 88 | :annotation: 89 | 90 | .. autodata:: ms_metabolite_data_schema 91 | :annotation: 92 | 93 | .. autodata:: nmr_binned_data_schema 94 | :annotation: 95 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # mwtab documentation build configuration file, created by 5 | # sphinx-quickstart on Mon Aug 21 15:32:57 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os 21 | import sys 22 | # sys.path.insert(0, os.path.abspath('.')) 23 | sys.path.insert(0, os.path.abspath('..')) 24 | 25 | from mwtab import __version__ 26 | 27 | # -- General configuration ------------------------------------------------ 28 | 29 | # If your documentation needs a minimal Sphinx version, state it here. 30 | # 31 | # needs_sphinx = '1.0' 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 35 | # ones. 36 | extensions = ['sphinx.ext.autodoc', 37 | 'sphinx.ext.doctest', 38 | 'sphinx.ext.intersphinx', 39 | 'sphinx.ext.todo', 40 | 'sphinx.ext.coverage', 41 | 'sphinx.ext.mathjax', 42 | 'sphinx.ext.ifconfig', 43 | 'sphinx.ext.viewcode', 44 | 'nbsphinx'] 45 | 46 | # Add any paths that contain templates here, relative to this directory. 47 | templates_path = ['_templates'] 48 | 49 | # The suffix(es) of source filenames. 50 | # You can specify multiple suffix as a list of string: 51 | # 52 | # source_suffix = ['.rst', '.md'] 53 | source_suffix = '.rst' 54 | 55 | # The master toctree document. 56 | master_doc = 'index' 57 | 58 | # General information about the project. 59 | project = 'mwtab' 60 | copyright = '2020, Christian D. Powell, Andrey Smelter, Hunter N.B. Moseley' 61 | author = 'Christian D. Powell, Andrey Smelter, Hunter N.B. Moseley' 62 | 63 | # The version info for the project you're documenting, acts as replacement for 64 | # |version| and |release|, also used in various other places throughout the 65 | # built documents. 66 | # 67 | # The short X.Y version. 68 | version = __version__ 69 | # The full version, including alpha/beta/rc tags. 70 | release = __version__ 71 | 72 | # The language for content autogenerated by Sphinx. Refer to documentation 73 | # for a list of supported languages. 74 | # 75 | # This is also used if you do content translation via gettext catalogs. 76 | # Usually you set "language" from the command line for these cases. 77 | language = None 78 | 79 | # List of patterns, relative to source directory, that match files and 80 | # directories to ignore when looking for source files. 81 | # This patterns also effect to html_static_path and html_extra_path 82 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 83 | 84 | # The name of the Pygments (syntax highlighting) style to use. 85 | pygments_style = 'sphinx' 86 | 87 | # If true, `todo` and `todoList` produce output, else they produce nothing. 88 | todo_include_todos = True 89 | 90 | 91 | # -- Options for HTML output ---------------------------------------------- 92 | 93 | # The theme to use for HTML and HTML Help pages. See the documentation for 94 | # a list of builtin themes. 95 | # 96 | html_theme = 'alabaster' 97 | 98 | # Theme options are theme-specific and customize the look and feel of a theme 99 | # further. For a list of options available for each theme, see the 100 | # documentation. 101 | # 102 | # html_theme_options = {} 103 | 104 | # Add any paths that contain custom static files (such as style sheets) here, 105 | # relative to this directory. They are copied after the builtin static files, 106 | # so a file named "default.css" will overwrite the builtin "default.css". 107 | html_static_path = ['_static'] 108 | 109 | 110 | # -- Options for HTMLHelp output ------------------------------------------ 111 | 112 | # Output file base name for HTML help builder. 113 | htmlhelp_basename = 'mwtabdoc' 114 | 115 | 116 | # -- Options for LaTeX output --------------------------------------------- 117 | 118 | latex_elements = { 119 | # The paper size ('letterpaper' or 'a4paper'). 120 | # 121 | # 'papersize': 'letterpaper', 122 | 123 | # The font size ('10pt', '11pt' or '12pt'). 124 | # 125 | # 'pointsize': '10pt', 126 | 127 | # Additional stuff for the LaTeX preamble. 128 | # 129 | # 'preamble': '', 130 | 131 | # Latex figure (float) alignment 132 | # 133 | # 'figure_align': 'htbp', 134 | } 135 | 136 | # Grouping the document tree into LaTeX files. List of tuples 137 | # (source start file, target name, title, 138 | # author, documentclass [howto, manual, or own class]). 139 | latex_documents = [ 140 | (master_doc, 'mwtab.tex', 'mwtab Documentation', 141 | 'Christian D. Powell, Andrey Smelter, Hunter N.B. Moseley', 'manual'), 142 | ] 143 | 144 | 145 | # -- Options for manual page output --------------------------------------- 146 | 147 | # One entry per manual page. List of tuples 148 | # (source start file, name, description, authors, manual section). 149 | man_pages = [ 150 | (master_doc, 'mwtab', 'mwtab Documentation', 151 | [author], 1) 152 | ] 153 | 154 | 155 | # -- Options for Texinfo output ------------------------------------------- 156 | 157 | # Grouping the document tree into Texinfo files. List of tuples 158 | # (source start file, target name, title, author, 159 | # dir menu entry, description, category) 160 | texinfo_documents = [ 161 | (master_doc, 'mwtab', 'mwtab Documentation', 162 | author, 'mwtab', 'One line description of project.', 163 | 'Miscellaneous'), 164 | ] 165 | 166 | 167 | 168 | # -- Options for Epub output ---------------------------------------------- 169 | 170 | # Bibliographic Dublin Core info. 171 | epub_title = project 172 | epub_author = author 173 | epub_publisher = author 174 | epub_copyright = copyright 175 | 176 | # The unique identifier of the text. This can be a ISBN number 177 | # or the project homepage. 178 | # 179 | # epub_identifier = '' 180 | 181 | # A unique identification for the text. 182 | # 183 | # epub_uid = '' 184 | 185 | # A list of files that should not be packed into the epub file. 186 | epub_exclude_files = ['search.html'] 187 | 188 | 189 | 190 | # Example configuration for intersphinx: refer to the Python standard library. 191 | intersphinx_mapping = {'https://docs.python.org/3': None} 192 | -------------------------------------------------------------------------------- /docs/guide.rst: -------------------------------------------------------------------------------- 1 | User Guide 2 | ========== 3 | 4 | Description 5 | ~~~~~~~~~~~ 6 | 7 | The ``mwtab`` package is a Python library that facilitates reading and writing 8 | files in ``mwTab`` format used by the `Metabolomics Workbench`_ for archival of 9 | Mass Spectrometry (MS) and Nuclear Magnetic Resonance (NMR) experimental data. 10 | 11 | The ``mwtab`` package provides facilities to convert ``mwTab`` formatted files into 12 | their equivalent JSONized (JavaScript Object Notation, an open-standard format that 13 | uses human-readable text to transmit data objects consisting of attribute-value pairs) 14 | representation and vice versa. 15 | 16 | The ``mwtab`` package can be used in several ways: 17 | 18 | * As a library for accessing and manipulating data stored in ``mwTab`` format files. 19 | * As a command-line tool to convert between ``mwTab`` format and its equivalent 20 | ``JSON`` representation. 21 | 22 | Installation 23 | ~~~~~~~~~~~~ 24 | 25 | The :mod:`mwtab` package runs under Python 2.7 and Python 3.4+. 26 | Starting with Python 3.4, pip_ is included by default. To install 27 | system-wide with pip_ run the following: 28 | 29 | Install on Linux, Mac OS X 30 | -------------------------- 31 | 32 | .. code:: bash 33 | 34 | python3 -m pip install mwtab 35 | 36 | Install on Windows 37 | ------------------ 38 | 39 | .. code:: bash 40 | 41 | py -3 -m pip install mwtab 42 | 43 | Install inside virtualenv 44 | ------------------------- 45 | 46 | For an isolated install, you can run the same inside a virtualenv_. 47 | 48 | .. code:: bash 49 | 50 | $ virtualenv -p /usr/bin/python3 venv # create virtual environment, use python3 interpreter 51 | 52 | $ source venv/bin/activate # activate virtual environment 53 | 54 | $ python3 -m pip install mwtab # install mwtab as usual 55 | 56 | $ deactivate # if you are done working in the virtual environment 57 | 58 | Get the source code 59 | ~~~~~~~~~~~~~~~~~~~ 60 | 61 | Code is available on GitHub: https://github.com/MoseleyBioinformaticsLab/mwtab 62 | 63 | You can either clone the public repository: 64 | 65 | .. code:: bash 66 | 67 | $ https://github.com/MoseleyBioinformaticsLab/mwtab.git 68 | 69 | Or, download the tarball and/or zipball: 70 | 71 | .. code:: bash 72 | 73 | $ curl -OL https://github.com/MoseleyBioinformaticsLab/mwtab/tarball/master 74 | 75 | $ curl -OL https://github.com/MoseleyBioinformaticsLab/mwtab/zipball/master 76 | 77 | Once you have a copy of the source, you can embed it in your own Python package, 78 | or install it into your system site-packages easily: 79 | 80 | .. code:: bash 81 | 82 | $ python3 setup.py install 83 | 84 | Dependencies 85 | ~~~~~~~~~~~~ 86 | 87 | The :mod:`mwtab` package depends on several Python libraries. The ``pip`` command 88 | will install all dependencies automatically, but if you wish to install them manually, 89 | run the following commands: 90 | 91 | * docopt_ for creating :mod:`mwtab` command-line interface. 92 | * To install docopt_ run the following: 93 | 94 | .. code:: bash 95 | 96 | python3 -m pip install docopt # On Linux, Mac OS X 97 | py -3 -m pip install docopt # On Windows 98 | 99 | * schema_ for validating functionality of ``mwTab`` files based on ``JSON`` schema. 100 | * To install the schema_ Python library run the following: 101 | 102 | .. code:: bash 103 | 104 | python3 -m pip install schema # On Linux, Mac OS X 105 | py -3 -m pip install schema # On Windows 106 | 107 | 108 | Basic usage 109 | ~~~~~~~~~~~ 110 | 111 | The :mod:`mwtab` package can be used in several ways: 112 | 113 | * As a library for accessing and manipulating data stored in ``mwTab`` formatted files. 114 | 115 | * Create the :class:`~mwtab.mwtab.MWTabFile` generator function that will generate 116 | (yield) a single :class:`~mwtab.mwtab.MWTabFile` instance at a time. 117 | 118 | * Process each :class:`~mwtab.mwtab.MWTabFile` instance: 119 | 120 | * Process ``mwTab`` files in a for-loop, one file at a time. 121 | * Process as an iterator calling the :py:func:`next` built-in function. 122 | * Convert the generator into a :py:class:`list` of :class:`~mwtab.mwtab.MWTabFile` objects. 123 | 124 | * As a command-line tool: 125 | 126 | * Convert from ``mwTab`` file format into its equivalent ``JSON`` file format and vice versa. 127 | * Validate data stored in ``mwTab`` file based on schema definition. 128 | 129 | .. note:: Read :doc:`tutorial` to learn more and see code examples on using the :mod:`mwtab` 130 | as a library and as a command-line tool. 131 | 132 | 133 | .. _pip: https://pip.pypa.io/ 134 | .. _virtualenv: https://virtualenv.pypa.io/ 135 | .. _docopt: https://pypi.org/project/docopt/ 136 | .. _schema: https://pypi.org/project/schema/ 137 | .. _Metabolomics Workbench: http://www.metabolomicsworkbench.org/ 138 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to mwtab's documentation! 2 | ================================= 3 | 4 | .. include:: ../README.rst 5 | 6 | Documentation index: 7 | ==================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | guide 14 | tutorial 15 | api 16 | license 17 | 18 | 19 | Indices and tables 20 | ================== 21 | 22 | * :ref:`genindex` 23 | * :ref:`modindex` 24 | * :ref:`search` 25 | -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | .. _license: 2 | 3 | License 4 | ======= 5 | 6 | .. include:: ../LICENSE 7 | -------------------------------------------------------------------------------- /docs/requirements-rtd.txt: -------------------------------------------------------------------------------- 1 | nbsphinx 2 | ipykernel 3 | mwtab -------------------------------------------------------------------------------- /mwtab/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """Routines for working with ``mwTab`` format files used by the 5 | Metabolomics Workbench. 6 | 7 | This package includes the following modules: 8 | 9 | ``mwtab`` 10 | This module provides the :class:`~mwtab.mwtab.MWTabFile` class which is a python 11 | dictionary representation of a Metabolomics Workbench `mwtab` file. Data can be accessed 12 | directly from the :class:`~mwtab.mwtab.MWTabFile` instance using bracket accessors. 13 | 14 | ``cli`` 15 | This module provides command-line interface for the ``mwtab`` package. 16 | 17 | ``tokenizer`` 18 | This module provides the :func:`~mwtab.tokenizer.tokenizer` generator that generates 19 | tuples of key-value pairs from `mwtab` files. 20 | 21 | ``fileio`` 22 | This module provides the :func:`~mwtab.fileio.read_files` generator 23 | to open files from different sources (single file/multiple files on a local 24 | machine, directory/archive of files, URL address of a file). 25 | 26 | ``converter`` 27 | This module provides the :class:`~mwtab.converter.Converter` class that is 28 | responsible for the conversion of ``mwTab`` formated files into their JSON 29 | representation and vice versa. 30 | 31 | ``mwschema`` 32 | This module provides JSON schema definitions for the ``mwTab`` formatted files, 33 | i.e. specifies required and optional keys as well as data types. 34 | 35 | ``validator`` 36 | This module provides routines to validate ``mwTab`` formatted files based 37 | on schema definitions as well as checks for file self-consistency. 38 | 39 | ``mwrest`` 40 | This module provides the :class:`~mwtab.mwrest.GenericMWURL` class which is a 41 | python dictionary representation of a Metabolomics Workbench REST URL. The class 42 | is used to validate query parameters and to generate a URL path which can be 43 | used to request data from Metabolomics Workbench through their REST API. 44 | """ 45 | 46 | from logging import getLogger, NullHandler 47 | from .fileio import read_files, read_mwrest 48 | from .validator import validate_file 49 | from .mwrest import GenericMWURL 50 | 51 | 52 | __version__ = "1.2.5.post1" 53 | 54 | 55 | # Setting default logging handler 56 | getLogger(__name__).addHandler(NullHandler()) 57 | -------------------------------------------------------------------------------- /mwtab/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import docopt 5 | 6 | from . import cli 7 | from . import __version__ 8 | 9 | 10 | def main(): 11 | 12 | args = docopt.docopt(cli.__doc__, version=__version__) 13 | cli.cli(args) 14 | 15 | 16 | if __name__ == "__main__": 17 | main() 18 | -------------------------------------------------------------------------------- /mwtab/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | The mwtab command-line interface 6 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 7 | 8 | Usage: 9 | mwtab -h | --help 10 | mwtab --version 11 | mwtab convert ( ) [--from-format=] [--to-format=] [--validate] [--mw-rest=] [--verbose] 12 | mwtab validate [--mw-rest=] [--verbose] 13 | mwtab download url [--to-path=] [--verbose] 14 | mwtab download study all [--to-path=] [--input-item=] [--output-format=] [--mw-rest=] [--validate] [--verbose] 15 | mwtab download study [--to-path=] [--input-item=] [--output-item=] [--output-format=] [--mw-rest=] [--validate] [--verbose] 16 | mwtab download (study | compound | refmet | gene | protein) [--output-format=] [--to-path=] [--mw-rest=] [--verbose] 17 | mwtab download moverz [--to-path=] [--mw-rest=] [--verbose] 18 | mwtab download exactmass [--to-path=] [--mw-rest=] [--verbose] 19 | mwtab extract metadata ... [--to-format=] [--no-header] 20 | mwtab extract metabolites ( ) ... [--to-format=] [--no-header] 21 | 22 | Options: 23 | -h, --help Show this screen. 24 | --version Show version. 25 | --verbose Print what files are processing. 26 | --validate Validate the mwTab file. 27 | --from-format= Input file format, available formats: mwtab, json [default: mwtab]. 28 | --to-format= Output file format [default: json]. 29 | Available formats for convert: 30 | mwtab, json. 31 | Available formats for extract: 32 | json, csv. 33 | --mw-rest= URL to MW REST interface 34 | [default: https://www.metabolomicsworkbench.org/rest/]. 35 | --context= Type of resource to access from MW REST interface, available contexts: study, 36 | compound, refmet, gene, protein, moverz, exactmass [default: study]. 37 | --input-item= Item to search Metabolomics Workbench with. 38 | --output-item= Item to be retrieved from Metabolomics Workbench. 39 | --output-format= Format for item to be retrieved in, available formats: mwtab, json. 40 | --no-header Include header at the top of csv formatted files. 41 | 42 | For extraction can take a "-" which will use stdout. 43 | """ 44 | 45 | from . import fileio, mwextract, mwrest 46 | from .converter import Converter 47 | from .validator import validate_file 48 | from .mwschema import section_schema_mapping 49 | 50 | from os import getcwd, makedirs, path 51 | from os.path import join, isfile 52 | from urllib.parse import quote_plus 53 | 54 | import json 55 | import re 56 | 57 | # remove 58 | import time 59 | import datetime 60 | 61 | 62 | OUTPUT_FORMATS = { 63 | "txt": "txt", 64 | "mwtab": "txt", 65 | "json": "json", 66 | None: None 67 | } 68 | VERBOSE = False 69 | 70 | 71 | def check_filepath(filepath): 72 | """Method for validating that a given path directory exits. If not, the directory is created. 73 | 74 | :param str filepath: File path string. 75 | :return: None 76 | :rtype: :py:obj:`None` 77 | """ 78 | if not path.exists(path.dirname(filepath)): 79 | dirname = path.dirname(filepath) 80 | if dirname: 81 | makedirs(dirname) 82 | 83 | 84 | def get_file_path(dir_path, filename, extension): 85 | """Helper method for validating that the commandline arguments "--to-path" or _ are not "None". Returns the given 86 | command argument if not none or creates a default file path from the given filename and the current working 87 | directory. 88 | 89 | :param dir_path: Path to directory file is to be saved in. 90 | :type dir_path: :py:class:`str` or :py:class:`None` 91 | :param str filename: Filename processed file is to be saved as. 92 | :param str extension: File extension. 93 | :return: Complete file path. 94 | :rtype: :py:class:`str` 95 | """ 96 | # check to see if given directory path is not None 97 | dir_path = dir_path if dir_path else getcwd() 98 | if path.splitext(dir_path)[1]: 99 | return dir_path 100 | extension = extension if extension else "txt" 101 | return join(dir_path, ".".join([quote_plus(filename).replace(".", "_"), extension])) 102 | 103 | 104 | def download(context, cmdparams): 105 | """Method for creating Metabolomics Workbench REST URLs and requesting files based on given commandline arguments. 106 | Retrieved data is then saved out as specified. 107 | 108 | :param str context: String indicating the type of data ("context") to be accessed from the Metabolomics Workbench. 109 | :param dict cmdparams: Commandline arguments specifying data to be accessed from Metabolomics Workbench. 110 | :return: None 111 | :rtype: :py:obj:`None` 112 | """ 113 | try: 114 | # TODO: Convert to using mwrest.generate_study_urls() method 115 | # create and validate a callable URL to pull data from Metabolomics Workbench's REST API 116 | mwresturl = mwrest.GenericMWURL({ 117 | "context": context, 118 | "input_item": cmdparams.get("") if cmdparams.get("") else "analysis_id", 119 | "input_value": cmdparams[""], 120 | "output_item": cmdparams.get("") if cmdparams.get("") else "mwtab", 121 | "output_format": OUTPUT_FORMATS[cmdparams.get("--output-format")] if cmdparams.get("--output-format") else "txt", 122 | }).url 123 | mwrestfile = next(fileio.read_mwrest(mwresturl)) 124 | 125 | if mwrestfile.text: # if the text file isn't blank 126 | with open(get_file_path( 127 | cmdparams.get("--to-path"), 128 | mwrestfile.source, 129 | OUTPUT_FORMATS[cmdparams.get("--output-format")] 130 | ), "w", encoding="utf-8") as fh: 131 | mwrestfile.write(fh) 132 | else: 133 | print("BLANK FILE") 134 | except Exception as e: 135 | print(e) 136 | 137 | 138 | def cli(cmdargs): 139 | """Implements the command line interface. 140 | 141 | param dict cmdargs: dictionary of command line arguments. 142 | """ 143 | 144 | VERBOSE = cmdargs["--verbose"] 145 | fileio.VERBOSE = cmdargs["--verbose"] 146 | fileio.MWREST = cmdargs["--mw-rest"] 147 | mwrest.VERBOSE = cmdargs["--verbose"] 148 | 149 | # mwtab convert ... 150 | if cmdargs["convert"]: 151 | converter = Converter(from_path=cmdargs[""], 152 | to_path=cmdargs[""], 153 | from_format=cmdargs["--from-format"], 154 | to_format=cmdargs["--to-format"], 155 | validate=cmdargs["--validate"]) 156 | converter.convert() 157 | 158 | # mwtab validate ... 159 | elif cmdargs["validate"]: 160 | for mwfile in fileio.read_files(cmdargs[""], validate=cmdargs["--validate"]): 161 | validate_file( 162 | mwtabfile=mwfile, 163 | section_schema_mapping=section_schema_mapping, 164 | verbose=cmdargs.get("--verbose") 165 | ) 166 | 167 | # mwtab download ... 168 | elif cmdargs["download"]: 169 | 170 | # mwtab download url ... 171 | if cmdargs[""]: 172 | mwrestfile = next(fileio.read_mwrest(cmdargs[""])) 173 | with open(get_file_path( 174 | cmdargs["--to-path"], 175 | mwrestfile.source, 176 | OUTPUT_FORMATS[cmdargs.get("--output-format")]), 177 | "w", 178 | encoding="utf-8" 179 | ) as fh: 180 | mwrestfile.write(fh) 181 | 182 | # mwtab download study ... 183 | elif cmdargs["study"]: 184 | 185 | # mwtab download study all ... 186 | if cmdargs["all"]: 187 | # mwtab download study all ... 188 | # mwtab download study all --input-item=analysis_id ... 189 | # mwtab download study all --input-item=study_id ... 190 | # TODO: mwtab download study all --input-item=project_id ... 191 | if not cmdargs["--input-item"] or cmdargs["--input-item"] in ("analysis_id", "study_id"): 192 | cmdargs[""] = cmdargs["--input-item"] 193 | 194 | id_list = list() 195 | if not cmdargs["--input-item"] or cmdargs["--input-item"] == "analysis_id": 196 | id_list = mwrest.analysis_ids() 197 | elif cmdargs["--input-item"] == "study_id": 198 | id_list = mwrest.study_ids() 199 | 200 | for count, input_id in enumerate(id_list): 201 | if VERBOSE: 202 | print("[{:4}/{:4}]".format(count+1, len(id_list)), input_id, datetime.datetime.now()) 203 | cmdargs[""] = input_id 204 | download("study", cmdargs) 205 | time.sleep(3) 206 | 207 | else: 208 | raise ValueError("Unknown \"--input-item\" {}".format(cmdargs["--input-item"])) 209 | 210 | # mwtab download study ... 211 | elif cmdargs[""] and not cmdargs[""]: 212 | if isfile(cmdargs[""]): 213 | with open(cmdargs[""], "r") as fh: 214 | id_list = json.loads(fh.read()) 215 | 216 | if VERBOSE: 217 | print("Found {} Files to be Downloaded".format(len(id_list))) 218 | for count, input_id in enumerate(id_list): 219 | if VERBOSE: 220 | print("[{:4}/{:4}]".format(count + 1, len(id_list)), input_id, datetime.datetime.now()) 221 | cmdargs[""] = input_id 222 | download("study", cmdargs) 223 | time.sleep(3) 224 | 225 | else: 226 | input_item = cmdargs.get("--input-item") 227 | input_value = cmdargs[""] 228 | if not input_item: 229 | if input_value.isdigit(): 230 | input_value = "AN{}".format(input_value.zfill(6)) 231 | input_item = "analysis_id" 232 | elif re.match(r'(AN[0-9]{6}$)', input_value): 233 | input_item = "analysis_id" 234 | elif re.match(r'(ST[0-9]{6}$)', input_value): 235 | input_item = "study_id" 236 | mwresturl = mwrest.GenericMWURL({ 237 | "context": "study", 238 | "input_item": input_item, 239 | "input_value": input_value, 240 | "output_item": cmdargs.get("--output-item") or "mwtab", 241 | "output_format": cmdargs["--output-format"], 242 | }, cmdargs["--mw-rest"]).url 243 | mwrestfile = next(fileio.read_mwrest(mwresturl)) 244 | with open(cmdargs["--to-path"] or join(getcwd(), 245 | quote_plus(mwrestfile.source).replace(".", "_") + "." + cmdargs[ 246 | "--output-format"]), 247 | "w", encoding="utf-8") as fh: 248 | mwrestfile.write(fh) 249 | 250 | # mwtab download (study | ...) ... 251 | elif cmdargs[""]: 252 | download("study", cmdargs) 253 | 254 | # mwtab download (... compound | refmet | gene | protein) ... 255 | elif cmdargs["compound"]: 256 | download("compound", cmdargs) 257 | elif cmdargs["refmet"]: 258 | download("refmet", cmdargs) 259 | elif cmdargs["gene"]: 260 | download("gene", cmdargs) 261 | elif cmdargs["protein"]: 262 | download("protein", cmdargs) 263 | 264 | # mwtab download moverz [--verbose] 265 | elif cmdargs["moverz"]: 266 | mwresturl = mwrest.GenericMWURL({ 267 | "context": "moverz", 268 | "input_item": cmdargs[""], 269 | "m/z_value": cmdargs[""], 270 | "ion_type_value": cmdargs[""], 271 | "m/z_tolerance_value": cmdargs[""], 272 | }).url 273 | mwrestfile = next(fileio.read_mwrest(mwresturl)) 274 | with open(cmdargs["--to-path"] or join(getcwd(), quote_plus(mwrestfile.source).replace(".", "_") + ".txt"), 275 | "w") as fh: 276 | mwrestfile.write(fh) 277 | 278 | # mwtab download exactmass [--verbose] 279 | elif cmdargs["exactmass"]: 280 | mwresturl = mwrest.GenericMWURL({ 281 | "context": "exactmass", 282 | "LIPID_abbreviation": cmdargs[""], 283 | "ion_type_value": cmdargs[""], 284 | }).url 285 | mwrestfile = next(fileio.read_mwrest(mwresturl)) 286 | with open(cmdargs["--to-path"] or join(getcwd(), quote_plus(mwrestfile.source).replace(".", "_") + ".txt"), 287 | "w") as fh: 288 | mwrestfile.write(fh) 289 | 290 | # mwtab extract ... 291 | elif cmdargs["extract"]: 292 | mwfile_generator = fileio.read_files(cmdargs[""]) 293 | if cmdargs["metabolites"]: 294 | metabolites_dict = mwextract.extract_metabolites( 295 | mwfile_generator, 296 | mwextract.generate_matchers( 297 | [(cmdargs[""][i], 298 | cmdargs[""][i] if not cmdargs[""][i][:2] == "r'" else re.compile(cmdargs[""][i][2:-1])) 299 | for i in range(len(cmdargs[""]))] 300 | ) 301 | ) 302 | 303 | if cmdargs[""] != "-": 304 | if cmdargs["--to-format"] == "csv": 305 | mwextract.write_metabolites_csv(cmdargs[""], metabolites_dict, cmdargs["--no-header"]) 306 | else: 307 | mwextract.write_json(cmdargs[""], metabolites_dict) 308 | else: 309 | print(json.dumps(metabolites_dict, indent=4, cls=mwextract.SetEncoder)) 310 | 311 | elif cmdargs["metadata"]: 312 | metadata = dict() 313 | for mwtabfile in mwfile_generator: 314 | extracted_values = mwextract.extract_metadata(mwtabfile, cmdargs[""]) 315 | [metadata.setdefault(key, set()).update(val) for (key, val) in extracted_values.items()] 316 | if cmdargs[""] != "-": 317 | if cmdargs["--to-format"] == "csv": 318 | mwextract.write_metadata_csv(cmdargs[""], metadata, cmdargs["--no-header"]) 319 | else: 320 | mwextract.write_json(cmdargs[""], metadata) 321 | else: 322 | print(metadata) 323 | -------------------------------------------------------------------------------- /mwtab/converter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | mwtab.converter 6 | ~~~~~~~~~~~~~~~ 7 | 8 | This module provides functionality for converting between the 9 | Metabolomics Workbench ``mwTab`` formatted file and its equivalent 10 | JSONized representation. 11 | 12 | The following conversions are possible: 13 | 14 | Local files: 15 | * One-to-one file conversions: 16 | * textfile - to - textfile 17 | * textfile - to - textfile.gz 18 | * textfile - to - textfile.bz2 19 | * textfile.gz - to - textfile 20 | * textfile.gz - to - textfile.gz 21 | * textfile.gz - to - textfile.bz2 22 | * textfile.bz2 - to - textfile 23 | * textfile.bz2 - to - textfile.gz 24 | * textfile.bz2 - to - textfile.bz2 25 | * textfile / textfile.gz / textfile.bz2 - to - textfile.zip / textfile.tar / textfile.tar.gz / textfile.tar.bz2 (TypeError: One-to-many conversion) 26 | * Many-to-many files conversions: 27 | * Directories: 28 | * directory - to - directory 29 | * directory - to - directory.zip 30 | * directory - to - directory.tar 31 | * directory - to - directory.tar.bz2 32 | * directory - to - directory.tar.gz 33 | * directory - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion) 34 | * Zipfiles: 35 | * zipfile.zip - to - directory 36 | * zipfile.zip - to - zipfile.zip 37 | * zipfile.zip - to - tarfile.tar 38 | * zipfile.zip - to - tarfile.tar.gz 39 | * zipfile.zip - to - tarfile.tar.bz2 40 | * zipfile.zip - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion) 41 | * Tarfiles: 42 | * tarfile.tar - to - directory 43 | * tarfile.tar - to - zipfile.zip 44 | * tarfile.tar - to - tarfile.tar 45 | * tarfile.tar - to - tarfile.tar.gz 46 | * tarfile.tar - to - tarfile.tar.bz2 47 | * tarfile.tar - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion) 48 | * tarfile.tar.gz - to - directory 49 | * tarfile.tar.gz - to - zipfile.zip 50 | * tarfile.tar.gz - to - tarfile.tar 51 | * tarfile.tar.gz - to - tarfile.tar.gz 52 | * tarfile.tar.gz - to - tarfile.tar.bz2 53 | * tarfile.tar.gz - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion) 54 | * tarfile.tar.bz2 - to - directory 55 | * tarfile.tar.bz2 - to - zipfile.zip 56 | * tarfile.tar.bz2 - to - tarfile.tar 57 | * tarfile.tar.bz2 - to - tarfile.tar.gz 58 | * tarfile.tar.bz2 - to - tarfile.tar.bz2 59 | * tarfile.tar.bz2 - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion) 60 | URL files: 61 | * One-to-one file conversions: 62 | * analysis_id - to - textfile 63 | * analysis_id - to - textfile.gz 64 | * analysis_id - to - textfile.bz2 65 | * analysis_id - to - textfile.zip / textfile.tar / textfile.tar.gz / textfile.tar.bz2 (TypeError: One-to-many conversion) 66 | * textfileurl - to - textfile 67 | * textfileurl - to - textfile.gz 68 | * textfileurl - to - textfile.bz2 69 | * textfileurl.gz - to - textfile 70 | * textfileurl.gz - to - textfile.gz 71 | * textfileurl.gz - to - textfile.bz2 72 | * textfileurl.bz2 - to - textfile 73 | * textfileurl.bz2 - to - textfile.gz 74 | * textfileurl.bz2 - to - textfile.bz2 75 | * textfileurl / textfileurl.gz / textfileurl.bz2 - to - textfile.zip / textfile.tar / textfile.tar.gz / textfile.tar.bz2 (TypeError: One-to-many conversion) 76 | * Many-to-many files conversions: 77 | * Zipfiles: 78 | * zipfileurl.zip - to - directory 79 | * zipfileurl.zip - to - zipfile.zip 80 | * zipfileurl.zip - to - tarfile.tar 81 | * zipfileurl.zip - to - tarfile.tar.gz 82 | * zipfileurl.zip - to - tarfile.tar.bz2 83 | * zipfileurl.zip - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion) 84 | * Tarfiles: 85 | * tarfileurl.tar - to - directory 86 | * tarfileurl.tar - to - zipfile.zip 87 | * tarfileurl.tar - to - tarfile.tar 88 | * tarfileurl.tar - to - tarfile.tar.gz 89 | * tarfileurl.tar - to - tarfile.tar.bz2 90 | * tarfileurl.tar - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion) 91 | * tarfileurl.tar.gz - to - directory 92 | * tarfileurl.tar.gz - to - zipfile.zip 93 | * tarfileurl.tar.gz - to - tarfile.tar 94 | * tarfileurl.tar.gz - to - tarfile.tar.gz 95 | * tarfileurl.tar.gz - to - tarfile.tar.bz2 96 | * tarfileurl.tar.gz - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion) 97 | * tarfileurl.tar.bz2 - to - directory 98 | * tarfileurl.tar.bz2 - to - zipfile.zip 99 | * tarfileurl.tar.bz2 - to - tarfile.tar 100 | * tarfileurl.tar.bz2 - to - tarfile.tar.gz 101 | * tarfileurl.tar.bz2 - to - tarfile.tar.bz2 102 | * tarfileurl.tar.bz2 - to - directory.gz / directory.bz2 (TypeError: Many-to-one conversion) 103 | """ 104 | 105 | import os 106 | import io 107 | import zipfile 108 | import tarfile 109 | import bz2 110 | import gzip 111 | 112 | from . import fileio 113 | 114 | 115 | class Translator(object): 116 | """Translator abstract class.""" 117 | 118 | def __init__(self, from_path, to_path, from_format=None, to_format=None, validate=False): 119 | """Translator initializer. 120 | :param str from_path: Path to input file(s). 121 | :param str to_path: Path to output file(s). 122 | :param str from_format: Input format. 123 | :param str to_format: Output format. 124 | """ 125 | self.from_path = from_path 126 | self.to_path = to_path 127 | self.from_format = from_format 128 | self.to_format = to_format 129 | self.from_path_compression = fileio.GenericFilePath.is_compressed(from_path) 130 | self.to_path_compression = fileio.GenericFilePath.is_compressed(to_path) 131 | self.validate = validate 132 | 133 | def __iter__(self): 134 | """Abstract iterator must be implemented in a subclass.""" 135 | raise NotImplementedError() 136 | 137 | 138 | class MWTabFileToMWTabFile(Translator): 139 | """Translator concrete class that can convert between ``mwTab`` and ``JSON`` formats.""" 140 | 141 | file_extension = {"json": ".json", 142 | "mwtab": ".txt"} 143 | 144 | def __init__(self, from_path, to_path, from_format=None, to_format=None, validate=False): 145 | """MWTabFileToMWTabFile translator initializer. 146 | :param str from_path: Path to input file(s). 147 | :param str to_path: Path to output file(s). 148 | :param str from_format: Input format: `mwtab` or `json`. 149 | :param str to_format: Output format: `mwtab` or `json`. 150 | :param bool validate: whether to validate or not. 151 | """ 152 | super(MWTabFileToMWTabFile, self).__init__(from_path, to_path, from_format, to_format, validate) 153 | 154 | def __iter__(self): 155 | """Iterator that yields instances of :class:`~mwtab.mwtab.MWTabFile` instances. 156 | :return: instance of :class:`~mwtab.mwtab.MWTabFile` object instance. 157 | :rtype: :class:`~mwtab.mwtab.MWTabFile` 158 | """ 159 | for mwtabfile in fileio.read_files(self.from_path, validate=self.validate): 160 | yield mwtabfile 161 | 162 | 163 | class Converter(object): 164 | """Converter class to convert ``mwTab`` files from ``mwTab`` to ``JSON`` or from ``JSON`` to ``mwTab`` format.""" 165 | 166 | def __init__(self, from_path, to_path, from_format="mwtab", to_format="json", validate=False): 167 | """Converter initializer. 168 | :param str from_path: Path to input file(s). 169 | :param str to_path: Path to output file(s). 170 | :param str from_format: Input format: `mwtab` or `json`. 171 | :param str to_format: Output format: `mwtab` or `json`. 172 | :param bool validate: whether to validate or not. 173 | """ 174 | self.file_generator = MWTabFileToMWTabFile(from_path, to_path, from_format, to_format, validate) 175 | 176 | def convert(self): 177 | """Convert file(s) from ``mwTab`` format to ``JSON`` format or from ``JSON`` format to ``mwTab`` format. 178 | :return: None 179 | :rtype: :py:obj:`None` 180 | """ 181 | if not os.path.exists(os.path.dirname(self.file_generator.to_path)): 182 | dirname = os.path.dirname(self.file_generator.to_path) 183 | if dirname: 184 | os.makedirs(dirname) 185 | 186 | if os.path.isdir(self.file_generator.from_path): 187 | self._many_to_many() 188 | elif os.path.isfile(self.file_generator.from_path) or fileio.GenericFilePath.is_url(self.file_generator.from_path): 189 | if self.file_generator.from_path_compression in ("zip", "tar", "tar.gz", "tar.bz2"): 190 | self._many_to_many() 191 | elif self.file_generator.from_path_compression in ("gz", "bz2"): 192 | self._one_to_one() 193 | elif not self.file_generator.from_path_compression: 194 | self._one_to_one() 195 | elif self.file_generator.from_path.isdigit(): 196 | self._one_to_one() 197 | else: 198 | raise TypeError('Unknown input file format: "{}"'.format(self.file_generator.from_path)) 199 | 200 | def _many_to_many(self): 201 | """Perform many-to-many files conversion. 202 | :return: None 203 | :rtype: :py:obj:`None` 204 | """ 205 | if not self.file_generator.to_path_compression: 206 | self._to_dir(self.file_generator) 207 | elif self.file_generator.to_path_compression == "zip": 208 | self._to_zipfile(self.file_generator) 209 | elif self.file_generator.to_path_compression in ("tar", "tar.gz", "tar.bz2"): 210 | self._to_tarfile(self.file_generator) 211 | elif self.file_generator.to_path_compression in ("gz", "bz2"): 212 | raise TypeError('Many-to-one conversion, cannot convert "{}" into "{}"'.format(self.file_generator.from_path, 213 | self.file_generator.to_path)) 214 | else: 215 | raise TypeError('Unknown output file format: "{}"'.format(self.file_generator.to_path)) 216 | 217 | def _one_to_one(self): 218 | """Perform one-to-one file conversion. 219 | :return: None 220 | :rtype: :py:obj:`None` 221 | """ 222 | if not self.file_generator.to_path_compression: 223 | self._to_textfile(self.file_generator) 224 | elif self.file_generator.to_path_compression == "gz": 225 | self._to_gzipfile(self.file_generator) 226 | elif self.file_generator.to_path_compression == "bz2": 227 | self._to_bz2file(self.file_generator) 228 | elif self.file_generator.to_path_compression in ("tar", "tar.gz", "tar.bz2", "zip"): 229 | raise TypeError('One-to-many conversion, cannot convert "{}" into "{}"'.format(self.file_generator.from_path, 230 | self.file_generator.to_path)) 231 | else: 232 | raise TypeError('Unknown format: "{}"'.format(self.file_generator.to_path)) 233 | 234 | def _to_dir(self, file_generator): 235 | """Convert files to directory. 236 | :return: None 237 | :rtype: :py:obj:`None` 238 | """ 239 | for f in file_generator: 240 | outpath = self._output_path(f.source, file_generator.to_format) 241 | 242 | if not os.path.exists(os.path.dirname(outpath)): 243 | os.makedirs(os.path.dirname(outpath)) 244 | 245 | with open(outpath, mode="w") as outfile: 246 | f.write(outfile, file_generator.to_format) 247 | 248 | def _to_zipfile(self, file_generator): 249 | """Convert files to zip archive. 250 | :return: None 251 | :rtype: :py:obj:`None` 252 | """ 253 | with zipfile.ZipFile(file_generator.to_path, mode="w", compression=zipfile.ZIP_DEFLATED) as outfile: 254 | for f in file_generator: 255 | outpath = self._output_path(f.source, file_generator.to_format, archive=True) 256 | outfile.writestr(outpath, f.writestr(file_generator.to_format)) 257 | 258 | def _to_tarfile(self, file_generator): 259 | """Convert files to tar archive. 260 | :return: None 261 | :rtype: :py:obj:`None` 262 | """ 263 | if file_generator.to_path_compression == "tar": 264 | tar_mode = "w" 265 | elif file_generator.to_path_compression == "tar.gz": 266 | tar_mode = "w:gz" 267 | elif file_generator.to_path_compression == 'tar.bz2': 268 | tar_mode = "w:bz2" 269 | else: 270 | tar_mode = "w" 271 | 272 | with tarfile.open(file_generator.to_path, mode=tar_mode) as outfile: 273 | for f in file_generator: 274 | outpath = self._output_path(f.source, file_generator.to_format, archive=True) 275 | info = tarfile.TarInfo(outpath) 276 | data = f.writestr(file_generator.to_format).encode() 277 | info.size = len(data) 278 | outfile.addfile(tarinfo=info, fileobj=io.BytesIO(data)) 279 | 280 | def _to_bz2file(self, file_generator): 281 | """Convert file to bz2-compressed file. 282 | :return: None 283 | :rtype: :py:obj:`None` 284 | """ 285 | with bz2.BZ2File(file_generator.to_path, mode="wb") as outfile: 286 | for f in file_generator: 287 | outfile.write(f.writestr(file_generator.to_format).encode()) 288 | 289 | def _to_gzipfile(self, file_generator): 290 | """Convert file to gzip-compressed file. 291 | :return: None 292 | :rtype: :py:obj:`None` 293 | """ 294 | with gzip.GzipFile(file_generator.to_path, mode="wb") as outfile: 295 | for f in file_generator: 296 | outfile.write(f.writestr(file_generator.to_format).encode()) 297 | 298 | def _to_textfile(self, file_generator): 299 | """Convert file to regular text file. 300 | :return: None 301 | :rtype: :py:obj:`None` 302 | """ 303 | to_path = file_generator.to_path \ 304 | if file_generator.to_path.endswith(file_generator.file_extension[file_generator.to_format]) \ 305 | else file_generator.to_path + file_generator.file_extension[file_generator.to_format] 306 | 307 | with open(to_path, mode="w") as outfile: 308 | for f in file_generator: 309 | outfile.write(f.writestr(file_generator.to_format)) 310 | 311 | def _output_path(self, input_path, to_format, archive=False): 312 | """Construct an output path string from an input path string. 313 | :param str input_path: Input path string. 314 | :return: Output path string. 315 | :rtype: :py:class:`str` 316 | """ 317 | indirpath, fname = os.path.split(os.path.abspath(os.path.normpath(input_path))) 318 | 319 | commonprefix = os.path.commonprefix([os.path.abspath(self.file_generator.from_path), 320 | os.path.abspath(indirpath)]) 321 | 322 | commonparts = commonprefix.split(os.sep) 323 | inparts = indirpath.split(os.sep) 324 | outparts = inparts[len(commonparts):] 325 | 326 | if archive: 327 | outdirpath = os.path.join(*outparts) if outparts else "" 328 | else: 329 | outdirpath = os.path.join(self.file_generator.to_path, *outparts) 330 | 331 | return os.path.join(outdirpath, fname + self.file_generator.file_extension[to_format]) -------------------------------------------------------------------------------- /mwtab/fileio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | mwtab.fileio 6 | ~~~~~~~~~~~~ 7 | 8 | This module provides routines for reading ``mwTab`` formatted files 9 | from difference kinds of sources: 10 | 11 | * Single ``mwTab`` formatted file on a local machine. 12 | * Directory containing multiple ``mwTab`` formatted files. 13 | * Compressed zip/tar archive of ``mwTab`` formatted files. 14 | * URL address of ``mwTab`` formatted file. 15 | * ``ANALYSIS_ID`` of ``mwTab`` formatted file. 16 | """ 17 | 18 | import os 19 | import io 20 | import zipfile 21 | import tarfile 22 | import bz2 23 | import gzip 24 | from re import match 25 | 26 | from . import mwtab 27 | from . import validator 28 | from . import mwschema 29 | from . import mwrest 30 | 31 | from urllib.request import urlopen 32 | from urllib.parse import urlparse 33 | 34 | 35 | VERBOSE = False 36 | 37 | 38 | def _generate_filenames(sources): 39 | """Generate filenames. 40 | 41 | :param tuple sources: Sequence of strings representing path to file(s). 42 | :return: Path to file(s). 43 | :rtype: :py:class:`str` 44 | """ 45 | for source in sources: 46 | if os.path.isdir(source): 47 | for path, _, filelist in os.walk(source): 48 | for fname in filelist: 49 | if os.path.splitext(fname)[1].lower() in {".csv", ".txt", ".json"}: 50 | if GenericFilePath.is_compressed(fname): 51 | if VERBOSE: 52 | print("Skipping compressed file: {}".format(os.path.abspath(fname))) 53 | continue 54 | else: 55 | yield os.path.join(path, fname) 56 | 57 | elif os.path.isfile(source): 58 | yield source 59 | 60 | elif source.isdigit(): 61 | yield next(mwrest.generate_mwtab_urls([source])) 62 | 63 | # TODO: Add ST parsing 64 | elif match(r"(AN[0-9]{6}$)", source): 65 | yield next(mwrest.generate_mwtab_urls([source])) 66 | 67 | elif GenericFilePath.is_url(source): 68 | yield source 69 | 70 | else: 71 | raise TypeError("Unknown file source.") 72 | 73 | 74 | def _generate_handles(filenames): 75 | """Open a sequence of filenames one at time producing file objects. 76 | The file is closed immediately when proceeding to the next iteration. 77 | 78 | :param generator filenames: Generator object that yields the path to each file, one at a time. 79 | :return: Filehandle to be processed into an instance. 80 | """ 81 | for fname in filenames: 82 | path = GenericFilePath(fname) 83 | for filehandle, source in path.open(): 84 | yield filehandle, source 85 | filehandle.close() 86 | 87 | 88 | def read_files(*sources, **kwds): 89 | """Construct a generator that yields file instances. 90 | 91 | :param sources: One or more strings representing path to file(s). 92 | """ 93 | filenames = _generate_filenames(sources) 94 | filehandles = _generate_handles(filenames) 95 | for fh, source in filehandles: 96 | try: 97 | f = mwtab.MWTabFile(source) 98 | f.read(fh) 99 | 100 | if kwds.get('validate'): 101 | validator.validate_file(mwtabfile=f, 102 | section_schema_mapping=mwschema.section_schema_mapping) 103 | 104 | if VERBOSE: 105 | print("Processed file: {}".format(os.path.abspath(source))) 106 | 107 | yield f 108 | 109 | except Exception as e: 110 | if VERBOSE: 111 | print("Error processing file: ", os.path.abspath(source), "\nReason:", e) 112 | raise e 113 | 114 | 115 | def read_mwrest(*sources, **kwds): 116 | """Construct a generator that yields file instances. 117 | 118 | :param sources: One or more strings representing path to file(s). 119 | """ 120 | filenames = _generate_filenames(sources) 121 | filehandles = _generate_handles(filenames) 122 | for fh, source in filehandles: 123 | try: 124 | f = mwrest.MWRESTFile(source) 125 | f.read(fh) 126 | 127 | if VERBOSE: 128 | print("Processed url: {}".format(source)) 129 | 130 | yield f 131 | 132 | except Exception as e: 133 | if VERBOSE: 134 | print("Error processing url: ", source, "\nReason:", e) 135 | pass 136 | 137 | 138 | class GenericFilePath(object): 139 | """`GenericFilePath` class knows how to open local files or files over URL.""" 140 | 141 | def __init__(self, path): 142 | """Initialize path. 143 | 144 | :param str path: String representing a path to local file(s) or valid URL address of file(s). 145 | """ 146 | self.path = path 147 | 148 | def open(self): 149 | """Generator that opens and yields filehandles using appropriate facilities: 150 | test if path represents a local file or file over URL, if file is compressed 151 | or not. 152 | 153 | :return: Filehandle to be processed into an instance. 154 | """ 155 | is_url = self.is_url(self.path) 156 | compression_type = self.is_compressed(self.path) 157 | 158 | if not compression_type: 159 | if is_url: 160 | filehandle = urlopen(self.path) 161 | else: 162 | filehandle = open(self.path, "r", encoding="utf-8") 163 | source = self.path 164 | yield filehandle, source 165 | filehandle.close() 166 | 167 | elif compression_type: 168 | if is_url: 169 | response = urlopen(self.path) 170 | path = response.read() 171 | response.close() 172 | else: 173 | path = self.path 174 | 175 | if compression_type == "zip": 176 | ziparchive = zipfile.ZipFile(io.BytesIO(path), "r") if is_url else zipfile.ZipFile(path) 177 | for name in ziparchive.infolist(): 178 | if not name.filename.endswith("/"): 179 | filehandle = ziparchive.open(name) 180 | source = self.path + "/" + name.filename 181 | yield filehandle, source 182 | filehandle.close() 183 | 184 | elif compression_type in ("tar", "tar.bz2", "tar.gz"): 185 | tararchive = tarfile.open(fileobj=io.BytesIO(path)) if is_url else tarfile.open(path) 186 | for name in tararchive: 187 | if name.isfile(): 188 | filehandle = tararchive.extractfile(name) 189 | source = self.path + "/" + name.name 190 | yield filehandle, source 191 | filehandle.close() 192 | 193 | elif compression_type == "bz2": 194 | filehandle = bz2.BZ2File(io.BytesIO(path)) if is_url else bz2.BZ2File(path) 195 | source = self.path 196 | yield filehandle, source 197 | filehandle.close() 198 | 199 | elif compression_type == "gz": 200 | filehandle = gzip.open(io.BytesIO(path)) if is_url else gzip.open(path) 201 | source = self.path 202 | yield filehandle, source 203 | filehandle.close() 204 | 205 | @staticmethod 206 | def is_compressed(path): 207 | """Test if path represents compressed file(s). 208 | 209 | :param str path: Path to file(s). 210 | :return: String specifying compression type if compressed, "" otherwise. 211 | :rtype: :py:class:`str` 212 | """ 213 | if path.endswith(".zip"): 214 | return "zip" 215 | elif path.endswith(".tar.gz"): 216 | return "tar.gz" 217 | elif path.endswith(".tar.bz2"): 218 | return "tar.bz2" 219 | elif path.endswith(".gz"): 220 | return "gz" 221 | elif path.endswith(".bz2"): 222 | return "bz2" 223 | elif path.endswith(".tar"): 224 | return "tar" 225 | return "" 226 | 227 | @staticmethod 228 | def is_url(path): 229 | """Test if path represents a valid URL. 230 | 231 | :param str path: Path to file. 232 | :return: True if path is valid url string, False otherwise. 233 | :rtype: :py:obj:`True` or :py:obj:`False` 234 | """ 235 | try: 236 | parse_result = urlparse(path) 237 | return all((parse_result.scheme, parse_result.netloc, parse_result.path)) 238 | except ValueError: 239 | return False 240 | -------------------------------------------------------------------------------- /mwtab/mwextract.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | mwtab.mwextract 6 | ~~~~~~~~~~~ 7 | 8 | This module provides a number of functions and classes for extracting and saving data and metadata 9 | stored in ``mwTab`` formatted files in the form of :class:`~mwtab.mwtab.MWTabFile`. 10 | """ 11 | import csv 12 | import json 13 | import os 14 | import re 15 | 16 | 17 | class ItemMatcher(object): 18 | """ItemMatcher class that can be called to match items from ``mwTab`` formatted files in the form of 19 | :class:`~mwtab.mwtab.MWTabFile`. 20 | """ 21 | 22 | section_conversion = { 23 | "PR": "PROJECT", 24 | "ST": "STUDY", 25 | "SU": "SUBJECT", 26 | "CO": "COLLECTION", 27 | "TR": "TREATMENT", 28 | "SP": "SAMPLEPREP", 29 | "CH": "CHROMATOGRAPHY", 30 | "AN": "ANALYSIS", 31 | "MS": "MS", 32 | "NM": "NMR", 33 | } 34 | 35 | def __init__(self, full_key, value_comparison): 36 | """ItemMatcher initializer. 37 | 38 | :param str full_key: Key to match in :class:`~mwtab.mwtab.MWTabFile`. 39 | :param value_comparison: Value to match in :class:`~mwtab.mwtab.MWTabFile`. 40 | :type value_comparison: :class:`re.Pattern` or :py:class:`str` 41 | """ 42 | self.full_key = full_key 43 | self.section, self.key = self.full_key.split(":") 44 | self.section = ItemMatcher.section_conversion[self.section] 45 | self.value_comparison = value_comparison 46 | 47 | def __call__(self, mwtabfile): 48 | """Match key value pair in :class:`~mwtab.mwtab.MWTabFile`. 49 | 50 | :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`. 51 | :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` 52 | :return: True if key and value are present, False otherwise. 53 | :rtype: :py:obj:`True` or :py:obj:`False` 54 | """ 55 | return mwtabfile[self.section][self.key] == self.value_comparison 56 | 57 | 58 | class ReGeXMatcher(ItemMatcher): 59 | """ReGeXMatcher class that can be called to match items from ``mwTab`` formatted files in the form of 60 | :class:`~mwtab.mwtab.MWTabFile` using regular expressions. 61 | """ 62 | 63 | def __init__(self, full_key, value_comparison): 64 | """ItemMatcher initializer. 65 | 66 | :param str full_key: Key to match in :class:`~mwtab.mwtab.MWTabFile`. 67 | :param value_comparison: Value, in the form of a regular expression, to match in 68 | :class:`~mwtab.mwtab.MWTabFile`. 69 | :type value_comparison: :class:`re.Pattern` 70 | """ 71 | super(ReGeXMatcher, self).__init__(full_key, value_comparison) 72 | 73 | def __call__(self, mwtabfile): 74 | """Match key value pair in :class:`~mwtab.mwtab.MWTabFile`. 75 | 76 | :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`. 77 | :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` 78 | :return: True if key and value are present, False otherwise. 79 | :rtype: :py:obj:`True` or :py:obj:`False` 80 | """ 81 | return re.search(self.value_comparison, mwtabfile[self.section][self.key]) 82 | 83 | 84 | def generate_matchers(items): 85 | """Construct a generator that yields Matchers :class:`~mwtab.mwtab.ItemMatcher` or 86 | :class:`~mwtab.mwtab.ReGeXMatcher`. 87 | 88 | :param iterable items: Iterable object containing key value pairs to match. 89 | :return: Yields a Matcher object for each given item. 90 | :rtype: :class:`~mwtab.mwtab.ItemMatcher` or :class:`~mwtab.mwtab.ReGeXMatcher` 91 | """ 92 | for item in items: 93 | if type(item[1]) == re.Pattern: 94 | yield ReGeXMatcher(item[0], item[1]) 95 | 96 | else: 97 | yield ItemMatcher(item[0], item[1]) 98 | 99 | 100 | def extract_metabolites(sources, matchers): 101 | """Extract metabolite data from ``mwTab`` formatted files in the form of :class:`~mwtab.mwtab.MWTabFile`. 102 | 103 | :param generator sources: Generator of mwtab file objects (:class:`~mwtab.mwtab.MWTabFile`). 104 | :param generator matchers: Generator of matcher objects (:class:`~mwtab.mwextract.ItemMatcher` or 105 | :class:`~mwtab.mwextract.ReGeXMatcher`). 106 | :return: Extracted metabolites dictionary. 107 | :rtype: :py:class:`dict` 108 | """ 109 | metabolites = dict() 110 | for mwtabfile in sources: 111 | if all(matcher(mwtabfile) for matcher in matchers): 112 | data_section_key = list(set(mwtabfile.keys()) & {"MS_METABOLITE_DATA", "NMR_METABOLITE_DATA", "NMR_BINNED_DATA"})[0] 113 | for data_list in mwtabfile[data_section_key]["Data"]: 114 | for test_key in (key for key in data_list.keys() if key != "Metabolite"): 115 | try: 116 | if float(data_list[test_key]) > 0: 117 | metabolites.setdefault(data_list["Metabolite"], dict())\ 118 | .setdefault(mwtabfile.study_id, dict())\ 119 | .setdefault(mwtabfile.analysis_id, set())\ 120 | .add(test_key) 121 | except Exception as e: 122 | pass 123 | return metabolites 124 | 125 | 126 | def extract_metadata(mwtabfile, keys): 127 | """Extract metadata data from ``mwTab`` formatted files in the form of :class:`~mwtab.mwtab.MWTabFile`. 128 | 129 | :param mwtabfile: mwTab file object for metadata to be extracted from. 130 | :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` 131 | :param list keys: List of metadata field keys for metadata values to be extracted. 132 | :return: Extracted metadata dictionary. 133 | :rtype: :py:class:`dict` 134 | """ 135 | extracted_values = {} 136 | for section in mwtabfile: 137 | for metadata in mwtabfile[section]: 138 | for key in keys: 139 | if metadata == key: # TODO: Allow for partial match, ReGeX, etc. 140 | extracted_values.setdefault(key, set()).add(mwtabfile[section][metadata]) 141 | 142 | return extracted_values 143 | 144 | 145 | def write_metadata_csv(to_path, extracted_values, no_header=False): 146 | """Write extracted metadata :py:class:`dict` into csv file. 147 | 148 | Example: 149 | "metadata","value1","value2" 150 | "SUBJECT_TYPE","Human","Plant" 151 | 152 | :param str to_path: Path to output file. 153 | :param dict extracted_values: Metadata dictionary to be saved. 154 | :param bool no_header: If true header is not included, otherwise header is included. 155 | :return: None 156 | :rtype: :py:obj:`None` 157 | """ 158 | if not os.path.exists(os.path.dirname(os.path.splitext(to_path)[0])): 159 | dirname = os.path.dirname(to_path) 160 | if dirname: 161 | os.makedirs(dirname) 162 | 163 | if not os.path.splitext(to_path)[1]: 164 | to_path += ".csv" 165 | 166 | with open(to_path, "w", newline="") as outfile: 167 | wr = csv.writer(outfile, quoting=csv.QUOTE_ALL) 168 | if not no_header: 169 | max_value_num = max([len(extracted_values[key]) for key in extracted_values.keys()]) 170 | line_list = ["metadata"] 171 | line_list.extend(["value{}".format(num) for num in range(max_value_num)]) 172 | wr.writerow(line_list) 173 | for key in extracted_values: 174 | line_list = [key] 175 | line_list.extend([val for val in sorted(extracted_values[key])]) 176 | wr.writerow(line_list) 177 | 178 | 179 | def write_metabolites_csv(to_path, extracted_values, no_header=False): 180 | """Write extracted metabolites data :py:class:`dict` into csv file. 181 | 182 | Example: 183 | "metabolite_name","num-studies","num_analyses","num_samples" 184 | "1,2,4-benzenetriol","1","1","24" 185 | "1-monostearin","1","1","24" 186 | ... 187 | 188 | :param str to_path: Path to output file. 189 | :param dict extracted_values: Metabolites data dictionary to be saved. 190 | :param bool no_header: If true header is not included, otherwise header is included. 191 | :return: None 192 | :rtype: :py:obj:`None` 193 | """ 194 | csv_list = [] 195 | for metabolite_key in extracted_values.keys(): 196 | num_analyses = 0 197 | num_samples = 0 198 | for study_key in extracted_values[metabolite_key]: 199 | num_analyses += len(extracted_values[metabolite_key][study_key]) 200 | for analysis_key in extracted_values[metabolite_key][study_key]: 201 | num_samples += len(extracted_values[metabolite_key][study_key][analysis_key]) 202 | 203 | csv_list.append([ 204 | metabolite_key, 205 | len(extracted_values[metabolite_key]), 206 | num_analyses, 207 | num_samples 208 | ]) 209 | 210 | if not os.path.exists(os.path.dirname(os.path.splitext(to_path)[0])): 211 | dirname = os.path.dirname(to_path) 212 | if dirname: 213 | os.makedirs(dirname) 214 | 215 | if not os.path.splitext(to_path)[1]: 216 | to_path += ".csv" 217 | 218 | with open(to_path, "w", newline="") as outfile: 219 | wr = csv.writer(outfile, quoting=csv.QUOTE_ALL) 220 | if not no_header: 221 | wr.writerow(["metabolite_name", "num-studies", "num_analyses", "num_samples"]) 222 | for line_list in csv_list: 223 | wr.writerow(line_list) 224 | 225 | 226 | class SetEncoder(json.JSONEncoder): 227 | """SetEncoder class for encoding Python sets :py:class:`set` into json serializable objects :py:class:`list`. 228 | """ 229 | 230 | def default(self, obj): 231 | """Method for encoding Python objects. If object passed is a set, converts the set to JSON serializable lists 232 | or calls base implementation. 233 | 234 | :param object obj: Python object to be json encoded. 235 | :return: JSON serializable object. 236 | :rtype: :py:class:`dict`, :py:class:`list`, 237 | :py:class:`tuple`, :py:class:`str`, 238 | :py:class:`int`, :py:class:`float`, 239 | :py:obj:`bool`, or :py:obj:`None` 240 | """ 241 | if isinstance(obj, set): 242 | return list(obj) 243 | return json.JSONEncoder.default(self, obj) 244 | 245 | 246 | def write_json(to_path, extracted_dict): 247 | """Write extracted data or metadata :py:class:`dict` into json file. 248 | 249 | Metabolites example: 250 | { 251 | "1,2,4-benzenetriol": { 252 | "ST000001": { 253 | "AN000001": [ 254 | "LabF_115816", 255 | ... 256 | ] 257 | } 258 | } 259 | } 260 | 261 | Metadata example: 262 | { 263 | "SUBJECT_TYPE": [ 264 | "Plant", 265 | "Human" 266 | ] 267 | } 268 | 269 | :param str to_path: Path to output file. 270 | :param dict extracted_dict: Metabolites data or metadata dictionary to be saved. 271 | :return: None 272 | :rtype: :py:obj:`None` 273 | """ 274 | if not os.path.exists(os.path.dirname(os.path.splitext(to_path)[0])): 275 | dirname = os.path.dirname(to_path) 276 | if dirname: 277 | os.makedirs(dirname) 278 | 279 | if not os.path.splitext(to_path)[1]: 280 | to_path += ".json" 281 | 282 | with open(to_path, "w") as outfile: 283 | json.dump(extracted_dict, outfile, sort_keys=True, indent=4, cls=SetEncoder) 284 | -------------------------------------------------------------------------------- /mwtab/tokenizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | mwtab.tokenizer 6 | ~~~~~~~~~~~~~~~ 7 | 8 | This module provides the :func:`~mwtab.tokenizer.tokenizer` lexical analyzer for 9 | `mwTab` format syntax. It is implemented as Python generator-based state 10 | machine which generates (yields) tokens one at a time when :py:func:`next()` 11 | is invoked on :func:`~mwtab.tokenizer.tokenizer` instance. 12 | 13 | Each token is a tuple of "key-value"-like pairs, tuple of 14 | ``SUBJECT_SAMPLE_FACTORS`` or tuple of data deposited between 15 | ``*_START`` and ``*_END`` blocks. 16 | """ 17 | 18 | from __future__ import print_function, division, unicode_literals 19 | from collections import deque, namedtuple, OrderedDict 20 | 21 | 22 | KeyValue = namedtuple("KeyValue", ["key", "value"]) 23 | KeyValueExtra = namedtuple("KeyValueExtra", ["key", "value", "extra"]) 24 | 25 | 26 | def tokenizer(text): 27 | """A lexical analyzer for the `mwtab` formatted files. 28 | 29 | :param text: `mwTab` formatted text. 30 | :type text: py:class:`str` 31 | :return: Tuples of data. 32 | :rtype: py:class:`~collections.namedtuple` 33 | """ 34 | stream = deque(text.split("\n")) 35 | 36 | while len(stream) > 0: 37 | line = stream.popleft() 38 | try: 39 | 40 | # header 41 | if line.startswith("#METABOLOMICS WORKBENCH"): 42 | yield KeyValue("#METABOLOMICS WORKBENCH", "\n") 43 | for identifier in line.split(" "): 44 | if ":" in identifier: 45 | key, value = identifier.split(":") 46 | yield KeyValue(key, value) 47 | 48 | # SUBJECT_SAMPLE_FACTORS header (reached new section) 49 | elif line.startswith("#SUBJECT_SAMPLE_FACTORS:"): 50 | yield KeyValue("#ENDSECTION", "\n") 51 | yield KeyValue("#SUBJECT_SAMPLE_FACTORS", "\n") 52 | 53 | # section header (reached new section) 54 | elif line.startswith("#"): 55 | yield KeyValue("#ENDSECTION", "\n") 56 | yield KeyValue(line.strip(), "\n") 57 | 58 | # SUBJECT_SAMPLE_FACTORS line 59 | elif line.startswith("SUBJECT_SAMPLE_FACTORS"): 60 | line_items = line.split("\t") 61 | subject_sample_factors_dict = OrderedDict({ 62 | "Subject ID": line_items[1], 63 | "Sample ID": line_items[2], 64 | "Factors": {factor_item.split(":")[0].strip(): factor_item.split(":")[1].strip() for factor_item in 65 | line_items[3].split("|")} 66 | }) 67 | if line_items[4]: 68 | subject_sample_factors_dict["Additional sample data"] = { 69 | factor_item.split("=")[0].strip(): factor_item.split("=")[1].strip() for factor_item in line_items[4].split(";") 70 | } 71 | yield KeyValue(line_items[0].strip(), subject_sample_factors_dict) 72 | 73 | # data start header 74 | elif line.endswith("_START"): 75 | yield KeyValue(line, "\n") 76 | 77 | # tokenize lines in data section till line ending with "_END" is reached 78 | while not line.endswith("_END"): 79 | line = stream.popleft() 80 | if line.endswith("_END"): 81 | yield KeyValue(line.strip(), "\n") 82 | else: 83 | data = line.split("\t") 84 | yield KeyValue(data[0], tuple(data)) 85 | 86 | # item line in item section (e.g. PROJECT, SUBJECT, etc..) 87 | elif line: 88 | if "_RESULTS_FILE" in line: 89 | line_items = line.split("\t") 90 | # if len(line_items) > 2: 91 | # extra_items = list() 92 | # for extra_item in line_items[2:]: 93 | # k, v = extra_item.split(":") 94 | # extra_items.append(tuple([k.strip(), v.strip()])) 95 | # yield KeyValueExtra(line_items[0].strip()[3:], line_items[1], extra_items) 96 | # else: 97 | # yield KeyValue(line_items[0].strip()[3:], line_items[1]) 98 | yield KeyValue(line_items[0].strip()[3:], " ".join(line_items[1:])) 99 | else: 100 | key, value = line.split("\t") 101 | if ":" in key: 102 | if ":UNITS" in key: 103 | yield KeyValue("Units", value) 104 | else: 105 | yield KeyValue(key.strip()[3:], value) 106 | else: 107 | yield KeyValue(key.strip(), value) 108 | 109 | except IndexError as e: 110 | raise IndexError("LINE WITH ERROR:\n\t", repr(line), e) 111 | except ValueError as e: 112 | raise ValueError("LINE WITH ERROR:\n\t", repr(line), e) 113 | 114 | # end of file 115 | yield KeyValue("#ENDSECTION", "\n") 116 | yield KeyValue("!#ENDFILE", "\n") # This is to ensure that tokenizer terminates when #END is missing. 117 | -------------------------------------------------------------------------------- /mwtab/validator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | mwtab.validator 6 | ~~~~~~~~~~~~~~~ 7 | 8 | This module contains routines to validate consistency of the ``mwTab`` 9 | formatted files, e.g. make sure that ``Samples`` and ``Factors`` 10 | identifiers are consistent across the file, make sure that all 11 | required key-value pairs are present. 12 | """ 13 | 14 | from copy import deepcopy 15 | from collections import OrderedDict 16 | from datetime import datetime 17 | from .mwschema import section_schema_mapping 18 | from re import match 19 | import io 20 | import sys 21 | import mwtab 22 | 23 | 24 | VERBOSE = False 25 | LOG = None 26 | 27 | METABOLITES_REGEXS = { 28 | "hmdb_id": { 29 | r"(?i)[\s|\S]{,}(HMDB)", 30 | r"(?i)(Human Metabolome D)[\S]{,}", 31 | }, 32 | "inchi_key": { 33 | r"(?i)(inchi)[\S]{,}", 34 | }, 35 | "kegg_id": { 36 | r"(?i)(kegg)$", 37 | r"(?i)(kegg)(\s|_)(i)", 38 | }, 39 | "moverz": { 40 | r"(?i)(m/z)", 41 | }, 42 | "moverz_quant": { 43 | r"(?i)(moverz)(\s|_)(quant)", 44 | r"(?i)(quan)[\S]{,}(\s|_)(m)[\S]{,}(z)", 45 | }, 46 | "other_id": { 47 | r"(?i)(other)(\s|_)(id)$", 48 | }, 49 | "other_id_type": { 50 | r"(?i)(other)(\s|_)(id)(\s|_)(type)$", 51 | }, 52 | "pubchem_id": { 53 | r"(?i)(pubchem)[\S]{,}", 54 | }, 55 | "retention_index": { 56 | r"(?i)(ri)$", 57 | r"(?i)(ret)[\s|\S]{,}(index)", 58 | }, 59 | "retention_index_type": { 60 | r"(?i)(ri)(\s|_)(type)", 61 | }, 62 | "retention_time": { 63 | r"(?i)(r)[\s|\S]{,}(time)[\S]{,}", 64 | }, 65 | } 66 | 67 | ITEM_SECTIONS = { 68 | "METABOLOMICS WORKBENCH", 69 | "PROJECT", 70 | "STUDY", 71 | "ANALYSIS", 72 | "SUBJECT", 73 | "COLLECTION", 74 | "TREATMENT", 75 | "SAMPLEPREP", 76 | "CHROMATOGRAPHY", 77 | "MS", 78 | "NM", 79 | } 80 | 81 | VALIDATION_LOG_HEADER = \ 82 | """Validation Log 83 | {} 84 | mwtab Python Library Version: {} 85 | Source: {} 86 | Study ID: {} 87 | Analysis ID: {} 88 | File format: {}""" 89 | 90 | 91 | def validate_subject_samples_factors(mwtabfile): 92 | """Validate ``SUBJECT_SAMPLE_FACTORS`` section. 93 | 94 | :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`. 95 | :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` or 96 | :py:class:`collections.OrderedDict` 97 | """ 98 | subject_samples_factors_errors = list() 99 | 100 | for index, subject_sample_factor in enumerate(mwtabfile["SUBJECT_SAMPLE_FACTORS"]): 101 | if not subject_sample_factor["Subject ID"]: 102 | subject_samples_factors_errors.append( 103 | "SUBJECT_SAMPLE_FACTORS: Entry #{} missing Subject ID.".format(index+1) 104 | ) 105 | if not subject_sample_factor["Sample ID"]: 106 | subject_samples_factors_errors.append( 107 | "SUBJECT_SAMPLE_FACTORS: Entry #{} missing Sample ID.".format(index + 1) 108 | ) 109 | if subject_sample_factor.get("Factors"): 110 | for factor_key in subject_sample_factor["Factors"]: 111 | if not subject_sample_factor["Factors"][factor_key]: 112 | subject_samples_factors_errors.append( 113 | "SUBJECT_SAMPLE_FACTORS: Entry #{} missing value for Factor {}.".format(index + 1, factor_key) 114 | ) 115 | if subject_sample_factor.get("Additional sample data"): 116 | for additional_key in subject_sample_factor["Additional sample data"]: 117 | if not subject_sample_factor["Additional sample data"][additional_key]: 118 | subject_samples_factors_errors.append( 119 | "SUBJECT_SAMPLE_FACTORS: Entry #{} missing value for Additional sample data {}.".format( 120 | index + 1, additional_key 121 | ) 122 | ) 123 | 124 | return subject_samples_factors_errors 125 | 126 | 127 | def validate_data(mwtabfile, data_section_key, null_values): 128 | """Validates ``MS_METABOLITE_DATA``, ``NMR_METABOLITE_DATA``, and ``NMR_BINNED_DATA`` sections. 129 | 130 | :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`. 131 | :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` or 132 | :py:class:`collections.OrderedDict` 133 | :param data_section_key: Section key (either MS_METABOLITE_DATA, NMR_METABOLITE_DATA, or NMR_BINNED_DATA) 134 | :type data_section_key: :py:class:`str` 135 | :param bool null_values: whether null values are present. 136 | """ 137 | data_errors = list() 138 | 139 | subject_sample_factors_sample_id_set = {subject_sample_factor["Sample ID"] for subject_sample_factor in mwtabfile["SUBJECT_SAMPLE_FACTORS"]} 140 | data_sample_id_set = set(list(mwtabfile[data_section_key]["Data"][0].keys())[1:]) 141 | 142 | # Removed for mwTab File Spec. 1.5 143 | # if subject_sample_factors_sample_id_set - data_sample_id_set: 144 | # data_errors.append("{}: Section missing data entry for sample(s): {}.".format( 145 | # data_section_key, 146 | # subject_sample_factors_sample_id_set - data_sample_id_set 147 | # )) 148 | if data_sample_id_set - subject_sample_factors_sample_id_set: 149 | data_errors.append("SUBJECT_SAMPLE_FACTORS: Section missing sample ID(s) {} found in {} section.".format( 150 | data_sample_id_set - subject_sample_factors_sample_id_set, 151 | data_section_key 152 | )) 153 | 154 | for index, metabolite in enumerate(mwtabfile[data_section_key]["Data"]): 155 | # if set(list(metabolite.keys())[1:]) != subject_sample_factors_sample_id_set: 156 | # print(len(subject_sample_factors_sample_id_set), len(metabolite) - 1) 157 | # print( 158 | # "{}: Metabolite \"{}\" missing data entry for {} samples".format( 159 | # data_section_key, 160 | # metabolite[list(metabolite.keys())[0]], 161 | # len(subject_sample_factors_sample_id_set - set(list(metabolite.keys())[1:])) 162 | # ), 163 | # file=error_stream 164 | # ) 165 | if null_values: 166 | for data_point_key in metabolite.keys(): 167 | if data_point_key != "Metabolite": 168 | try: 169 | float(metabolite[data_point_key]) 170 | except ValueError as e: 171 | metabolite[data_point_key] = "" 172 | data_errors.append( 173 | "{}: Data entry #{} contains non-numeric value converted to \"\".".format(data_section_key, index + 1)) 174 | 175 | return data_errors 176 | 177 | 178 | def validate_metabolites(mwtabfile, data_section_key): 179 | """Validate ``METABOLITES`` section. 180 | 181 | :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`. 182 | :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` or 183 | :py:class:`collections.OrderedDict` 184 | :param data_section_key: Section key (either MS_METABOLITE_DATA, NMR_METABOLITE_DATA, or NMR_BINNED_DATA) 185 | :type data_section_key: :py:class:`str` 186 | """ 187 | metabolites_errors = list() 188 | 189 | for index, metabolite in enumerate(mwtabfile[data_section_key]["Metabolites"]): 190 | for field_key in list(metabolite.keys())[1:]: 191 | if not any(k == field_key for k in METABOLITES_REGEXS.keys()): 192 | for regex_key in METABOLITES_REGEXS.keys(): 193 | if any(match(p, field_key) for p in METABOLITES_REGEXS[regex_key]): 194 | metabolites_errors.append("METABOLITES: Data entry #{} contains field name \"{}\" which matches a commonly used field name \"{}\".".format(index + 1, field_key, regex_key)) 195 | field_key = regex_key 196 | break 197 | 198 | return metabolites_errors 199 | 200 | 201 | def validate_extended(mwtabfile, data_section_key): 202 | """Validate ``EXTENDED_MS_METABOLITE_DATA``, ``EXTENDED_NMR_METABOLITE_DATA``, and ``EXTENDED_NMR_BINNED_DATA`` sections. 203 | 204 | :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`. 205 | :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` or 206 | :py:class:`collections.OrderedDict` 207 | :param data_section_key: Section key (either MS_METABOLITE_DATA, NMR_METABOLITE_DATA, or NMR_BINNED_DATA) 208 | :type data_section_key: :py:class:`str` 209 | """ 210 | extended_errors = list() 211 | 212 | sample_id_set = {subject_sample_factor["Sample ID"] for subject_sample_factor in 213 | mwtabfile["SUBJECT_SAMPLE_FACTORS"]} 214 | 215 | for index, extended_data in enumerate(mwtabfile[data_section_key]["Extended"]): 216 | if "sample_id" not in extended_data.keys(): 217 | extended_errors.append("EXTENDED_{}: Data entry #{} missing Sample ID.".format(data_section_key, index + 1)) 218 | elif not extended_data["sample_id"] in sample_id_set: 219 | extended_errors.append( 220 | "EXTENDED_{}: Data entry #{} contains Sample ID \"{}\" not found in SUBJECT_SAMPLE_FACTORS section.".format( 221 | data_section_key, index + 1, extended_data["sample_id"] 222 | )) 223 | 224 | return extended_errors 225 | 226 | 227 | def validate_section_schema(section, schema, section_key): 228 | """Validate section of ``mwTab`` formatted file. 229 | 230 | :param section: Section of :class:`~mwtab.mwtab.MWTabFile`. 231 | :type section: :py:class:`collections.OrderedDict` 232 | :param schema: Schema definition. 233 | :type schema: :py:class:`~schema.schema` 234 | :param str section_key: Section key. 235 | 236 | :return: Validated section. 237 | :rtype: :py:class:`collections.OrderedDict` 238 | """ 239 | schema_errors = list() 240 | 241 | if section_key in ITEM_SECTIONS: 242 | for key in section.keys(): 243 | if not section[key]: 244 | schema_errors.append("{}: Contains item \"{}\" with null value.".format(section_key, key)) 245 | del section[key] 246 | 247 | return schema.validate(section), schema_errors 248 | 249 | 250 | def validate_file(mwtabfile, section_schema_mapping=section_schema_mapping, verbose=False, metabolites=True): 251 | """Validate ``mwTab`` formatted file. 252 | 253 | :param mwtabfile: Instance of :class:`~mwtab.mwtab.MWTabFile`. 254 | :type mwtabfile: :class:`~mwtab.mwtab.MWTabFile` or 255 | :py:class:`collections.OrderedDict` 256 | :param dict section_schema_mapping: Dictionary that provides mapping between section name and schema definition. 257 | :param bool verbose: whether to be verbose or not. 258 | :param bool metabolites: whether to validate metabolites section. 259 | :return: Validated file. 260 | :rtype: :py:class:`collections.OrderedDict` 261 | """ 262 | # setup 263 | if not verbose: 264 | error_stout = io.StringIO() 265 | else: 266 | error_stout = sys.stdout 267 | validated_mwtabfile = deepcopy(OrderedDict(mwtabfile)) 268 | 269 | # generate validation log header(s) 270 | file_format = mwtabfile.source.split("/")[-1] if "https://www.metabolomicsworkbench.org/" in mwtabfile.source else \ 271 | mwtabfile.source.split(".")[1] 272 | print(VALIDATION_LOG_HEADER.format( 273 | str(datetime.now()), 274 | mwtab.__version__, 275 | mwtabfile.source, 276 | mwtabfile.study_id, 277 | mwtabfile.analysis_id, 278 | file_format 279 | ), file=error_stout) 280 | 281 | # create list to collect validation errors 282 | errors = list() 283 | 284 | # validate PROJECT, STUDY, ANALYSIS... and Schemas 285 | for section_key, section in mwtabfile.items(): 286 | try: 287 | schema = section_schema_mapping[section_key] 288 | # section = validate_section_schema(section, schema, section_key, error_stout) 289 | section, schema_errors = validate_section_schema(section, schema, section_key) 290 | errors.extend(schema_errors) 291 | validated_mwtabfile[section_key] = section 292 | except Exception as e: 293 | errors.append("SCHEMA: Section \"{}\" does not match the allowed schema. ".format(section_key) + str(e)) 294 | 295 | # validate SUBJECT_SAMPLE_FACTORS 296 | # validate_subject_samples_factors(validated_mwtabfile, error_stout) 297 | errors.extend(validate_subject_samples_factors(validated_mwtabfile)) 298 | 299 | # validate ..._DATA sections 300 | data_section_key = list(set(validated_mwtabfile.keys()) & 301 | {"MS_METABOLITE_DATA", "NMR_METABOLITE_DATA", "NMR_BINNED_DATA"}) 302 | if data_section_key: 303 | data_section_key = data_section_key[0] 304 | # validate_data(validated_mwtabfile, data_section_key, error_stout, False) 305 | errors.extend(validate_data(validated_mwtabfile, data_section_key, False)) 306 | 307 | if data_section_key in ("MS_METABOLITE_DATA", "NMR_METABOLITE_DATA"): 308 | # temp for testing 309 | if metabolites: 310 | if "Metabolites" in validated_mwtabfile[data_section_key].keys(): 311 | errors.extend(validate_metabolites(validated_mwtabfile, data_section_key)) 312 | else: 313 | errors.append("DATA: Missing METABOLITES section.") 314 | if "Extended" in validated_mwtabfile[data_section_key].keys(): 315 | errors.extend(validate_extended(validated_mwtabfile, data_section_key)) 316 | 317 | else: 318 | if "MS" in validated_mwtabfile.keys(): 319 | if not validated_mwtabfile["MS"].get("MS_RESULTS_FILE"): 320 | errors.append("DATA: Missing MS_METABOLITE_DATA section or MS_RESULTS_FILE item in MS section.") 321 | elif "NM" in validated_mwtabfile.keys(): 322 | if not validated_mwtabfile['NM'].get('NMR_RESULTS_FILE'): 323 | errors.append("DATA: Missing either NMR_METABOLITE_DATA or NMR_BINNED_DATA section or NMR_RESULTS_FILE item in NM secction.") 324 | 325 | # finish writing validation/error log 326 | if errors: 327 | print("Status: Contains Validation Errors", file=error_stout) 328 | print("Number Errors: {}\n".format(len(errors)), file=error_stout) 329 | print("Error Log:\n" + "\n".join(errors), file=error_stout) 330 | else: 331 | print("Status: Passing", file=error_stout) 332 | 333 | if verbose: 334 | return validated_mwtabfile, None 335 | else: 336 | return validated_mwtabfile, error_stout.getvalue() 337 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | docopt >= 0.6.2 2 | schema >= 0.6.6 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import sys 6 | import re 7 | from setuptools import setup, find_packages 8 | 9 | 10 | if sys.argv[-1] == 'publish': 11 | os.system('python3 setup.py sdist') 12 | os.system('twine upload dist/*') 13 | sys.exit() 14 | 15 | 16 | def readme(): 17 | with open('README.rst') as readme_file: 18 | return readme_file.read() 19 | 20 | 21 | def find_version(): 22 | with open('mwtab/__init__.py', 'r') as fd: 23 | version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', 24 | fd.read(), re.MULTILINE).group(1) 25 | if not version: 26 | raise RuntimeError('Cannot find version information') 27 | return version 28 | 29 | 30 | REQUIRES = [ 31 | "docopt >= 0.6.2", 32 | "schema >= 0.6.6" 33 | ] 34 | 35 | 36 | setup( 37 | name='mwtab', 38 | version=find_version(), 39 | author='Andrey Smelter', 40 | author_email='andrey.smelter@gmail.com', 41 | description='Parser for mwtab files from the Metabolomics Workbench', 42 | keywords='mwtab metabolomics workbench', 43 | license='BSD', 44 | url='https://github.com/MoseleyBioinformaticsLab/mwtab', 45 | packages=find_packages(), 46 | platforms='any', 47 | long_description=readme(), 48 | install_requires=REQUIRES, 49 | classifiers=[ 50 | 'Development Status :: 4 - Beta', 51 | 'Environment :: Console', 52 | 'Intended Audience :: Developers', 53 | 'Intended Audience :: Science/Research', 54 | 'License :: OSI Approved :: BSD License', 55 | 'Operating System :: OS Independent', 56 | 'Programming Language :: Python :: 3.5', 57 | 'Programming Language :: Python :: 3.6', 58 | 'Programming Language :: Python :: 3.7', 59 | 'Programming Language :: Python :: 3.8', 60 | 'Programming Language :: Python :: 3.9', 61 | 'Programming Language :: Python :: 3.10', 62 | 'Topic :: Scientific/Engineering :: Bio-Informatics', 63 | 'Topic :: Software Development :: Libraries :: Python Modules', 64 | ], 65 | entry_points={"console_scripts": ["mwtab = mwtab.__main__:main"]}, 66 | ) 67 | -------------------------------------------------------------------------------- /tests/example_data/mwtab_files.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/tests/example_data/mwtab_files.tar.bz2 -------------------------------------------------------------------------------- /tests/example_data/mwtab_files.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/tests/example_data/mwtab_files.tar.gz -------------------------------------------------------------------------------- /tests/example_data/mwtab_files.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoseleyBioinformaticsLab/mwtab/946cf1e85926ef32143eb5d5aff4da56127bb358/tests/example_data/mwtab_files.zip -------------------------------------------------------------------------------- /tests/example_data/mwtab_files/ST000122_AN000204.txt: -------------------------------------------------------------------------------- 1 | #METABOLOMICS WORKBENCH STUDY_ID:ST000122 ANALYSIS_ID:AN000204 PROJECT_ID:PR000109 2 | VERSION 1 3 | CREATED_ON 2016-09-17 4 | #PROJECT 5 | PR:PROJECT_TITLE Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic 6 | PR:PROJECT_TYPE Pilot and Feasibility Projects 7 | PR:PROJECT_SUMMARY - 8 | PR:INSTITUTE University of California, Davis 9 | PR:DEPARTMENT Nutrition 10 | PR:LABORATORY Gaikwad Lab 11 | PR:LAST_NAME Gaikwad 12 | PR:FIRST_NAME Nilesh 13 | PR:ADDRESS - 14 | PR:EMAIL nwgaikwad@ucdavis.edu 15 | PR:PHONE 530-752-2906 16 | PR:FUNDING_SOURCE NIH 1U24DK097154 ;  PI Fiehn, Oliver  ; UC Davis WEST COAST CENTRAL 17 | PR:FUNDING_SOURCE METABOLOMICS RESOURCE CORE (WC3MRC) 18 | #STUDY 19 | ST:STUDY_TITLE Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic 20 | ST:STUDY_TYPE steroid panel 21 | ST:STUDY_SUMMARY - 22 | ST:INSTITUTE University of California, Davis 23 | ST:DEPARTMENT Nutrition 24 | ST:LABORATORY Gaikwad Lab 25 | ST:LAST_NAME Gaikwad 26 | ST:FIRST_NAME Nilesh 27 | ST:ADDRESS - 28 | ST:EMAIL nwgaikwad@ucdavis.edu 29 | ST:PHONE - 30 | ST:NUM_GROUPS NA 31 | #SUBJECT 32 | SU:SUBJECT_TYPE Human 33 | SU:SUBJECT_SPECIES Homo sapiens 34 | SU:TAXONOMY_ID 9606 35 | #SUBJECT_SAMPLE_FACTORS: SUBJECT(optional)[tab]SAMPLE[tab]FACTORS(NAME:VALUE pairs separated by |)[tab]Additional sample data 36 | SUBJECT_SAMPLE_FACTORS CER030_294717_ML_1 CER030_294717_ML_1 Tissue/Fluid:Serum 37 | SUBJECT_SAMPLE_FACTORS CER040_242995_ML_2 CER040_242995_ML_2 Tissue/Fluid:Serum 38 | SUBJECT_SAMPLE_FACTORS CER055_249947_ML_3 CER055_249947_ML_3 Tissue/Fluid:Serum 39 | SUBJECT_SAMPLE_FACTORS CER062_246153_ML_4 CER062_246153_ML_4 Tissue/Fluid:Serum 40 | SUBJECT_SAMPLE_FACTORS CER085_251176_ML_5 CER085_251176_ML_5 Tissue/Fluid:Serum 41 | SUBJECT_SAMPLE_FACTORS CER093_242931_ML_6 CER093_242931_ML_6 Tissue/Fluid:Serum 42 | SUBJECT_SAMPLE_FACTORS CER110_238825_ML_7 CER110_238825_ML_7 Tissue/Fluid:Serum 43 | SUBJECT_SAMPLE_FACTORS CER120_253690_ML_8 CER120_253690_ML_8 Tissue/Fluid:Serum 44 | SUBJECT_SAMPLE_FACTORS CER147_254803_ML_9 CER147_254803_ML_9 Tissue/Fluid:Serum 45 | SUBJECT_SAMPLE_FACTORS CER149_266689_ML_10 CER149_266689_ML_10 Tissue/Fluid:Serum 46 | SUBJECT_SAMPLE_FACTORS CER158_254231_ML_11 CER158_254231_ML_11 Tissue/Fluid:Serum 47 | SUBJECT_SAMPLE_FACTORS CER165_287001_ML_12 CER165_287001_ML_12 Tissue/Fluid:Serum 48 | SUBJECT_SAMPLE_FACTORS CER178_295145_ML_13 CER178_295145_ML_13 Tissue/Fluid:Serum 49 | SUBJECT_SAMPLE_FACTORS CER181_244392_ML_14 CER181_244392_ML_14 Tissue/Fluid:Serum 50 | SUBJECT_SAMPLE_FACTORS CER188_250760_ML_15 CER188_250760_ML_15 Tissue/Fluid:Serum 51 | SUBJECT_SAMPLE_FACTORS CER192_254091_ML_16 CER192_254091_ML_16 Tissue/Fluid:Serum 52 | SUBJECT_SAMPLE_FACTORS CER201_244193_ML_17 CER201_244193_ML_17 Tissue/Fluid:Serum 53 | SUBJECT_SAMPLE_FACTORS CER216_242490_ML_18 CER216_242490_ML_18 Tissue/Fluid:Serum 54 | SUBJECT_SAMPLE_FACTORS CER220_274308_ML_19 CER220_274308_ML_19 Tissue/Fluid:Serum 55 | SUBJECT_SAMPLE_FACTORS CER223_264067_ML_20 CER223_264067_ML_20 Tissue/Fluid:Serum 56 | SUBJECT_SAMPLE_FACTORS CER226_254303_ML_21 CER226_254303_ML_21 Tissue/Fluid:Serum 57 | SUBJECT_SAMPLE_FACTORS CER277_255328_ML_22 CER277_255328_ML_22 Tissue/Fluid:Serum 58 | SUBJECT_SAMPLE_FACTORS CER287_248530_ML_23 CER287_248530_ML_23 Tissue/Fluid:Serum 59 | SUBJECT_SAMPLE_FACTORS CER303_253023_ML_24 CER303_253023_ML_24 Tissue/Fluid:Serum 60 | SUBJECT_SAMPLE_FACTORS CER315_282966_ML_25 CER315_282966_ML_25 Tissue/Fluid:Serum 61 | SUBJECT_SAMPLE_FACTORS CER324_285069_ML_26 CER324_285069_ML_26 Tissue/Fluid:Serum 62 | SUBJECT_SAMPLE_FACTORS CER340_244448_ML_27 CER340_244448_ML_27 Tissue/Fluid:Serum 63 | SUBJECT_SAMPLE_FACTORS CER346_246320_ML_28 CER346_246320_ML_28 Tissue/Fluid:Serum 64 | SUBJECT_SAMPLE_FACTORS CER356_269662_ML_29 CER356_269662_ML_29 Tissue/Fluid:Serum 65 | SUBJECT_SAMPLE_FACTORS CER368_250104_ML_30 CER368_250104_ML_30 Tissue/Fluid:Serum 66 | SUBJECT_SAMPLE_FACTORS CER369_276355_ML_31 CER369_276355_ML_31 Tissue/Fluid:Serum 67 | SUBJECT_SAMPLE_FACTORS CER384_264971_ML_32 CER384_264971_ML_32 Tissue/Fluid:Serum 68 | SUBJECT_SAMPLE_FACTORS CER445_286527_ML_33 CER445_286527_ML_33 Tissue/Fluid:Serum 69 | SUBJECT_SAMPLE_FACTORS CER452_240972_ML_34 CER452_240972_ML_34 Tissue/Fluid:Serum 70 | SUBJECT_SAMPLE_FACTORS CER463_271249_ML_35 CER463_271249_ML_35 Tissue/Fluid:Serum 71 | SUBJECT_SAMPLE_FACTORS CER465_265004_ML_36 CER465_265004_ML_36 Tissue/Fluid:Serum 72 | SUBJECT_SAMPLE_FACTORS CER483_294606_ML_37 CER483_294606_ML_37 Tissue/Fluid:Serum 73 | SUBJECT_SAMPLE_FACTORS CER488_274343_ML_38 CER488_274343_ML_38 Tissue/Fluid:Serum 74 | SUBJECT_SAMPLE_FACTORS CER530_249229_ML_39 CER530_249229_ML_39 Tissue/Fluid:Serum 75 | SUBJECT_SAMPLE_FACTORS CER540_240346_ML_40 CER540_240346_ML_40 Tissue/Fluid:Serum 76 | SUBJECT_SAMPLE_FACTORS CER552_241945_ML_41 CER552_241945_ML_41 Tissue/Fluid:Serum 77 | SUBJECT_SAMPLE_FACTORS CER555_251239_ML_42 CER555_251239_ML_42 Tissue/Fluid:Serum 78 | #COLLECTION 79 | CO:COLLECTION_SUMMARY - 80 | #TREATMENT 81 | TR:TREATMENT_SUMMARY - 82 | #SAMPLEPREP 83 | SP:SAMPLEPREP_SUMMARY Methanol: Water Extraction 84 | SP:SAMPLEPREP_PROTOCOL_FILENAME NIH_WCMC_LaMerrill_Method_GaikwadLab__SteroidAnalysis_2013-14.docx 85 | SP:PROCESSING_METHOD Homogenization and Solvent Removal w/ Speed Vac 86 | SP:PROCESSING_STORAGE_CONDITIONS On Ice 87 | SP:EXTRACTION_METHOD 1:1 Methanol: Water 88 | SP:EXTRACT_STORAGE -80C 89 | SP:SAMPLE_RESUSPENSION 150ul CH3OH/H2O 90 | SP:ORGAN Sprague-Dawley Maternal: Adrenal, liver, placenta, amniotic fluid 91 | SP:ORGAN Fetal: Male and female brain, male and female liver 92 | #CHROMATOGRAPHY 93 | CH:CHROMATOGRAPHY_SUMMARY Targeted UPLC-MS/MS 94 | CH:CHROMATOGRAPHY_TYPE Reversed phase 95 | CH:INSTRUMENT_NAME Waters Acquity 96 | CH:COLUMN_NAME Waters Acquity HSS T3 (150 x 2.1mm, 1.8um) 97 | CH:FLOW_GRADIENT 0-2 min 100% A (Water 0.1% formic acid) 0% B (CH3CN 0.1 % formic acid), 2-4 min 98 | CH:FLOW_GRADIENT A, 4-9mins 45% A, 9-11 mins 20% A, 11-12 mins 100% A 99 | CH:FLOW_RATE 0.15 ml/min 100 | CH:SAMPLE_INJECTION 10ul 101 | CH:SOLVENT_A Water 0.1% formic acid 102 | CH:SOLVENT_B CH3CN 0.1 % formic acid 103 | CH:ANALYTICAL_TIME 12 mins 104 | #ANALYSIS 105 | AN:ANALYSIS_TYPE MS 106 | AN:LABORATORY_NAME Gaikwad Laboratory 107 | AN:ACQUISITION_DATE 41716 108 | AN:SOFTWARE_VERSION Masslynx 109 | AN:OPERATOR_NAME Nilesh Gaikwad 110 | #MS 111 | MS:INSTRUMENT_NAME Waters Xevo-TQ 112 | MS:INSTRUMENT_TYPE Triple quadrupole 113 | MS:MS_TYPE ESI 114 | MS:ION_MODE POSITIVE 115 | MS:CAPILLARY_VOLTAGE 3.0 kV 116 | MS:COLLISION_GAS N2 117 | MS:IONIZATION Electrospray Ionization 118 | MS:SOURCE_TEMPERATURE 150C 119 | MS:DESOLVATION_GAS_FLOW 600 L/h 120 | MS:DESOLVATION_TEMPERATURE 350C 121 | MS:MS_COMMENTS UPLC-MS/MS 122 | #MS_METABOLITE_DATA 123 | MS_METABOLITE_DATA:UNITS pg/ml 124 | MS_METABOLITE_DATA_START 125 | Samples CER030_294717_ML_1 CER040_242995_ML_2 CER055_249947_ML_3 CER062_246153_ML_4 CER085_251176_ML_5 CER093_242931_ML_6 CER110_238825_ML_7 CER120_253690_ML_8 CER147_254803_ML_9 CER149_266689_ML_10 CER158_254231_ML_11 CER165_287001_ML_12 CER178_295145_ML_13 CER181_244392_ML_14 CER188_250760_ML_15 CER192_254091_ML_16 CER201_244193_ML_17 CER216_242490_ML_18 CER220_274308_ML_19 CER223_264067_ML_20 CER226_254303_ML_21 CER277_255328_ML_22 CER287_248530_ML_23 CER303_253023_ML_24 CER315_282966_ML_25 CER324_285069_ML_26 CER340_244448_ML_27 CER346_246320_ML_28 CER356_269662_ML_29 CER368_250104_ML_30 CER369_276355_ML_31 CER384_264971_ML_32 CER445_286527_ML_33 CER452_240972_ML_34 CER463_271249_ML_35 CER465_265004_ML_36 CER483_294606_ML_37 CER488_274343_ML_38 CER530_249229_ML_39 CER540_240346_ML_40 CER552_241945_ML_41 CER555_251239_ML_42 126 | Factors Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum 127 | 17-hydroxypregnenolone 946.2500 0.0000 676.2500 0.0000 2251.2500 0.0000 0.0000 1134.7500 0.0000 0.0000 2016.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1919.7500 0.0000 972.7500 0.0000 1542.2500 1687.7500 421.0000 0.0000 373.2500 0.0000 614.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 528.2500 128 | 17-hydroxyprogesterone 0.0000 2.0000 0.0000 0.0000 19.2500 0.0000 0.0000 27.0000 2.0000 120.7500 27.7500 83.0000 0.0000 8.0000 3.5000 274.0000 0.0000 0.0000 3.0000 3.2500 0.0000 43.7500 15.2500 25.7500 4.2500 0.0000 0.0000 49.5000 27.7500 14.0000 9.7500 35.2500 34.7500 4.5000 8.0000 17.2500 0.0000 24.7500 19.0000 0.0000 4.5000 132.0000 129 | Allodihydrotestosterone 80.0000 1181.0000 0.0000 0.0000 0.0000 112.2500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 288.0000 0.0000 0.0000 374.7500 0.0000 27.5000 112.7500 247.7500 39.0000 0.0000 0.0000 0.0000 0.0000 0.0000 761.0000 245.5000 332.5000 52.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 465.7500 159.0000 0.0000 77.0000 315.5000 466.0000 130 | Androstenedione 76.7500 57.0000 176.2500 399.5000 208.5000 37.0000 281.2500 79.7500 250.7500 420.5000 123.0000 186.2500 34.7500 224.5000 67.7500 335.0000 126.5000 277.0000 50.5000 153.7500 62.2500 107.0000 431.2500 167.5000 134.0000 60.7500 38.5000 42.0000 78.7500 43.0000 60.0000 114.7500 237.7500 53.5000 51.7500 298.0000 220.2500 15.0000 256.5000 172.5000 79.2500 52.5000 131 | Androstenolone (DHEA) 1779.7500 1409.2500 945.7500 748.2500 2284.0000 2351.0000 2183.7500 1916.5000 5079.5000 1474.0000 1338.5000 1646.0000 2051.7500 2039.7500 2618.0000 306.7500 574.5000 1794.2500 1429.0000 2293.2500 2066.2500 2493.2500 918.0000 1579.2500 2042.2500 2645.7500 2393.7500 1913.0000 1641.5000 853.2500 586.5000 537.2500 562.5000 1887.2500 979.0000 678.5000 1357.2500 1526.2500 2300.7500 129.0000 409.2500 282.2500 132 | Cortexolone 0.0000 0.0000 0.0000 54.0000 0.0000 0.0000 0.0000 0.0000 215.7500 135.7500 72.7500 53.0000 11.7500 0.0000 0.0000 0.0000 0.0000 101.2500 11.2500 0.0000 0.0000 315.0000 181.2500 0.0000 7.7500 151.2500 0.0000 0.0000 104.0000 0.0000 0.0000 30.7500 94.2500 210.5000 33.2500 126.0000 0.0000 10.0000 17.0000 15.7500 0.0000 0.0000 133 | Cortexone 108.0000 16.0000 13.0000 117.5000 3.2500 63.2500 42.5000 146.7500 29.5000 204.2500 28.7500 67.0000 30.5000 103.0000 23.0000 416.7500 63.5000 32.5000 32.5000 127.2500 39.0000 84.2500 7.2500 16.2500 68.7500 27.0000 46.5000 21.7500 3.2500 14.7500 28.7500 67.0000 33.0000 40.7500 31.0000 32.2500 40.0000 13.7500 18.7500 0.0000 25.7500 29.0000 134 | Corticosterone_ DOC 0.0000 354.5000 0.0000 0.0000 322.5000 419.7500 420.7500 0.0000 0.0000 0.0000 393.2500 915.5000 0.0000 432.2500 1233.0000 0.0000 525.5000 1700.0000 0.0000 98.7500 285.5000 42.5000 428.2500 0.0000 427.5000 271.7500 254.7500 478.0000 303.5000 462.2500 532.0000 715.0000 1073.0000 836.2500 0.0000 1639.0000 601.7500 287.7500 0.0000 0.0000 435.2500 1602.2500 135 | Cortisol 7643.0000 39245.7500 11671.5000 20216.0000 14908.7500 14386.5000 16815.2500 7806.2500 27135.5000 7095.0000 12175.2500 36413.0000 2499.2500 15101.7500 22045.0000 24832.0000 13257.0000 19528.5000 4539.7500 7681.7500 9585.2500 19361.0000 24203.7500 5667.0000 19437.2500 10849.2500 11855.7500 7546.5000 3093.7500 19035.7500 18575.0000 14801.5000 22960.7500 22506.5000 8001.5000 31037.5000 18577.2500 15506.2500 8364.7500 2145.7500 5574.7500 19662.5000 136 | Estradiol 123992.2500 796595.7500 619110.0000 449415.7500 320835.5000 326124.2500 249087.2500 311589.2500 345598.5000 485857.0000 332055.2500 211831.0000 334929.7500 235466.7500 352555.0000 410500.0000 887955.0000 865791.7500 1648163.5000 856726.7500 579044.2500 254013.2500 326272.7500 239893.7500 329553.2500 438715.5000 248489.0000 380251.0000 338965.5000 337231.2500 342754.5000 370657.2500 2028106.5000 733521.0000 399244.2500 321007.5000 634463.0000 231294.0000 349439.2500 75746.7500 399415.5000 303855.7500 137 | Estrone 484.5000 1663.7500 1680.7500 794.5000 557.2500 625.7500 669.7500 885.0000 715.0000 1225.5000 697.7500 478.2500 659.0000 575.5000 871.7500 1089.0000 1726.2500 2325.2500 3286.7500 1955.7500 1094.0000 486.2500 650.5000 574.2500 601.7500 842.7500 757.7500 732.7500 571.7500 693.7500 1004.2500 879.2500 3154.7500 1095.2500 22680.2500 637.2500 1108.2500 474.2500 810.2500 421.2500 680.7500 623.7500 138 | Pregnenolone 12.2500 0.0000 0.0000 0.0000 0.0000 144.2500 14.7500 807.2500 0.0000 30.0000 0.0000 0.0000 0.0000 0.0000 16.5000 139.5000 132.5000 0.0000 0.0000 13.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 488.5000 0.0000 0.0000 0.0000 0.0000 280.7500 0.0000 0.0000 0.0000 0.0000 0.0000 205.5000 139 | Progesterone 28.2500 6.2500 725.2500 57.2500 767.0000 2.7500 388.0000 9.0000 19.5000 242.5000 4.0000 0.0000 94.5000 160.7500 0.0000 3214.5000 218.2500 1.0000 0.0000 20.0000 4.5000 55.7500 24.5000 57.0000 200.5000 138.7500 132.2500 120.5000 80.5000 59.5000 315.7500 247.2500 211.5000 198.5000 232.2500 241.0000 199.5000 282.5000 216.5000 358.5000 289.5000 199.2500 140 | Testosterone 75.7500 63.2500 42.7500 98.0000 24.2500 35.0000 165.7500 23.2500 73.7500 52.7500 118.7500 35.7500 65.2500 127.2500 14.2500 202.5000 110.7500 53.5000 54.2500 2.2500 105.2500 182.7500 116.0000 66.2500 52.5000 106.2500 43.2500 57.2500 97.2500 16.0000 192.0000 53.7500 182.5000 0.2500 11.5000 87.2500 33.7500 45.5000 26.2500 96.0000 17.5000 79.7500 141 | MS_METABOLITE_DATA_END 142 | #METABOLITES 143 | METABOLITES_START 144 | metabolite_name moverz_quant ri ri_type pubchem_id inchi_key kegg_id other_id other_id_type 145 | 17-hydroxypregnenolone 91451 2Q4710 UCDavis_Gaikwad_Lab_ID 146 | 17-hydroxyprogesterone 6238 6Q3360 UCDavis_Gaikwad_Lab_ID 147 | Allodihydrotestosterone 10635 14A2570 UCDavis_Gaikwad_Lab_ID 148 | Androstenedione 6128 12A6030 UCDavis_Gaikwad_Lab_ID 149 | Androstenolone (DHEA) 5881 3A8500 UCDavis_Gaikwad_Lab_ID 150 | Cortexolone 440707 7Q1610 UCDavis_Gaikwad_Lab_ID 151 | Cortexone 6166 9Q3460 UCDavis_Gaikwad_Lab_ID 152 | Corticosterone, DOC 5753 10Q1550 UCDavis_Gaikwad_Lab_ID 153 | Cortisol 5754 8Q3880 UCDavis_Gaikwad_Lab_ID 154 | Estradiol 5757 16E0950 UCDavis_Gaikwad_Lab_ID 155 | Estrone 5870 15E2300 UCDavis_Gaikwad_Lab_ID 156 | Pregnenolone 8955 1Q5500 UCDavis_Gaikwad_Lab_ID 157 | Progesterone 5994 5Q2600 UCDavis_Gaikwad_Lab_ID 158 | Testosterone 6013 13A6950 UCDavis_Gaikwad_Lab_ID 159 | METABOLITES_END 160 | #END 161 | 162 | 163 | -------------------------------------------------------------------------------- /tests/example_data/validation_files/ST000122_AN000204_error_1.txt: -------------------------------------------------------------------------------- 1 | #METABOLOMICS WORKBENCH STUDY_ID:ST000122 ANALYSIS_ID:AN000204 PROJECT_ID:PR000109 2 | VERSION 1 3 | CREATED_ON 2016-09-17 4 | #PROJECT 5 | PR:PROJECT_TITLE Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic 6 | PR:PROJECT_TYPE Pilot and Feasibility Projects 7 | PR:PROJECT_SUMMARY - 8 | PR:INSTITUTE University of California, Davis 9 | PR:DEPARTMENT Nutrition 10 | PR:LABORATORY Gaikwad Lab 11 | PR:LAST_NAME Gaikwad 12 | PR:FIRST_NAME Nilesh 13 | PR:ADDRESS - 14 | PR:EMAIL nwgaikwad@ucdavis.edu 15 | PR:PHONE 530-752-2906 16 | PR:FUNDING_SOURCE NIH 1U24DK097154 ;  PI Fiehn, Oliver  ; UC Davis WEST COAST CENTRAL 17 | PR:FUNDING_SOURCE METABOLOMICS RESOURCE CORE (WC3MRC) 18 | #STUDY 19 | ST:STUDY_TITLE Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic 20 | ST:STUDY_TYPE steroid panel 21 | ST:STUDY_SUMMARY - 22 | ST:INSTITUTE University of California, Davis 23 | ST:DEPARTMENT Nutrition 24 | ST:LABORATORY Gaikwad Lab 25 | ST:LAST_NAME Gaikwad 26 | ST:FIRST_NAME Nilesh 27 | ST:ADDRESS - 28 | ST:EMAIL nwgaikwad@ucdavis.edu 29 | ST:PHONE - 30 | ST:NUM_GROUPS NA 31 | #SUBJECT 32 | SU:SUBJECT_TYPE Human 33 | SU:SUBJECT_SPECIES Homo sapiens 34 | SU:TAXONOMY_ID 9606 35 | #SUBJECT_SAMPLE_FACTORS: SUBJECT(optional)[tab]SAMPLE[tab]FACTORS(NAME:VALUE pairs separated by |)[tab]Additional sample data 36 | SUBJECT_SAMPLE_FACTORS Tissue/Fluid: 37 | SUBJECT_SAMPLE_FACTORS CER040_242995_ML_2 CER040_242995_ML_2 Tissue/Fluid:Serum 38 | SUBJECT_SAMPLE_FACTORS CER055_249947_ML_3 CER055_249947_ML_3 Tissue/Fluid:Serum 39 | SUBJECT_SAMPLE_FACTORS CER062_246153_ML_4 CER062_246153_ML_4 Tissue/Fluid:Serum 40 | SUBJECT_SAMPLE_FACTORS CER085_251176_ML_5 CER085_251176_ML_5 Tissue/Fluid:Serum 41 | SUBJECT_SAMPLE_FACTORS CER093_242931_ML_6 CER093_242931_ML_6 Tissue/Fluid:Serum 42 | SUBJECT_SAMPLE_FACTORS CER110_238825_ML_7 CER110_238825_ML_7 Tissue/Fluid:Serum 43 | SUBJECT_SAMPLE_FACTORS CER120_253690_ML_8 CER120_253690_ML_8 Tissue/Fluid:Serum 44 | SUBJECT_SAMPLE_FACTORS CER147_254803_ML_9 CER147_254803_ML_9 Tissue/Fluid:Serum 45 | SUBJECT_SAMPLE_FACTORS CER149_266689_ML_10 CER149_266689_ML_10 Tissue/Fluid:Serum 46 | SUBJECT_SAMPLE_FACTORS CER158_254231_ML_11 CER158_254231_ML_11 Tissue/Fluid:Serum 47 | SUBJECT_SAMPLE_FACTORS CER165_287001_ML_12 CER165_287001_ML_12 Tissue/Fluid:Serum 48 | SUBJECT_SAMPLE_FACTORS CER178_295145_ML_13 CER178_295145_ML_13 Tissue/Fluid:Serum 49 | SUBJECT_SAMPLE_FACTORS CER181_244392_ML_14 CER181_244392_ML_14 Tissue/Fluid:Serum 50 | SUBJECT_SAMPLE_FACTORS CER188_250760_ML_15 CER188_250760_ML_15 Tissue/Fluid:Serum 51 | SUBJECT_SAMPLE_FACTORS CER192_254091_ML_16 CER192_254091_ML_16 Tissue/Fluid:Serum 52 | SUBJECT_SAMPLE_FACTORS CER201_244193_ML_17 CER201_244193_ML_17 Tissue/Fluid:Serum 53 | SUBJECT_SAMPLE_FACTORS CER216_242490_ML_18 CER216_242490_ML_18 Tissue/Fluid:Serum 54 | SUBJECT_SAMPLE_FACTORS CER220_274308_ML_19 CER220_274308_ML_19 Tissue/Fluid:Serum 55 | SUBJECT_SAMPLE_FACTORS CER223_264067_ML_20 CER223_264067_ML_20 Tissue/Fluid:Serum 56 | SUBJECT_SAMPLE_FACTORS CER226_254303_ML_21 CER226_254303_ML_21 Tissue/Fluid:Serum 57 | SUBJECT_SAMPLE_FACTORS CER277_255328_ML_22 CER277_255328_ML_22 Tissue/Fluid:Serum 58 | SUBJECT_SAMPLE_FACTORS CER287_248530_ML_23 CER287_248530_ML_23 Tissue/Fluid:Serum 59 | SUBJECT_SAMPLE_FACTORS CER303_253023_ML_24 CER303_253023_ML_24 Tissue/Fluid:Serum 60 | SUBJECT_SAMPLE_FACTORS CER315_282966_ML_25 CER315_282966_ML_25 Tissue/Fluid:Serum 61 | SUBJECT_SAMPLE_FACTORS CER324_285069_ML_26 CER324_285069_ML_26 Tissue/Fluid:Serum 62 | SUBJECT_SAMPLE_FACTORS CER340_244448_ML_27 CER340_244448_ML_27 Tissue/Fluid:Serum 63 | SUBJECT_SAMPLE_FACTORS CER346_246320_ML_28 CER346_246320_ML_28 Tissue/Fluid:Serum 64 | SUBJECT_SAMPLE_FACTORS CER356_269662_ML_29 CER356_269662_ML_29 Tissue/Fluid:Serum 65 | SUBJECT_SAMPLE_FACTORS CER368_250104_ML_30 CER368_250104_ML_30 Tissue/Fluid:Serum 66 | SUBJECT_SAMPLE_FACTORS CER369_276355_ML_31 CER369_276355_ML_31 Tissue/Fluid:Serum 67 | SUBJECT_SAMPLE_FACTORS CER384_264971_ML_32 CER384_264971_ML_32 Tissue/Fluid:Serum 68 | SUBJECT_SAMPLE_FACTORS CER445_286527_ML_33 CER445_286527_ML_33 Tissue/Fluid:Serum 69 | SUBJECT_SAMPLE_FACTORS CER452_240972_ML_34 CER452_240972_ML_34 Tissue/Fluid:Serum 70 | SUBJECT_SAMPLE_FACTORS CER463_271249_ML_35 CER463_271249_ML_35 Tissue/Fluid:Serum 71 | SUBJECT_SAMPLE_FACTORS CER465_265004_ML_36 CER465_265004_ML_36 Tissue/Fluid:Serum 72 | SUBJECT_SAMPLE_FACTORS CER483_294606_ML_37 CER483_294606_ML_37 Tissue/Fluid:Serum 73 | SUBJECT_SAMPLE_FACTORS CER488_274343_ML_38 CER488_274343_ML_38 Tissue/Fluid:Serum 74 | SUBJECT_SAMPLE_FACTORS CER530_249229_ML_39 CER530_249229_ML_39 Tissue/Fluid:Serum 75 | SUBJECT_SAMPLE_FACTORS CER540_240346_ML_40 CER540_240346_ML_40 Tissue/Fluid:Serum 76 | SUBJECT_SAMPLE_FACTORS CER552_241945_ML_41 CER552_241945_ML_41 Tissue/Fluid:Serum 77 | SUBJECT_SAMPLE_FACTORS CER555_251239_ML_42 CER555_251239_ML_42 Tissue/Fluid:Serum 78 | #COLLECTION 79 | CO:COLLECTION_SUMMARY - 80 | #TREATMENT 81 | TR:TREATMENT_SUMMARY - 82 | #SAMPLEPREP 83 | SP:SAMPLEPREP_SUMMARY Methanol: Water Extraction 84 | SP:SAMPLEPREP_PROTOCOL_FILENAME NIH_WCMC_LaMerrill_Method_GaikwadLab__SteroidAnalysis_2013-14.docx 85 | SP:PROCESSING_METHOD Homogenization and Solvent Removal w/ Speed Vac 86 | SP:PROCESSING_STORAGE_CONDITIONS On Ice 87 | SP:EXTRACTION_METHOD 1:1 Methanol: Water 88 | SP:EXTRACT_STORAGE -80C 89 | SP:SAMPLE_RESUSPENSION 150ul CH3OH/H2O 90 | SP:ORGAN Sprague-Dawley Maternal: Adrenal, liver, placenta, amniotic fluid 91 | SP:ORGAN Fetal: Male and female brain, male and female liver 92 | #CHROMATOGRAPHY 93 | CH:CHROMATOGRAPHY_SUMMARY Targeted UPLC-MS/MS 94 | CH:CHROMATOGRAPHY_TYPE Reversed phase 95 | CH:INSTRUMENT_NAME Waters Acquity 96 | CH:COLUMN_NAME Waters Acquity HSS T3 (150 x 2.1mm, 1.8um) 97 | CH:FLOW_GRADIENT 0-2 min 100% A (Water 0.1% formic acid) 0% B (CH3CN 0.1 % formic acid), 2-4 min 98 | CH:FLOW_GRADIENT A, 4-9mins 45% A, 9-11 mins 20% A, 11-12 mins 100% A 99 | CH:FLOW_RATE 0.15 ml/min 100 | CH:SAMPLE_INJECTION 10ul 101 | CH:SOLVENT_A Water 0.1% formic acid 102 | CH:SOLVENT_B CH3CN 0.1 % formic acid 103 | CH:ANALYTICAL_TIME 12 mins 104 | #ANALYSIS 105 | AN:ANALYSIS_TYPE MS 106 | AN:LABORATORY_NAME Gaikwad Laboratory 107 | AN:ACQUISITION_DATE 41716 108 | AN:SOFTWARE_VERSION Masslynx 109 | AN:OPERATOR_NAME Nilesh Gaikwad 110 | #MS 111 | MS:INSTRUMENT_NAME Waters Xevo-TQ 112 | MS:INSTRUMENT_TYPE Triple quadrupole 113 | MS:MS_TYPE ESI 114 | MS:ION_MODE POSITIVE 115 | MS:CAPILLARY_VOLTAGE 3.0 kV 116 | MS:COLLISION_GAS N2 117 | MS:IONIZATION Electrospray Ionization 118 | MS:SOURCE_TEMPERATURE 150C 119 | MS:DESOLVATION_GAS_FLOW 600 L/h 120 | MS:DESOLVATION_TEMPERATURE 350C 121 | MS:MS_COMMENTS UPLC-MS/MS 122 | #MS_METABOLITE_DATA 123 | MS_METABOLITE_DATA:UNITS pg/ml 124 | MS_METABOLITE_DATA_START 125 | Samples CER030_294717_ML_1 CER040_242995_ML_2 CER055_249947_ML_3 CER062_246153_ML_4 CER085_251176_ML_5 CER093_242931_ML_6 CER110_238825_ML_7 CER120_253690_ML_8 CER147_254803_ML_9 CER149_266689_ML_10 CER158_254231_ML_11 CER165_287001_ML_12 CER178_295145_ML_13 CER181_244392_ML_14 CER188_250760_ML_15 CER192_254091_ML_16 CER201_244193_ML_17 CER216_242490_ML_18 CER220_274308_ML_19 CER223_264067_ML_20 CER226_254303_ML_21 CER277_255328_ML_22 CER287_248530_ML_23 CER303_253023_ML_24 CER315_282966_ML_25 CER324_285069_ML_26 CER340_244448_ML_27 CER346_246320_ML_28 CER356_269662_ML_29 CER368_250104_ML_30 CER369_276355_ML_31 CER384_264971_ML_32 CER445_286527_ML_33 CER452_240972_ML_34 CER463_271249_ML_35 CER465_265004_ML_36 CER483_294606_ML_37 CER488_274343_ML_38 CER530_249229_ML_39 CER540_240346_ML_40 CER552_241945_ML_41 CER555_251239_ML_42 126 | Factors Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum 127 | 17-hydroxypregnenolone 946.2500 0.0000 676.2500 0.0000 2251.2500 0.0000 0.0000 1134.7500 0.0000 0.0000 2016.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1919.7500 0.0000 972.7500 0.0000 1542.2500 1687.7500 421.0000 0.0000 373.2500 0.0000 614.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 528.2500 128 | 17-hydroxyprogesterone 0.0000 2.0000 0.0000 0.0000 19.2500 0.0000 0.0000 27.0000 2.0000 120.7500 27.7500 83.0000 0.0000 8.0000 3.5000 274.0000 0.0000 0.0000 3.0000 3.2500 0.0000 43.7500 15.2500 25.7500 4.2500 0.0000 0.0000 49.5000 27.7500 14.0000 9.7500 35.2500 34.7500 4.5000 8.0000 17.2500 0.0000 24.7500 19.0000 0.0000 4.5000 132.0000 129 | Allodihydrotestosterone 80.0000 1181.0000 0.0000 0.0000 0.0000 112.2500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 288.0000 0.0000 0.0000 374.7500 0.0000 27.5000 112.7500 247.7500 39.0000 0.0000 0.0000 0.0000 0.0000 0.0000 761.0000 245.5000 332.5000 52.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 465.7500 159.0000 0.0000 77.0000 315.5000 466.0000 130 | Androstenedione 76.7500 57.0000 176.2500 399.5000 208.5000 37.0000 281.2500 79.7500 250.7500 420.5000 123.0000 186.2500 34.7500 224.5000 67.7500 335.0000 126.5000 277.0000 50.5000 153.7500 62.2500 107.0000 431.2500 167.5000 134.0000 60.7500 38.5000 42.0000 78.7500 43.0000 60.0000 114.7500 237.7500 53.5000 51.7500 298.0000 220.2500 15.0000 256.5000 172.5000 79.2500 52.5000 131 | Androstenolone (DHEA) 1779.7500 1409.2500 945.7500 748.2500 2284.0000 2351.0000 2183.7500 1916.5000 5079.5000 1474.0000 1338.5000 1646.0000 2051.7500 2039.7500 2618.0000 306.7500 574.5000 1794.2500 1429.0000 2293.2500 2066.2500 2493.2500 918.0000 1579.2500 2042.2500 2645.7500 2393.7500 1913.0000 1641.5000 853.2500 586.5000 537.2500 562.5000 1887.2500 979.0000 678.5000 1357.2500 1526.2500 2300.7500 129.0000 409.2500 282.2500 132 | Cortexolone 0.0000 0.0000 0.0000 54.0000 0.0000 0.0000 0.0000 0.0000 215.7500 135.7500 72.7500 53.0000 11.7500 0.0000 0.0000 0.0000 0.0000 101.2500 11.2500 0.0000 0.0000 315.0000 181.2500 0.0000 7.7500 151.2500 0.0000 0.0000 104.0000 0.0000 0.0000 30.7500 94.2500 210.5000 33.2500 126.0000 0.0000 10.0000 17.0000 15.7500 0.0000 0.0000 133 | Cortexone 108.0000 16.0000 13.0000 117.5000 3.2500 63.2500 42.5000 146.7500 29.5000 204.2500 28.7500 67.0000 30.5000 103.0000 23.0000 416.7500 63.5000 32.5000 32.5000 127.2500 39.0000 84.2500 7.2500 16.2500 68.7500 27.0000 46.5000 21.7500 3.2500 14.7500 28.7500 67.0000 33.0000 40.7500 31.0000 32.2500 40.0000 13.7500 18.7500 0.0000 25.7500 29.0000 134 | Corticosterone_ DOC 0.0000 354.5000 0.0000 0.0000 322.5000 419.7500 420.7500 0.0000 0.0000 0.0000 393.2500 915.5000 0.0000 432.2500 1233.0000 0.0000 525.5000 1700.0000 0.0000 98.7500 285.5000 42.5000 428.2500 0.0000 427.5000 271.7500 254.7500 478.0000 303.5000 462.2500 532.0000 715.0000 1073.0000 836.2500 0.0000 1639.0000 601.7500 287.7500 0.0000 0.0000 435.2500 1602.2500 135 | Cortisol 7643.0000 39245.7500 11671.5000 20216.0000 14908.7500 14386.5000 16815.2500 7806.2500 27135.5000 7095.0000 12175.2500 36413.0000 2499.2500 15101.7500 22045.0000 24832.0000 13257.0000 19528.5000 4539.7500 7681.7500 9585.2500 19361.0000 24203.7500 5667.0000 19437.2500 10849.2500 11855.7500 7546.5000 3093.7500 19035.7500 18575.0000 14801.5000 22960.7500 22506.5000 8001.5000 31037.5000 18577.2500 15506.2500 8364.7500 2145.7500 5574.7500 19662.5000 136 | Estradiol 123992.2500 796595.7500 619110.0000 449415.7500 320835.5000 326124.2500 249087.2500 311589.2500 345598.5000 485857.0000 332055.2500 211831.0000 334929.7500 235466.7500 352555.0000 410500.0000 887955.0000 865791.7500 1648163.5000 856726.7500 579044.2500 254013.2500 326272.7500 239893.7500 329553.2500 438715.5000 248489.0000 380251.0000 338965.5000 337231.2500 342754.5000 370657.2500 2028106.5000 733521.0000 399244.2500 321007.5000 634463.0000 231294.0000 349439.2500 75746.7500 399415.5000 303855.7500 137 | Estrone 484.5000 1663.7500 1680.7500 794.5000 557.2500 625.7500 669.7500 885.0000 715.0000 1225.5000 697.7500 478.2500 659.0000 575.5000 871.7500 1089.0000 1726.2500 2325.2500 3286.7500 1955.7500 1094.0000 486.2500 650.5000 574.2500 601.7500 842.7500 757.7500 732.7500 571.7500 693.7500 1004.2500 879.2500 3154.7500 1095.2500 22680.2500 637.2500 1108.2500 474.2500 810.2500 421.2500 680.7500 623.7500 138 | Pregnenolone 12.2500 0.0000 0.0000 0.0000 0.0000 144.2500 14.7500 807.2500 0.0000 30.0000 0.0000 0.0000 0.0000 0.0000 16.5000 139.5000 132.5000 0.0000 0.0000 13.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 488.5000 0.0000 0.0000 0.0000 0.0000 280.7500 0.0000 0.0000 0.0000 0.0000 0.0000 205.5000 139 | Progesterone 28.2500 6.2500 725.2500 57.2500 767.0000 2.7500 388.0000 9.0000 19.5000 242.5000 4.0000 0.0000 94.5000 160.7500 0.0000 3214.5000 218.2500 1.0000 0.0000 20.0000 4.5000 55.7500 24.5000 57.0000 200.5000 138.7500 132.2500 120.5000 80.5000 59.5000 315.7500 247.2500 211.5000 198.5000 232.2500 241.0000 199.5000 282.5000 216.5000 358.5000 289.5000 199.2500 140 | Testosterone 75.7500 63.2500 42.7500 98.0000 24.2500 35.0000 165.7500 23.2500 73.7500 52.7500 118.7500 35.7500 65.2500 127.2500 14.2500 202.5000 110.7500 53.5000 54.2500 2.2500 105.2500 182.7500 116.0000 66.2500 52.5000 106.2500 43.2500 57.2500 97.2500 16.0000 192.0000 53.7500 182.5000 0.2500 11.5000 87.2500 33.7500 45.5000 26.2500 96.0000 17.5000 79.7500 141 | MS_METABOLITE_DATA_END 142 | #METABOLITES 143 | METABOLITES_START 144 | metabolite_name moverz_quant ri ri_type pubchem_id inchi_key kegg_id other_id other_id_type 145 | 17-hydroxypregnenolone 91451 2Q4710 UCDavis_Gaikwad_Lab_ID 146 | 17-hydroxyprogesterone 6238 6Q3360 UCDavis_Gaikwad_Lab_ID 147 | Allodihydrotestosterone 10635 14A2570 UCDavis_Gaikwad_Lab_ID 148 | Androstenedione 6128 12A6030 UCDavis_Gaikwad_Lab_ID 149 | Androstenolone (DHEA) 5881 3A8500 UCDavis_Gaikwad_Lab_ID 150 | Cortexolone 440707 7Q1610 UCDavis_Gaikwad_Lab_ID 151 | Cortexone 6166 9Q3460 UCDavis_Gaikwad_Lab_ID 152 | Corticosterone, DOC 5753 10Q1550 UCDavis_Gaikwad_Lab_ID 153 | Cortisol 5754 8Q3880 UCDavis_Gaikwad_Lab_ID 154 | Estradiol 5757 16E0950 UCDavis_Gaikwad_Lab_ID 155 | Estrone 5870 15E2300 UCDavis_Gaikwad_Lab_ID 156 | Pregnenolone 8955 1Q5500 UCDavis_Gaikwad_Lab_ID 157 | Progesterone 5994 5Q2600 UCDavis_Gaikwad_Lab_ID 158 | Testosterone 6013 13A6950 UCDavis_Gaikwad_Lab_ID 159 | METABOLITES_END 160 | #END 161 | 162 | 163 | -------------------------------------------------------------------------------- /tests/example_data/validation_files/ST000122_AN000204_error_2.txt: -------------------------------------------------------------------------------- 1 | #METABOLOMICS WORKBENCH STUDY_ID:ST000122 ANALYSIS_ID:AN000204 PROJECT_ID:PR000109 2 | VERSION 1 3 | CREATED_ON 2016-09-17 4 | #PROJECT 5 | PR:PROJECT_TITLE Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic 6 | PR:PROJECT_TYPE Pilot and Feasibility Projects 7 | PR:PROJECT_SUMMARY - 8 | PR:INSTITUTE University of California, Davis 9 | PR:DEPARTMENT Nutrition 10 | PR:LABORATORY Gaikwad Lab 11 | PR:LAST_NAME Gaikwad 12 | PR:FIRST_NAME Nilesh 13 | PR:ADDRESS - 14 | PR:EMAIL nwgaikwad@ucdavis.edu 15 | PR:PHONE 530-752-2906 16 | PR:FUNDING_SOURCE NIH 1U24DK097154 ;  PI Fiehn, Oliver  ; UC Davis WEST COAST CENTRAL 17 | PR:FUNDING_SOURCE METABOLOMICS RESOURCE CORE (WC3MRC) 18 | #STUDY 19 | ST:STUDY_TITLE Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic 20 | ST:STUDY_TYPE steroid panel 21 | ST:STUDY_SUMMARY - 22 | ST:INSTITUTE University of California, Davis 23 | ST:DEPARTMENT Nutrition 24 | ST:LABORATORY Gaikwad Lab 25 | ST:LAST_NAME Gaikwad 26 | ST:FIRST_NAME Nilesh 27 | ST:ADDRESS - 28 | ST:EMAIL nwgaikwad@ucdavis.edu 29 | ST:PHONE - 30 | ST:NUM_GROUPS NA 31 | #SUBJECT 32 | SU:SUBJECT_TYPE Human 33 | SU:SUBJECT_SPECIES Homo sapiens 34 | SU:TAXONOMY_ID 9606 35 | #SUBJECT_SAMPLE_FACTORS: SUBJECT(optional)[tab]SAMPLE[tab]FACTORS(NAME:VALUE pairs separated by |)[tab]Additional sample data 36 | SUBJECT_SAMPLE_FACTORS CER030_294717_ML_1 TEST Tissue/Fluid:Serum 37 | SUBJECT_SAMPLE_FACTORS CER040_242995_ML_2 CER040_242995_ML_2 Tissue/Fluid:Serum 38 | SUBJECT_SAMPLE_FACTORS CER055_249947_ML_3 CER055_249947_ML_3 Tissue/Fluid:Serum 39 | SUBJECT_SAMPLE_FACTORS CER062_246153_ML_4 CER062_246153_ML_4 Tissue/Fluid:Serum 40 | SUBJECT_SAMPLE_FACTORS CER085_251176_ML_5 CER085_251176_ML_5 Tissue/Fluid:Serum 41 | SUBJECT_SAMPLE_FACTORS CER093_242931_ML_6 CER093_242931_ML_6 Tissue/Fluid:Serum 42 | SUBJECT_SAMPLE_FACTORS CER110_238825_ML_7 CER110_238825_ML_7 Tissue/Fluid:Serum 43 | SUBJECT_SAMPLE_FACTORS CER120_253690_ML_8 CER120_253690_ML_8 Tissue/Fluid:Serum 44 | SUBJECT_SAMPLE_FACTORS CER147_254803_ML_9 CER147_254803_ML_9 Tissue/Fluid:Serum 45 | SUBJECT_SAMPLE_FACTORS CER149_266689_ML_10 CER149_266689_ML_10 Tissue/Fluid:Serum 46 | SUBJECT_SAMPLE_FACTORS CER158_254231_ML_11 CER158_254231_ML_11 Tissue/Fluid:Serum 47 | SUBJECT_SAMPLE_FACTORS CER165_287001_ML_12 CER165_287001_ML_12 Tissue/Fluid:Serum 48 | SUBJECT_SAMPLE_FACTORS CER178_295145_ML_13 CER178_295145_ML_13 Tissue/Fluid:Serum 49 | SUBJECT_SAMPLE_FACTORS CER181_244392_ML_14 CER181_244392_ML_14 Tissue/Fluid:Serum 50 | SUBJECT_SAMPLE_FACTORS CER188_250760_ML_15 CER188_250760_ML_15 Tissue/Fluid:Serum 51 | SUBJECT_SAMPLE_FACTORS CER192_254091_ML_16 CER192_254091_ML_16 Tissue/Fluid:Serum 52 | SUBJECT_SAMPLE_FACTORS CER201_244193_ML_17 CER201_244193_ML_17 Tissue/Fluid:Serum 53 | SUBJECT_SAMPLE_FACTORS CER216_242490_ML_18 CER216_242490_ML_18 Tissue/Fluid:Serum 54 | SUBJECT_SAMPLE_FACTORS CER220_274308_ML_19 CER220_274308_ML_19 Tissue/Fluid:Serum 55 | SUBJECT_SAMPLE_FACTORS CER223_264067_ML_20 CER223_264067_ML_20 Tissue/Fluid:Serum 56 | SUBJECT_SAMPLE_FACTORS CER226_254303_ML_21 CER226_254303_ML_21 Tissue/Fluid:Serum 57 | SUBJECT_SAMPLE_FACTORS CER277_255328_ML_22 CER277_255328_ML_22 Tissue/Fluid:Serum 58 | SUBJECT_SAMPLE_FACTORS CER287_248530_ML_23 CER287_248530_ML_23 Tissue/Fluid:Serum 59 | SUBJECT_SAMPLE_FACTORS CER303_253023_ML_24 CER303_253023_ML_24 Tissue/Fluid:Serum 60 | SUBJECT_SAMPLE_FACTORS CER315_282966_ML_25 CER315_282966_ML_25 Tissue/Fluid:Serum 61 | SUBJECT_SAMPLE_FACTORS CER324_285069_ML_26 CER324_285069_ML_26 Tissue/Fluid:Serum 62 | SUBJECT_SAMPLE_FACTORS CER340_244448_ML_27 CER340_244448_ML_27 Tissue/Fluid:Serum 63 | SUBJECT_SAMPLE_FACTORS CER346_246320_ML_28 CER346_246320_ML_28 Tissue/Fluid:Serum 64 | SUBJECT_SAMPLE_FACTORS CER356_269662_ML_29 CER356_269662_ML_29 Tissue/Fluid:Serum 65 | SUBJECT_SAMPLE_FACTORS CER368_250104_ML_30 CER368_250104_ML_30 Tissue/Fluid:Serum 66 | SUBJECT_SAMPLE_FACTORS CER369_276355_ML_31 CER369_276355_ML_31 Tissue/Fluid:Serum 67 | SUBJECT_SAMPLE_FACTORS CER384_264971_ML_32 CER384_264971_ML_32 Tissue/Fluid:Serum 68 | SUBJECT_SAMPLE_FACTORS CER445_286527_ML_33 CER445_286527_ML_33 Tissue/Fluid:Serum 69 | SUBJECT_SAMPLE_FACTORS CER452_240972_ML_34 CER452_240972_ML_34 Tissue/Fluid:Serum 70 | SUBJECT_SAMPLE_FACTORS CER463_271249_ML_35 CER463_271249_ML_35 Tissue/Fluid:Serum 71 | SUBJECT_SAMPLE_FACTORS CER465_265004_ML_36 CER465_265004_ML_36 Tissue/Fluid:Serum 72 | SUBJECT_SAMPLE_FACTORS CER483_294606_ML_37 CER483_294606_ML_37 Tissue/Fluid:Serum 73 | SUBJECT_SAMPLE_FACTORS CER488_274343_ML_38 CER488_274343_ML_38 Tissue/Fluid:Serum 74 | SUBJECT_SAMPLE_FACTORS CER530_249229_ML_39 CER530_249229_ML_39 Tissue/Fluid:Serum 75 | SUBJECT_SAMPLE_FACTORS CER540_240346_ML_40 CER540_240346_ML_40 Tissue/Fluid:Serum 76 | SUBJECT_SAMPLE_FACTORS CER552_241945_ML_41 CER552_241945_ML_41 Tissue/Fluid:Serum 77 | SUBJECT_SAMPLE_FACTORS CER555_251239_ML_42 CER555_251239_ML_42 Tissue/Fluid:Serum 78 | #COLLECTION 79 | CO:COLLECTION_SUMMARY - 80 | #TREATMENT 81 | TR:TREATMENT_SUMMARY - 82 | #SAMPLEPREP 83 | SP:SAMPLEPREP_SUMMARY Methanol: Water Extraction 84 | SP:SAMPLEPREP_PROTOCOL_FILENAME NIH_WCMC_LaMerrill_Method_GaikwadLab__SteroidAnalysis_2013-14.docx 85 | SP:PROCESSING_METHOD Homogenization and Solvent Removal w/ Speed Vac 86 | SP:PROCESSING_STORAGE_CONDITIONS On Ice 87 | SP:EXTRACTION_METHOD 1:1 Methanol: Water 88 | SP:EXTRACT_STORAGE -80C 89 | SP:SAMPLE_RESUSPENSION 150ul CH3OH/H2O 90 | SP:ORGAN Sprague-Dawley Maternal: Adrenal, liver, placenta, amniotic fluid 91 | SP:ORGAN Fetal: Male and female brain, male and female liver 92 | #CHROMATOGRAPHY 93 | CH:CHROMATOGRAPHY_SUMMARY Targeted UPLC-MS/MS 94 | CH:CHROMATOGRAPHY_TYPE Reversed phase 95 | CH:INSTRUMENT_NAME Waters Acquity 96 | CH:COLUMN_NAME Waters Acquity HSS T3 (150 x 2.1mm, 1.8um) 97 | CH:FLOW_GRADIENT 0-2 min 100% A (Water 0.1% formic acid) 0% B (CH3CN 0.1 % formic acid), 2-4 min 98 | CH:FLOW_GRADIENT A, 4-9mins 45% A, 9-11 mins 20% A, 11-12 mins 100% A 99 | CH:FLOW_RATE 0.15 ml/min 100 | CH:SAMPLE_INJECTION 10ul 101 | CH:SOLVENT_A Water 0.1% formic acid 102 | CH:SOLVENT_B CH3CN 0.1 % formic acid 103 | CH:ANALYTICAL_TIME 12 mins 104 | #ANALYSIS 105 | AN:ANALYSIS_TYPE MS 106 | AN:LABORATORY_NAME Gaikwad Laboratory 107 | AN:ACQUISITION_DATE 41716 108 | AN:SOFTWARE_VERSION Masslynx 109 | AN:OPERATOR_NAME Nilesh Gaikwad 110 | #MS 111 | MS:INSTRUMENT_NAME Waters Xevo-TQ 112 | MS:INSTRUMENT_TYPE Triple quadrupole 113 | MS:MS_TYPE ESI 114 | MS:ION_MODE POSITIVE 115 | MS:CAPILLARY_VOLTAGE 3.0 kV 116 | MS:COLLISION_GAS N2 117 | MS:IONIZATION Electrospray Ionization 118 | MS:SOURCE_TEMPERATURE 150C 119 | MS:DESOLVATION_GAS_FLOW 600 L/h 120 | MS:DESOLVATION_TEMPERATURE 350C 121 | MS:MS_COMMENTS UPLC-MS/MS 122 | #MS_METABOLITE_DATA 123 | MS_METABOLITE_DATA:UNITS pg/ml 124 | MS_METABOLITE_DATA_START 125 | Samples CER030_294717_ML_1 CER040_242995_ML_2 CER055_249947_ML_3 CER062_246153_ML_4 CER085_251176_ML_5 CER093_242931_ML_6 CER110_238825_ML_7 CER120_253690_ML_8 CER147_254803_ML_9 CER149_266689_ML_10 CER158_254231_ML_11 CER165_287001_ML_12 CER178_295145_ML_13 CER181_244392_ML_14 CER188_250760_ML_15 CER192_254091_ML_16 CER201_244193_ML_17 CER216_242490_ML_18 CER220_274308_ML_19 CER223_264067_ML_20 CER226_254303_ML_21 CER277_255328_ML_22 CER287_248530_ML_23 CER303_253023_ML_24 CER315_282966_ML_25 CER324_285069_ML_26 CER340_244448_ML_27 CER346_246320_ML_28 CER356_269662_ML_29 CER368_250104_ML_30 CER369_276355_ML_31 CER384_264971_ML_32 CER445_286527_ML_33 CER452_240972_ML_34 CER463_271249_ML_35 CER465_265004_ML_36 CER483_294606_ML_37 CER488_274343_ML_38 CER530_249229_ML_39 CER540_240346_ML_40 CER552_241945_ML_41 CER555_251239_ML_42 126 | Factors Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum 127 | 17-hydroxypregnenolone 946.2500 0.0000 676.2500 0.0000 2251.2500 0.0000 0.0000 1134.7500 0.0000 0.0000 2016.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1919.7500 0.0000 972.7500 0.0000 1542.2500 1687.7500 421.0000 0.0000 373.2500 0.0000 614.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 528.2500 128 | 17-hydroxyprogesterone 0.0000 2.0000 0.0000 0.0000 19.2500 0.0000 0.0000 27.0000 2.0000 120.7500 27.7500 83.0000 0.0000 8.0000 3.5000 274.0000 0.0000 0.0000 3.0000 3.2500 0.0000 43.7500 15.2500 25.7500 4.2500 0.0000 0.0000 49.5000 27.7500 14.0000 9.7500 35.2500 34.7500 4.5000 8.0000 17.2500 0.0000 24.7500 19.0000 0.0000 4.5000 132.0000 129 | Allodihydrotestosterone 80.0000 1181.0000 0.0000 0.0000 0.0000 112.2500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 288.0000 0.0000 0.0000 374.7500 0.0000 27.5000 112.7500 247.7500 39.0000 0.0000 0.0000 0.0000 0.0000 0.0000 761.0000 245.5000 332.5000 52.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 465.7500 159.0000 0.0000 77.0000 315.5000 466.0000 130 | Androstenedione 76.7500 57.0000 176.2500 399.5000 208.5000 37.0000 281.2500 79.7500 250.7500 420.5000 123.0000 186.2500 34.7500 224.5000 67.7500 335.0000 126.5000 277.0000 50.5000 153.7500 62.2500 107.0000 431.2500 167.5000 134.0000 60.7500 38.5000 42.0000 78.7500 43.0000 60.0000 114.7500 237.7500 53.5000 51.7500 298.0000 220.2500 15.0000 256.5000 172.5000 79.2500 52.5000 131 | Androstenolone (DHEA) 1779.7500 1409.2500 945.7500 748.2500 2284.0000 2351.0000 2183.7500 1916.5000 5079.5000 1474.0000 1338.5000 1646.0000 2051.7500 2039.7500 2618.0000 306.7500 574.5000 1794.2500 1429.0000 2293.2500 2066.2500 2493.2500 918.0000 1579.2500 2042.2500 2645.7500 2393.7500 1913.0000 1641.5000 853.2500 586.5000 537.2500 562.5000 1887.2500 979.0000 678.5000 1357.2500 1526.2500 2300.7500 129.0000 409.2500 282.2500 132 | Cortexolone 0.0000 0.0000 0.0000 54.0000 0.0000 0.0000 0.0000 0.0000 215.7500 135.7500 72.7500 53.0000 11.7500 0.0000 0.0000 0.0000 0.0000 101.2500 11.2500 0.0000 0.0000 315.0000 181.2500 0.0000 7.7500 151.2500 0.0000 0.0000 104.0000 0.0000 0.0000 30.7500 94.2500 210.5000 33.2500 126.0000 0.0000 10.0000 17.0000 15.7500 0.0000 0.0000 133 | Cortexone 108.0000 16.0000 13.0000 117.5000 3.2500 63.2500 42.5000 146.7500 29.5000 204.2500 28.7500 67.0000 30.5000 103.0000 23.0000 416.7500 63.5000 32.5000 32.5000 127.2500 39.0000 84.2500 7.2500 16.2500 68.7500 27.0000 46.5000 21.7500 3.2500 14.7500 28.7500 67.0000 33.0000 40.7500 31.0000 32.2500 40.0000 13.7500 18.7500 0.0000 25.7500 29.0000 134 | Corticosterone_ DOC 0.0000 354.5000 0.0000 0.0000 322.5000 419.7500 420.7500 0.0000 0.0000 0.0000 393.2500 915.5000 0.0000 432.2500 1233.0000 0.0000 525.5000 1700.0000 0.0000 98.7500 285.5000 42.5000 428.2500 0.0000 427.5000 271.7500 254.7500 478.0000 303.5000 462.2500 532.0000 715.0000 1073.0000 836.2500 0.0000 1639.0000 601.7500 287.7500 0.0000 0.0000 435.2500 1602.2500 135 | Cortisol 7643.0000 39245.7500 11671.5000 20216.0000 14908.7500 14386.5000 16815.2500 7806.2500 27135.5000 7095.0000 12175.2500 36413.0000 2499.2500 15101.7500 22045.0000 24832.0000 13257.0000 19528.5000 4539.7500 7681.7500 9585.2500 19361.0000 24203.7500 5667.0000 19437.2500 10849.2500 11855.7500 7546.5000 3093.7500 19035.7500 18575.0000 14801.5000 22960.7500 22506.5000 8001.5000 31037.5000 18577.2500 15506.2500 8364.7500 2145.7500 5574.7500 19662.5000 136 | Estradiol 123992.2500 796595.7500 619110.0000 449415.7500 320835.5000 326124.2500 249087.2500 311589.2500 345598.5000 485857.0000 332055.2500 211831.0000 334929.7500 235466.7500 352555.0000 410500.0000 887955.0000 865791.7500 1648163.5000 856726.7500 579044.2500 254013.2500 326272.7500 239893.7500 329553.2500 438715.5000 248489.0000 380251.0000 338965.5000 337231.2500 342754.5000 370657.2500 2028106.5000 733521.0000 399244.2500 321007.5000 634463.0000 231294.0000 349439.2500 75746.7500 399415.5000 303855.7500 137 | Estrone 484.5000 1663.7500 1680.7500 794.5000 557.2500 625.7500 669.7500 885.0000 715.0000 1225.5000 697.7500 478.2500 659.0000 575.5000 871.7500 1089.0000 1726.2500 2325.2500 3286.7500 1955.7500 1094.0000 486.2500 650.5000 574.2500 601.7500 842.7500 757.7500 732.7500 571.7500 693.7500 1004.2500 879.2500 3154.7500 1095.2500 22680.2500 637.2500 1108.2500 474.2500 810.2500 421.2500 680.7500 623.7500 138 | Pregnenolone 12.2500 0.0000 0.0000 0.0000 0.0000 144.2500 14.7500 807.2500 0.0000 30.0000 0.0000 0.0000 0.0000 0.0000 16.5000 139.5000 132.5000 0.0000 0.0000 13.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 488.5000 0.0000 0.0000 0.0000 0.0000 280.7500 0.0000 0.0000 0.0000 0.0000 0.0000 205.5000 139 | Progesterone 28.2500 6.2500 725.2500 57.2500 767.0000 2.7500 388.0000 9.0000 19.5000 242.5000 4.0000 0.0000 94.5000 160.7500 0.0000 3214.5000 218.2500 1.0000 0.0000 20.0000 4.5000 55.7500 24.5000 57.0000 200.5000 138.7500 132.2500 120.5000 80.5000 59.5000 315.7500 247.2500 211.5000 198.5000 232.2500 241.0000 199.5000 282.5000 216.5000 358.5000 289.5000 199.2500 140 | Testosterone 75.7500 63.2500 42.7500 98.0000 24.2500 35.0000 165.7500 23.2500 73.7500 52.7500 118.7500 35.7500 65.2500 127.2500 14.2500 202.5000 110.7500 53.5000 54.2500 2.2500 105.2500 182.7500 116.0000 66.2500 52.5000 106.2500 43.2500 57.2500 97.2500 16.0000 192.0000 53.7500 182.5000 0.2500 11.5000 87.2500 33.7500 45.5000 26.2500 96.0000 17.5000 79.7500 141 | MS_METABOLITE_DATA_END 142 | #METABOLITES 143 | METABOLITES_START 144 | metabolite_name moverz_quant ri ri_type pubchem_id inchi_key kegg_id other_id other_id_type 145 | 17-hydroxypregnenolone 91451 2Q4710 UCDavis_Gaikwad_Lab_ID 146 | 17-hydroxyprogesterone 6238 6Q3360 UCDavis_Gaikwad_Lab_ID 147 | Allodihydrotestosterone 10635 14A2570 UCDavis_Gaikwad_Lab_ID 148 | Androstenedione 6128 12A6030 UCDavis_Gaikwad_Lab_ID 149 | Androstenolone (DHEA) 5881 3A8500 UCDavis_Gaikwad_Lab_ID 150 | Cortexolone 440707 7Q1610 UCDavis_Gaikwad_Lab_ID 151 | Cortexone 6166 9Q3460 UCDavis_Gaikwad_Lab_ID 152 | Corticosterone, DOC 5753 10Q1550 UCDavis_Gaikwad_Lab_ID 153 | Cortisol 5754 8Q3880 UCDavis_Gaikwad_Lab_ID 154 | Estradiol 5757 16E0950 UCDavis_Gaikwad_Lab_ID 155 | Estrone 5870 15E2300 UCDavis_Gaikwad_Lab_ID 156 | Pregnenolone 8955 1Q5500 UCDavis_Gaikwad_Lab_ID 157 | Progesterone 5994 5Q2600 UCDavis_Gaikwad_Lab_ID 158 | Testosterone 6013 13A6950 UCDavis_Gaikwad_Lab_ID 159 | METABOLITES_END 160 | #END 161 | 162 | 163 | -------------------------------------------------------------------------------- /tests/example_data/validation_files/ST000122_AN000204_error_3.txt: -------------------------------------------------------------------------------- 1 | #METABOLOMICS WORKBENCH STUDY_ID:ST000122 ANALYSIS_ID:AN000204 PROJECT_ID:PR000109 2 | VERSION 1 3 | CREATED_ON 2016-09-17 4 | #PROJECT 5 | PR:PROJECT_TITLE Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic 6 | PR:PROJECT_TYPE Pilot and Feasibility Projects 7 | PR:PROJECT_SUMMARY - 8 | PR:INSTITUTE University of California, Davis 9 | PR:DEPARTMENT Nutrition 10 | PR:LABORATORY Gaikwad Lab 11 | PR:LAST_NAME Gaikwad 12 | PR:FIRST_NAME Nilesh 13 | PR:ADDRESS - 14 | PR:EMAIL nwgaikwad@ucdavis.edu 15 | PR:PHONE 530-752-2906 16 | PR:FUNDING_SOURCE NIH 1U24DK097154 ;  PI Fiehn, Oliver  ; UC Davis WEST COAST CENTRAL 17 | PR:FUNDING_SOURCE METABOLOMICS RESOURCE CORE (WC3MRC) 18 | #STUDY 19 | ST:STUDY_TITLE Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic 20 | ST:STUDY_TYPE steroid panel 21 | ST:STUDY_SUMMARY - 22 | ST:INSTITUTE University of California, Davis 23 | ST:DEPARTMENT Nutrition 24 | ST:LABORATORY Gaikwad Lab 25 | ST:LAST_NAME Gaikwad 26 | ST:FIRST_NAME Nilesh 27 | ST:ADDRESS - 28 | ST:EMAIL nwgaikwad@ucdavis.edu 29 | ST:PHONE - 30 | ST:NUM_GROUPS NA 31 | #SUBJECT 32 | SU:SUBJECT_TYPE Human 33 | SU:SUBJECT_SPECIES Homo sapiens 34 | SU:TAXONOMY_ID 9606 35 | #SUBJECT_SAMPLE_FACTORS: SUBJECT(optional)[tab]SAMPLE[tab]FACTORS(NAME:VALUE pairs separated by |)[tab]Additional sample data 36 | SUBJECT_SAMPLE_FACTORS CER030_294717_ML_1 CER030_294717_ML_1 Tissue/Fluid:Serum 37 | SUBJECT_SAMPLE_FACTORS CER040_242995_ML_2 CER040_242995_ML_2 Tissue/Fluid:Serum 38 | SUBJECT_SAMPLE_FACTORS CER055_249947_ML_3 CER055_249947_ML_3 Tissue/Fluid:Serum 39 | SUBJECT_SAMPLE_FACTORS CER062_246153_ML_4 CER062_246153_ML_4 Tissue/Fluid:Serum 40 | SUBJECT_SAMPLE_FACTORS CER085_251176_ML_5 CER085_251176_ML_5 Tissue/Fluid:Serum 41 | SUBJECT_SAMPLE_FACTORS CER093_242931_ML_6 CER093_242931_ML_6 Tissue/Fluid:Serum 42 | SUBJECT_SAMPLE_FACTORS CER110_238825_ML_7 CER110_238825_ML_7 Tissue/Fluid:Serum 43 | SUBJECT_SAMPLE_FACTORS CER120_253690_ML_8 CER120_253690_ML_8 Tissue/Fluid:Serum 44 | SUBJECT_SAMPLE_FACTORS CER147_254803_ML_9 CER147_254803_ML_9 Tissue/Fluid:Serum 45 | SUBJECT_SAMPLE_FACTORS CER149_266689_ML_10 CER149_266689_ML_10 Tissue/Fluid:Serum 46 | SUBJECT_SAMPLE_FACTORS CER158_254231_ML_11 CER158_254231_ML_11 Tissue/Fluid:Serum 47 | SUBJECT_SAMPLE_FACTORS CER165_287001_ML_12 CER165_287001_ML_12 Tissue/Fluid:Serum 48 | SUBJECT_SAMPLE_FACTORS CER178_295145_ML_13 CER178_295145_ML_13 Tissue/Fluid:Serum 49 | SUBJECT_SAMPLE_FACTORS CER181_244392_ML_14 CER181_244392_ML_14 Tissue/Fluid:Serum 50 | SUBJECT_SAMPLE_FACTORS CER188_250760_ML_15 CER188_250760_ML_15 Tissue/Fluid:Serum 51 | SUBJECT_SAMPLE_FACTORS CER192_254091_ML_16 CER192_254091_ML_16 Tissue/Fluid:Serum 52 | SUBJECT_SAMPLE_FACTORS CER201_244193_ML_17 CER201_244193_ML_17 Tissue/Fluid:Serum 53 | SUBJECT_SAMPLE_FACTORS CER216_242490_ML_18 CER216_242490_ML_18 Tissue/Fluid:Serum 54 | SUBJECT_SAMPLE_FACTORS CER220_274308_ML_19 CER220_274308_ML_19 Tissue/Fluid:Serum 55 | SUBJECT_SAMPLE_FACTORS CER223_264067_ML_20 CER223_264067_ML_20 Tissue/Fluid:Serum 56 | SUBJECT_SAMPLE_FACTORS CER226_254303_ML_21 CER226_254303_ML_21 Tissue/Fluid:Serum 57 | SUBJECT_SAMPLE_FACTORS CER277_255328_ML_22 CER277_255328_ML_22 Tissue/Fluid:Serum 58 | SUBJECT_SAMPLE_FACTORS CER287_248530_ML_23 CER287_248530_ML_23 Tissue/Fluid:Serum 59 | SUBJECT_SAMPLE_FACTORS CER303_253023_ML_24 CER303_253023_ML_24 Tissue/Fluid:Serum 60 | SUBJECT_SAMPLE_FACTORS CER315_282966_ML_25 CER315_282966_ML_25 Tissue/Fluid:Serum 61 | SUBJECT_SAMPLE_FACTORS CER324_285069_ML_26 CER324_285069_ML_26 Tissue/Fluid:Serum 62 | SUBJECT_SAMPLE_FACTORS CER340_244448_ML_27 CER340_244448_ML_27 Tissue/Fluid:Serum 63 | SUBJECT_SAMPLE_FACTORS CER346_246320_ML_28 CER346_246320_ML_28 Tissue/Fluid:Serum 64 | SUBJECT_SAMPLE_FACTORS CER356_269662_ML_29 CER356_269662_ML_29 Tissue/Fluid:Serum 65 | SUBJECT_SAMPLE_FACTORS CER368_250104_ML_30 CER368_250104_ML_30 Tissue/Fluid:Serum 66 | SUBJECT_SAMPLE_FACTORS CER369_276355_ML_31 CER369_276355_ML_31 Tissue/Fluid:Serum 67 | SUBJECT_SAMPLE_FACTORS CER384_264971_ML_32 CER384_264971_ML_32 Tissue/Fluid:Serum 68 | SUBJECT_SAMPLE_FACTORS CER445_286527_ML_33 CER445_286527_ML_33 Tissue/Fluid:Serum 69 | SUBJECT_SAMPLE_FACTORS CER452_240972_ML_34 CER452_240972_ML_34 Tissue/Fluid:Serum 70 | SUBJECT_SAMPLE_FACTORS CER463_271249_ML_35 CER463_271249_ML_35 Tissue/Fluid:Serum 71 | SUBJECT_SAMPLE_FACTORS CER465_265004_ML_36 CER465_265004_ML_36 Tissue/Fluid:Serum 72 | SUBJECT_SAMPLE_FACTORS CER483_294606_ML_37 CER483_294606_ML_37 Tissue/Fluid:Serum 73 | SUBJECT_SAMPLE_FACTORS CER488_274343_ML_38 CER488_274343_ML_38 Tissue/Fluid:Serum 74 | SUBJECT_SAMPLE_FACTORS CER530_249229_ML_39 CER530_249229_ML_39 Tissue/Fluid:Serum 75 | SUBJECT_SAMPLE_FACTORS CER540_240346_ML_40 CER540_240346_ML_40 Tissue/Fluid:Serum 76 | SUBJECT_SAMPLE_FACTORS CER552_241945_ML_41 CER552_241945_ML_41 Tissue/Fluid:Serum 77 | SUBJECT_SAMPLE_FACTORS CER555_251239_ML_42 CER555_251239_ML_42 Tissue/Fluid:Serum 78 | #COLLECTION 79 | CO:COLLECTION_SUMMARY - 80 | #TREATMENT 81 | TR:TREATMENT_SUMMARY - 82 | #SAMPLEPREP 83 | SP:SAMPLEPREP_SUMMARY Methanol: Water Extraction 84 | SP:SAMPLEPREP_PROTOCOL_FILENAME NIH_WCMC_LaMerrill_Method_GaikwadLab__SteroidAnalysis_2013-14.docx 85 | SP:PROCESSING_METHOD Homogenization and Solvent Removal w/ Speed Vac 86 | SP:PROCESSING_STORAGE_CONDITIONS On Ice 87 | SP:EXTRACTION_METHOD 1:1 Methanol: Water 88 | SP:EXTRACT_STORAGE -80C 89 | SP:SAMPLE_RESUSPENSION 150ul CH3OH/H2O 90 | SP:ORGAN Sprague-Dawley Maternal: Adrenal, liver, placenta, amniotic fluid 91 | SP:ORGAN Fetal: Male and female brain, male and female liver 92 | #CHROMATOGRAPHY 93 | CH:CHROMATOGRAPHY_SUMMARY Targeted UPLC-MS/MS 94 | CH:CHROMATOGRAPHY_TYPE Reversed phase 95 | CH:INSTRUMENT_NAME Waters Acquity 96 | CH:COLUMN_NAME Waters Acquity HSS T3 (150 x 2.1mm, 1.8um) 97 | CH:FLOW_GRADIENT 0-2 min 100% A (Water 0.1% formic acid) 0% B (CH3CN 0.1 % formic acid), 2-4 min 98 | CH:FLOW_GRADIENT A, 4-9mins 45% A, 9-11 mins 20% A, 11-12 mins 100% A 99 | CH:FLOW_RATE 0.15 ml/min 100 | CH:SAMPLE_INJECTION 10ul 101 | CH:SOLVENT_A Water 0.1% formic acid 102 | CH:SOLVENT_B CH3CN 0.1 % formic acid 103 | CH:ANALYTICAL_TIME 12 mins 104 | #ANALYSIS 105 | AN:ANALYSIS_TYPE MS 106 | AN:LABORATORY_NAME Gaikwad Laboratory 107 | AN:ACQUISITION_DATE 41716 108 | AN:SOFTWARE_VERSION Masslynx 109 | AN:OPERATOR_NAME Nilesh Gaikwad 110 | #MS 111 | MS:INSTRUMENT_NAME Waters Xevo-TQ 112 | MS:INSTRUMENT_TYPE Triple quadrupole 113 | MS:MS_TYPE ESI 114 | MS:ION_MODE POSITIVE 115 | MS:CAPILLARY_VOLTAGE 3.0 kV 116 | MS:COLLISION_GAS N2 117 | MS:IONIZATION Electrospray Ionization 118 | MS:SOURCE_TEMPERATURE 150C 119 | MS:DESOLVATION_GAS_FLOW 600 L/h 120 | MS:DESOLVATION_TEMPERATURE 350C 121 | MS:MS_COMMENTS UPLC-MS/MS 122 | #MS_METABOLITE_DATA 123 | MS_METABOLITE_DATA:UNITS pg/ml 124 | MS_METABOLITE_DATA_START 125 | Samples CER030_294717_ML_1 CER040_242995_ML_2 CER055_249947_ML_3 CER062_246153_ML_4 CER085_251176_ML_5 CER093_242931_ML_6 CER110_238825_ML_7 CER120_253690_ML_8 CER147_254803_ML_9 CER149_266689_ML_10 CER158_254231_ML_11 CER165_287001_ML_12 CER178_295145_ML_13 CER181_244392_ML_14 CER188_250760_ML_15 CER192_254091_ML_16 CER201_244193_ML_17 CER216_242490_ML_18 CER220_274308_ML_19 CER223_264067_ML_20 CER226_254303_ML_21 CER277_255328_ML_22 CER287_248530_ML_23 CER303_253023_ML_24 CER315_282966_ML_25 CER324_285069_ML_26 CER340_244448_ML_27 CER346_246320_ML_28 CER356_269662_ML_29 CER368_250104_ML_30 CER369_276355_ML_31 CER384_264971_ML_32 CER445_286527_ML_33 CER452_240972_ML_34 CER463_271249_ML_35 CER465_265004_ML_36 CER483_294606_ML_37 CER488_274343_ML_38 CER530_249229_ML_39 CER540_240346_ML_40 CER552_241945_ML_41 CER555_251239_ML_42 126 | Factors Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum 127 | 17-hydroxypregnenolone 946.2500 0.0000 676.2500 0.0000 2251.2500 0.0000 0.0000 1134.7500 0.0000 0.0000 2016.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1919.7500 0.0000 972.7500 0.0000 1542.2500 1687.7500 421.0000 0.0000 373.2500 0.0000 614.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 528.2500 128 | 17-hydroxyprogesterone 0.0000 2.0000 0.0000 0.0000 19.2500 0.0000 0.0000 27.0000 2.0000 120.7500 27.7500 83.0000 0.0000 8.0000 3.5000 274.0000 0.0000 0.0000 3.0000 3.2500 0.0000 43.7500 15.2500 25.7500 4.2500 0.0000 0.0000 49.5000 27.7500 14.0000 9.7500 35.2500 34.7500 4.5000 8.0000 17.2500 0.0000 24.7500 19.0000 0.0000 4.5000 132.0000 129 | Allodihydrotestosterone 80.0000 1181.0000 0.0000 0.0000 0.0000 112.2500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 288.0000 0.0000 0.0000 374.7500 0.0000 27.5000 112.7500 247.7500 39.0000 0.0000 0.0000 0.0000 0.0000 0.0000 761.0000 245.5000 332.5000 52.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 465.7500 159.0000 0.0000 77.0000 315.5000 466.0000 130 | Androstenedione 76.7500 57.0000 176.2500 399.5000 208.5000 37.0000 281.2500 79.7500 250.7500 420.5000 123.0000 186.2500 34.7500 224.5000 67.7500 335.0000 126.5000 277.0000 50.5000 153.7500 62.2500 107.0000 431.2500 167.5000 134.0000 60.7500 38.5000 42.0000 78.7500 43.0000 60.0000 114.7500 237.7500 53.5000 51.7500 298.0000 220.2500 15.0000 256.5000 172.5000 79.2500 52.5000 131 | Androstenolone (DHEA) 1779.7500 1409.2500 945.7500 748.2500 2284.0000 2351.0000 2183.7500 1916.5000 5079.5000 1474.0000 1338.5000 1646.0000 2051.7500 2039.7500 2618.0000 306.7500 574.5000 1794.2500 1429.0000 2293.2500 2066.2500 2493.2500 918.0000 1579.2500 2042.2500 2645.7500 2393.7500 1913.0000 1641.5000 853.2500 586.5000 537.2500 562.5000 1887.2500 979.0000 678.5000 1357.2500 1526.2500 2300.7500 129.0000 409.2500 282.2500 132 | Cortexolone 0.0000 0.0000 0.0000 54.0000 0.0000 0.0000 0.0000 0.0000 215.7500 135.7500 72.7500 53.0000 11.7500 0.0000 0.0000 0.0000 0.0000 101.2500 11.2500 0.0000 0.0000 315.0000 181.2500 0.0000 7.7500 151.2500 0.0000 0.0000 104.0000 0.0000 0.0000 30.7500 94.2500 210.5000 33.2500 126.0000 0.0000 10.0000 17.0000 15.7500 0.0000 0.0000 133 | Cortexone 108.0000 16.0000 13.0000 117.5000 3.2500 63.2500 42.5000 146.7500 29.5000 204.2500 28.7500 67.0000 30.5000 103.0000 23.0000 416.7500 63.5000 32.5000 32.5000 127.2500 39.0000 84.2500 7.2500 16.2500 68.7500 27.0000 46.5000 21.7500 3.2500 14.7500 28.7500 67.0000 33.0000 40.7500 31.0000 32.2500 40.0000 13.7500 18.7500 0.0000 25.7500 29.0000 134 | Corticosterone_ DOC 0.0000 354.5000 0.0000 0.0000 322.5000 419.7500 420.7500 0.0000 0.0000 0.0000 393.2500 915.5000 0.0000 432.2500 1233.0000 0.0000 525.5000 1700.0000 0.0000 98.7500 285.5000 42.5000 428.2500 0.0000 427.5000 271.7500 254.7500 478.0000 303.5000 462.2500 532.0000 715.0000 1073.0000 836.2500 0.0000 1639.0000 601.7500 287.7500 0.0000 0.0000 435.2500 1602.2500 135 | Cortisol 7643.0000 39245.7500 11671.5000 20216.0000 14908.7500 14386.5000 16815.2500 7806.2500 27135.5000 7095.0000 12175.2500 36413.0000 2499.2500 15101.7500 22045.0000 24832.0000 13257.0000 19528.5000 4539.7500 7681.7500 9585.2500 19361.0000 24203.7500 5667.0000 19437.2500 10849.2500 11855.7500 7546.5000 3093.7500 19035.7500 18575.0000 14801.5000 22960.7500 22506.5000 8001.5000 31037.5000 18577.2500 15506.2500 8364.7500 2145.7500 5574.7500 19662.5000 136 | Estradiol 123992.2500 796595.7500 619110.0000 449415.7500 320835.5000 326124.2500 249087.2500 311589.2500 345598.5000 485857.0000 332055.2500 211831.0000 334929.7500 235466.7500 352555.0000 410500.0000 887955.0000 865791.7500 1648163.5000 856726.7500 579044.2500 254013.2500 326272.7500 239893.7500 329553.2500 438715.5000 248489.0000 380251.0000 338965.5000 337231.2500 342754.5000 370657.2500 2028106.5000 733521.0000 399244.2500 321007.5000 634463.0000 231294.0000 349439.2500 75746.7500 399415.5000 303855.7500 137 | Estrone 484.5000 1663.7500 1680.7500 794.5000 557.2500 625.7500 669.7500 885.0000 715.0000 1225.5000 697.7500 478.2500 659.0000 575.5000 871.7500 1089.0000 1726.2500 2325.2500 3286.7500 1955.7500 1094.0000 486.2500 650.5000 574.2500 601.7500 842.7500 757.7500 732.7500 571.7500 693.7500 1004.2500 879.2500 3154.7500 1095.2500 22680.2500 637.2500 1108.2500 474.2500 810.2500 421.2500 680.7500 623.7500 138 | Pregnenolone 12.2500 0.0000 0.0000 0.0000 0.0000 144.2500 14.7500 807.2500 0.0000 30.0000 0.0000 0.0000 0.0000 0.0000 16.5000 139.5000 132.5000 0.0000 0.0000 13.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 488.5000 0.0000 0.0000 0.0000 0.0000 280.7500 0.0000 0.0000 0.0000 0.0000 0.0000 205.5000 139 | Progesterone 28.2500 6.2500 725.2500 57.2500 767.0000 2.7500 388.0000 9.0000 19.5000 242.5000 4.0000 0.0000 94.5000 160.7500 0.0000 3214.5000 218.2500 1.0000 0.0000 20.0000 4.5000 55.7500 24.5000 57.0000 200.5000 138.7500 132.2500 120.5000 80.5000 59.5000 315.7500 247.2500 211.5000 198.5000 232.2500 241.0000 199.5000 282.5000 216.5000 358.5000 289.5000 199.2500 140 | Testosterone 75.7500 63.2500 42.7500 98.0000 24.2500 35.0000 165.7500 23.2500 73.7500 52.7500 118.7500 35.7500 65.2500 127.2500 14.2500 202.5000 110.7500 53.5000 54.2500 2.2500 105.2500 182.7500 116.0000 66.2500 52.5000 106.2500 43.2500 57.2500 97.2500 16.0000 192.0000 53.7500 182.5000 0.2500 11.5000 87.2500 33.7500 45.5000 26.2500 96.0000 17.5000 79.7500 141 | MS_METABOLITE_DATA_END 142 | #METABOLITES 143 | METABOLITES_START 144 | metabolite_name moverz_quant ri ri_type pubchem_id inchi_key kegg_id other_id other_id_type 145 | 17-hydroxypregnenolone 91451 2Q4710 UCDavis_Gaikwad_Lab_ID 146 | 17-hydroxyprogesterone 6238 6Q3360 UCDavis_Gaikwad_Lab_ID 147 | Allodihydrotestosterone 10635 14A2570 UCDavis_Gaikwad_Lab_ID 148 | Androstenedione 6128 12A6030 UCDavis_Gaikwad_Lab_ID 149 | Androstenolone (DHEA) 5881 3A8500 UCDavis_Gaikwad_Lab_ID 150 | Cortexolone 440707 7Q1610 UCDavis_Gaikwad_Lab_ID 151 | Cortexone 6166 9Q3460 UCDavis_Gaikwad_Lab_ID 152 | Corticosterone, DOC 5753 10Q1550 UCDavis_Gaikwad_Lab_ID 153 | Cortisol 5754 8Q3880 UCDavis_Gaikwad_Lab_ID 154 | Estradiol 5757 16E0950 UCDavis_Gaikwad_Lab_ID 155 | Estrone 5870 15E2300 UCDavis_Gaikwad_Lab_ID 156 | Pregnenolone 8955 1Q5500 UCDavis_Gaikwad_Lab_ID 157 | Progesterone 5994 5Q2600 UCDavis_Gaikwad_Lab_ID 158 | Testosterone 6013 13A6950 UCDavis_Gaikwad_Lab_ID 159 | METABOLITES_END 160 | #END 161 | 162 | 163 | -------------------------------------------------------------------------------- /tests/example_data/validation_files/ST000122_AN000204_error_4.txt: -------------------------------------------------------------------------------- 1 | #METABOLOMICS WORKBENCH STUDY_ID:ST000122 ANALYSIS_ID:AN000204 PROJECT_ID:PR000109 2 | VERSION 1 3 | CREATED_ON 2016-09-17 4 | #PROJECT 5 | PR:PROJECT_TYPE Pilot and Feasibility Projects 6 | PR:PROJECT_SUMMARY - 7 | PR:INSTITUTE University of California, Davis 8 | PR:DEPARTMENT Nutrition 9 | PR:LABORATORY Gaikwad Lab 10 | PR:LAST_NAME Gaikwad 11 | PR:FIRST_NAME Nilesh 12 | PR:ADDRESS - 13 | PR:EMAIL nwgaikwad@ucdavis.edu 14 | PR:PHONE 530-752-2906 15 | PR:FUNDING_SOURCE NIH 1U24DK097154 ;  PI Fiehn, Oliver  ; UC Davis WEST COAST CENTRAL 16 | PR:FUNDING_SOURCE METABOLOMICS RESOURCE CORE (WC3MRC) 17 | #STUDY 18 | ST:STUDY_TITLE Perinatal DDT causes dysfunctional lipid metabolism underlying metabolic 19 | ST:STUDY_TYPE steroid panel 20 | ST:STUDY_SUMMARY - 21 | ST:INSTITUTE University of California, Davis 22 | ST:DEPARTMENT Nutrition 23 | ST:LABORATORY Gaikwad Lab 24 | ST:LAST_NAME Gaikwad 25 | ST:FIRST_NAME Nilesh 26 | ST:ADDRESS - 27 | ST:EMAIL nwgaikwad@ucdavis.edu 28 | ST:PHONE - 29 | ST:NUM_GROUPS NA 30 | #SUBJECT 31 | SU:SUBJECT_TYPE Human 32 | SU:SUBJECT_SPECIES Homo sapiens 33 | SU:TAXONOMY_ID 9606 34 | #SUBJECT_SAMPLE_FACTORS: SUBJECT(optional)[tab]SAMPLE[tab]FACTORS(NAME:VALUE pairs separated by |)[tab]Additional sample data 35 | SUBJECT_SAMPLE_FACTORS CER030_294717_ML_1 CER030_294717_ML_1 Tissue/Fluid:Serum 36 | SUBJECT_SAMPLE_FACTORS CER040_242995_ML_2 CER040_242995_ML_2 Tissue/Fluid:Serum 37 | SUBJECT_SAMPLE_FACTORS CER055_249947_ML_3 CER055_249947_ML_3 Tissue/Fluid:Serum 38 | SUBJECT_SAMPLE_FACTORS CER062_246153_ML_4 CER062_246153_ML_4 Tissue/Fluid:Serum 39 | SUBJECT_SAMPLE_FACTORS CER085_251176_ML_5 CER085_251176_ML_5 Tissue/Fluid:Serum 40 | SUBJECT_SAMPLE_FACTORS CER093_242931_ML_6 CER093_242931_ML_6 Tissue/Fluid:Serum 41 | SUBJECT_SAMPLE_FACTORS CER110_238825_ML_7 CER110_238825_ML_7 Tissue/Fluid:Serum 42 | SUBJECT_SAMPLE_FACTORS CER120_253690_ML_8 CER120_253690_ML_8 Tissue/Fluid:Serum 43 | SUBJECT_SAMPLE_FACTORS CER147_254803_ML_9 CER147_254803_ML_9 Tissue/Fluid:Serum 44 | SUBJECT_SAMPLE_FACTORS CER149_266689_ML_10 CER149_266689_ML_10 Tissue/Fluid:Serum 45 | SUBJECT_SAMPLE_FACTORS CER158_254231_ML_11 CER158_254231_ML_11 Tissue/Fluid:Serum 46 | SUBJECT_SAMPLE_FACTORS CER165_287001_ML_12 CER165_287001_ML_12 Tissue/Fluid:Serum 47 | SUBJECT_SAMPLE_FACTORS CER178_295145_ML_13 CER178_295145_ML_13 Tissue/Fluid:Serum 48 | SUBJECT_SAMPLE_FACTORS CER181_244392_ML_14 CER181_244392_ML_14 Tissue/Fluid:Serum 49 | SUBJECT_SAMPLE_FACTORS CER188_250760_ML_15 CER188_250760_ML_15 Tissue/Fluid:Serum 50 | SUBJECT_SAMPLE_FACTORS CER192_254091_ML_16 CER192_254091_ML_16 Tissue/Fluid:Serum 51 | SUBJECT_SAMPLE_FACTORS CER201_244193_ML_17 CER201_244193_ML_17 Tissue/Fluid:Serum 52 | SUBJECT_SAMPLE_FACTORS CER216_242490_ML_18 CER216_242490_ML_18 Tissue/Fluid:Serum 53 | SUBJECT_SAMPLE_FACTORS CER220_274308_ML_19 CER220_274308_ML_19 Tissue/Fluid:Serum 54 | SUBJECT_SAMPLE_FACTORS CER223_264067_ML_20 CER223_264067_ML_20 Tissue/Fluid:Serum 55 | SUBJECT_SAMPLE_FACTORS CER226_254303_ML_21 CER226_254303_ML_21 Tissue/Fluid:Serum 56 | SUBJECT_SAMPLE_FACTORS CER277_255328_ML_22 CER277_255328_ML_22 Tissue/Fluid:Serum 57 | SUBJECT_SAMPLE_FACTORS CER287_248530_ML_23 CER287_248530_ML_23 Tissue/Fluid:Serum 58 | SUBJECT_SAMPLE_FACTORS CER303_253023_ML_24 CER303_253023_ML_24 Tissue/Fluid:Serum 59 | SUBJECT_SAMPLE_FACTORS CER315_282966_ML_25 CER315_282966_ML_25 Tissue/Fluid:Serum 60 | SUBJECT_SAMPLE_FACTORS CER324_285069_ML_26 CER324_285069_ML_26 Tissue/Fluid:Serum 61 | SUBJECT_SAMPLE_FACTORS CER340_244448_ML_27 CER340_244448_ML_27 Tissue/Fluid:Serum 62 | SUBJECT_SAMPLE_FACTORS CER346_246320_ML_28 CER346_246320_ML_28 Tissue/Fluid:Serum 63 | SUBJECT_SAMPLE_FACTORS CER356_269662_ML_29 CER356_269662_ML_29 Tissue/Fluid:Serum 64 | SUBJECT_SAMPLE_FACTORS CER368_250104_ML_30 CER368_250104_ML_30 Tissue/Fluid:Serum 65 | SUBJECT_SAMPLE_FACTORS CER369_276355_ML_31 CER369_276355_ML_31 Tissue/Fluid:Serum 66 | SUBJECT_SAMPLE_FACTORS CER384_264971_ML_32 CER384_264971_ML_32 Tissue/Fluid:Serum 67 | SUBJECT_SAMPLE_FACTORS CER445_286527_ML_33 CER445_286527_ML_33 Tissue/Fluid:Serum 68 | SUBJECT_SAMPLE_FACTORS CER452_240972_ML_34 CER452_240972_ML_34 Tissue/Fluid:Serum 69 | SUBJECT_SAMPLE_FACTORS CER463_271249_ML_35 CER463_271249_ML_35 Tissue/Fluid:Serum 70 | SUBJECT_SAMPLE_FACTORS CER465_265004_ML_36 CER465_265004_ML_36 Tissue/Fluid:Serum 71 | SUBJECT_SAMPLE_FACTORS CER483_294606_ML_37 CER483_294606_ML_37 Tissue/Fluid:Serum 72 | SUBJECT_SAMPLE_FACTORS CER488_274343_ML_38 CER488_274343_ML_38 Tissue/Fluid:Serum 73 | SUBJECT_SAMPLE_FACTORS CER530_249229_ML_39 CER530_249229_ML_39 Tissue/Fluid:Serum 74 | SUBJECT_SAMPLE_FACTORS CER540_240346_ML_40 CER540_240346_ML_40 Tissue/Fluid:Serum 75 | SUBJECT_SAMPLE_FACTORS CER552_241945_ML_41 CER552_241945_ML_41 Tissue/Fluid:Serum 76 | SUBJECT_SAMPLE_FACTORS CER555_251239_ML_42 CER555_251239_ML_42 Tissue/Fluid:Serum 77 | #COLLECTION 78 | CO:COLLECTION_SUMMARY - 79 | #TREATMENT 80 | TR:TREATMENT_SUMMARY - 81 | #SAMPLEPREP 82 | SP:SAMPLEPREP_SUMMARY Methanol: Water Extraction 83 | SP:SAMPLEPREP_PROTOCOL_FILENAME NIH_WCMC_LaMerrill_Method_GaikwadLab__SteroidAnalysis_2013-14.docx 84 | SP:PROCESSING_METHOD Homogenization and Solvent Removal w/ Speed Vac 85 | SP:PROCESSING_STORAGE_CONDITIONS On Ice 86 | SP:EXTRACTION_METHOD 1:1 Methanol: Water 87 | SP:EXTRACT_STORAGE -80C 88 | SP:SAMPLE_RESUSPENSION 150ul CH3OH/H2O 89 | SP:ORGAN Sprague-Dawley Maternal: Adrenal, liver, placenta, amniotic fluid 90 | SP:ORGAN Fetal: Male and female brain, male and female liver 91 | #CHROMATOGRAPHY 92 | CH:CHROMATOGRAPHY_SUMMARY Targeted UPLC-MS/MS 93 | CH:CHROMATOGRAPHY_TYPE Reversed phase 94 | CH:INSTRUMENT_NAME Waters Acquity 95 | CH:COLUMN_NAME Waters Acquity HSS T3 (150 x 2.1mm, 1.8um) 96 | CH:FLOW_GRADIENT 0-2 min 100% A (Water 0.1% formic acid) 0% B (CH3CN 0.1 % formic acid), 2-4 min 97 | CH:FLOW_GRADIENT A, 4-9mins 45% A, 9-11 mins 20% A, 11-12 mins 100% A 98 | CH:FLOW_RATE 0.15 ml/min 99 | CH:SAMPLE_INJECTION 10ul 100 | CH:SOLVENT_A Water 0.1% formic acid 101 | CH:SOLVENT_B CH3CN 0.1 % formic acid 102 | CH:ANALYTICAL_TIME 12 mins 103 | #ANALYSIS 104 | AN:ANALYSIS_TYPE MS 105 | AN:LABORATORY_NAME Gaikwad Laboratory 106 | AN:ACQUISITION_DATE 41716 107 | AN:SOFTWARE_VERSION Masslynx 108 | AN:OPERATOR_NAME Nilesh Gaikwad 109 | #MS 110 | MS:INSTRUMENT_NAME Waters Xevo-TQ 111 | MS:INSTRUMENT_TYPE Triple quadrupole 112 | MS:MS_TYPE ESI 113 | MS:ION_MODE POSITIVE 114 | MS:CAPILLARY_VOLTAGE 3.0 kV 115 | MS:COLLISION_GAS N2 116 | MS:IONIZATION Electrospray Ionization 117 | MS:SOURCE_TEMPERATURE 150C 118 | MS:DESOLVATION_GAS_FLOW 600 L/h 119 | MS:DESOLVATION_TEMPERATURE 350C 120 | MS:MS_COMMENTS UPLC-MS/MS 121 | #MS_METABOLITE_DATA 122 | MS_METABOLITE_DATA:UNITS pg/ml 123 | MS_METABOLITE_DATA_START 124 | Samples CER030_294717_ML_1 CER040_242995_ML_2 CER055_249947_ML_3 CER062_246153_ML_4 CER085_251176_ML_5 CER093_242931_ML_6 CER110_238825_ML_7 CER120_253690_ML_8 CER147_254803_ML_9 CER149_266689_ML_10 CER158_254231_ML_11 CER165_287001_ML_12 CER178_295145_ML_13 CER181_244392_ML_14 CER188_250760_ML_15 CER192_254091_ML_16 CER201_244193_ML_17 CER216_242490_ML_18 CER220_274308_ML_19 CER223_264067_ML_20 CER226_254303_ML_21 CER277_255328_ML_22 CER287_248530_ML_23 CER303_253023_ML_24 CER315_282966_ML_25 CER324_285069_ML_26 CER340_244448_ML_27 CER346_246320_ML_28 CER356_269662_ML_29 CER368_250104_ML_30 CER369_276355_ML_31 CER384_264971_ML_32 CER445_286527_ML_33 CER452_240972_ML_34 CER463_271249_ML_35 CER465_265004_ML_36 CER483_294606_ML_37 CER488_274343_ML_38 CER530_249229_ML_39 CER540_240346_ML_40 CER552_241945_ML_41 CER555_251239_ML_42 125 | Factors Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum Tissue/Fluid:Serum 126 | 17-hydroxypregnenolone 946.2500 0.0000 676.2500 0.0000 2251.2500 0.0000 0.0000 1134.7500 0.0000 0.0000 2016.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1919.7500 0.0000 972.7500 0.0000 1542.2500 1687.7500 421.0000 0.0000 373.2500 0.0000 614.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 528.2500 127 | 17-hydroxyprogesterone 0.0000 2.0000 0.0000 0.0000 19.2500 0.0000 0.0000 27.0000 2.0000 120.7500 27.7500 83.0000 0.0000 8.0000 3.5000 274.0000 0.0000 0.0000 3.0000 3.2500 0.0000 43.7500 15.2500 25.7500 4.2500 0.0000 0.0000 49.5000 27.7500 14.0000 9.7500 35.2500 34.7500 4.5000 8.0000 17.2500 0.0000 24.7500 19.0000 0.0000 4.5000 132.0000 128 | Allodihydrotestosterone 80.0000 1181.0000 0.0000 0.0000 0.0000 112.2500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 288.0000 0.0000 0.0000 374.7500 0.0000 27.5000 112.7500 247.7500 39.0000 0.0000 0.0000 0.0000 0.0000 0.0000 761.0000 245.5000 332.5000 52.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 465.7500 159.0000 0.0000 77.0000 315.5000 466.0000 129 | Androstenedione 76.7500 57.0000 176.2500 399.5000 208.5000 37.0000 281.2500 79.7500 250.7500 420.5000 123.0000 186.2500 34.7500 224.5000 67.7500 335.0000 126.5000 277.0000 50.5000 153.7500 62.2500 107.0000 431.2500 167.5000 134.0000 60.7500 38.5000 42.0000 78.7500 43.0000 60.0000 114.7500 237.7500 53.5000 51.7500 298.0000 220.2500 15.0000 256.5000 172.5000 79.2500 52.5000 130 | Androstenolone (DHEA) 1779.7500 1409.2500 945.7500 748.2500 2284.0000 2351.0000 2183.7500 1916.5000 5079.5000 1474.0000 1338.5000 1646.0000 2051.7500 2039.7500 2618.0000 306.7500 574.5000 1794.2500 1429.0000 2293.2500 2066.2500 2493.2500 918.0000 1579.2500 2042.2500 2645.7500 2393.7500 1913.0000 1641.5000 853.2500 586.5000 537.2500 562.5000 1887.2500 979.0000 678.5000 1357.2500 1526.2500 2300.7500 129.0000 409.2500 282.2500 131 | Cortexolone 0.0000 0.0000 0.0000 54.0000 0.0000 0.0000 0.0000 0.0000 215.7500 135.7500 72.7500 53.0000 11.7500 0.0000 0.0000 0.0000 0.0000 101.2500 11.2500 0.0000 0.0000 315.0000 181.2500 0.0000 7.7500 151.2500 0.0000 0.0000 104.0000 0.0000 0.0000 30.7500 94.2500 210.5000 33.2500 126.0000 0.0000 10.0000 17.0000 15.7500 0.0000 0.0000 132 | Cortexone 108.0000 16.0000 13.0000 117.5000 3.2500 63.2500 42.5000 146.7500 29.5000 204.2500 28.7500 67.0000 30.5000 103.0000 23.0000 416.7500 63.5000 32.5000 32.5000 127.2500 39.0000 84.2500 7.2500 16.2500 68.7500 27.0000 46.5000 21.7500 3.2500 14.7500 28.7500 67.0000 33.0000 40.7500 31.0000 32.2500 40.0000 13.7500 18.7500 0.0000 25.7500 29.0000 133 | Corticosterone_ DOC 0.0000 354.5000 0.0000 0.0000 322.5000 419.7500 420.7500 0.0000 0.0000 0.0000 393.2500 915.5000 0.0000 432.2500 1233.0000 0.0000 525.5000 1700.0000 0.0000 98.7500 285.5000 42.5000 428.2500 0.0000 427.5000 271.7500 254.7500 478.0000 303.5000 462.2500 532.0000 715.0000 1073.0000 836.2500 0.0000 1639.0000 601.7500 287.7500 0.0000 0.0000 435.2500 1602.2500 134 | Cortisol 7643.0000 39245.7500 11671.5000 20216.0000 14908.7500 14386.5000 16815.2500 7806.2500 27135.5000 7095.0000 12175.2500 36413.0000 2499.2500 15101.7500 22045.0000 24832.0000 13257.0000 19528.5000 4539.7500 7681.7500 9585.2500 19361.0000 24203.7500 5667.0000 19437.2500 10849.2500 11855.7500 7546.5000 3093.7500 19035.7500 18575.0000 14801.5000 22960.7500 22506.5000 8001.5000 31037.5000 18577.2500 15506.2500 8364.7500 2145.7500 5574.7500 19662.5000 135 | Estradiol 123992.2500 796595.7500 619110.0000 449415.7500 320835.5000 326124.2500 249087.2500 311589.2500 345598.5000 485857.0000 332055.2500 211831.0000 334929.7500 235466.7500 352555.0000 410500.0000 887955.0000 865791.7500 1648163.5000 856726.7500 579044.2500 254013.2500 326272.7500 239893.7500 329553.2500 438715.5000 248489.0000 380251.0000 338965.5000 337231.2500 342754.5000 370657.2500 2028106.5000 733521.0000 399244.2500 321007.5000 634463.0000 231294.0000 349439.2500 75746.7500 399415.5000 303855.7500 136 | Estrone 484.5000 1663.7500 1680.7500 794.5000 557.2500 625.7500 669.7500 885.0000 715.0000 1225.5000 697.7500 478.2500 659.0000 575.5000 871.7500 1089.0000 1726.2500 2325.2500 3286.7500 1955.7500 1094.0000 486.2500 650.5000 574.2500 601.7500 842.7500 757.7500 732.7500 571.7500 693.7500 1004.2500 879.2500 3154.7500 1095.2500 22680.2500 637.2500 1108.2500 474.2500 810.2500 421.2500 680.7500 623.7500 137 | Pregnenolone 12.2500 0.0000 0.0000 0.0000 0.0000 144.2500 14.7500 807.2500 0.0000 30.0000 0.0000 0.0000 0.0000 0.0000 16.5000 139.5000 132.5000 0.0000 0.0000 13.7500 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 488.5000 0.0000 0.0000 0.0000 0.0000 280.7500 0.0000 0.0000 0.0000 0.0000 0.0000 205.5000 138 | Progesterone 28.2500 6.2500 725.2500 57.2500 767.0000 2.7500 388.0000 9.0000 19.5000 242.5000 4.0000 0.0000 94.5000 160.7500 0.0000 3214.5000 218.2500 1.0000 0.0000 20.0000 4.5000 55.7500 24.5000 57.0000 200.5000 138.7500 132.2500 120.5000 80.5000 59.5000 315.7500 247.2500 211.5000 198.5000 232.2500 241.0000 199.5000 282.5000 216.5000 358.5000 289.5000 199.2500 139 | Testosterone 75.7500 63.2500 42.7500 98.0000 24.2500 35.0000 165.7500 23.2500 73.7500 52.7500 118.7500 35.7500 65.2500 127.2500 14.2500 202.5000 110.7500 53.5000 54.2500 2.2500 105.2500 182.7500 116.0000 66.2500 52.5000 106.2500 43.2500 57.2500 97.2500 16.0000 192.0000 53.7500 182.5000 0.2500 11.5000 87.2500 33.7500 45.5000 26.2500 96.0000 17.5000 79.7500 140 | MS_METABOLITE_DATA_END 141 | #METABOLITES 142 | METABOLITES_START 143 | metabolite_name moverz_quant ri ri_type pubchem_id inchi_key kegg_id other_id other_id_type 144 | 17-hydroxypregnenolone 91451 2Q4710 UCDavis_Gaikwad_Lab_ID 145 | 17-hydroxyprogesterone 6238 6Q3360 UCDavis_Gaikwad_Lab_ID 146 | Allodihydrotestosterone 10635 14A2570 UCDavis_Gaikwad_Lab_ID 147 | Androstenedione 6128 12A6030 UCDavis_Gaikwad_Lab_ID 148 | Androstenolone (DHEA) 5881 3A8500 UCDavis_Gaikwad_Lab_ID 149 | Cortexolone 440707 7Q1610 UCDavis_Gaikwad_Lab_ID 150 | Cortexone 6166 9Q3460 UCDavis_Gaikwad_Lab_ID 151 | Corticosterone, DOC 5753 10Q1550 UCDavis_Gaikwad_Lab_ID 152 | Cortisol 5754 8Q3880 UCDavis_Gaikwad_Lab_ID 153 | Estradiol 5757 16E0950 UCDavis_Gaikwad_Lab_ID 154 | Estrone 5870 15E2300 UCDavis_Gaikwad_Lab_ID 155 | Pregnenolone 8955 1Q5500 UCDavis_Gaikwad_Lab_ID 156 | Progesterone 5994 5Q2600 UCDavis_Gaikwad_Lab_ID 157 | Testosterone 6013 13A6950 UCDavis_Gaikwad_Lab_ID 158 | METABOLITES_END 159 | #END 160 | 161 | 162 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | import csv 3 | import json 4 | import mwtab 5 | import os 6 | import pytest 7 | import shutil 8 | 9 | 10 | def teardown_module(module): 11 | if os.path.exists("tests/example_data/tmp/"): 12 | shutil.rmtree("tests/example_data/tmp") 13 | 14 | 15 | @pytest.mark.parametrize("files_source", [ 16 | "204", 17 | "AN000204", 18 | "https://www.metabolomicsworkbench.org/rest/study/analysis_id/AN000204/mwtab/txt", 19 | "tests/example_data/mwtab_files/ST000122_AN000204.txt", 20 | "tests/example_data/mwtab_files/ST000122_AN000204.json", 21 | "tests/example_data/mwtab_files", 22 | "tests/example_data/mwtab_files.zip", 23 | "tests/example_data/mwtab_files.tar", 24 | "tests/example_data/mwtab_files.tar.gz", 25 | "tests/example_data/mwtab_files.tar.bz2" 26 | ]) 27 | def test_validate_command(files_source): 28 | command = "python -m mwtab validate {}".format(files_source) 29 | assert os.system(command) == 0 30 | 31 | 32 | @pytest.mark.parametrize("from_path, to_path, from_format, to_format", [ 33 | # one-to-one file conversions 34 | ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"), 35 | ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"), 36 | ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"), 37 | ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"), 38 | ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"), 39 | ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"), 40 | ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"), 41 | ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"), 42 | ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"), 43 | ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"), 44 | ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"), 45 | ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"), 46 | ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"), 47 | ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"), 48 | ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"), 49 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"), 50 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"), 51 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"), 52 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"), 53 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"), 54 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"), 55 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"), 56 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"), 57 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"), 58 | # many-to-many file conversions 59 | ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json", "mwtab", "json"), 60 | ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.zip", "mwtab", "json"), 61 | ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar", "mwtab", "json"), 62 | ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "mwtab", "json"), 63 | ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "mwtab", "json"), 64 | ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab", "json", "mwtab"), 65 | ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.zip", "json", "mwtab"), 66 | ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar", "json", "mwtab"), 67 | ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar.gz", "json", "mwtab"), 68 | ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar.bz2", "json", "mwtab"), 69 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab", "json", "mwtab"), 70 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.zip", "json", "mwtab"), 71 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar", "json", "mwtab"), 72 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar.gz", "json", "mwtab"), 73 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar.bz2", "json", "mwtab"), 74 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab", "json", "mwtab"), 75 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.zip", "json", "mwtab"), 76 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar", "json", "mwtab"), 77 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar.gz", "json", "mwtab"), 78 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar.bz2", "json", "mwtab"), 79 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab", "json", "mwtab"), 80 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.zip", "json", "mwtab"), 81 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar", "json", "mwtab"), 82 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar.gz", "json", "mwtab"), 83 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar.bz2", "json", "mwtab") 84 | ]) 85 | def test_convert_command(from_path, to_path, from_format, to_format): 86 | command = "python -m mwtab convert {} {} --from-format={} --to-format={}".format( 87 | from_path, to_path, from_format, to_format 88 | ) 89 | assert os.system(command) == 0 90 | 91 | mwtabfile_generator = mwtab.read_files(to_path) 92 | mwtabfiles_list = list(mwtabfile_generator) 93 | mwtabfiles_study_ids_set = set(mwf.study_id for mwf in mwtabfiles_list) 94 | mwtabfiles_analysis_ids_set = set(mwf.analysis_id for mwf in mwtabfiles_list) 95 | assert mwtabfiles_study_ids_set.issubset({"ST000122"}) 96 | assert mwtabfiles_analysis_ids_set.issubset({"AN000204"}) 97 | 98 | 99 | @pytest.mark.parametrize("command", [ 100 | # download by url 101 | "python -m mwtab download url https://www.metabolomicsworkbench.org/rest/study/study_id/ST000001/summary --to-path=tests/example_data/tmp/tmp.txt", 102 | # download by study methods 103 | "python -m mwtab download study 2 --to-path=tests/example_data/tmp/tmp.txt --output-format=txt", 104 | "python -m mwtab download study ST000002 --to-path=tests/example_data/tmp/tmp.txt --output-format=txt", 105 | "python -m mwtab download study study_id ST000002 summary --to-path=tests/example_data/tmp/tmp.txt", 106 | "python -m mwtab download study study_id ST analysis --to-path=tests/example_data/tmp/tmp.txt", 107 | # download compound | refmet | gene | protein 108 | "python -m mwtab download compound regno 11 name --to-path=tests/example_data/tmp/tmp.txt", 109 | "python -m mwtab download refmet name Cholesterol all --to-path=tests/example_data/tmp/tmp.txt", 110 | "python -m mwtab download gene gene_symbol acaca all --to-path=tests/example_data/tmp/tmp.txt", 111 | "python -m mwtab download protein uniprot_id Q13085 all --to-path=tests/example_data/tmp/tmp.txt", 112 | # download moverz 113 | "python -m mwtab download moverz MB 635.52 M+H 0.5 --to-path=tests/example_data/tmp/tmp.txt", 114 | "python -m mwtab download moverz LIPIDS 513.45 M-2H 0.2 --to-path=tests/example_data/tmp/tmp.txt", 115 | "python -m mwtab download moverz REFMET 255.2 M+H 0.2 --to-path=tests/example_data/tmp/tmp.txt", 116 | # download exactmass 117 | "python -m mwtab download exactmass \"PC(34:1)\" M+H --to-path=tests/example_data/tmp/tmp.txt", 118 | "python -m mwtab download exactmass \"GlcCer(d42:2)\" M-H --to-path=tests/example_data/tmp/tmp.txt", 119 | 120 | ]) 121 | def test_download_command(command): 122 | assert os.system(command) == 0 123 | 124 | file_str = "" 125 | with open("tests/example_data/tmp/tmp.txt", "r") as fh: 126 | file_str = fh.read() 127 | fh.close() 128 | with open("tests/example_data/tmp/tmp.txt", "w") as fh: 129 | fh.close() 130 | assert file_str 131 | 132 | 133 | @pytest.mark.parametrize("from_path, to_path, key, to_format, no_header", [ 134 | ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metadata", "SUBJECT_TYPE", "csv", " --no-header"), 135 | ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metadata", "SUBJECT_TYPE", "csv", ""), 136 | ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metadata", "SUBJECT_TYPE", "json", "") 137 | ]) 138 | def test_extract_metadata_command(from_path, to_path, key, to_format, no_header): 139 | command = "python -m mwtab extract metadata {} {} {} --to-format={}{}".format( 140 | from_path, to_path, key, to_format, no_header 141 | ) 142 | assert os.system(command) == 0 143 | 144 | with open(".".join([to_path, to_format]), "r") as f: 145 | if to_format == "csv": 146 | data = list(csv.reader(f)) 147 | if bool(no_header): 148 | assert set(data[0]) == {"SUBJECT_TYPE", "Human"} 149 | else: 150 | assert set(data[0]) == {"metadata", "value0"} 151 | assert set(data[1]) == {"SUBJECT_TYPE", "Human"} 152 | elif to_format == "json": 153 | data = json.load(f) 154 | data["SUBJECT_TYPE"] = set(data["SUBJECT_TYPE"]) 155 | assert data == {"SUBJECT_TYPE": {"Human"}} 156 | else: 157 | assert False 158 | 159 | 160 | # @pytest.mark.parametrize("from_path, to_path, key, value, to_format, no_header", [ 161 | # ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "Plant", "csv", " --no-header"), 162 | # ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "Plant", "csv", ""), 163 | # ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "Plant", "json", ""), 164 | # ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites.csv", "SU:SUBJECT_TYPE", "Plant", "csv", ""), 165 | # ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites.json", "SU:SUBJECT_TYPE", "Plant", "json", ""), 166 | # ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "\"r'(Plant)'\"", "csv", " --no-header"), 167 | # ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "\"r'(Plant)'\"", "csv", ""), 168 | # ("tests/example_data/mwtab_files/", "tests/example_data/tmp/test_extract_metabolites", "SU:SUBJECT_TYPE", "\"r'(Plant)'\"", "json", "") 169 | # ]) 170 | # def test_extract_metabolites_command(from_path, to_path, key, value, to_format, no_header): 171 | # command = "python -m mwtab extract metabolites {} {} {} {} --to-format={}{}".format( 172 | # from_path, to_path, key, value, to_format, no_header 173 | # ) 174 | # assert os.system(command) == 0 175 | # 176 | # if to_format == "csv": 177 | # filepath = to_path 178 | # if not os.path.splitext(filepath)[1]: 179 | # filepath += ".csv" 180 | # with open(filepath, "r") as fh: 181 | # data = list(csv.reader(fh)) 182 | # if bool(no_header): 183 | # assert set(data[0]) == {"1,2,4-benzenetriol", "1", "1", "24"} 184 | # assert len(data) == 191 185 | # else: 186 | # assert set(data[0]) == {"metabolite_name", "num-studies", "num_analyses", "num_samples"} 187 | # assert set(data[1]) == {"1,2,4-benzenetriol", "1", "1", "24"} 188 | # assert len(data) == 192 189 | # fh.close() 190 | # elif to_format == 'json': 191 | # filepath = to_path 192 | # if not os.path.splitext(filepath)[1]: 193 | # filepath += ".json" 194 | # with open(filepath, "r") as fh: 195 | # text = fh.read() 196 | # fh.close() 197 | # assert text 198 | # else: 199 | # assert False 200 | -------------------------------------------------------------------------------- /tests/test_converter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import pytest 4 | import mwtab 5 | from json import loads 6 | from mwtab.converter import Converter 7 | 8 | 9 | ITEM_SECTIONS = { 10 | # "METABOLOMICS WORKBENCH", 11 | "PROJECT", 12 | "STUDY", 13 | "ANALYSIS", 14 | "SUBJECT", 15 | "COLLECTION", 16 | "TREATMENT", 17 | "SAMPLEPREP", 18 | "CHROMATOGRAPHY", 19 | "MS", 20 | "NMR", 21 | } 22 | 23 | 24 | def teardown_module(module): 25 | if os.path.exists("tests/example_data/tmp"): 26 | shutil.rmtree("tests/example_data/tmp") 27 | 28 | 29 | def compare_item_sections(dict1, dict2): 30 | """ 31 | Method for comparing the item sections of two given dictionaries. 32 | 33 | Helper method which asserts two item sections (dictionaries), section which only contain key-value item pairs, from 34 | two different `~mwtab.mwtab.MWTabFile` objects are equal. 35 | 36 | :param dict1: First dictionary representing mwTab file section containing key-value item pairs. 37 | :type dict1: :py:class:`collections.OrderedDict` or :py:class:`dict` 38 | :param dict2: Second dictionary representing mwTab file section containing key-value item pairs. 39 | :type dict2: :py:class:`collections.OrderedDict` or :py:class:`dict` 40 | """ 41 | keys1 = set(dict1.keys()) 42 | keys2 = set(dict2.keys()) 43 | 44 | assert not keys1 ^ keys2 45 | 46 | for key in keys1 & keys2: 47 | assert dict1[key] == dict2[key] 48 | 49 | 50 | @pytest.mark.parametrize("mwtab_file_path, json_file_path", [ 51 | ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/mwtab_files/ST000122_AN000204.json") 52 | ]) 53 | def test_convert_mwtab_to_json(mwtab_file_path, json_file_path): 54 | """ 55 | 56 | """ 57 | # convert given mwTab file to JSON 58 | mwfile = next(mwtab.read_files(mwtab_file_path)) 59 | if not os.path.exists("tests/example_data/tmp/"): 60 | os.makedirs("tests/example_data/tmp/") 61 | with open("tests/example_data/tmp/tmp.json", "w") as f: 62 | mwfile.write(f, file_format="json") 63 | f.close() 64 | 65 | # open files 66 | with open("tests/example_data/tmp/tmp.json", "r") as f: 67 | mwtab_file = loads(f.read()) 68 | with open(json_file_path, "r") as f: 69 | json_file = loads(f.read()) 70 | 71 | # assert both files contain the same sections 72 | assert not set(mwtab_file.keys()) ^ set(json_file.keys()) 73 | 74 | # Assert item sections are equal 75 | for section_key in ITEM_SECTIONS: 76 | if section_key in set(mwtab_file.keys()) & set(json_file.keys()): 77 | compare_item_sections(mwtab_file[section_key], json_file[section_key]) 78 | 79 | # assert MS_METABOLITE_DATA or NMR_METABOLITE_DATA sections are the same 80 | 81 | 82 | @pytest.mark.parametrize("from_path, to_path, from_format, to_format", [ 83 | # one-to-one file conversions 84 | ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"), 85 | ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"), 86 | ("tests/example_data/mwtab_files/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"), 87 | ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"), 88 | ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"), 89 | ("tests/example_data/tmp/json/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"), 90 | ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"), 91 | ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"), 92 | ("tests/example_data/tmp/json/ST000122_AN000204.json.gz", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"), 93 | ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"), 94 | ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"), 95 | ("tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"), 96 | ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "json", "mwtab"), 97 | ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "json", "mwtab"), 98 | ("tests/example_data/mwtab_files/ST000122_AN000204.json", "tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "json", "mwtab"), 99 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"), 100 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"), 101 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"), 102 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"), 103 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"), 104 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.gz", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"), 105 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json", "mwtab", "json"), 106 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json.gz", "mwtab", "json"), 107 | ("tests/example_data/tmp/mwtab/ST000122_AN000204.txt.bz2", "tests/example_data/tmp/json/ST000122_AN000204.json.bz2", "mwtab", "json"), 108 | # many-to-many file conversions 109 | ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json", "mwtab", "json"), 110 | ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.zip", "mwtab", "json"), 111 | ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar", "mwtab", "json"), 112 | ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "mwtab", "json"), 113 | ("tests/example_data/mwtab_files", "tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "mwtab", "json"), 114 | ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab", "json", "mwtab"), 115 | ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.zip", "json", "mwtab"), 116 | ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar", "json", "mwtab"), 117 | ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar.gz", "json", "mwtab"), 118 | ("tests/example_data/tmp/json/dir/mwtab_files_json.zip", "tests/example_data/tmp/mwtab/zip/mwtab_files_mwtab.tar.bz2", "json", "mwtab"), 119 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab", "json", "mwtab"), 120 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.zip", "json", "mwtab"), 121 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar", "json", "mwtab"), 122 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar.gz", "json", "mwtab"), 123 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar", "tests/example_data/tmp/mwtab/tar/mwtab_files_mwtab.tar.bz2", "json", "mwtab"), 124 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab", "json", "mwtab"), 125 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.zip", "json", "mwtab"), 126 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar", "json", "mwtab"), 127 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar.gz", "json", "mwtab"), 128 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.gz", "tests/example_data/tmp/mwtab/targz/mwtab_files_mwtab.tar.bz2", "json", "mwtab"), 129 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab", "json", "mwtab"), 130 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.zip", "json", "mwtab"), 131 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar", "json", "mwtab"), 132 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar.gz", "json", "mwtab"), 133 | ("tests/example_data/tmp/json/dir/mwtab_files_json.tar.bz2", "tests/example_data/tmp/mwtab/tarbz2/mwtab_files_mwtab.tar.bz2", "json", "mwtab") 134 | ]) 135 | def test_converter_module(from_path, to_path, from_format, to_format): 136 | converter = Converter(from_path=from_path, 137 | to_path=to_path, 138 | from_format=from_format, 139 | to_format=to_format) 140 | converter.convert() 141 | 142 | mwtabfile_generator = mwtab.read_files(to_path) 143 | mwtabfiles_list = list(mwtabfile_generator) 144 | mwtabfiles_study_ids_set = set(mwf.study_id for mwf in mwtabfiles_list) 145 | mwtabfiles_analysis_ids_set = set(mwf.analysis_id for mwf in mwtabfiles_list) 146 | assert mwtabfiles_study_ids_set.issubset({"ST000122"}) 147 | assert mwtabfiles_analysis_ids_set.issubset({"AN000204"}) 148 | -------------------------------------------------------------------------------- /tests/test_mwextract.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import mwtab 3 | 4 | -------------------------------------------------------------------------------- /tests/test_mwrest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from mwtab.mwrest import BASE_URL, GenericMWURL, analysis_ids, study_ids 3 | 4 | 5 | def test_study_analysis(): 6 | an_ids = analysis_ids() 7 | assert an_ids 8 | st_ids = study_ids() 9 | assert st_ids 10 | 11 | 12 | @pytest.mark.parametrize("kwds", [ 13 | ({"context": "study", 14 | "input_item": "analysis_id", 15 | "input_value": "AN000002", 16 | "output_item": "mwtab", 17 | 'output_format': "txt"}), 18 | ({"context": "study", 19 | "input_item": "study_id", 20 | "input_value": "ST000001", 21 | "output_item": "mwtab", 22 | 'output_format': "txt"}), 23 | ({"base_url": "https://www.test.org/rest/", 24 | "context": "study", 25 | "input_item": "study_id", 26 | "input_value": "ST000001", 27 | "output_item": "mwtab", 28 | 'output_format': "txt"}), 29 | ]) 30 | def test_mwrest(kwds): 31 | test_mwurl = GenericMWURL(kwds) 32 | assert test_mwurl.url == test_mwurl.base_url + "/".join([ 33 | kwds["context"], 34 | kwds["input_item"], 35 | kwds["input_value"], 36 | kwds["output_item"], 37 | kwds.get("output_format") or "" 38 | ]) 39 | 40 | 41 | @pytest.mark.parametrize("kwds", [ 42 | ({"context": "study", 43 | "input_item": "analysis_id", 44 | "input_value": "ST000001", 45 | "output_item": "mwtab", 46 | 'output_format': "txt"}), 47 | ({"context": "moverz", 48 | "input_item": "LIPIDS", 49 | "m/z_value": 49, 50 | "ion_type_value": "M+H", 51 | "m/z_tolerance_value": 0.1, 52 | 'output_format': "txt"}), 53 | ({"context": "exactmass", 54 | "LIPID_abbreviation": "Test", 55 | "ion_type_value": "M+H"}), 56 | ]) 57 | def test_fail_mwrest(kwds): 58 | try: 59 | test_mwurl = GenericMWURL(kwds) 60 | assert False 61 | except Exception as e: 62 | assert type(e) == ValueError 63 | -------------------------------------------------------------------------------- /tests/test_reading.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import mwtab 3 | 4 | 5 | @pytest.mark.parametrize("files_source", [ 6 | "204", 7 | "AN000204", 8 | "https://www.metabolomicsworkbench.org/rest/study/analysis_id/AN000204/mwtab/txt", 9 | "tests/example_data/mwtab_files/ST000122_AN000204.txt", 10 | "tests/example_data/mwtab_files/ST000122_AN000204.json", 11 | ]) 12 | def test_single_file_reading(files_source): 13 | mwtabfile_generator = mwtab.read_files(files_source) 14 | mwtabfile = next(mwtabfile_generator) 15 | assert mwtabfile.study_id == "ST000122" 16 | assert mwtabfile.analysis_id == "AN000204" 17 | 18 | 19 | @pytest.mark.parametrize("files_source", [ 20 | "tests/example_data/mwtab_files", 21 | "tests/example_data/mwtab_files.zip", 22 | "tests/example_data/mwtab_files.tar.gz", 23 | "tests/example_data/mwtab_files.tar.bz2" 24 | ]) 25 | def test_multiple_reading(files_source): 26 | mwtabfile_generator = mwtab.read_files(files_source) 27 | mwtabfiles_list = list(mwtabfile_generator) 28 | mwtabfiles_study_ids_set = set(mwf.study_id for mwf in mwtabfiles_list) 29 | mwtabfiles_analysis_ids_set = set(mwf.analysis_id for mwf in mwtabfiles_list) 30 | assert mwtabfiles_study_ids_set == {"ST000122"} 31 | assert mwtabfiles_analysis_ids_set == {"AN000204"} 32 | -------------------------------------------------------------------------------- /tests/test_validator.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import mwtab 3 | 4 | 5 | @pytest.mark.parametrize("files_source", [ 6 | "tests/example_data/mwtab_files/ST000122_AN000204.json", 7 | "tests/example_data/mwtab_files/ST000122_AN000204.txt" 8 | ]) 9 | def test_validate(files_source): 10 | """Test method for validating passing mwTab and JSON files from Metabolomics Workbench. 11 | :param files_source: File path to Metabolomics Workbench file to be validated. 12 | :type files_source: :py:class:`str` or 13 | """ 14 | mwfile = next(mwtab.read_files(files_source)) 15 | _, validation_log = mwtab.validate_file(mwfile, metabolites=False) 16 | assert len(validation_log.split('\n')) == 9 17 | 18 | 19 | @pytest.mark.parametrize("file_source", [ 20 | "tests/example_data/validation_files/ST000122_AN000204_error_1.txt", 21 | "tests/example_data/validation_files/ST000122_AN000204_error_1.json" 22 | ]) 23 | def test_validate_subject_sample_factors(file_source): 24 | mwfile = next(mwtab.read_files(file_source)) 25 | _, validation_log = mwtab.validate_file(mwfile, metabolites=False) 26 | assert "missing Subject ID" in validation_log 27 | assert "missing Sample ID" in validation_log 28 | assert "missing value for Factor" in validation_log 29 | 30 | 31 | @pytest.mark.parametrize("file_source", [ 32 | "tests/example_data/validation_files/ST000122_AN000204_error_2.txt", 33 | "tests/example_data/validation_files/ST000122_AN000204_error_2.json" 34 | ]) 35 | def test_validate_subject_sample_factors(file_source): 36 | mwfile = next(mwtab.read_files(file_source)) 37 | _, validation_log = mwtab.validate_file(mwfile, metabolites=False) 38 | # assert "Section missing data entry for sample(s):" in validation_log 39 | assert "SUBJECT_SAMPLE_FACTORS: Section missing sample ID(s)" in validation_log 40 | 41 | 42 | @pytest.mark.parametrize("file_source", [ 43 | "tests/example_data/validation_files/ST000122_AN000204_error_3.txt", 44 | "tests/example_data/validation_files/ST000122_AN000204_error_3.json" 45 | ]) 46 | def test_validate_metabolites(file_source): 47 | mwfile = next(mwtab.read_files(file_source)) 48 | _, validation_log = mwtab.validate_file(mwfile) 49 | assert "which matches a commonly used field name" in validation_log 50 | 51 | 52 | @pytest.mark.parametrize("file_source", [ 53 | "tests/example_data/validation_files/ST000122_AN000204_error_4.txt", 54 | "tests/example_data/validation_files/ST000122_AN000204_error_4.json" 55 | ]) 56 | def test_validate_schema(file_source): 57 | mwfile = next(mwtab.read_files(file_source)) 58 | _, validation_log = mwtab.validate_file(mwfile) 59 | assert "does not match the allowed schema" in validation_log 60 | 61 | 62 | @pytest.mark.parametrize("file_source", [ 63 | "tests/example_data/mwtab_files/ST000122_AN000204.json" 64 | ]) 65 | def test_validation_log_local(file_source): 66 | mwfile = next(mwtab.read_files(file_source)) 67 | _, validation_log = mwtab.validate_file(mwfile) 68 | # assert "mwtab version: {}".format(mwtab.__version__) in validation_log 69 | assert "Source: {}".format(file_source) in validation_log 70 | assert "Study ID: {}".format("ST000122") in validation_log 71 | assert "Analysis ID: {}".format("AN000204") in validation_log 72 | assert "File format: {}".format("json") in validation_log 73 | 74 | 75 | @pytest.mark.parametrize("file_source", [ 76 | "2" 77 | ]) 78 | def test_validation_log_web(file_source): 79 | mwfile = next(mwtab.read_files(file_source)) 80 | _, validation_log = mwtab.validate_file(mwfile, metabolites=False) 81 | # assert "mwtab version: {}".format(mwtab.__version__) in validation_log 82 | assert "Source: {}".format("https://www.metabolomicsworkbench.org/rest/study/analysis_id/AN000002/mwtab/txt")\ 83 | in validation_log 84 | assert "Study ID: {}".format("ST000002") in validation_log 85 | assert "Analysis ID: {}".format("AN000002") in validation_log 86 | assert "File format: {}".format("txt") in validation_log --------------------------------------------------------------------------------