├── .coveragerc ├── .github └── workflows │ └── build-test.yml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.rst ├── beamspy ├── __init__.py ├── __main__.py ├── annotation.py ├── auxiliary.py ├── data │ ├── adducts.txt │ ├── databases │ │ ├── biocyc_chlamycyc_20180702_v1.sql.gz │ │ ├── chebi_complete_3star_rel195_v1.sql.gz │ │ ├── chebi_complete_rel195_v1.sql.gz │ │ ├── databases.txt │ │ ├── hmdb_csf_v4_0_20200910_v1.sql.gz │ │ ├── hmdb_feces_v4_0_20200910_v1.sql.gz │ │ ├── hmdb_full_v4_0_20200909_v1.sql.gz │ │ ├── hmdb_saliva_v4_0_20200910_v1.sql.gz │ │ ├── hmdb_serum_v4_0_20200910_v1.sql.gz │ │ ├── hmdb_sweat_v4_0_20200910_v1.sql.gz │ │ ├── hmdb_urine_v4_0_20200910_v1.sql.gz │ │ ├── kegg_dpx_20210111_v1.sql.gz │ │ ├── kegg_full_20210111_v1.sql.gz │ │ ├── kegg_hsa_20210111_v1.sql.gz │ │ ├── lipidmaps_fattyacyls_20201001_v1.sql.gz │ │ ├── lipidmaps_full_20201001_v1.sql.gz │ │ ├── lipidmaps_glycerolipids_20201001_v1.sql.gz │ │ ├── lipidmaps_sacccharolipids_20201001_v1.sql.gz │ │ ├── lipidmaps_slycerophospholipids_20201001_v1.sql.gz │ │ ├── lipidmaps_solyketides_20201001_v1.sql.gz │ │ ├── lipidmaps_sphingolipids_20201001_v1.sql.gz │ │ ├── lipidmaps_srenollipids_20201001_v1.sql.gz │ │ └── lipidmaps_sterollipids_20201001_v1.sql.gz │ ├── db_compounds.txt │ ├── db_mf.txt │ ├── isotopes.txt │ ├── multiple_charged_ions.txt │ ├── neutral_losses.txt │ └── nist_database.txt ├── db_parsers.py ├── grouping.py ├── gui.py ├── in_out.py ├── libraries.py ├── plots.py ├── qt │ ├── __init__.py │ ├── form.py │ ├── form.ui │ └── ui_to_py.bat └── statistics.py ├── codecov.yml ├── docs ├── Makefile ├── make.bat └── source │ ├── beams.rst │ ├── cli.rst │ ├── conf.py │ ├── galaxy.rst │ ├── gui.rst │ ├── index.rst │ ├── introduction.rst │ └── quickstart.rst ├── environment.yml ├── examples ├── run.py └── run.sh ├── notebooks └── examples.ipynb ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── test_annotation.py ├── test_auxiliary.py ├── test_data ├── biocyc_record.txt ├── dataMatrix.txt ├── dataMatrix_dims_theoretical.txt ├── dataMatrix_lcms_theoretical.txt ├── dataMatrix_lcms_theoretical_mc_o.txt ├── dataMatrix_lcms_theoretical_nls.txt ├── hmdb_record.xml ├── kegg_record.txt ├── peaklist_dims_pos_theoretical.txt ├── peaklist_lcms_pos_theoretical.txt ├── peaklist_lcms_pos_theoretical_mc_o.txt ├── peaklist_lcms_pos_theoretical_mn.txt ├── peaklist_lcms_pos_theoretical_nls.txt ├── peaklist_lcms_pos_theoretical_no_name.txt ├── results_annotation.sqlite ├── results_annotation_excl_pattern.sqlite ├── results_annotation_graph.sqlite ├── results_annotation_mc_o.sqlite ├── results_annotation_nls.sqlite ├── results_mfdb.sqlite ├── results_mfdb_excl_hrules.sqlite ├── results_pearson.sqlite ├── results_pearson_all.sqlite ├── results_spearman.sqlite ├── sdf_record.sdf ├── summary_mr_mc.txt ├── summary_mr_mc_graphs.txt ├── summary_mr_mc_nls.txt ├── summary_sr_mc.txt ├── summary_sr_sc.txt ├── tab_delimited_record.txt ├── variableMetadata.txt └── xset_matrix.txt ├── test_db_parsers.py ├── test_grouping.py ├── test_in_out.py ├── test_libraries.py ├── test_plots.py ├── test_results └── .gitignore ├── test_statistics.py └── utils.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = tests/*,setup.py,beamspy/__main__.py,beamspy/qt/form.py,beamspy/gui.py -------------------------------------------------------------------------------- /.github/workflows/build-test.yml: -------------------------------------------------------------------------------- 1 | name: beamspy 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ${{ matrix.os }} 8 | 9 | strategy: 10 | matrix: 11 | os: [ubuntu-latest, windows-latest, macos-latest] 12 | python-version: ['3.8', '3.9', '3.10'] 13 | 14 | env: 15 | OS: ${{ matrix.os }} 16 | PYTHON: ${{ matrix.python-version }} 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | 21 | - name: Setup conda - Python ${{ matrix.python-version }} 22 | uses: s-weigand/setup-conda@v1 23 | with: 24 | update-conda: true 25 | python-version: ${{ matrix.python-version }} 26 | conda-channels: conda-forge, bioconda 27 | 28 | - name: Install dependencies 29 | run: | 30 | 31 | python --version 32 | conda env update --file environment.yml --name __setup_conda 33 | 34 | - name: Lint with flake8 35 | run: | 36 | 37 | conda install flake8 38 | 39 | # stop build if there are Python syntax errors or undefined names 40 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 41 | 42 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 43 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 44 | 45 | - name: Test with pytest-cov 46 | run: | 47 | conda install pytest codecov pytest-cov -c conda-forge 48 | 49 | python -m pip install --no-deps -e . 50 | beamspy --help 51 | 52 | pytest --cov ./ --cov-config=.coveragerc --cov-report=xml 53 | 54 | - name: Upload code coverage to codecov 55 | uses: codecov/codecov-action@v1 56 | if: matrix.os == 'ubuntu-latest' 57 | with: 58 | flags: unittests 59 | env_vars: OS,PYTHON 60 | fail_ci_if_error: true 61 | verbose: true 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | .venv 84 | venv/ 85 | ENV/ 86 | 87 | # PyCharm 88 | # User-specific stuff: 89 | .idea/ 90 | 91 | # R-lanaguage 92 | # History files 93 | .Rhistory 94 | .Rapp.history 95 | 96 | # Session Data files 97 | .RData 98 | 99 | #macOS 100 | *.DS_Store 101 | 102 | *~ 103 | 104 | 105 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE 3 | include requirements.txt 4 | include examples/run.py 5 | include examples/run.sh 6 | include tests/*.py 7 | include tests/test_data/*.* 8 | include tests/test_results/.gitignore 9 | include beamspy/data/*.txt 10 | include beamspy/data/databases/*.sql.gz 11 | include beamspy/data/databases/databases.txt -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | BEAMSpy - Birmingham mEtabolite Annotation for Mass Spectrometry (Python package) 2 | ================================================================================== 3 | |Version| |Py versions| |Git| |Bioconda| |Build Status| |License| |RTD doc| |codecov| |mybinder| 4 | 5 | BEAMSpy (Birmingham mEtabolite Annotation for Mass Spectrometry) is a Python package that includes several automated and 6 | seamless computational modules that are applied to putatively annotate metabolites detected in untargeted ultra (high) 7 | performance liquid chromatography-mass spectrometry or untargeted direct infusion mass spectrometry metabolomic assays. 8 | All reported metabolites are annotated to level 2 or 3 of the Metabolomics Standards 9 | Initiative (MSI) reporting standards (Metabolomics. 2007 Sep; 3(3): 211–221. `doi: 10.1007/s11306-007-0082-2 `_). 10 | The package is highly flexible to suit the diversity of sample types studied and mass spectrometers applied in 11 | untargeted metabolomics studies. The user can use the standard reference files included in the package or can develop 12 | their own reference files. 13 | 14 | 15 | - `Documentation (Read the Docs) `_ 16 | - `Bug reports `_ 17 | 18 | 19 | Quick installation 20 | ------------------- 21 | 22 | Conda_ 23 | ~~~~~~~ 24 | 25 | 1. Install `Miniconda `_. Follow the steps described `here `__. 26 | 2. Run the following commands to install BEAMSpy. 27 | 28 | Windows-64, Linux-64 and OSx 29 | 30 | :: 31 | 32 | $ conda create -n beamspy beamspy -c conda-forge -c bioconda -c computational-metabolomics 33 | $ activate beamspy 34 | 35 | Linux-64 and OSx 36 | 37 | :: 38 | 39 | $ conda create -n beamspy beamspy -c conda-forge -c bioconda -c computational-metabolomics 40 | $ source activate beamspy 41 | 42 | 43 | Usage 44 | ------------------------ 45 | 46 | Command line interface (CLI) 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 48 | 49 | :: 50 | 51 | $ beamspy --help 52 | 53 | Graphical user interface (GUI) 54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 55 | 56 | :: 57 | 58 | $ beamspy start-gui 59 | 60 | 61 | Bug reports 62 | ------------------------ 63 | 64 | Please report any bugs that you find `here `__. 65 | Or fork the repository on `GitHub `_ 66 | and create a pull request (PR). We welcome all contributions, and we will help you to make the PR if you are new to `git `_. 67 | 68 | 69 | Credits 70 | ------- 71 | - `Team (University of Birmingham and EMBL-EBI) `__ 72 | 73 | **Code base** 74 | - Ralf J. M. Weber (r.j.weber@bham.ac.uk) - `University of Birmingham (UK) `__ 75 | 76 | 77 | License 78 | ------------------------ 79 | 80 | Released under the GNU General Public License v3.0 (see `LICENSE `_) 81 | 82 | .. |Build Status| image:: https://github.com/computational-metabolomics/beamspy/workflows/beamspy/badge.svg 83 | :target: https://github.com/computational-metabolomics/beamspy/actions 84 | 85 | .. |Py versions| image:: https://img.shields.io/pypi/pyversions/beamspy.svg?style=flat&maxAge=3600 86 | :target: https://pypi.python.org/pypi/beamspy/ 87 | 88 | .. |Version| image:: https://img.shields.io/pypi/v/beamspy.svg?style=flat&maxAge=3600 89 | :target: https://pypi.python.org/pypi/beamspy/ 90 | 91 | .. |Git| image:: https://img.shields.io/badge/repository-GitHub-blue.svg?style=flat&maxAge=3600 92 | :target: https://github.com/computational-metabolomics/beamspy 93 | 94 | .. |Bioconda| image:: https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat&maxAge=3600 95 | :target: http://bioconda.github.io/recipes/beamspy/README.html 96 | 97 | .. |License| image:: https://img.shields.io/badge/License-GPL%20v3-blue.svg 98 | :target: https://www.gnu.org/licenses/gpl-3.0.html 99 | 100 | .. |RTD doc| image:: https://img.shields.io/badge/documentation-RTD-71B360.svg?style=flat&maxAge=3600 101 | :target: https://beamspy.readthedocs.io/en/latest/ 102 | 103 | .. |codecov| image:: https://codecov.io/gh/computational-metabolomics/beamspy/branch/master/graph/badge.svg 104 | :target: https://codecov.io/gh/computational-metabolomics/beamspy 105 | 106 | .. |mybinder| image:: https://mybinder.org/badge_logo.svg 107 | :target: https://mybinder.org/v2/gh/computational-metabolomics/beamspy/master?filepath=notebooks 108 | 109 | .. _pip: https://pip.pypa.io/ 110 | .. _Conda: https://conda.io/en/latest/ 111 | -------------------------------------------------------------------------------- /beamspy/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'r.j.weber@bham.ac.uk' 2 | __credits__ = 'r.j.weber@bham.ac.uk' 3 | __version__ = '1.2.0' 4 | __license__ = 'GPLv3' 5 | -------------------------------------------------------------------------------- /beamspy/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from beamspy import __version__ 5 | import argparse 6 | import sys 7 | import os 8 | import networkx as nx 9 | from beamspy import in_out 10 | from beamspy import grouping 11 | from beamspy import annotation 12 | from beamspy import plots 13 | 14 | 15 | def map_delimiter(delimiter): 16 | seps = {"comma": ",", "tab": "\t"} 17 | if delimiter in seps: 18 | return seps[delimiter] 19 | else: 20 | return delimiter 21 | 22 | 23 | def main(): 24 | print("Executing BEAMSpy version {}.".format(__version__)) 25 | 26 | parser = argparse.ArgumentParser(description='Annotation package of LC-MS and DIMS data', 27 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 28 | # formatter_class=RawTextHelpFormatter) 29 | 30 | subparsers = parser.add_subparsers(dest='step') 31 | 32 | 33 | parser_gf = subparsers.add_parser('group-features', help='Group features.') 34 | 35 | parser_app = subparsers.add_parser('annotate-peak-patterns', help='Annotate peak patterns, molecular formulae and metabolites.') 36 | 37 | parser_amf = subparsers.add_parser('annotate-mf', help='Annotate molecular formulae.') 38 | 39 | parser_am = subparsers.add_parser('annotate-compounds', help='Annotate metabolites.') 40 | 41 | parser_sr = subparsers.add_parser('summary-results', help='Summarise results.') 42 | 43 | parser_gui = subparsers.add_parser('start-gui', help='Start GUI.') 44 | 45 | 46 | ################################# 47 | # GROUP FEATURES 48 | ################################# 49 | 50 | parser_gf.add_argument('-l', '--peaklist', 51 | type=str, required=True, help="Tab-delimited peaklist.") 52 | 53 | parser_gf.add_argument('-i', '--intensity-matrix', 54 | type=str, required=True, help="Tab-delimited intensity matrix.") 55 | 56 | #parser_gf.add_argument('-x', '--xset-matrix', 57 | # type=str, required=False, help="Tab-delimited intensity matrix") 58 | 59 | parser_gf.add_argument('-d', '--db', type=str, required=True, 60 | help="Sqlite database to write results.") 61 | 62 | parser_gf.add_argument('-r', '--max-rt-diff', default=5.0, type=float, required=True, 63 | help="Maximum difference in retention time between two peaks.") 64 | 65 | parser_gf.add_argument('-m', '--method', default="pearson", choices=["pearson", "spearman"], required=True, 66 | help="Method to apply for grouping features.") 67 | 68 | parser_gf.add_argument('-c', '--coeff-threshold', default=0.7, type=float, required=True, 69 | help="Threshold for correlation coefficient.") 70 | 71 | parser_gf.add_argument('-p', '--pvalue-threshold', default=0.01, type=float, required=True, 72 | help="Threshold for p-value.") 73 | 74 | parser_gf.add_argument('-o', '--positive', action="store_true", 75 | help="Use positive correlation only otherwise use both positive and negative correlation.") 76 | 77 | parser_gf.add_argument('-g', '--gml-file', type=str, required=True, 78 | help="Write graph to GraphML format.") 79 | 80 | parser_gf.add_argument('-n', '--ncpus', type=int, required=False, 81 | help="Number of central processing units (CPUs).") 82 | 83 | ################################# 84 | # ANNOTATE PEAK PATTERS 85 | ################################# 86 | 87 | parser_app.add_argument('-l', '--peaklist', type=str, required=True, 88 | help="Tab-delimited peaklist.") 89 | 90 | parser_app.add_argument('-i', '--intensity-matrix', type=str, required=False, 91 | help="Tab-delimited intensity matrix.") 92 | 93 | parser_app.add_argument('-g', '--gml-file', type=str, required=False, 94 | help="Correlation graph in GraphML format.") 95 | 96 | parser_app.add_argument('-d', '--db', type=str, required=True, 97 | help="Sqlite database to write results.") 98 | 99 | parser_app.add_argument('-a', '--adducts', action='store_true', required=False, 100 | help="Annotate adducts.") 101 | 102 | parser_app.add_argument('-b', '--adducts-library', type=str, default=None, required=False, 103 | help="List of adducts.") 104 | 105 | parser_app.add_argument('-e', '--isotopes', action='store_true', required=False, 106 | help="Annotate isotopes.") 107 | 108 | parser_app.add_argument('-f', '--isotopes-library', required=False, 109 | help="List of isotopes.") 110 | 111 | parser_app.add_argument('-o', '--oligomers', action='store_true', required=False, 112 | help="Annotate oligomers.") 113 | 114 | parser_app.add_argument('-n', '--neutral-losses', action='store_true', required=False, 115 | help="Annotate neutral losses.") 116 | 117 | parser_app.add_argument('-s', '--neutral-losses-library', required=False, 118 | help="List of neutral losses.") 119 | 120 | parser_app.add_argument('-m', '--ion-mode', choices=["pos", "neg"], required=True, 121 | help="Ion mode of the libraries.") 122 | 123 | parser_app.add_argument('-p', '--ppm', default=3.0, type=float, required=True, 124 | help="Mass tolerance in parts per million.") 125 | 126 | parser_app.add_argument('-u', '--max-monomer-units', default=2, type=int, required=False, 127 | help="Maximum number of monomer units.") 128 | 129 | 130 | ################################# 131 | # ANNOTATE MOLECULAR FORMULAE 132 | ################################# 133 | 134 | parser_amf.add_argument('-l', '--peaklist', type=str, required=True, 135 | help="Tab-delimited peaklist.") 136 | 137 | parser_amf.add_argument('-i', '--intensity-matrix', type=str, required=False, 138 | help="Tab-delimited intensity matrix.") 139 | 140 | parser_amf.add_argument('-d', '--db', type=str, required=True, 141 | help="Sqlite database to write results.") 142 | 143 | parser_amf.add_argument('-c', '--db-mf', type=str, default="http://mfdb.bham.ac.uk", 144 | help="Molecular formulae database (reference).") 145 | 146 | parser_amf.add_argument('-a', '--adducts-library', type=str, default=None, required=False, 147 | help="List of adducts to search for.") 148 | 149 | parser_amf.add_argument('-m', '--ion-mode', choices=["pos", "neg"], required=True, 150 | help="Ion mode of the libraries.") 151 | 152 | parser_amf.add_argument('-p', '--ppm', default=3.0, type=float, required=True, 153 | help="Mass tolerance in parts per million.") 154 | 155 | parser_amf.add_argument('-e', '--skip-patterns', action="store_false", 156 | help="Skip applying/using peak patterns (e.g. adduct and isotope patterns) to filter annotations.") 157 | 158 | parser_amf.add_argument('-r', '--skip-rules', action="store_false", 159 | help="Skip heuritic rules to filter annotations.") 160 | 161 | parser_amf.add_argument('-z', '--max-mz', type=float, required=False, default=500.0, 162 | help="Maximum m/z value to assign molecular formula(e).") 163 | 164 | 165 | ################################# 166 | # ANNOTATE METABOLITES 167 | ################################# 168 | 169 | parser_am.add_argument('-l', '--peaklist', type=str, required=True, 170 | help="Tab-delimited peaklist.") 171 | 172 | parser_am.add_argument('-i', '--intensity-matrix', type=str, required=False, 173 | help="Tab-delimited intensity matrix.") 174 | 175 | parser_am.add_argument('-d', '--db', type=str, required=True, 176 | help="Sqlite database to write results.") 177 | 178 | parser_am.add_argument('-c', '--db-compounds', type=str, default="", required=False, 179 | help="Metabolite database (reference).") 180 | 181 | parser_am.add_argument('-n', '--db-name', type=str, default="", required=True, 182 | help="Name compound / metabolite database (within --db-compounds).") 183 | 184 | parser_am.add_argument('-a', '--adducts-library', type=str, default=None, required=False, 185 | help="List of adducts to search for.") 186 | 187 | parser_am.add_argument('-m', '--ion-mode', choices=["pos", "neg"], required=True, 188 | help="Ion mode of the libraries.") 189 | 190 | parser_am.add_argument('-p', '--ppm', default=3.0, type=float, required=True, 191 | help="Mass tolerance in parts per million.") 192 | 193 | parser_am.add_argument('-e', '--skip-patterns', action="store_false", 194 | help="Skip applying/using peak patterns (e.g. adduct and isotope patterns) to filter annotations.") 195 | 196 | parser_am.add_argument('-r', '--rt', default=None, type=float, 197 | help="Retention time tolerance in seconds.") 198 | 199 | ################################# 200 | # SUMMARY RESULTS 201 | ################################# 202 | 203 | parser_sr.add_argument('-l', '--peaklist', type=str, required=True, 204 | help="Tab-delimited peaklist") 205 | 206 | parser_sr.add_argument('-i', '--intensity-matrix', type=str, required=False, 207 | help="Tab-delimited intensity matrix.") 208 | 209 | parser_sr.add_argument('-o', '--output', type=str, required=True, 210 | help="Output file for the summary") 211 | 212 | parser_sr.add_argument('-p', '--pdf', type=str, required=False, 213 | help="Output pdf file for the summary plots") 214 | 215 | parser_sr.add_argument('-d', '--db', type=str, required=True, 216 | help="Sqlite database that contains the results from the previous steps.") 217 | 218 | parser_sr.add_argument('-s', '--sep', default="tab", choices=["tab", "comma"], required=True, 219 | help="Values on each line of the output are separated by this character.") 220 | 221 | parser_sr.add_argument('-r', '--single-row', action="store_true", 222 | help="Concatenate the annotations for each spectral feature and represent in a single row.") 223 | 224 | parser_sr.add_argument('-c', '--single-column', action="store_true", 225 | help="Concatenate the annotations for each spectral feature and keep seperate columns for molecular formula, adduct, name, etc.") 226 | 227 | parser_sr.add_argument('-n', '--ndigits-mz', default=None, type=int, required=False, 228 | help="Digits after the decimal point for m/z values.") 229 | 230 | parser_sr.add_argument('-t', '--convert-rt', default=None, choices=["sec", "min", None], 231 | required=False, help="Covert the retention time to seconds or minutes. An additional column will be added.") 232 | 233 | args = parser.parse_args() 234 | 235 | print(args) 236 | 237 | separators = {"tab": "\t", "comma": ","} 238 | 239 | if args.step == "group-features": 240 | df = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix) 241 | graph = grouping.group_features(df, db_out=args.db, max_rt_diff=args.max_rt_diff, 242 | coeff_thres=args.coeff_threshold, pvalue_thres=args.pvalue_threshold, 243 | method=args.method, positive=args.positive, ncpus=args.ncpus) 244 | nx.write_gml(graph, str(args.gml_file)) 245 | 246 | if args.step == "annotate-peak-patterns": 247 | 248 | if args.gml_file: 249 | inp = nx.read_gml(args.gml_file) 250 | elif args.intensity_matrix: 251 | inp = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix) 252 | else: 253 | inp = in_out.read_peaklist(args.peaklist) 254 | 255 | if args.adducts: 256 | if args.adducts_library: 257 | lib = in_out.read_adducts(args.adducts_library, args.ion_mode) 258 | else: 259 | path = 'data/adducts.txt' 260 | p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) 261 | lib = in_out.read_adducts(p, args.ion_mode) 262 | annotation.annotate_adducts(inp, db_out=args.db, ppm=args.ppm, lib=lib, add=False) 263 | 264 | if args.isotopes: 265 | if args.isotopes_library: 266 | lib = in_out.read_isotopes(args.isotopes_library, args.ion_mode) 267 | else: 268 | path = 'data/isotopes.txt' 269 | p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) 270 | lib = in_out.read_isotopes(p, args.ion_mode) 271 | annotation.annotate_isotopes(inp, db_out=args.db, ppm=args.ppm, lib=lib) 272 | 273 | if args.neutral_losses: 274 | if args.neutral_losses_library: 275 | lib = in_out.read_neutral_losses(args.neutral_losses_library) 276 | else: 277 | path = 'data/neutral_losses.txt' 278 | p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) 279 | lib = in_out.read_neutral_losses(p) 280 | annotation.annotate_neutral_losses(inp, db_out=args.db, ppm=args.ppm, lib=lib) 281 | 282 | if args.oligomers: 283 | if args.adducts_library: 284 | lib = in_out.read_adducts(args.adducts_library, args.ion_mode) 285 | else: 286 | path = 'data/adducts.txt' 287 | p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) 288 | lib = in_out.read_adducts(p, args.ion_mode) 289 | 290 | annotation.annotate_oligomers(inp, db_out=args.db, ppm=args.ppm, lib=lib, maximum=args.max_monomer_units) 291 | 292 | if args.step == "annotate-mf": 293 | 294 | if args.intensity_matrix: 295 | df = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix) 296 | else: 297 | df = in_out.read_peaklist(args.peaklist) 298 | 299 | if args.adducts_library: 300 | lib = in_out.read_adducts(args.adducts_library, args.ion_mode) 301 | else: 302 | path = 'data/adducts.txt' 303 | p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) 304 | lib = in_out.read_adducts(p, args.ion_mode) 305 | annotation.annotate_molecular_formulae(df, ppm=args.ppm, lib_adducts=lib, db_out=args.db, db_in=args.db_mf, 306 | patterns=args.skip_patterns, rules=args.skip_rules, max_mz=args.max_mz) 307 | 308 | if args.step == "annotate-compounds": 309 | 310 | if args.intensity_matrix: 311 | df = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix) 312 | else: 313 | df = in_out.read_peaklist(args.peaklist) 314 | 315 | if args.adducts_library: 316 | lib = in_out.read_adducts(args.adducts_library, args.ion_mode) 317 | else: 318 | path = 'data/adducts.txt' 319 | p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) 320 | lib = in_out.read_adducts(p, args.ion_mode) 321 | annotation.annotate_compounds(df, lib_adducts=lib, ppm=args.ppm, db_out=args.db, db_name=args.db_name, patterns=args.skip_patterns, db_in=args.db_compounds, rt_tol=args.rt) 322 | 323 | if args.step == "summary-results": 324 | 325 | if args.intensity_matrix: 326 | df = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix) 327 | else: 328 | df = in_out.read_peaklist(args.peaklist) 329 | 330 | df_out = annotation.summary(df, db=args.db, single_row=args.single_row, single_column=args.single_column, convert_rt=args.convert_rt, ndigits_mz=args.ndigits_mz) 331 | df_out.to_csv(args.output, sep=separators[args.sep], index=False, encoding="utf-8") 332 | if args.pdf: 333 | plots.report(db=args.db, pdf_out=args.pdf, 334 | column_corr="r_value", column_pvalue="p_value", 335 | column_ppm_error="ppm_error", column_adducts="adduct") 336 | 337 | if args.step == "start-gui": 338 | from PySide2 import QtWidgets 339 | from beamspy.gui import BeamsApp 340 | app = QtWidgets.QApplication(sys.argv) 341 | # app.setStyle("Fusion") 342 | form = BeamsApp() 343 | form.show() 344 | sys.exit(app.exec_()) 345 | 346 | 347 | if __name__ == "__main__": 348 | main() 349 | -------------------------------------------------------------------------------- /beamspy/auxiliary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from collections import OrderedDict 5 | import gzip 6 | import sqlite3 7 | import pandas as pd 8 | from pyteomics import mass as pyteomics_mass 9 | from beamspy.db_parsers import parse_nist_database 10 | 11 | 12 | def order_composition_by_hill(composition): 13 | symbols = set(composition) 14 | if 'C' in symbols: 15 | symbols.remove('C') 16 | yield 'C' 17 | if 'H' in symbols: 18 | symbols.remove('H') 19 | yield 'H' 20 | for symbol in sorted(symbols): 21 | yield symbol 22 | 23 | 24 | def composition_to_string(composition): 25 | molecular_formula = "" 26 | for atom in order_composition_by_hill(composition): 27 | if composition[atom] > 1: 28 | molecular_formula += atom + str(composition[atom]) 29 | elif composition[atom] == 1: 30 | molecular_formula += atom 31 | return molecular_formula 32 | 33 | 34 | def double_bond_equivalents(composition): 35 | c = {} 36 | X = sum([composition[h] for h in ["F", "Cl", "Br", "I", "At"] if h in composition]) 37 | for e in ["C", "H", "N"]: 38 | if e in composition: 39 | c[e] = composition[e] 40 | else: 41 | c[e] = 0 42 | 43 | return float(c["C"]) - (float(c["H"])/2) - (float(X)/2) + (float(c["N"])/2) + 1 44 | 45 | 46 | def HC_HNOPS_rules(molecular_formula): 47 | 48 | composition = pyteomics_mass.Composition(molecular_formula) 49 | 50 | rules = {"HC": 0, "NOPSC": 0} 51 | 52 | if "C" not in composition or "H" not in composition: 53 | rules["HC"] = 0 54 | elif "C" not in composition and "H" not in composition: 55 | rules["HC"] = 0 56 | elif "C" in composition and "H" in composition: 57 | if float(composition['H']) / float((composition['C'])) > 0 and float(composition['H'] / (composition['C'])) < 6: 58 | rules["HC"] = 1 59 | if float(composition['H']) / float((composition['C'])) >= 6: 60 | rules["HC"] = 0 61 | 62 | NOPS_check = [] 63 | for element in ['N', 'O', 'P', 'S']: 64 | if element in composition and "C" in composition: 65 | NOPS_check.append(float(float(composition[element])) / float((composition['C']))) 66 | else: 67 | NOPS_check.append(float(0)) 68 | 69 | if NOPS_check[0] >= float(0) and \ 70 | NOPS_check[0] <= float(4) and \ 71 | NOPS_check[1] >= float(0) and \ 72 | NOPS_check[1] <= float(3) and \ 73 | NOPS_check[2] >= float(0) and \ 74 | NOPS_check[2] <= float(2) and \ 75 | NOPS_check[3] >= float(0) and \ 76 | NOPS_check[3] <= float(3): 77 | rules["NOPSC"] = 1 78 | 79 | if NOPS_check[0] > float(4) or NOPS_check[1] > float(3) or NOPS_check[2] > float(2) or NOPS_check[3] > float(3): 80 | rules["NOPSC"] = 0 81 | return rules 82 | 83 | 84 | def lewis_senior_rules(molecular_formula): 85 | 86 | valence = {'C': 4, 'H': 1, 'N': 3, 'O': 2, 'P': 3, 'S': 2} 87 | 88 | composition = pyteomics_mass.Composition(molecular_formula) 89 | 90 | rules = {"lewis": 0, "senior": 0} 91 | 92 | lewis_sum = 0 93 | for element in valence: 94 | if element in composition: 95 | lewis_sum += valence[element] * composition[element] 96 | 97 | if lewis_sum % 2 == 0: 98 | rules["lewis"] = 1 99 | if lewis_sum % 2 != 0: 100 | rules["lewis"] = 0 101 | if lewis_sum >= ((sum(composition.values()) - 1) * 2): 102 | rules["senior"] = 1 103 | if lewis_sum < ((sum(composition.values()) - 1) * 2): 104 | rules["senior"] = 0 105 | 106 | return rules 107 | 108 | 109 | def nist_database_to_pyteomics(fn, skip_lines=10): 110 | 111 | """ 112 | :param fn: text file (NISTs Linearized ASCII Output) 113 | :param skip_lines: the number of lines of the data file to skip before beginning to read data. 114 | :return: Ordered dictionary containing NIST records compatible with 'Pyteomics' 115 | """ 116 | 117 | def add_record(r, nm): 118 | if r["Atomic Symbol"] not in nm: 119 | nm[r["Atomic Symbol"]] = OrderedDict([(0, (0.0, 0.0))]) # update after all records have been added 120 | nm[r["Atomic Symbol"]][r["Mass Number"]] = (r["Relative Atomic Mass"][0], r["Isotopic Composition"][0]) 121 | else: 122 | nm[r["Atomic Symbol"]][r["Mass Number"]] = (r["Relative Atomic Mass"][0], r["Isotopic Composition"][0]) 123 | return nm 124 | 125 | lib = OrderedDict() 126 | for record in parse_nist_database(fn, skip_lines=skip_lines): 127 | if record["Atomic Symbol"] in ["D", "T"]: 128 | lib = add_record(record, lib) 129 | record["Atomic Symbol"] = "H" 130 | lib = add_record(record, lib) 131 | else: 132 | lib = add_record(record, lib) 133 | 134 | for element in list(lib.keys()): 135 | lib_sorted = sorted(lib[element].items(), key=lambda e: e[1][1], reverse=True) 136 | if lib_sorted[0][1][0] > 0.0: 137 | lib[element][0] = (lib_sorted[0][1][0], 1.0) 138 | elif len(lib_sorted) == 2: 139 | lib[element][0] = (lib_sorted[1][1][0], 1.0) 140 | else: 141 | del lib[element] 142 | 143 | es = list(order_composition_by_hill(lib.keys())) 144 | return OrderedDict((k, lib[k]) for k in es) 145 | 146 | 147 | def convert_sql_to_text(path_sql, table_name, path_out, separator="\t"): 148 | 149 | if path_sql.endswith(".gz"): 150 | db_dump = gzip.GzipFile(path_sql, mode='rb') 151 | else: 152 | db_dump = open(path_sql, mode='rb') 153 | 154 | conn = sqlite3.connect(":memory:") 155 | cursor = conn.cursor() 156 | cursor.executescript(db_dump.read().decode('utf-8')) 157 | conn.commit() 158 | 159 | df = pd.read_sql_query("select * from " + table_name, conn) 160 | df.to_csv(path_out, sep=separator) 161 | 162 | conn.close() 163 | db_dump.close() 164 | -------------------------------------------------------------------------------- /beamspy/data/adducts.txt: -------------------------------------------------------------------------------- 1 | label exact_mass charge ion_mode 2 | [M+H]+ 1.007276 1 pos 3 | [M+Na]+ 22.989221 1 pos 4 | [M+K]+ 38.963158 1 pos 5 | [M-H]- -1.007276 1 neg 6 | [M+Cl]- 34.969401 1 neg 7 | [M+Na-2H]- 20.974668 1 neg 8 | [M+K-2H]- 36.948605 1 neg 9 | [M+Hac-H]- 59.013853 1 neg 10 | -------------------------------------------------------------------------------- /beamspy/data/databases/biocyc_chlamycyc_20180702_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/biocyc_chlamycyc_20180702_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/chebi_complete_3star_rel195_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/chebi_complete_3star_rel195_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/chebi_complete_rel195_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/chebi_complete_rel195_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/databases.txt: -------------------------------------------------------------------------------- 1 | database organism_or_subset category source_url description source_filename source_format source_version released_on license login_required beams_db_version database_name id molecular_formula inchi inchi_key pubchem_id smiles name 2 | BioCyc Chlamydomonas_reinhardtii ChlamyCyc ftp.dpb.carnegiescience.edu//Pathways/Data_dumps/PMN13_July2018/compounds/chlamycyc_compounds.20180702 PlantCyc - ChlamyCyc (Chlamydomonas reinhardtii) | 2018-07-02 | v1 ChlamyCyc_compounds.20180702 tab 2018-07-02 2018-07-02 - 0 v1 biocyc_chlamycyc_20180702_v1 Compound_id Chemical_formula Smiles Compound_common_name 3 | CHEBI complete ftp://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel195/SDF/ChEBI_complete.sdf.gz CHEBI - Complete database | rel195 | v1 ChEBI_complete.sdf sdf rel195 2021-01-01 https://creativecommons.org/licenses/by/4.0/ 0 v1 chebi_complete_rel195_v1 ChEBI ID Formulae InChI InChIKey PubChem Database Links SMILES ChEBI Name 4 | CHEBI complete_3star ftp://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel195/SDF/ChEBI_complete_3star.sdf.gz CHEBI - Complete 3star records | rel195 | v1 ChEBI_complete_3star.sdf sdf rel195 2021-01-01 https://creativecommons.org/licenses/by/4.0/ 0 v1 chebi_complete_3star_rel195_v1 ChEBI ID Formulae InChI InChIKey PubChem Database Links SMILES ChEBI Name 5 | HMDB Human urine http://www.hmdb.ca/system/downloads/current/urine_metabolites.zip HMDB - The Urine Metabolome Database | v4.0 2020-09-10 | v1 urine_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_urine_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name 6 | HMDB Human serum http://www.hmdb.ca/system/downloads/current/serum_metabolites.zip HMDB - The Serum Metabolome Database | v4.0 2020-09-10 | v1 serum_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_serum_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name 7 | HMDB Human csf http://www.hmdb.ca/system/downloads/current/csf_metabolites.zip HMDB - The cerebrospinal fluid (CSF) Metabolome Database | v4.0 2020-09-10 | v1 csf_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_csf_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name 8 | HMDB Human saliva http://www.hmdb.ca/system/downloads/current/saliva_metabolites.zip HMDB - The Saliva Metabolome Database | v4.0 2020-09-10 | v1 saliva_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_saliva_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name 9 | HMDB Human faces http://www.hmdb.ca/system/downloads/current/feces_metabolites.zip HMDB - The Fecal Metabolome Database | v4.0 2020-09-10 | v1 feces_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_feces_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name 10 | HMDB Human sweat http://www.hmdb.ca/system/downloads/current/sweat_metabolites.zip HMDB - The Sweat Metabolome Database | v4.0 2020-09-10 | v1 sweat_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_sweat_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name 11 | HMDB Human full http://www.hmdb.ca/system/downloads/current/hmdb_metabolites.zip HMDB - The Human Metabolome Database | v4.0 2020-09-09 | v1 hmdb_metabolites.xml xml v4.0 2020-09-09 http://www.hmdb.ca/downloads 0 v1 hmdb_full_v4_0_20200909_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name 12 | KEGG Daphnia_pulex dpx https://www.kegg.jp/kegg/rest/keggapi.html KEGG - Daphnia pulex (dpx) | 2021-01-11 | v1 API API 2021-01-11 | v1 2021-01-11 - 0 v1 kegg_dpx_20210111_v1 entry formula name 13 | KEGG Human hsa https://www.kegg.jp/kegg/rest/keggapi.html KEGG - Human (hsa) | 2021-01-11 | v1 API API 2021-01-11 | v1 2021-01-11 - 0 v1 kegg_hsa_20210111_v1 entry formula name 14 | KEGG full https://www.kegg.jp/kegg/rest/keggapi.html KEGG - Full database | 2021-01-11 | v1 API API 2021-01-11 | v1 2021-01-11 - 0 v1 kegg_full_20210111_v1 entry formula name 15 | LIPID_MAPS Fatty Acyls [FA] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Fatty Acyls [FA] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_fattyacyls_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME 16 | LIPID_MAPS Glycerolipids [GL] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Glycerolipids [GL] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_glycerolipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME 17 | LIPID_MAPS Glycerophospholipids [GP] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Glycerophospholipids [GP] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_slycerophospholipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME 18 | LIPID_MAPS Polyketides [PK] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Polyketides [PK] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_solyketides_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME 19 | LIPID_MAPS Prenol Lipids [PR] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Prenol Lipids [PR] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_srenollipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME 20 | LIPID_MAPS Saccharolipids [SL] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Sacccharolipids [SL]| 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_sacccharolipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME 21 | LIPID_MAPS Sphingolipids [SP] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Sphingolipids [SP] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_sphingolipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME 22 | LIPID_MAPS Sterol Lipids [ST] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Sterol Lipids [ST] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_sterollipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME 23 | LIPID_MAPS full https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Full database | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_full_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME 24 | -------------------------------------------------------------------------------- /beamspy/data/databases/hmdb_csf_v4_0_20200910_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/hmdb_csf_v4_0_20200910_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/hmdb_feces_v4_0_20200910_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/hmdb_feces_v4_0_20200910_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/hmdb_full_v4_0_20200909_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/hmdb_full_v4_0_20200909_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/hmdb_saliva_v4_0_20200910_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/hmdb_saliva_v4_0_20200910_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/hmdb_serum_v4_0_20200910_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/hmdb_serum_v4_0_20200910_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/hmdb_sweat_v4_0_20200910_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/hmdb_sweat_v4_0_20200910_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/hmdb_urine_v4_0_20200910_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/hmdb_urine_v4_0_20200910_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/kegg_dpx_20210111_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/kegg_dpx_20210111_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/kegg_full_20210111_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/kegg_full_20210111_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/kegg_hsa_20210111_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/kegg_hsa_20210111_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/lipidmaps_fattyacyls_20201001_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/lipidmaps_fattyacyls_20201001_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/lipidmaps_full_20201001_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/lipidmaps_full_20201001_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/lipidmaps_glycerolipids_20201001_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/lipidmaps_glycerolipids_20201001_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/lipidmaps_sacccharolipids_20201001_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/lipidmaps_sacccharolipids_20201001_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/lipidmaps_slycerophospholipids_20201001_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/lipidmaps_slycerophospholipids_20201001_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/lipidmaps_solyketides_20201001_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/lipidmaps_solyketides_20201001_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/lipidmaps_sphingolipids_20201001_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/lipidmaps_sphingolipids_20201001_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/lipidmaps_srenollipids_20201001_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/lipidmaps_srenollipids_20201001_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/databases/lipidmaps_sterollipids_20201001_v1.sql.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/data/databases/lipidmaps_sterollipids_20201001_v1.sql.gz -------------------------------------------------------------------------------- /beamspy/data/isotopes.txt: -------------------------------------------------------------------------------- 1 | label_x label_y mass_difference abundance_x abundance_y ion_mode charge 2 | C (13C) 1.003355 98.93 1.07 both 1 3 | C (13C) 0.5016775 98.93 1.07 both 2 4 | S (34S) 1.995796 94.99 4.25 both 1 5 | K (41K) 1.998119 93.25 6.73 pos 1 6 | Cl (37Cl) 1.99705 75.76 24.24 neg 1 -------------------------------------------------------------------------------- /beamspy/data/multiple_charged_ions.txt: -------------------------------------------------------------------------------- 1 | label exact_mass charge ion_mode 2 | [M+H]+ 1.007276 1 pos 3 | [M+Na]+ 22.989221 1 pos 4 | [M+2H]2+ 1.007276 2 pos 5 | [M+H+Na]2+ 11.9982485 2 pos 6 | -------------------------------------------------------------------------------- /beamspy/data/neutral_losses.txt: -------------------------------------------------------------------------------- 1 | label mass_difference 2 | H2O 18.010565 3 | CO 27.994915 4 | -------------------------------------------------------------------------------- /beamspy/db_parsers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | from collections import OrderedDict 6 | import io 7 | import xml.etree.ElementTree as etree 8 | import csv 9 | from Bio.KEGG import Compound, REST 10 | import re 11 | 12 | 13 | def parse_delimited(source, delimiter): 14 | with open(source, 'r') as inp: 15 | reader = csv.DictReader(inp, delimiter=delimiter) 16 | for row in reader: 17 | yield row 18 | 19 | 20 | def parse_kegg_compound(source, sdf=False): 21 | with open(source, "r") as inp: 22 | for record in Compound.parse(inp): 23 | record_out = OrderedDict() 24 | if "C" in record.entry or "D" in record.entry: 25 | for attribute in dir(record): 26 | if "_" not in attribute: 27 | record_out[attribute.upper()] = "" 28 | record_out[attribute.upper()] = getattr(record, attribute.lower()) 29 | 30 | if sdf: 31 | record_out["SDF"] = REST.GetMol(record_out["ENTRY"]) 32 | 33 | yield record_out 34 | 35 | 36 | def parse_xml(source, encoding="utf8"): 37 | 38 | with io.open(source, "r", encoding=encoding) as inp: 39 | record_out = OrderedDict() 40 | 41 | xmldec = inp.readline() 42 | xmldec2 = inp.readline() 43 | 44 | xml_record = "" 45 | path = [] 46 | 47 | for line in inp: 48 | xml_record += line 49 | if line == "\n" or line == "\n": 50 | 51 | if sys.version_info[0] == 3: 52 | inp = io.StringIO(xml_record) 53 | else: 54 | inp = io.BytesIO(xml_record.encode('utf-8').strip()) 55 | 56 | for event, elem in etree.iterparse(inp, events=("start", "end")): 57 | if event == 'end': 58 | path.pop() 59 | 60 | if event == 'start': 61 | path.append(elem.tag) 62 | if elem.text != None: 63 | if elem.text.replace(" ", "") != "\n": 64 | 65 | path_elem = ".".join(map(str, path[1:])) 66 | if path_elem in record_out: 67 | if type(record_out[path_elem]) != list: 68 | record_out[path_elem] = [record_out[path_elem]] 69 | record_out[path_elem].append(elem.text) 70 | else: 71 | record_out[path_elem] = elem.text 72 | 73 | xml_record = "" 74 | yield record_out 75 | record_out = OrderedDict() 76 | 77 | 78 | def parse_sdf(source): 79 | 80 | with open(source, "r") as inp: 81 | record_out = OrderedDict() 82 | c = 0 83 | temp = "" 84 | for line in inp: 85 | line = line.replace("'", "").replace('"', "") 86 | if "$$$$" in line: 87 | temp = temp.split("> <") 88 | for attribute_value in temp[1:]: 89 | attribute_value = attribute_value.split(">\n") 90 | if len(attribute_value) == 1: 91 | record_out[list(record_out.keys())[-1]] += attribute_value[0].rstrip() 92 | else: 93 | record_out[attribute_value[0]] = attribute_value[1].rstrip() 94 | c += 1 95 | record_out["SDF"] = temp[0] 96 | yield record_out 97 | temp = "" 98 | record_out = OrderedDict() 99 | else: 100 | temp += line 101 | 102 | 103 | def parse_biocyc(source): 104 | 105 | with open(source, "r") as inp: 106 | record_out = OrderedDict() 107 | temp = "" 108 | 109 | for line in inp: 110 | line = line.replace("'", "").replace('"', "") 111 | if "//" in line: 112 | 113 | temp_attribute = "" 114 | extra_line = 0 115 | extra_line_added = 1 116 | for line_temp in temp.split("\n")[0:-1]: 117 | extra_line += 1 # did it pass the attribute? 118 | attribute_value = line_temp.split(" - ", 1) 119 | 120 | if attribute_value[0][0] != "/": 121 | 122 | if attribute_value[0] not in record_out: 123 | 124 | try: 125 | record_out[attribute_value[0]] = float(attribute_value[1]) 126 | except: 127 | record_out[attribute_value[0]] = attribute_value[1].replace('"', "'") 128 | temp_attribute = attribute_value[0] 129 | extra_line = 1 130 | 131 | elif attribute_value[0] in record_out: 132 | if type(record_out[temp_attribute]) != list: 133 | record_out[temp_attribute] = [record_out[temp_attribute]] 134 | record_out[temp_attribute].append(attribute_value[1].replace('"', "'")) 135 | temp_attribute = attribute_value[0] 136 | extra_line = 1 137 | 138 | elif attribute_value[0][0] == "/" and len(attribute_value[0]) > 1: 139 | if temp_attribute in record_out and (extra_line == 2 or extra_line_added >= 1): 140 | if type(record_out[temp_attribute]) != list: 141 | record_out[temp_attribute] = [record_out[temp_attribute]] 142 | index_to_add = len(record_out[temp_attribute]) - 1 143 | record_out[temp_attribute][index_to_add] = record_out[temp_attribute][index_to_add] + attribute_value[0].replace("/", "", 1).replace('"', "'") 144 | extra_line_added += 1 145 | 146 | # PRINT FORMULA BIOCYC IN CORRECT FORMAT ###### 147 | if "CHEMICAL-FORMULA" in record_out: 148 | formula = "" 149 | for atom in record_out["CHEMICAL-FORMULA"]: 150 | if " 1)" in atom: 151 | atom = atom.replace(" 1)", "") 152 | formula += atom.replace("(", "") 153 | else: 154 | formula += atom.replace(" ", "")[1:-1] 155 | record_out["CHEMICAL-FORMULA"] = formula 156 | # PRINT FORMULA BIOCYC IN CORRECT FORMAT ###### 157 | 158 | yield record_out 159 | temp = "" 160 | record_out = OrderedDict() 161 | 162 | elif line[0] != "#": 163 | temp += line 164 | 165 | 166 | def parse_nist_database(fn, skip_lines=10): 167 | 168 | """ 169 | :param fn: text file (NISTs Linearized ASCII Output) 170 | :param skip_lines: the number of lines of the data file to skip before beginning to read data. 171 | :return: Ordered dictionary containing the parsed records 172 | """ 173 | 174 | with open(fn, "r") as inp: 175 | for i in range(skip_lines): 176 | inp.readline() 177 | for e in inp.read().split("\n\n"): 178 | record = OrderedDict() 179 | for line in e.strip().split("\n"): 180 | kv = line.split(" =") 181 | if kv[0] == "Relative Atomic Mass": 182 | record[kv[0]] = re.findall(r'\d+(?:\.\d+)?', kv[1]) 183 | record[kv[0]][0] = float(record[kv[0]][0]) 184 | record[kv[0]][1] = int(record[kv[0]][1]) 185 | 186 | elif kv[0] == "Isotopic Composition": 187 | matches = re.findall(r'\d+(?:\.\d+)?', kv[1]) 188 | if len(matches) > 0: 189 | record[kv[0]] = matches 190 | if len(matches) > 1: 191 | record[kv[0]][0] = float(record[kv[0]][0]) 192 | record[kv[0]][1] = int(record[kv[0]][1]) 193 | else: 194 | record[kv[0]] = [float(record[kv[0]][0]), None] 195 | else: 196 | record[kv[0]] = [0.0, None] 197 | elif kv[0] == "Atomic Number" or kv[0] == "Mass Number": 198 | record[kv[0]] = int(kv[1]) 199 | elif kv[0] == "Standard Atomic Weight": 200 | matches = re.findall(r'\d+(?:\.\d+)?', kv[1]) 201 | record[kv[0]] = matches 202 | else: 203 | record[kv[0]] = kv[1].strip() 204 | yield record 205 | -------------------------------------------------------------------------------- /beamspy/grouping.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import sqlite3 5 | from beamspy import statistics 6 | import networkx as nx 7 | 8 | 9 | def group_features(df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", positive=True, block=5000, ncpus=None): 10 | 11 | conn = sqlite3.connect(db_out) 12 | cursor = conn.cursor() 13 | 14 | cursor.execute("DROP TABLE IF EXISTS groups") 15 | 16 | cursor.execute("""CREATE TABLE groups ( 17 | group_id INTEGER DEFAULT NULL, 18 | peak_id_a TEXT DEFAULT NULL, 19 | peak_id_b TEXT DEFAULT NULL, 20 | degree_a INTEGER DEFAULT NULL, 21 | degree_b INTEGER DEFAULT NULL, 22 | r_value REAL DEFAULT NULL, 23 | p_value REAL DEFAULT NULL, 24 | rt_diff REAL DEFAULT NULL, 25 | mz_diff REAL DEFAULT NULL, 26 | PRIMARY KEY (peak_id_a, peak_id_b));""") 27 | 28 | df_coeffs = statistics.correlation_coefficients(df, max_rt_diff, coeff_thres, pvalue_thres, method, positive, block, ncpus) 29 | graph = statistics.correlation_graphs(df_coeffs, df) 30 | sub_graphs = list(graph.subgraph(c) for c in nx.weakly_connected_components(graph)) 31 | for i in range(len(sub_graphs)): 32 | sub_graphs[i].graph["groupid"] = i + 1 # not stored in output - place holder 33 | sub_graph_edges = [] 34 | # sort edges 35 | edges = sorted(sub_graphs[i].edges(data=True), key=lambda e: (e[0], e[1])) 36 | for edge in edges: 37 | sub_graph_edges.append((i+1, 38 | str(edge[0]), str(edge[1]), 39 | sub_graphs[i].degree(edge[0]), sub_graphs[i].degree(edge[1]), 40 | round(float(edge[2]["rvalue"]), 2), float(edge[2]["pvalue"]), 41 | float(edge[2]["rtdiff"]), float(edge[2]["mzdiff"]))) 42 | cursor.executemany("""insert into groups (group_id, peak_id_a, peak_id_b, degree_a, degree_b, 43 | r_value, p_value, rt_diff, mz_diff) values (?,?,?,?,?,?,?,?,?)""", sub_graph_edges) 44 | conn.commit() 45 | conn.close() 46 | return graph 47 | -------------------------------------------------------------------------------- /beamspy/in_out.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import copy 5 | import os 6 | import collections 7 | import numpy as np 8 | from pandas import read_csv 9 | import pandas as pd 10 | from pyteomics import mass as pyteomics_mass 11 | from beamspy import libraries 12 | from beamspy.auxiliary import nist_database_to_pyteomics 13 | from beamspy.auxiliary import order_composition_by_hill 14 | from beamspy.auxiliary import composition_to_string 15 | from beamspy.auxiliary import double_bond_equivalents 16 | from beamspy.auxiliary import HC_HNOPS_rules 17 | from beamspy.auxiliary import lewis_senior_rules 18 | 19 | 20 | def read_adducts(filename, ion_mode, separator="\t"): 21 | df = read_csv(filename, sep=separator, float_precision="round_trip") 22 | adducts = libraries.Adducts() 23 | adducts.remove("*") 24 | for index, row in df.iterrows(): 25 | if "ion_mode" not in row: 26 | adducts.add(row["label"], row["exact_mass"], row["charge"]) 27 | elif (row["ion_mode"] == "pos" or row["ion_mode"] == "both") and ion_mode == "pos": 28 | adducts.add(row["label"], row["exact_mass"], row["charge"]) 29 | elif (row["ion_mode"] == "neg" or row["ion_mode"] == "both") and ion_mode == "neg": 30 | adducts.add(row["label"], row["exact_mass"], row["charge"]) 31 | return adducts 32 | 33 | 34 | def read_isotopes(filename, ion_mode, separator="\t"): 35 | df = read_csv(filename, sep=separator, float_precision="round_trip") 36 | isotopes = libraries.Isotopes() 37 | isotopes.remove("*") 38 | for index, row in df.iterrows(): 39 | if "ion_mode" not in row: 40 | isotopes.add(row["label_x"], row["label_y"], row["abundance_x"], row["abundance_y"], 41 | row["mass_difference"], row["charge"]) 42 | elif (row["ion_mode"] == "pos" or row["ion_mode"] == "both") and ion_mode == "pos": 43 | isotopes.add(row["label_x"], row["label_y"], row["abundance_x"], row["abundance_y"], 44 | row["mass_difference"], row["charge"]) 45 | elif (row["ion_mode"] == "neg" or row["ion_mode"] == "both") and ion_mode == "neg": 46 | isotopes.add(row["label_x"], row["label_y"], row["abundance_x"], row["abundance_y"], 47 | row["mass_difference"], row["charge"]) 48 | return isotopes 49 | 50 | 51 | def read_molecular_formulae(filename, separator="\t", calculate=True, filename_atoms=""): 52 | 53 | if calculate: 54 | path_nist_database = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'nist_database.txt') 55 | nist_database = nist_database_to_pyteomics(path_nist_database) 56 | 57 | df = read_csv(filename, sep=separator, float_precision="round_trip") 58 | records = [] 59 | for index, row in df.iterrows(): 60 | record = collections.OrderedDict() 61 | comp = pyteomics_mass.Composition(str(row.molecular_formula)) 62 | if comp: 63 | record["composition"] = collections.OrderedDict((k, comp[k]) for k in order_composition_by_hill(comp.keys())) 64 | sum_CHNOPS = sum([comp[e] for e in comp if e in ["C", "H", "N", "O", "P", "S"]]) 65 | record["CHNOPS"] = sum_CHNOPS == sum(list(comp.values())) 66 | if calculate: 67 | record["exact_mass"] = round(pyteomics_mass.mass.calculate_mass(formula=str(row.molecular_formula), mass_data=nist_database), 6) 68 | else: 69 | record["exact_mass"] = float(row.exact_mass) 70 | record.update(HC_HNOPS_rules(str(row.molecular_formula))) 71 | record.update(lewis_senior_rules(str(row.molecular_formula))) 72 | record["double_bond_equivalents"] = double_bond_equivalents(record["composition"]) 73 | records.append(record) 74 | else: 75 | Warning("{} Skipped".format(row)) 76 | 77 | return records 78 | 79 | 80 | def read_compounds(filename, separator="\t", calculate=True, lib_adducts=[], filename_atoms=""): 81 | 82 | if calculate: 83 | path_nist_database = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'nist_database.txt') 84 | nist_database = nist_database_to_pyteomics(path_nist_database) 85 | 86 | df = read_csv(filename, sep=separator, float_precision="round_trip") 87 | records = [] 88 | for index, row in df.iterrows(): 89 | record = collections.OrderedDict() 90 | comp = pyteomics_mass.Composition(str(row.molecular_formula)) 91 | if comp: 92 | record["composition"] = collections.OrderedDict((k, comp[k]) for k in order_composition_by_hill(comp.keys())) 93 | sum_CHNOPS = sum([comp[e] for e in comp if e in ["C", "H", "N", "O", "P", "S"]]) 94 | record["CHNOPS"] = sum_CHNOPS == sum(list(comp.values())) 95 | if calculate: 96 | record["exact_mass"] = round(pyteomics_mass.calculate_mass(formula=str(str(row.molecular_formula)), mass_data=nist_database),6) 97 | else: 98 | record["exact_mass"] = float(row.exact_mass) 99 | 100 | record["compound_id"] = row.compound_id 101 | record["compound_name"] = row.compound_name 102 | comp = pyteomics_mass.Composition(str(row.molecular_formula)) 103 | record["molecular_formula"] = composition_to_string(comp) 104 | 105 | if "retention_time" in df.columns: 106 | record["retention_time"] = row.retention_time 107 | elif "rt" in df.columns: 108 | record["retention_time"] = row.rt 109 | if "adduct" in df.columns: 110 | record["adduct"] = row.adduct 111 | if lib_adducts and calculate: 112 | record["exact_mass"] += lib_adducts.lib[row.adduct]["mass"] 113 | 114 | records.append(record) 115 | else: 116 | Warning("{} Skipped".format(row)) 117 | 118 | return records 119 | 120 | 121 | def read_mass_differences(filename, ion_mode, separator="\t"): 122 | df = read_csv(filename, sep=separator, float_precision="round_trip") 123 | mass_differences = libraries.MassDifferences() 124 | for index, row in df.iterrows(): 125 | if "charge_x" in row: 126 | charge_x = row["charge_x"] 127 | charge_y = row["charge_y"] 128 | else: 129 | charge_x = 1 130 | charge_y = 1 131 | if "ion_mode" not in row: 132 | mass_differences.add(row["label_x"], row["label_y"], row["mass_difference"], charge_x, charge_y) 133 | elif (row["ion_mode"] == "pos" or row["ion_mode"] == "both") and ion_mode == "pos": 134 | mass_differences.add(row["label_x"], row["label_y"], row["mass_difference"], charge_x, charge_y) 135 | elif (row["ion_mode"] == "neg" or row["ion_mode"] == "both") and ion_mode == "neg": 136 | mass_differences.add(row["label_x"], row["label_y"], row["mass_difference"], charge_x, charge_y) 137 | return mass_differences 138 | 139 | 140 | def read_neutral_losses(filename, separator="\t"): 141 | df = read_csv(filename, sep=separator, float_precision="round_trip") 142 | nls = libraries.NeutralLosses() 143 | for index, row in df.iterrows(): 144 | nls.add(row["label"], row["mass_difference"]) 145 | return nls 146 | 147 | 148 | def read_xset_matrix(fn_matrix, first_sample, separator="\t", mapping={"mz": "mz", "rt": "rt", "name": "name"}, samples_in_columns=True): 149 | if "mz" not in mapping and "rt" not in mapping and "name" not in mapping: 150 | raise ValueError("Incorrect column mapping: provide column names for mz, and name") 151 | 152 | df = pd.read_csv(fn_matrix, header=0, sep=separator, dtype={"name": str}, float_precision="round_trip") 153 | df.replace(0, np.nan, inplace=True) 154 | 155 | if not samples_in_columns: 156 | df = df.T 157 | 158 | df_peaklist = df[[mapping["name"], mapping["mz"], mapping["rt"]]] 159 | df_matrix = df.iloc[:, df.columns.get_loc(first_sample):] 160 | df_peaklist = df_peaklist.assign(intensity=pd.Series(df_matrix.median(axis=1, skipna=True).values)) 161 | df_peaklist.columns = ["name", "mz", "rt", "intensity"] 162 | return pd.concat([df_peaklist, df_matrix], axis=1) 163 | 164 | 165 | def combine_peaklist_matrix(fn_peaklist, fn_matrix, separator="\t", median_intensity=True, 166 | mapping={"name": "name", "mz": "mz", "rt": "rt", "intensity": "intensity"}, 167 | merge_on="name", samples_in_columns=True): 168 | if "mz" not in mapping and "rt" not in mapping and "name" not in mapping: 169 | raise ValueError("Incorrect column mapping: provide column names for mz, and name") 170 | 171 | df_peaklist = pd.read_csv(fn_peaklist, header=0, sep=separator, dtype={"name": str}, float_precision="round_trip") 172 | df_matrix = pd.read_csv(fn_matrix, header=0, sep=separator, dtype={"name": str}, float_precision="round_trip") 173 | df_matrix.replace(0, np.nan, inplace=True) 174 | 175 | if not samples_in_columns: 176 | df_matrix = df_matrix.T 177 | 178 | if mapping["mz"] in df_peaklist.columns and mapping["name"] not in df_peaklist.columns and mapping["mz"] in df_matrix.columns: 179 | df_peaklist = read_peaklist(fn_peaklist, separator=separator) 180 | df_peaklist = df_peaklist[[mapping["name"], mapping["mz"], mapping["rt"], "intensity"]] 181 | 182 | df_matrix = df_matrix.rename(columns={"mz": 'name'}) 183 | df_matrix["name"] = [str(x).replace(".", "_") for x in df_matrix["name"]] 184 | else: 185 | df_peaklist = df_peaklist[[mapping["name"], mapping["mz"], mapping["rt"]]] 186 | df_peaklist.columns = ["name", "mz", "rt"] 187 | 188 | df_matrix = df_matrix.rename(columns={mapping["name"]: 'name'}) 189 | 190 | if mapping["intensity"] not in df_peaklist.columns: 191 | if median_intensity: 192 | df_peaklist["intensity"] = pd.Series(df_matrix.median(axis=1, skipna=True, numeric_only=True), index=df_matrix.index) 193 | else: 194 | df_peaklist["intensity"] = pd.Series(df_matrix.mean(axis=1, skipna=True, numeric_only=True), index=df_matrix.index) 195 | 196 | if len(df_peaklist[mapping["name"]].unique()) != len(df_peaklist[mapping["name"]]): 197 | raise ValueError("Peaklist: Values column '{}' are not unique".format(mapping["name"])) 198 | if len(df_matrix[mapping["name"]].unique()) != len(df_matrix[mapping["name"]]): 199 | raise ValueError("Matrix: Values column '{}' are not unique".format(mapping["name"])) 200 | 201 | return pd.merge(df_peaklist, df_matrix, how='left', left_on=merge_on, right_on=merge_on) 202 | 203 | 204 | def read_peaklist(fn_peaklist, separator="\t", 205 | mapping={"name": "name", "mz": "mz", "rt": "rt", "intensity": "intensity"}): 206 | 207 | df_peaklist = pd.read_csv(fn_peaklist, header=0, sep=separator, dtype={"name": str}, float_precision="round_trip") 208 | if mapping["mz"] not in df_peaklist.columns.values or mapping["intensity"] not in df_peaklist.columns.values: 209 | raise ValueError("Incorrect mapping of columns: {}".format(str(mapping))) 210 | 211 | if ("rt" in mapping and mapping["rt"] not in df_peaklist.columns.values) or "rt" not in mapping: 212 | if mapping["name"] not in df_peaklist.columns.values: 213 | df_peaklist = pd.read_csv(fn_peaklist, header=0, sep=separator, dtype={"mz": str}) 214 | df_peaklist = df_peaklist[[mapping["mz"], mapping["intensity"]]] 215 | df_peaklist.columns = ["mz", "intensity"] 216 | df_peaklist.insert(0, "name", [str(x).replace(".","_") for x in df_peaklist[mapping["mz"]]]) 217 | df_peaklist["mz"] = df_peaklist["mz"].astype(float) 218 | df_peaklist["intensity"] = df_peaklist["intensity"].astype(float) 219 | else: 220 | df_peaklist = df_peaklist[[mapping["name"], mapping["mz"], mapping["intensity"]]] 221 | df_peaklist.columns = ["name", "mz", "intensity"] 222 | df_peaklist["mz"] = df_peaklist["mz"].astype(float) 223 | df_peaklist["intensity"] = df_peaklist["intensity"].astype(float) 224 | df_peaklist.insert(2, "rt", 0.0) 225 | elif "rt" in mapping: 226 | if mapping["name"] in df_peaklist.columns.values: 227 | df_peaklist = df_peaklist[[mapping["name"], mapping["mz"], mapping["rt"], mapping["intensity"]]] 228 | df_peaklist.columns = ["name", "mz", "rt", "intensity"] 229 | else: 230 | df_peaklist = df_peaklist[[mapping["mz"], mapping["rt"], mapping["intensity"]]] 231 | df_peaklist.columns = ["mz", "rt", "intensity"] 232 | 233 | uids = df_peaklist["mz"].round().astype(int).astype(str).str.cat(df_peaklist["rt"].round().astype(int).astype(str), sep="T") 234 | ms = pd.Series(['M'] * len(uids)) 235 | names = ms.str.cat(uids, sep='') 236 | 237 | for n in names.copy(): 238 | idxs = names.index[names == n].tolist() 239 | if len(idxs) > 1: 240 | for i, idx_t in enumerate(idxs): 241 | names[idx_t] = names[idx_t] + "_" + str(i + 1) 242 | df_peaklist.insert(0, "name", names) 243 | else: 244 | df_peaklist = df_peaklist[[mapping["name"], mapping["mz"], mapping["rt"], mapping["intensity"]]] 245 | df_peaklist.columns = ["name", "mz", "rt", "intensity"] 246 | 247 | df_peaklist["mz"] = df_peaklist["mz"].astype(float) 248 | df_peaklist["rt"] = df_peaklist["rt"].astype(float) 249 | df_peaklist["intensity"] = df_peaklist["intensity"].astype(float) 250 | 251 | return df_peaklist 252 | -------------------------------------------------------------------------------- /beamspy/libraries.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from collections import OrderedDict 5 | from beamspy.auxiliary import order_composition_by_hill 6 | 7 | 8 | class Adducts: 9 | def __init__(self, ion_mode=None, e=0.0005486): 10 | 11 | self.e = e 12 | if ion_mode == "pos": 13 | self.lib = OrderedDict() 14 | elif ion_mode == "neg": 15 | self.lib = OrderedDict() 16 | elif ion_mode is None: 17 | self.lib = OrderedDict() 18 | 19 | def add(self, name, mass, charge): 20 | self.lib[name] = OrderedDict([("mass", float(mass)), ("charge", int(charge))]) 21 | self.lib = OrderedDict(sorted(self.lib.items(), key=lambda x: x[1]['mass'])) 22 | 23 | def remove(self, name): 24 | if name == "*": 25 | self.lib = OrderedDict() 26 | else: 27 | if name in self.lib: 28 | self.lib.remove(name) 29 | else: 30 | raise IOError("Entry not in library: {}".format(name)) 31 | 32 | def __str__(self): 33 | out = "Adducts in library\n" 34 | out += "-----------------\n" 35 | out += "name\texact_mass\n" 36 | for key in self.lib: 37 | out += "%s\t%s\n" % (key, self.lib[key]) 38 | return out 39 | 40 | 41 | class Isotopes: 42 | 43 | def __init__(self, ion_mode=None): 44 | 45 | self.ion_mode = ion_mode 46 | self.lib = [OrderedDict([("C", {"abundance": 100.0}), ("(13C)", {"abundance": 1.1}), 47 | ("mass_difference", 1.003355), 48 | ("charge", 1)]), 49 | OrderedDict([("S", {"abundance": 100.0}), ("(34S)", {"abundance": 4.21}), 50 | ("mass_difference", 1.995796), 51 | ("charge", 1)])] 52 | 53 | if self.ion_mode == "pos": 54 | self.lib.append(OrderedDict([("K", {"abundance": 100.0}), ("(41K)", {"abundance": 6.73}), 55 | ("mass_difference", 1.998117), ("charge", 1)])) 56 | #self.lib.append(OrderedDict([("(6Li)", {"abundance": 7.42}), ("Li", {"abundance": 1.0}), ("mass_difference", 1.000882)])) 57 | 58 | elif self.ion_mode == "neg": 59 | self.lib.append(OrderedDict([("Cl", {"abundance": 100.0}), ("(37Cl)", {"abundance": 24.23}), 60 | ("mass_difference", 1.997050), ("charge", 1)])) 61 | self.lib = sorted(self.lib, key=lambda k: k['mass_difference']) 62 | 63 | def add(self, label_x, label_y, mx_abundance, my_abundance, mass_difference, charge): 64 | self.lib.append(OrderedDict([(label_x, {"abundance": float(mx_abundance)}), 65 | (label_y, {"abundance": float(my_abundance)}), 66 | ("mass_difference", float(mass_difference)), 67 | ("charge", int(charge))])) 68 | self.lib = sorted(self.lib, key=lambda k: k['mass_difference']) 69 | 70 | def remove(self, label_x="*", label_y="*"): 71 | if label_x == "*" and label_y == "*": 72 | self.lib = [] 73 | else: 74 | for item in self.lib: 75 | if label_x in item or label_y in item: 76 | self.lib.remove(item) 77 | else: 78 | print("Entry not in library") 79 | 80 | def __str__(self): 81 | out = "Isotopes in library:\n" 82 | out += "--------------------------------------------\n" 83 | out += "label_x\tlabel_y\tmass_difference\tcharge\tabundance_x\tabundance_y\n" 84 | for item in self.lib: 85 | label_x = list(item.items())[0][0] 86 | label_y = list(item.items())[1][0] 87 | out += "{}\t{}\t{}\t{}\t{}\t{}\n".format(label_x, label_y, 88 | item["mass_difference"], 89 | item["charge"], 90 | item[label_x]["abundance"], item[label_y]["abundance"]) 91 | return out 92 | 93 | 94 | class NeutralLosses: 95 | 96 | def __init__(self): 97 | self.lib = [] 98 | 99 | def add(self, label, mass_difference): 100 | self.lib.append(OrderedDict([("label", label), ("mass_difference", mass_difference)])) 101 | self.lib = sorted(self.lib, key=lambda k: k['mass_difference']) 102 | 103 | def remove(self, label="*"): 104 | if label == "*": 105 | self.lib = [] 106 | else: 107 | for item in self.lib: 108 | if label in self.lib: 109 | self.lib.remove(item) 110 | else: 111 | print("Entry not in library") 112 | 113 | def __str__(self): 114 | out = "Neutral losses in library:\n" 115 | out += "--------------------------------------------\n" 116 | out += "label\tmass_difference\n" 117 | for d in self.lib: 118 | out += "{}\t{}\n".format(d["label"], d["mass_difference"]) 119 | return out 120 | 121 | 122 | class MassDifferences: 123 | 124 | def __init__(self, ion_mode=None): 125 | 126 | self.ion_mode = ion_mode 127 | self.lib = [] 128 | 129 | def add(self, label_x, label_y, mass_difference, charge_x=1, charge_y=1): 130 | self.lib.append(OrderedDict([(label_x, {"charge": float(charge_x)}), 131 | (label_y, {"charge": float(charge_y)}), 132 | ("mass_difference", mass_difference)])) 133 | self.lib = sorted(self.lib, key=lambda k: k['mass_difference']) 134 | 135 | def remove(self, label_x="*", label_y="*"): 136 | if label_x == "*" and label_y == "*": 137 | self.lib = [] 138 | else: 139 | for item in self.lib: 140 | if label_x in item or label_y in item: 141 | self.lib.remove(item) 142 | else: 143 | print("Entry not in library") 144 | 145 | def __str__(self): 146 | out = "Mass differences in library:\n" 147 | out += "--------------------------------------------\n" 148 | out += "label_x\tlabel_y\tmass_difference\tcharge_x\tcharge_y\n" 149 | for item in self.lib: 150 | label_x, label_y = list(item.items())[0][0], list(item.items())[1][0] 151 | out += "{}\t{}\t{}\t{}\t{}\n".format(label_x, label_y, item["mass_difference"], 152 | item[label_x]["charge"], item[label_y]["charge"]) 153 | return out 154 | -------------------------------------------------------------------------------- /beamspy/plots.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | import matplotlib 6 | 7 | if "linux" in sys.platform: 8 | gui_env = ['TkAgg', 'GTKAgg', 'Qt5Agg', 'WXAgg'] 9 | elif sys.platform == "darwin": 10 | try: 11 | import PySide2 12 | gui_env = ['Qt5Agg'] 13 | except ImportError: 14 | gui_env = ['TkAgg', 'GTKAgg', 'Qt5Agg', 'WXAgg'] 15 | else: 16 | pass 17 | 18 | if sys.platform != "win32": 19 | for gui in gui_env: 20 | try: 21 | matplotlib.use(gui, warn=False, force=True) 22 | break 23 | except: 24 | continue 25 | 26 | 27 | import sqlite3 28 | import numpy as np 29 | import pandas as pd 30 | import matplotlib.pyplot as plt 31 | import matplotlib.gridspec as gridspec 32 | import seaborn as sns 33 | from matplotlib.backends.backend_pdf import PdfPages 34 | 35 | 36 | def plot_correlations(column_corr, column_pvalue, df): 37 | 38 | fig = plt.figure(figsize=(8, 8)) 39 | fig.set_size_inches(8.27, 11.69) 40 | 41 | gs = gridspec.GridSpec(3, 3) 42 | ax_main = plt.subplot(gs[1:3, :2]) 43 | ax_x_dist = plt.subplot(gs[0, :2], sharex=ax_main) 44 | ax_y_dist = plt.subplot(gs[1:3, 2], sharey=ax_main) 45 | 46 | ax_main.grid(linestyle='dashed') 47 | ax_x_dist.grid(linestyle='dashed') 48 | ax_y_dist.grid(linestyle='dashed') 49 | 50 | ax_main.set_axisbelow(True) 51 | ax_x_dist.set_axisbelow(True) 52 | ax_y_dist.set_axisbelow(True) 53 | 54 | max_pvalue = df[column_pvalue].max() 55 | bin_size_pvalue = max_pvalue / 10.0 56 | 57 | hb = ax_main.hexbin(x=column_corr, y=column_pvalue, data=df, gridsize=(40, 40), mincnt=1, extent=[-1, 1.0, 0, max_pvalue]) 58 | ax_main.set(xlabel="Correlation coefficient (R)", ylabel="P-value", 59 | xticks=np.arange(-1, 1.1, 0.1), yticks=np.arange(0.0, max_pvalue * 1.1, bin_size_pvalue)) 60 | ax_main.ticklabel_format(style='sci', axis='y', scilimits=(0, 0), useMathText=True) 61 | 62 | bins = np.arange(-1, 1.05, 0.05) 63 | ax_x_dist.hist(x=column_corr, data=df, bins=bins, align='mid', color="lightblue") 64 | ax_x_dist.set(ylabel='Frequency', xlim=(-1.1, 1.1)) 65 | ax_x_dist.axvline(0, color='k', linestyle='dashed', linewidth=1) 66 | ax_x_dist.tick_params(axis="x", labelsize=7.5) 67 | 68 | ax_xcum_dist = ax_x_dist.twinx() 69 | ax_xcum_dist.hist(x=column_corr, data=df, bins=bins, cumulative=True, histtype='step', 70 | density=True, color='darkblue', align='mid') 71 | ax_xcum_dist.set(xlim=(-1.1, 1.1)) 72 | ax_xcum_dist.tick_params(axis="y", colors='darkblue') 73 | ax_xcum_dist.set_ylabel('cumulative', color='darkblue') 74 | ax_xcum_dist.set(yticks=np.arange(0.0, 1.2, 0.2)) 75 | 76 | bins = np.arange(0, max_pvalue + bin_size_pvalue, bin_size_pvalue) 77 | ax_y_dist.hist(x=column_pvalue, data=df, bins=bins, orientation='horizontal', 78 | align='mid', color="lightblue") 79 | ax_y_dist.set(xlabel='Frequency') 80 | ax_ycum_dist = ax_y_dist.twiny() 81 | ax_ycum_dist.hist(x=column_pvalue, data=df, bins=bins, cumulative=True, histtype='step', 82 | density=True, color='darkblue', align='mid', orientation='horizontal') 83 | ax_ycum_dist.tick_params(axis="x", colors='darkblue') 84 | ax_ycum_dist.set_xlabel('cumulative', color='darkblue') 85 | ax_ycum_dist.set(xticks=np.arange(0.0, 1.2, 0.2), ylim=(-bin_size_pvalue, max_pvalue * 1.1)) 86 | 87 | #plt.setp(ax_x_dist.get_xticklabels(), visible=False) 88 | plt.setp(ax_y_dist.get_yticklabels(), visible=False) 89 | plt.setp(ax_x_dist.get_xticklabels(), rotation=90) 90 | plt.setp(ax_main.get_xticklabels(), rotation=90) 91 | plt.setp(ax_y_dist.get_xticklabels(), rotation=90) 92 | plt.setp(ax_ycum_dist.get_xticklabels(), rotation=90) 93 | 94 | fig.subplots_adjust(top=0.85, right=0.85) 95 | cbar_ax = fig.add_axes([0.87, 0.15, 0.03, 0.4]) 96 | 97 | cb = plt.colorbar(hb, cax=cbar_ax) 98 | cb.set_label('Frequency') 99 | 100 | return plt 101 | 102 | 103 | def plot_annotations(column_ppm_error, column_adducts, df): 104 | 105 | fig = plt.figure() 106 | fig.set_size_inches(8.27, 11.69) 107 | 108 | gs = gridspec.GridSpec(5, 2, height_ratios=[1, 1, 5, 1, 1]) 109 | 110 | ax_box = plt.subplot(gs[2]) 111 | ax_hist = plt.subplot(gs[4], sharex=ax_box) 112 | ax_count = plt.subplot(gs[5]) 113 | # ax = plt.subplot(gs[1]) 114 | 115 | ppm_errors = df[column_ppm_error].dropna() 116 | 117 | sns.boxplot(x=ppm_errors, ax=ax_box) 118 | 119 | bin_size = 0.1 120 | bins = np.arange(np.floor(ppm_errors.min()) - bin_size, np.ceil(ppm_errors.max()) + bin_size, bin_size).round(3) 121 | ax_hist.hist(x=column_ppm_error, data=df, bins=bins, align='mid', color="lightblue") 122 | 123 | ax_hist.grid(False) 124 | ax_box.grid(False) 125 | 126 | std = ppm_errors.std() 127 | mean = ppm_errors.mean() 128 | median = ppm_errors.median() 129 | Q1 = ppm_errors.quantile(0.25) 130 | Q3 = ppm_errors.quantile(0.75) 131 | 132 | # Remove x axis name for the boxplot 133 | ax_box.set(xlabel="") 134 | # ax_box.set_xticks([]) 135 | ax_box.set_title("Q1={}; median={}; Q3={}".format(round(Q1, 2), round(median, 2), round(Q3, 2))) 136 | 137 | ax_hist.set_title("mean={}; std={}".format(round(mean, 2), round(std, 2))) 138 | ax_hist.set(xlabel="Ppm error", ylabel="Frequency") 139 | 140 | sns.countplot(x=df[column_adducts].dropna(), ax=ax_count) 141 | ax_count.set(xlabel="Adduct", ylabel="Frequency") 142 | 143 | plt.setp(ax_box.get_xticklabels(), visible=False) 144 | plt.setp(ax_count.get_xticklabels(), rotation=90) 145 | 146 | return plt 147 | 148 | 149 | def report(db, pdf_out, column_corr, column_pvalue, column_ppm_error, column_adducts): 150 | 151 | with PdfPages(pdf_out) as pdf: 152 | 153 | conn = sqlite3.connect(db) 154 | cursor = conn.cursor() 155 | cursor.execute("""SELECT name FROM sqlite_master WHERE type='table';""") 156 | title = "Summary - BEAMSpy\n\n\n" 157 | for i, table in enumerate(cursor.fetchall()): 158 | if str(table[0]) == "groups": 159 | 160 | df = pd.read_sql_query("SELECT {}, {} FROM groups".format(column_corr, column_pvalue), conn) 161 | 162 | plt = plot_correlations(column_corr, column_pvalue, df) 163 | plt.suptitle('{}Grouping features'.format(title), fontsize=20) 164 | title = "\n\n\n" 165 | pdf.savefig(dpi=300) 166 | plt.close() 167 | 168 | elif table[0][0:10] == "compounds_": 169 | 170 | df = pd.read_sql_query("SELECT {}, {} FROM {}".format(column_ppm_error, column_adducts, table[0]), conn) 171 | 172 | plt = plot_annotations("ppm_error", "adduct", df) 173 | plt.suptitle('{}Compound Annotation\nDatabase: {}'.format(title, table[0].replace("compounds_", "")), fontsize=20) 174 | title = "\n\n\n" 175 | pdf.savefig(dpi=300) 176 | plt.close() 177 | conn.close() 178 | 179 | 180 | # if __name__ == '__main__': 181 | # 182 | # report("../tests/test_results/results_annotation.sqlite", "test_report_01.pdf", 183 | # "r_value", "p_value", "ppm_error", "adduct") 184 | # statinfo = os.stat("test_report_01.pdf") 185 | # 186 | # report("../tests/test_results/results_pearson_all.sqlite", "test_report_02.pdf", 187 | # "r_value", "p_value", "ppm_error", "adduct") 188 | -------------------------------------------------------------------------------- /beamspy/qt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/beamspy/qt/__init__.py -------------------------------------------------------------------------------- /beamspy/qt/ui_to_py.bat: -------------------------------------------------------------------------------- 1 | pyside2-uic form.ui -o form.py 2 | -------------------------------------------------------------------------------- /beamspy/statistics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import numpy as np 5 | import scipy.stats 6 | from multiprocessing import Pool, cpu_count 7 | import networkx as nx 8 | import pandas as pd 9 | import tqdm 10 | 11 | 12 | def _cc_pp_(pairs, method, ncpus): 13 | 14 | coeffs = [] 15 | 16 | pool = Pool(ncpus) 17 | 18 | if len(pairs) > ncpus > 1: 19 | pairs = [pairs[i: i + int(len(pairs) / (ncpus - 1))] for i in range(0, len(pairs), int(len(pairs) / (ncpus - 1)))] 20 | else: 21 | pairs = [pairs] 22 | 23 | if method == "pearson": 24 | results = pool.map(_pearsonr, pairs) 25 | elif method == "spearman": 26 | results = pool.map(_spearmanr, pairs) 27 | else: 28 | raise ValueError("Method {} does not exist".format(method)) 29 | 30 | pool.close() 31 | pool.join() 32 | 33 | for result in results: 34 | coeffs.extend(result) 35 | 36 | return coeffs 37 | 38 | 39 | def _pearsonr(pairs): 40 | temp = [] 41 | for pair in pairs: 42 | out = scipy.stats.pearsonr(pair[0], pair[1]) 43 | temp.append([out[0], out[1]]) 44 | return temp 45 | 46 | 47 | def _spearmanr(pairs): 48 | temp = [] 49 | for pair in pairs: 50 | out = scipy.stats.spearmanr(pair[0], pair[1]) 51 | temp.append([out[0], out[1]]) 52 | return temp 53 | 54 | 55 | def correlation_coefficients(df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=0.05, method="pearson", positive=True, block=5000, ncpus=None): 56 | if ["name", "mz", "rt"] == list(df.columns.values[0:3]): 57 | ncols = 4 58 | df = df.sort_values(['rt', 'mz']).reset_index(drop=True) 59 | elif "mz" == df.columns.values[0] and "rt" not in df.columns.values: 60 | ncols = 1 61 | else: 62 | raise ValueError("Incorrect column names: [name, mz, rt] or [mz, intensity]") 63 | 64 | if ncpus is None: 65 | ncpus = cpu_count() 66 | if ncpus > 1: 67 | ncpus -= 1 68 | 69 | column_names = ["name_a", "name_b", "r_value", "p_value"] 70 | df_coeffs = pd.DataFrame(columns=column_names) 71 | 72 | pairs, peaks = [], [] 73 | n = len(df.iloc[:, 0]) 74 | 75 | if n >= 100: 76 | disable_tqdm = False 77 | else: 78 | disable_tqdm = True 79 | 80 | for i in tqdm.trange(n, disable=disable_tqdm): 81 | 82 | intens_i = df.iloc[i, ncols:].values 83 | 84 | if pd.notnull(intens_i).sum() < 4: 85 | continue 86 | 87 | for j in range(i + 1, n): 88 | 89 | if max_rt_diff is not None: 90 | rt_diff = abs(float(df.loc[j, "rt"] - df.loc[i, "rt"])) 91 | else: 92 | rt_diff = 0.0 # Direct Infusion - no retention time available 93 | 94 | if rt_diff <= max_rt_diff and max_rt_diff is not None: 95 | 96 | intens_j = df.iloc[j, ncols:].values 97 | nas = np.logical_or(pd.isnull(intens_i), pd.isnull(intens_j)) 98 | intens_filt_i, intens_filt_j = intens_i[~nas], intens_j[~nas] 99 | 100 | if len(intens_filt_i) > 3 and len(intens_filt_j) > 3: 101 | peaks.append([df.iloc[i, 0], df.iloc[j, 0], rt_diff]) 102 | pairs.append([intens_filt_i, intens_filt_j]) 103 | if len(pairs) == block * ncpus: 104 | #print("Calculating correlations for {} pairs (subset)".format(len(pairs))) 105 | coeffs = _cc_pp_(pairs, method, ncpus) 106 | coe = [] 107 | for k in range(len(coeffs)): 108 | if abs(coeffs[k][0]) > coeff_thres and (abs(coeffs[k][1]) < pvalue_thres or pvalue_thres is None): 109 | s = pd.Series([peaks[k][0], peaks[k][1], round(coeffs[k][0], 2), coeffs[k][1]], index=column_names) 110 | coe.append(s) 111 | tmp = pd.DataFrame(coe, columns=column_names) 112 | df_coeffs = pd.concat([df_coeffs, tmp], ignore_index=True) 113 | pairs, peaks = [], [] 114 | else: 115 | break 116 | 117 | if len(pairs) > 0: 118 | #print("Calculating correlations for {} pairs (subset)".format(len(pairs))) 119 | coeffs = _cc_pp_(pairs, method, ncpus) 120 | coe = [] 121 | for k in range(len(coeffs)): 122 | if abs(coeffs[k][0]) > coeff_thres and (abs(coeffs[k][1]) < pvalue_thres or pvalue_thres is None): 123 | s = pd.Series([peaks[k][0], peaks[k][1], round(coeffs[k][0], 2), coeffs[k][1]], index=column_names) 124 | coe.append(s) 125 | tmp = pd.DataFrame(coe, columns=column_names) 126 | df_coeffs = pd.concat([df_coeffs, tmp], ignore_index=True) 127 | 128 | # filter 129 | if positive: 130 | df_coeffs = df_coeffs[df_coeffs['r_value'] > 0].reset_index(drop=True) 131 | 132 | return df_coeffs 133 | 134 | 135 | def correlation_graphs(df_coeffs, df): 136 | df_coeffs = df_coeffs.merge(df[["name", "mz", "intensity", "rt"]], how='left', left_on=['name_a'], right_on=['name']) 137 | df_coeffs = df_coeffs.merge(df[["name", "mz", "intensity", "rt"]], how='left', left_on=['name_b'], right_on=['name']) 138 | from decimal import Decimal 139 | # graphs = nx.OrderedDiGraph() # networkx version < 3.0 140 | graphs = nx.DiGraph() 141 | for index, row in df_coeffs.iterrows(): 142 | graphs.add_node(str(row["name_a"]), mz=row["mz_x"], intensity=row["intensity_x"], rt=row["rt_x"]) 143 | graphs.add_node(str(row["name_b"]), mz=row["mz_y"], intensity=row["intensity_y"], rt=row["rt_y"]) 144 | 145 | mz_diff = row["mz_x"] - row["mz_y"] 146 | 147 | if mz_diff < 0: 148 | graphs.add_edge(str(row["name_a"]), str(row["name_b"]), rvalue=row["r_value"], pvalue=row["p_value"], 149 | mzdiff=abs(row["mz_x"]-row["mz_y"]), rtdiff=abs(row["rt_x"]-row["rt_y"])) 150 | else: 151 | graphs.add_edge(str(row["name_b"]), str(row["name_a"]), rvalue=row["r_value"], pvalue=row["p_value"], 152 | mzdiff=abs(row["mz_x"] - row["mz_y"]), rtdiff=abs(row["rt_x"] - row["rt_y"])) 153 | return graphs 154 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | patch: 4 | default: 5 | target: 80% 6 | project: 7 | default: 8 | threshold: 5% 9 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/beams.rst: -------------------------------------------------------------------------------- 1 | BEAMSpy package 2 | ================================== 3 | 4 | beamspy.annotation module 5 | ----------------------- 6 | 7 | .. automodule:: beamspy.annotation 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | beamspy.auxiliary module 13 | ---------------------- 14 | 15 | .. automodule:: beamspy.auxiliary 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | beamspy.db\_parsers module 21 | ------------------------ 22 | 23 | .. automodule:: beamspy.db_parsers 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | beamspy.grouping module 29 | --------------------- 30 | 31 | .. automodule:: beamspy.grouping 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | beamspy.gui module 37 | ---------------- 38 | 39 | .. automodule:: beamspy.gui 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | beamspy.in\_out module 45 | -------------------- 46 | 47 | .. automodule:: beamspy.in_out 48 | :members: 49 | :undoc-members: 50 | :show-inheritance: 51 | 52 | beamspy.libraries module 53 | ---------------------- 54 | 55 | .. automodule:: beamspy.libraries 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: 59 | 60 | beamspy.plots module 61 | ------------------ 62 | 63 | .. automodule:: beamspy.plots 64 | :members: 65 | :undoc-members: 66 | :show-inheritance: 67 | 68 | beamspy.statistics module 69 | ----------------------- 70 | 71 | .. automodule:: beamspy.statistics 72 | :members: 73 | :undoc-members: 74 | :show-inheritance: 75 | 76 | -------------------------------------------------------------------------------- /docs/source/cli.rst: -------------------------------------------------------------------------------- 1 | Command line interface (CLI) 2 | ================================== 3 | 4 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('..')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'BEAMSpy' 21 | copyright = '2019, Ralf Weber' 22 | author = 'Ralf Weber' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = '0.1.0' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | 'sphinx.ext.autodoc', 35 | 'sphinx.ext.doctest', 36 | 'sphinx.ext.viewcode', 37 | 'sphinx.ext.napoleon', 38 | 'sphinx.ext.todo', 39 | 'sphinx.ext.mathjax' 40 | ] 41 | 42 | # Add any paths that contain templates here, relative to this directory. 43 | templates_path = ['_templates'] 44 | 45 | # The master toctree document. 46 | master_doc = 'index' 47 | 48 | # List of patterns, relative to source directory, that match files and 49 | # directories to ignore when looking for source files. 50 | # This pattern also affects html_static_path and html_extra_path. 51 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 52 | 53 | 54 | # -- Options for HTML output ------------------------------------------------- 55 | 56 | # The theme to use for HTML and HTML Help pages. See the documentation for 57 | # a list of builtin themes. 58 | # 59 | html_theme = 'sphinx_rtd_theme' 60 | 61 | # Add any paths that contain custom static files (such as style sheets) here, 62 | # relative to this directory. They are copied after the builtin static files, 63 | # so a file named "default.css" will overwrite the builtin "default.css". 64 | html_static_path = ['_static'] 65 | -------------------------------------------------------------------------------- /docs/source/galaxy.rst: -------------------------------------------------------------------------------- 1 | Galaxy tools & workflows 2 | ================================== 3 | 4 | -------------------------------------------------------------------------------- /docs/source/gui.rst: -------------------------------------------------------------------------------- 1 | Graphical user interface (GUI) 2 | ================================== 3 | 4 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. BEAMSpy documentation master file, created by 2 | sphinx-quickstart on Sun Aug 11 22:50:29 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to BEAMS's documentation! 7 | ================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | introduction 14 | quickstart 15 | cli 16 | gui 17 | galaxy 18 | beams 19 | 20 | 21 | 22 | Indices and tables 23 | ================== 24 | 25 | * :ref:`genindex` 26 | * :ref:`modindex` 27 | * :ref:`search` 28 | -------------------------------------------------------------------------------- /docs/source/introduction.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============= 3 | 4 | -------------------------------------------------------------------------------- /docs/source/quickstart.rst: -------------------------------------------------------------------------------- 1 | Quick Start 2 | ================================== 3 | 4 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: beamspy 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | dependencies: 6 | - numpy 7 | - scipy 8 | - requests 9 | - networkx<=2.5 10 | - pandas<=1.5.3 11 | - matplotlib 12 | - seaborn 13 | - pyteomics<=4.4.1 14 | - biopython<=1.78 15 | - pyside2 16 | - tqdm 17 | -------------------------------------------------------------------------------- /examples/run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | from beamspy import in_out 6 | import networkx as nx 7 | from beamspy.grouping import group_features 8 | from beamspy.annotation import annotate_adducts 9 | from beamspy.annotation import annotate_isotopes 10 | from beamspy.annotation import annotate_oligomers 11 | from beamspy.annotation import annotate_compounds 12 | from beamspy.annotation import annotate_molecular_formulae 13 | from beamspy.annotation import summary 14 | from beamspy import plots 15 | 16 | 17 | def main(): 18 | 19 | path = "../tests/test_data/" 20 | fn_peaklist = os.path.join(path, "peaklist_lcms_pos_theoretical.txt") 21 | fn_matrix = os.path.join(path, "dataMatrix_lcms_theoretical.txt") 22 | 23 | df = in_out.combine_peaklist_matrix(fn_peaklist, fn_matrix) 24 | 25 | ion_mode = "pos" 26 | db_out = "results.sqlite".format(ion_mode) 27 | 28 | # graphs = group_features(df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=0.01, method="pearson") 29 | graphs = group_features(df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=0.01, method="pearson", positive=False) 30 | 31 | nx.write_gml(graphs, "graphs.gml") 32 | # graphs = nx.read_gml("graphs.gml") 33 | 34 | path = "../beamspy/data" 35 | lib_isotopes = in_out.read_isotopes(os.path.join(path, "isotopes.txt"), ion_mode) 36 | lib_adducts = in_out.read_adducts(os.path.join(path, "adducts.txt"), ion_mode) 37 | 38 | print(lib_isotopes) 39 | print(lib_adducts) 40 | 41 | ppm = 5.0 42 | 43 | annotate_adducts(graphs, db_out, ppm, lib_adducts) 44 | annotate_isotopes(graphs, db_out, ppm, lib_isotopes) 45 | 46 | # annotate_molecular_formulae(df, lib_adducts, ppm, db_out) 47 | annotate_compounds(df, lib_adducts, ppm, db_out, "hmdb_full_v4_0_20200909_v1") 48 | 49 | df_out = summary(df, db_out) 50 | fn_out = "summary.txt" 51 | df_out.to_csv(fn_out, sep="\t", index=False, encoding="utf-8") 52 | 53 | pdf_out = "report.pdf" 54 | plots.report(db=db_out, pdf_out=pdf_out, column_corr="r_value", column_pvalue="p_value", 55 | column_ppm_error="ppm_error", column_adducts="adduct") 56 | 57 | 58 | if __name__ == '__main__': 59 | main() 60 | -------------------------------------------------------------------------------- /examples/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | beamspy group-features \ 4 | --peaklist ../tests/test_data/peaklist_lcms_pos_theoretical.txt \ 5 | --intensity-matrix ../tests/test_data/dataMatrix_lcms_theoretical.txt \ 6 | --gml graph.gml \ 7 | --db results.sqlite \ 8 | --max-rt-diff 5.0 \ 9 | --method pearson \ 10 | --coeff-threshold 0.7 \ 11 | --pvalue-threshold 0.01 \ 12 | --positive 13 | 14 | beamspy annotate-peak-patterns \ 15 | --peaklist ../tests/test_data/peaklist_lcms_pos_theoretical.txt \ 16 | --intensity-matrix ../tests/test_data/dataMatrix_lcms_theoretical.txt \ 17 | --gml graph.gml \ 18 | --db results.sqlite \ 19 | --adducts \ 20 | --adducts-library ../beamspy/data/adducts.txt \ 21 | --isotopes \ 22 | --isotopes-library ../beamspy/data/isotopes.txt \ 23 | --ion-mode pos \ 24 | --ppm 5.0 25 | 26 | beamspy annotate-compounds \ 27 | --peaklist ../tests/test_data/peaklist_lcms_pos_theoretical.txt \ 28 | --intensity-matrix ../tests/test_data/dataMatrix_lcms_theoretical.txt \ 29 | --db results.sqlite \ 30 | --db-name hmdb_full_v4_0_20200909_v1 \ 31 | --adducts-library ../beamspy/data/adducts.txt \ 32 | --ion-mode pos \ 33 | --ppm 3.0 34 | 35 | beamspy summary-results \ 36 | --peaklist ../tests/test_data/peaklist_lcms_pos_theoretical.txt \ 37 | --intensity-matrix ../tests/test_data/dataMatrix_lcms_theoretical.txt \ 38 | --db results.sqlite \ 39 | --output summary.txt \ 40 | --sep tab 41 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | requests 4 | networkx<=2.5 5 | pandas<=1.5.3 6 | matplotlib 7 | seaborn 8 | pyteomics<=4.4.1 9 | biopython<=1.78 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import setuptools 5 | import beamspy 6 | 7 | 8 | def main(): 9 | 10 | install_requires = open("requirements.txt").read().splitlines() 11 | 12 | setuptools.setup(name="beamspy", 13 | version=beamspy.__version__, 14 | description="Putative annotation of metabolites for mass spectrometry-based metabolomics datasets.", 15 | long_description=open("README.rst").read(), 16 | long_description_content_type="text/x-rst", 17 | author="Ralf Weber", 18 | author_email="r.j.weber@bham.ac.uk", 19 | url="https://github.com/computational-metabolomics/beamspy", 20 | license="GPLv3", 21 | platforms=["Windows, UNIX"], 22 | keywords=["Metabolomics", "Mass spectrometry", "Liquid-Chromatography Mass Spectrometry", "Metabolite Annotation"], 23 | packages=setuptools.find_packages(), 24 | python_requires=">=3.8", 25 | test_suite="tests.suite", 26 | install_requires=install_requires, 27 | include_package_data=True, 28 | classifiers=[ 29 | "Programming Language :: Python :: 3", 30 | "Programming Language :: Python :: 3.8", 31 | "Programming Language :: Python :: 3.9", 32 | "Programming Language :: Python :: 3.10", 33 | "Topic :: Scientific/Engineering :: Bio-Informatics", 34 | "Topic :: Scientific/Engineering :: Chemistry", 35 | "Topic :: Utilities", 36 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 37 | "Operating System :: OS Independent", 38 | ], 39 | entry_points={ 40 | "console_scripts": [ 41 | "beamspy = beamspy.__main__:main" 42 | ] 43 | } 44 | ) 45 | 46 | 47 | if __name__ == "__main__": 48 | main() 49 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | def suite(): 5 | test_loader = unittest.TestLoader() 6 | test_suite = test_loader.discover('.', pattern='test_*.py') 7 | return test_suite 8 | -------------------------------------------------------------------------------- /tests/test_auxiliary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import unittest 6 | from collections import OrderedDict 7 | from beamspy.auxiliary import * 8 | 9 | 10 | class AuxiliaryTestCase(unittest.TestCase): 11 | 12 | def setUp(self): 13 | self.path, f = os.path.split(os.path.dirname(os.path.abspath(__file__))) 14 | 15 | def test_order_composition_by_hill(self): 16 | composition = OrderedDict([('C', 6), ('O', 6), ('H', 12)]) 17 | hill = order_composition_by_hill(composition) 18 | self.assertEqual(list(hill), ['C', 'H', 'O']) 19 | 20 | def test_composition_to_string(self): 21 | composition = OrderedDict([('C', 6), ('O', 6), ('H', 12)]) 22 | mf = composition_to_string(composition) 23 | self.assertEqual(mf, "C6H12O6") 24 | 25 | def test_double_bond_equivalents(self): 26 | composition = OrderedDict([('C', 6), ('H', 12), ('O', 6)]) 27 | dbe = double_bond_equivalents(composition) 28 | self.assertEqual(dbe, 1) 29 | 30 | composition = OrderedDict([('C', 6), ('H', 24), ('O', 12)]) 31 | dbe = double_bond_equivalents(composition) 32 | self.assertEqual(dbe, -5.0) 33 | 34 | def test_HC_HNOPS_rules(self): 35 | molecular_formula = "C6H12O6" 36 | rules = HC_HNOPS_rules(molecular_formula) 37 | self.assertEqual(rules, {"HC": 1, "NOPSC": 1}) 38 | 39 | molecular_formula = "C6H36O6" 40 | rules = HC_HNOPS_rules(molecular_formula) 41 | self.assertEqual(rules, {"HC": 0, "NOPSC": 1}) 42 | 43 | molecular_formula = "C6H12O32" 44 | rules = HC_HNOPS_rules(molecular_formula) 45 | self.assertEqual(rules, {"HC": 1, "NOPSC": 0}) 46 | 47 | def test_lewis_senior_rules(self): 48 | molecular_formula = "C6H12O6" 49 | rules = lewis_senior_rules(molecular_formula) 50 | self.assertEqual(rules, {"lewis": 1, "senior": 1}) 51 | 52 | molecular_formula = "C6H24O12" 53 | rules = lewis_senior_rules(molecular_formula) 54 | self.assertEqual(rules, {"lewis": 1, "senior": 0}) 55 | 56 | 57 | if __name__ == '__main__': 58 | unittest.main() 59 | -------------------------------------------------------------------------------- /tests/test_data/biocyc_record.txt: -------------------------------------------------------------------------------- 1 | UNIQUE-ID - PANTOTHENATE 2 | TYPES - Compounds 3 | COMMON-NAME - (R)-pantothenate 4 | ATOM-CHARGES - (4 -1) 5 | CHEMICAL-FORMULA - (H 16) 6 | CHEMICAL-FORMULA - (N 1) 7 | CHEMICAL-FORMULA - (C 9) 8 | CHEMICAL-FORMULA - (O 5) 9 | DBLINKS - (PUBCHEM "167945" NIL |taltman| 3451921010 NIL NIL) 10 | DBLINKS - (|Wikipedia| "Vitamin_B5" NIL |caspi| 3496684943 NIL NIL) 11 | DBLINKS - (PUBCHEM "167945" NIL |taltman| 3466375285 NIL NIL) 12 | DBLINKS - (CHEBI "29032" NIL |taltman| 3452363569 NIL NIL) 13 | DBLINKS - (LIGAND-CPD "C00864" NIL |kr| 3346617700 NIL NIL) 14 | DBLINKS - (CAS "79-83-4") 15 | IN-MIXTURE - MIX66-4 16 | INCHI - InChI=1/C9H17NO5/c1-9(2,5-11)7(14)8(15)10-4-3-6(12)13/h7,11,14H,3-5H2,1-2H3,(H,10,15)(H,12,13)/t7-/m0/s1/f/h10,12H 17 | MOLECULAR-WEIGHT - 218.229 18 | MONOISOTOPIC-MW - 219.11067266139997 19 | SMILES - C(CC(=O)[O-])NC(=O)C(O)C(C)(C)CO 20 | SYNONYMS - vitamin B5 21 | SYNONYMS - (R)-pantothenic acid 22 | SYNONYMS - D-pantothenic acid 23 | SYSTEMATIC-NAME - beta-alanine, (R)-N-(2,4-dihydroxy-3,3-dimethyl-oxobutyl)- 24 | // 25 | -------------------------------------------------------------------------------- /tests/test_data/dataMatrix_dims_theoretical.txt: -------------------------------------------------------------------------------- 1 | mz sample01 sample02 sample03 sample04 sample05 sample06 sample07 sample08 sample09 sample10 sample11 sample12 sample13 sample14 sample15 sample16 sample17 sample18 sample19 sample20 sample21 sample22 sample23 sample24 sample25 sample26 sample27 sample28 sample29 sample30 sample31 sample32 sample33 sample34 sample35 sample36 sample37 sample38 sample39 sample40 sample41 sample42 sample43 sample44 sample45 sample46 sample47 sample48 sample49 sample50 2 | 126.979204 1348.35 1468.50 1348.35 1441.80 1361.70 1428.45 1428.45 1468.50 1455.15 1388.40 1468.50 1468.50 1388.40 1375.05 1428.45 1348.35 1428.45 1428.45 1441.80 1428.45 1375.05 1428.45 1415.10 1468.50 1361.70 1348.35 1388.40 1415.10 1415.10 1361.70 1428.45 1401.75 1428.45 1441.80 1375.05 1468.50 1455.15 1455.15 1388.40 1348.35 1428.45 1468.50 1375.05 1375.05 1415.10 1401.75 1361.70 1361.70 1468.50 1455.15 3 | 135.028801 2606.04 2581.91 2654.30 2606.04 2654.30 2485.39 2509.52 2606.04 2606.04 2654.30 2654.30 2630.17 2557.78 2630.17 2437.13 2606.04 2630.17 2581.91 2606.04 2533.65 2581.91 2581.91 2557.78 2485.39 2485.39 2581.91 2630.17 2509.52 2606.04 2606.04 2461.26 2533.65 2485.39 2485.39 2557.78 2437.13 2630.17 2509.52 2606.04 2533.65 2533.65 2437.13 2630.17 2654.30 2630.17 2485.39 2485.39 2606.04 2461.26 2557.78 4 | 139.000181 3796.58 3907.16 3944.02 3944.02 3870.30 3796.58 3833.44 3907.16 4017.74 3980.88 3907.16 3944.02 3944.02 3796.58 3980.88 3759.72 3870.30 3759.72 3944.02 3870.30 4054.60 4017.74 3796.58 3833.44 4017.74 3833.44 3759.72 3870.30 4054.60 3907.16 3833.44 3980.88 3722.86 3944.02 3870.30 3944.02 3759.72 3907.16 3796.58 3944.02 3944.02 3980.88 3759.72 3722.86 3833.44 3796.58 3907.16 3796.58 3796.58 3833.44 5 | 147.028801 2268.06 2400.18 2268.06 2290.08 2246.04 2312.10 2422.20 2268.06 2246.04 2422.20 2422.20 2224.02 2290.08 2378.16 2400.18 2268.06 2356.14 2290.08 2246.04 2422.20 2422.20 2378.16 2378.16 2290.08 2400.18 2400.18 2334.12 2400.18 2246.04 2312.10 2422.20 2224.02 2400.18 2246.04 2356.14 2268.06 2334.12 2400.18 2290.08 2268.06 2400.18 2400.18 2268.06 2400.18 2400.18 2312.10 2268.06 2334.12 2334.12 2400.18 6 | 154.995096 2296.90 2430.70 2274.60 2319.20 2252.30 2296.90 2252.30 2453.00 2453.00 2252.30 2453.00 2274.60 2408.40 2319.20 2363.80 2430.70 2408.40 2252.30 2341.50 2408.40 2453.00 2319.20 2386.10 2408.40 2341.50 2363.80 2341.50 2296.90 2319.20 2408.40 2386.10 2363.80 2319.20 2252.30 2252.30 2341.50 2453.00 2274.60 2363.80 2341.50 2341.50 2252.30 2252.30 2453.00 2319.20 2430.70 2319.20 2430.70 2319.20 2430.70 7 | 156.989769 1363.74 1457.33 1377.11 1363.74 1417.22 1350.37 1443.96 1417.22 1457.33 1457.33 1430.59 1443.96 1377.11 1443.96 1377.11 1470.70 1403.85 1430.59 1443.96 1363.74 1417.22 1457.33 1470.70 1363.74 1363.74 1457.33 1417.22 1377.11 1403.85 1443.96 1417.22 1377.11 1417.22 1350.37 1443.96 1377.11 1350.37 1350.37 1403.85 1417.22 1377.11 1470.70 1470.70 1350.37 1350.37 1443.96 1443.96 1430.59 1377.11 1363.74 8 | 168.989654 515.00 515.00 525.00 550.00 510.00 505.00 520.00 515.00 525.00 545.00 515.00 525.00 525.00 530.00 540.00 520.00 520.00 510.00 525.00 520.00 540.00 510.00 550.00 520.00 510.00 520.00 505.00 525.00 550.00 530.00 510.00 530.00 510.00 525.00 505.00 510.00 530.00 525.00 510.00 550.00 525.00 545.00 545.00 515.00 520.00 520.00 550.00 520.00 540.00 505.00 9 | 336.972032 1030.00 1030.00 1050.00 1100.00 1020.00 1010.00 1040.00 1030.00 1050.00 1090.00 1030.00 1050.00 1050.00 1060.00 1080.00 1040.00 1040.00 1020.00 1050.00 1040.00 1080.00 1020.00 1100.00 1040.00 1020.00 1040.00 1010.00 1050.00 1100.00 1060.00 1020.00 1060.00 1020.00 1050.00 1010.00 1020.00 1060.00 1050.00 1020.00 1100.00 1050.00 1090.00 1090.00 1030.00 1040.00 1040.00 1100.00 1040.00 1080.00 1010.00 10 | 504.954410 386.25 386.25 393.75 412.50 382.50 378.75 390.00 386.25 393.75 408.75 386.25 393.75 393.75 397.50 405.00 390.00 390.00 382.50 393.75 390.00 405.00 382.50 412.50 390.00 382.50 390.00 378.75 393.75 412.50 397.50 382.50 397.50 382.50 393.75 378.75 382.50 397.50 393.75 382.50 412.50 393.75 408.75 408.75 386.25 390.00 390.00 412.50 390.00 405.00 378.75 11 | 197.005661 5160.61 5160.61 4919.46 4967.69 4919.46 5112.38 5208.84 5112.38 5160.61 5160.61 5015.92 5257.07 5112.38 5208.84 5064.15 4871.23 5305.30 5112.38 5160.61 5160.61 5112.38 4871.23 4919.46 5160.61 5015.92 5257.07 5015.92 4967.69 4967.69 5305.30 4967.69 5160.61 5208.84 4967.69 5112.38 5305.30 4967.69 5208.84 5015.92 5160.61 5208.84 4919.46 5257.07 4967.69 4871.23 5160.61 5112.38 4967.69 5257.07 5305.30 12 | 213.000576 974.65 1013.25 984.30 1061.50 1061.50 974.65 1032.55 1032.55 1051.85 1042.20 1022.90 1061.50 1003.60 984.30 1051.85 1003.60 1042.20 1013.25 984.30 1061.50 1061.50 993.95 1003.60 1042.20 1003.60 1051.85 984.30 1013.25 1051.85 984.30 1042.20 1032.55 974.65 1051.85 1013.25 984.30 993.95 1003.60 1042.20 974.65 1003.60 984.30 1032.55 1032.55 1051.85 993.95 1061.50 1042.20 984.30 1042.20 13 | 215.016226 3859.80 3786.28 3970.08 3823.04 4006.84 3823.04 3970.08 4006.84 3859.80 3896.56 3749.52 3749.52 3786.28 3970.08 3823.04 3859.80 3970.08 3933.32 3970.08 3823.04 3970.08 3896.56 3823.04 3712.76 4006.84 3712.76 3970.08 3823.04 4006.84 3896.56 3823.04 3896.56 3970.08 3786.28 3712.76 3712.76 3970.08 3859.80 3933.32 3786.28 3786.28 3786.28 3970.08 3712.76 3749.52 3712.76 4006.84 4043.60 3859.80 3933.32 14 | 230.990164 1929.90 1893.14 1985.04 1911.52 2003.42 1911.52 1985.04 2003.42 1929.90 1948.28 1874.76 1874.76 1893.14 1985.04 1911.52 1929.90 1985.04 1966.66 1985.04 1911.52 1985.04 1948.28 1911.52 1856.38 2003.42 1856.38 1985.04 1911.52 2003.42 1948.28 1911.52 1948.28 1985.04 1893.14 1856.38 1856.38 1985.04 1929.90 1966.66 1893.14 1893.14 1893.14 1985.04 1856.38 1874.76 1856.38 2003.42 2021.80 1929.90 1966.66 15 | 426.052250 1144.50 1155.40 1199.00 1122.70 1155.40 1133.60 1122.70 1188.10 1111.80 1144.50 1133.60 1133.60 1166.30 1166.30 1144.50 1166.30 1122.70 1166.30 1155.40 1177.20 1155.40 1144.50 1199.00 1177.20 1100.90 1133.60 1177.20 1144.50 1177.20 1177.20 1122.70 1100.90 1177.20 1177.20 1166.30 1111.80 1177.20 1199.00 1144.50 1122.70 1100.90 1111.80 1122.70 1188.10 1144.50 1111.80 1144.50 1199.00 1177.20 1155.40 16 | 492.060410 1080.00 1100.00 1090.00 1060.00 1100.00 1070.00 1090.00 1030.00 1080.00 1050.00 1070.00 1060.00 1040.00 1100.00 1080.00 1100.00 1100.00 1090.00 1020.00 1060.00 1050.00 1080.00 1020.00 1060.00 1100.00 1040.00 1040.00 1030.00 1020.00 1030.00 1060.00 1010.00 1100.00 1030.00 1090.00 1090.00 1060.00 1050.00 1060.00 1060.00 1090.00 1060.00 1080.00 1080.00 1080.00 1030.00 1070.00 1040.00 1060.00 1060.00 17 | 493.063765 166.41 169.49 167.95 163.33 169.49 164.87 167.95 158.71 166.41 161.79 164.87 163.33 160.25 169.49 166.41 169.49 169.49 167.95 157.16 163.33 161.79 166.41 157.16 163.33 169.49 160.25 160.25 158.71 157.16 158.71 163.33 155.62 169.49 158.71 167.95 167.95 163.33 161.79 163.33 163.33 167.95 163.33 166.41 166.41 166.41 158.71 164.87 160.25 163.33 163.33 18 | 550.065890 4766.30 4506.32 4419.66 4549.65 4722.97 4376.33 4462.99 4419.66 4549.65 4636.31 4376.33 4679.64 4722.97 4376.33 4549.65 4679.64 4376.33 4722.97 4679.64 4462.99 4766.30 4766.30 4766.30 4419.66 4506.32 4636.31 4419.66 4766.30 4419.66 4592.98 4506.32 4766.30 4462.99 4419.66 4549.65 4766.30 4679.64 4419.66 4766.30 4549.65 4592.98 4376.33 4419.66 4419.66 4592.98 4549.65 4766.30 4636.31 4636.31 4462.99 19 | -------------------------------------------------------------------------------- /tests/test_data/dataMatrix_lcms_theoretical.txt: -------------------------------------------------------------------------------- 1 | name sample01 sample02 sample03 sample04 sample05 sample06 sample07 sample08 sample09 sample10 sample11 sample12 sample13 sample14 sample15 sample16 sample17 sample18 sample19 sample20 sample21 sample22 sample23 sample24 sample25 sample26 sample27 sample28 sample29 sample30 sample31 sample32 sample33 sample34 sample35 sample36 sample37 sample38 sample39 sample40 sample41 sample42 sample43 sample44 sample45 sample46 sample47 sample48 sample49 sample50 2 | M127T60 1348.35 1468.50 1348.35 1441.80 1361.70 1428.45 1428.45 1468.50 1455.15 1388.40 1468.50 1468.50 1388.40 1375.05 1428.45 1348.35 1428.45 1428.45 1441.80 1428.45 1375.05 1428.45 1415.10 1468.50 1361.70 1348.35 1388.40 1415.10 1415.10 1361.70 1428.45 1401.75 1428.45 1441.80 1375.05 1468.50 1455.15 1455.15 1388.40 1348.35 1428.45 1468.50 1375.05 1375.05 1415.10 1401.75 1361.70 1361.70 1468.50 1455.15 3 | M135T70 2606.04 2581.91 2654.30 2606.04 2654.30 2485.39 2509.52 2606.04 2606.04 2654.30 2654.30 2630.17 2557.78 2630.17 2437.13 2606.04 2630.17 2581.91 2606.04 2533.65 2581.91 2581.91 2557.78 2485.39 2485.39 2581.91 2630.17 2509.52 2606.04 2606.04 2461.26 2533.65 2485.39 2485.39 2557.78 2437.13 2630.17 2509.52 2606.04 2533.65 2533.65 2437.13 2630.17 2654.30 2630.17 2485.39 2485.39 2606.04 2461.26 2557.78 4 | M139T80 3796.58 3907.16 3944.02 3944.02 3870.30 3796.58 3833.44 3907.16 4017.74 3980.88 3907.16 3944.02 3944.02 3796.58 3980.88 3759.72 3870.30 3759.72 3944.02 3870.30 4054.60 4017.74 3796.58 3833.44 4017.74 3833.44 3759.72 3870.30 4054.60 3907.16 3833.44 3980.88 3722.86 3944.02 3870.30 3944.02 3759.72 3907.16 3796.58 3944.02 3944.02 3980.88 3759.72 3722.86 3833.44 3796.58 3907.16 3796.58 3796.58 3833.44 5 | M147T90 2268.06 2400.18 2268.06 2290.08 2246.04 2312.10 2422.20 2268.06 2246.04 2422.20 2422.20 2224.02 2290.08 2378.16 2400.18 2268.06 2356.14 2290.08 2246.04 2422.20 2422.20 2378.16 2378.16 2290.08 2400.18 2400.18 2334.12 2400.18 2246.04 2312.10 2422.20 2224.02 2400.18 2246.04 2356.14 2268.06 2334.12 2400.18 2290.08 2268.06 2400.18 2400.18 2268.06 2400.18 2400.18 2312.10 2268.06 2334.12 2334.12 2400.18 6 | M155T100 2296.90 2430.70 2274.60 2319.20 2252.30 2296.90 2252.30 2453.00 2453.00 2252.30 2453.00 2274.60 2408.40 2319.20 2363.80 2430.70 2408.40 2252.30 2341.50 2408.40 2453.00 2319.20 2386.10 2408.40 2341.50 2363.80 2341.50 2296.90 2319.20 2408.40 2386.10 2363.80 2319.20 2252.30 2252.30 2341.50 2453.00 2274.60 2363.80 2341.50 2341.50 2252.30 2252.30 2453.00 2319.20 2430.70 2319.20 2430.70 2319.20 2430.70 7 | M157T110 1363.74 1457.33 1377.11 1363.74 1417.22 1350.37 1443.96 1417.22 1457.33 1457.33 1430.59 1443.96 1377.11 1443.96 1377.11 1470.70 1403.85 1430.59 1443.96 1363.74 1417.22 1457.33 1470.70 1363.74 1363.74 1457.33 1417.22 1377.11 1403.85 1443.96 1417.22 1377.11 1417.22 1350.37 1443.96 1377.11 1350.37 1350.37 1403.85 1417.22 1377.11 1470.70 1470.70 1350.37 1350.37 1443.96 1443.96 1430.59 1377.11 1363.74 8 | M169T120 515.00 515.00 525.00 550.00 510.00 505.00 520.00 515.00 525.00 545.00 515.00 525.00 525.00 530.00 540.00 520.00 520.00 510.00 525.00 520.00 540.00 510.00 550.00 520.00 510.00 520.00 505.00 525.00 550.00 530.00 510.00 530.00 510.00 525.00 505.00 510.00 530.00 525.00 510.00 550.00 525.00 545.00 545.00 515.00 520.00 520.00 550.00 520.00 540.00 505.00 9 | M337T121 1030.00 1030.00 1050.00 1100.00 1020.00 1010.00 1040.00 1030.00 1050.00 1090.00 1030.00 1050.00 1050.00 1060.00 1080.00 1040.00 1040.00 1020.00 1050.00 1040.00 1080.00 1020.00 1100.00 1040.00 1020.00 1040.00 1010.00 1050.00 1100.00 1060.00 1020.00 1060.00 1020.00 1050.00 1010.00 1020.00 1060.00 1050.00 1020.00 1100.00 1050.00 1090.00 1090.00 1030.00 1040.00 1040.00 1100.00 1040.00 1080.00 1010.00 10 | M505T122 386.25 386.25 393.75 412.50 382.50 378.75 390.00 386.25 393.75 408.75 386.25 393.75 393.75 397.50 405.00 390.00 390.00 382.50 393.75 390.00 405.00 382.50 412.50 390.00 382.50 390.00 378.75 393.75 412.50 397.50 382.50 397.50 382.50 393.75 378.75 382.50 397.50 393.75 382.50 412.50 393.75 408.75 408.75 386.25 390.00 390.00 412.50 390.00 405.00 378.75 11 | M197T150 5160.61 5160.61 4919.46 4967.69 4919.46 5112.38 5208.84 5112.38 5160.61 5160.61 5015.92 5257.07 5112.38 5208.84 5064.15 4871.23 5305.30 5112.38 5160.61 5160.61 5112.38 4871.23 4919.46 5160.61 5015.92 5257.07 5015.92 4967.69 4967.69 5305.30 4967.69 5160.61 5208.84 4967.69 5112.38 5305.30 4967.69 5208.84 5015.92 5160.61 5208.84 4919.46 5257.07 4967.69 4871.23 5160.61 5112.38 4967.69 5257.07 5305.30 12 | M213T160 974.65 1013.25 984.30 1061.50 1061.50 974.65 1032.55 1032.55 1051.85 1042.20 1022.90 1061.50 1003.60 984.30 1051.85 1003.60 1042.20 1013.25 984.30 1061.50 1061.50 993.95 1003.60 1042.20 1003.60 1051.85 984.30 1013.25 1051.85 984.30 1042.20 1032.55 974.65 1051.85 1013.25 984.30 993.95 1003.60 1042.20 974.65 1003.60 984.30 1032.55 1032.55 1051.85 993.95 1061.50 1042.20 984.30 1042.20 13 | M215T170 3859.80 3786.28 3970.08 3823.04 4006.84 3823.04 3970.08 4006.84 3859.80 3896.56 3749.52 3749.52 3786.28 3970.08 3823.04 3859.80 3970.08 3933.32 3970.08 3823.04 3970.08 3896.56 3823.04 3712.76 4006.84 3712.76 3970.08 3823.04 4006.84 3896.56 3823.04 3896.56 3970.08 3786.28 3712.76 3712.76 3970.08 3859.80 3933.32 3786.28 3786.28 3786.28 3970.08 3712.76 3749.52 3712.76 4006.84 4043.60 3859.80 3933.32 14 | M231T174 1929.90 1893.14 1985.04 1911.52 2003.42 1911.52 1985.04 2003.42 1929.90 1948.28 1874.76 1874.76 1893.14 1985.04 1911.52 1929.90 1985.04 1966.66 1985.04 1911.52 1985.04 1948.28 1911.52 1856.38 2003.42 1856.38 1985.04 1911.52 2003.42 1948.28 1911.52 1948.28 1985.04 1893.14 1856.38 1856.38 1985.04 1929.90 1966.66 1893.14 1893.14 1893.14 1985.04 1856.38 1874.76 1856.38 2003.42 2021.80 1929.90 1966.66 15 | M426T180 1144.50 1155.40 1199.00 1122.70 1155.40 1133.60 1122.70 1188.10 1111.80 1144.50 1133.60 1133.60 1166.30 1166.30 1144.50 1166.30 1122.70 1166.30 1155.40 1177.20 1155.40 1144.50 1199.00 1177.20 1100.90 1133.60 1177.20 1144.50 1177.20 1177.20 1122.70 1100.90 1177.20 1177.20 1166.30 1111.80 1177.20 1199.00 1144.50 1122.70 1100.90 1111.80 1122.70 1188.10 1144.50 1111.80 1144.50 1199.00 1177.20 1155.40 16 | M492T190 1080.00 1100.00 1090.00 1060.00 1100.00 1070.00 1090.00 1030.00 1080.00 1050.00 1070.00 1060.00 1040.00 1100.00 1080.00 1100.00 1100.00 1090.00 1020.00 1060.00 1050.00 1080.00 1020.00 1060.00 1100.00 1040.00 1040.00 1030.00 1020.00 1030.00 1060.00 1010.00 1100.00 1030.00 1090.00 1090.00 1060.00 1050.00 1060.00 1060.00 1090.00 1060.00 1080.00 1080.00 1080.00 1030.00 1070.00 1040.00 1060.00 1060.00 17 | M493T192 166.41 169.49 167.95 163.33 169.49 164.87 167.95 158.71 166.41 161.79 164.87 163.33 160.25 169.49 166.41 169.49 169.49 167.95 157.16 163.33 161.79 166.41 157.16 163.33 169.49 160.25 160.25 158.71 157.16 158.71 163.33 155.62 169.49 158.71 167.95 167.95 163.33 161.79 163.33 163.33 167.95 163.33 166.41 166.41 166.41 158.71 164.87 160.25 163.33 163.33 18 | M550T200 4766.30 4506.32 4419.66 4549.65 4722.97 4376.33 4462.99 4419.66 4549.65 4636.31 4376.33 4679.64 4722.97 4376.33 4549.65 4679.64 4376.33 4722.97 4679.64 4462.99 4766.30 4766.30 4766.30 4419.66 4506.32 4636.31 4419.66 4766.30 4419.66 4592.98 4506.32 4766.30 4462.99 4419.66 4549.65 4766.30 4679.64 4419.66 4766.30 4549.65 4592.98 4376.33 4419.66 4419.66 4592.98 4549.65 4766.30 4636.31 4636.31 4462.99 19 | -------------------------------------------------------------------------------- /tests/test_data/dataMatrix_lcms_theoretical_mc_o.txt: -------------------------------------------------------------------------------- 1 | name sample01 sample02 sample03 sample04 sample05 sample06 sample07 sample08 sample09 sample10 sample11 sample12 sample13 sample14 sample15 sample16 sample17 sample18 sample19 sample20 sample21 sample22 sample23 sample24 sample25 sample26 sample27 sample28 sample29 sample30 sample31 sample32 sample33 sample34 sample35 sample36 sample37 sample38 sample39 sample40 sample41 sample42 sample43 sample44 sample45 sample46 sample47 sample48 sample49 sample50 M127T60 1348.35 1468.5 1348.35 1441.8 1361.7 1428.45 1428.45 1468.5 1455.15 1388.4 1468.5 1468.5 1388.4 1375.05 1428.45 1348.35 1428.45 1428.45 1441.8 1428.45 1375.05 1428.45 1415.1 1468.5 1361.7 1348.35 1388.4 1415.1 1415.1 1361.7 1428.45 1401.75 1428.45 1441.8 1375.05 1468.5 1455.15 1455.15 1388.4 1348.35 1428.45 1468.5 1375.05 1375.05 1415.1 1401.75 1361.7 1361.7 1468.5 1455.15 M135T70 2606.04 2581.91 2654.3 2606.04 2654.3 2485.39 2509.52 2606.04 2606.04 2654.3 2654.3 2630.17 2557.78 2630.17 2437.13 2606.04 2630.17 2581.91 2606.04 2533.65 2581.91 2581.91 2557.78 2485.39 2485.39 2581.91 2630.17 2509.52 2606.04 2606.04 2461.26 2533.65 2485.39 2485.39 2557.78 2437.13 2630.17 2509.52 2606.04 2533.65 2533.65 2437.13 2630.17 2654.3 2630.17 2485.39 2485.39 2606.04 2461.26 2557.78 M139T80 3796.58 3907.16 3944.02 3944.02 3870.3 3796.58 3833.44 3907.16 4017.74 3980.88 3907.16 3944.02 3944.02 3796.58 3980.88 3759.72 3870.3 3759.72 3944.02 3870.3 4054.6 4017.74 3796.58 3833.44 4017.74 3833.44 3759.72 3870.3 4054.6 3907.16 3833.44 3980.88 3722.86 3944.02 3870.3 3944.02 3759.72 3907.16 3796.58 3944.02 3944.02 3980.88 3759.72 3722.86 3833.44 3796.58 3907.16 3796.58 3796.58 3833.44 M147T90 2268.06 2400.18 2268.06 2290.08 2246.04 2312.1 2422.2 2268.06 2246.04 2422.2 2422.2 2224.02 2290.08 2378.16 2400.18 2268.06 2356.14 2290.08 2246.04 2422.2 2422.2 2378.16 2378.16 2290.08 2400.18 2400.18 2334.12 2400.18 2246.04 2312.1 2422.2 2224.02 2400.18 2246.04 2356.14 2268.06 2334.12 2400.18 2290.08 2268.06 2400.18 2400.18 2268.06 2400.18 2400.18 2312.1 2268.06 2334.12 2334.12 2400.18 M155T100 2296.9 2430.7 2274.6 2319.2 2252.3 2296.9 2252.3 2453 2453 2252.3 2453 2274.6 2408.4 2319.2 2363.8 2430.7 2408.4 2252.3 2341.5 2408.4 2453 2319.2 2386.1 2408.4 2341.5 2363.8 2341.5 2296.9 2319.2 2408.4 2386.1 2363.8 2319.2 2252.3 2252.3 2341.5 2453 2274.6 2363.8 2341.5 2341.5 2252.3 2252.3 2453 2319.2 2430.7 2319.2 2430.7 2319.2 2430.7 M157T110 1363.74 1457.33 1377.11 1363.74 1417.22 1350.37 1443.96 1417.22 1457.33 1457.33 1430.59 1443.96 1377.11 1443.96 1377.11 1470.7 1403.85 1430.59 1443.96 1363.74 1417.22 1457.33 1470.7 1363.74 1363.74 1457.33 1417.22 1377.11 1403.85 1443.96 1417.22 1377.11 1417.22 1350.37 1443.96 1377.11 1350.37 1350.37 1403.85 1417.22 1377.11 1470.7 1470.7 1350.37 1350.37 1443.96 1443.96 1430.59 1377.11 1363.74 M169T120 515 515 525 550 510 505 520 515 525 545 515 525 525 530 540 520 520 510 525 520 540 510 550 520 510 520 505 525 550 530 510 530 510 525 505 510 530 525 510 550 525 545 545 515 520 520 550 520 540 505 M337T121 1030 1030 1050 1100 1020 1010 1040 1030 1050 1090 1030 1050 1050 1060 1080 1040 1040 1020 1050 1040 1080 1020 1100 1040 1020 1040 1010 1050 1100 1060 1020 1060 1020 1050 1010 1020 1060 1050 1020 1100 1050 1090 1090 1030 1040 1040 1100 1040 1080 1010 M505T122 386.25 386.25 393.75 412.5 382.5 378.75 390 386.25 393.75 408.75 386.25 393.75 393.75 397.5 405 390 390 382.5 393.75 390 405 382.5 412.5 390 382.5 390 378.75 393.75 412.5 397.5 382.5 397.5 382.5 393.75 378.75 382.5 397.5 393.75 382.5 412.5 393.75 408.75 408.75 386.25 390 390 412.5 390 405 378.75 M197T150 5160.61 5160.61 4919.46 4967.69 4919.46 5112.38 5208.84 5112.38 5160.61 5160.61 5015.92 5257.07 5112.38 5208.84 5064.15 4871.23 5305.3 5112.38 5160.61 5160.61 5112.38 4871.23 4919.46 5160.61 5015.92 5257.07 5015.92 4967.69 4967.69 5305.3 4967.69 5160.61 5208.84 4967.69 5112.38 5305.3 4967.69 5208.84 5015.92 5160.61 5208.84 4919.46 5257.07 4967.69 4871.23 5160.61 5112.38 4967.69 5257.07 5305.3 M213T160 974.65 1013.25 984.3 1061.5 1061.5 974.65 1032.55 1032.55 1051.85 1042.2 1022.9 1061.5 1003.6 984.3 1051.85 1003.6 1042.2 1013.25 984.3 1061.5 1061.5 993.95 1003.6 1042.2 1003.6 1051.85 984.3 1013.25 1051.85 984.3 1042.2 1032.55 974.65 1051.85 1013.25 984.3 993.95 1003.6 1042.2 974.65 1003.6 984.3 1032.55 1032.55 1051.85 993.95 1061.5 1042.2 984.3 1042.2 M214T181 1144.5 1155.4 1199 1122.7 1155.4 1133.6 1122.7 1188.1 1111.8 1144.5 1133.6 1133.6 1166.3 1166.3 1144.5 1166.3 1122.7 1166.3 1155.4 1177.2 1155.4 1144.5 1199 1177.2 1100.9 1133.6 1177.2 1144.5 1177.2 1177.2 1122.7 1100.9 1177.2 1177.2 1166.3 1111.8 1177.2 1199 1144.5 1122.7 1100.9 1111.8 1122.7 1188.1 1144.5 1111.8 1144.5 1199 1177.2 1155.4 M214T182 1144.5 1155.4 1199 1122.7 1155.4 1133.6 1122.7 1188.1 1111.8 1144.5 1133.6 1133.6 1166.3 1166.3 1144.5 1166.3 1122.7 1166.3 1155.4 1177.2 1155.4 1144.5 1199 1177.2 1100.9 1133.6 1177.2 1144.5 1177.2 1177.2 1122.7 1100.9 1177.2 1177.2 1166.3 1111.8 1177.2 1199 1144.5 1122.7 1100.9 1111.8 1122.7 1188.1 1144.5 1111.8 1144.5 1199 1177.2 1155.4 M215T170 3859.8 3786.28 3970.08 3823.04 4006.84 3823.04 3970.08 4006.84 3859.8 3896.56 3749.52 3749.52 3786.28 3970.08 3823.04 3859.8 3970.08 3933.32 3970.08 3823.04 3970.08 3896.56 3823.04 3712.76 4006.84 3712.76 3970.08 3823.04 4006.84 3896.56 3823.04 3896.56 3970.08 3786.28 3712.76 3712.76 3970.08 3859.8 3933.32 3786.28 3786.28 3786.28 3970.08 3712.76 3749.52 3712.76 4006.84 4043.6 3859.8 3933.32 M225T182 1144.5 1155.4 1199 1122.7 1155.4 1133.6 1122.7 1188.1 1111.8 1144.5 1133.6 1133.6 1166.3 1166.3 1144.5 1166.3 1122.7 1166.3 1155.4 1177.2 1155.4 1144.5 1199 1177.2 1100.9 1133.6 1177.2 1144.5 1177.2 1177.2 1122.7 1100.9 1177.2 1177.2 1166.3 1111.8 1177.2 1199 1144.5 1122.7 1100.9 1111.8 1122.7 1188.1 1144.5 1111.8 1144.5 1199 1177.2 1155.4 M231T174 1929.9 1893.14 1985.04 1911.52 2003.42 1911.52 1985.04 2003.42 1929.9 1948.28 1874.76 1874.76 1893.14 1985.04 1911.52 1929.9 1985.04 1966.66 1985.04 1911.52 1985.04 1948.28 1911.52 1856.38 2003.42 1856.38 1985.04 1911.52 2003.42 1948.28 1911.52 1948.28 1985.04 1893.14 1856.38 1856.38 1985.04 1929.9 1966.66 1893.14 1893.14 1893.14 1985.04 1856.38 1874.76 1856.38 2003.42 2021.8 1929.9 1966.66 M426T180 1144.5 1155.4 1199 1122.7 1155.4 1133.6 1122.7 1188.1 1111.8 1144.5 1133.6 1133.6 1166.3 1166.3 1144.5 1166.3 1122.7 1166.3 1155.4 1177.2 1155.4 1144.5 1199 1177.2 1100.9 1133.6 1177.2 1144.5 1177.2 1177.2 1122.7 1100.9 1177.2 1177.2 1166.3 1111.8 1177.2 1199 1144.5 1122.7 1100.9 1111.8 1122.7 1188.1 1144.5 1111.8 1144.5 1199 1177.2 1155.4 M492T190 1080 1100 1090 1060 1100 1070 1090 1030 1080 1050 1070 1060 1040 1100 1080 1100 1100 1090 1020 1060 1050 1080 1020 1060 1100 1040 1040 1030 1020 1030 1060 1010 1100 1030 1090 1090 1060 1050 1060 1060 1090 1060 1080 1080 1080 1030 1070 1040 1060 1060 M493T192 166.41 169.49 167.95 163.33 169.49 164.87 167.95 158.71 166.41 161.79 164.87 163.33 160.25 169.49 166.41 169.49 169.49 167.95 157.16 163.33 161.79 166.41 157.16 163.33 169.49 160.25 160.25 158.71 157.16 158.71 163.33 155.62 169.49 158.71 167.95 167.95 163.33 161.79 163.33 163.33 167.95 163.33 166.41 166.41 166.41 158.71 164.87 160.25 163.33 163.33 M550T200 4766.3 4506.32 4419.66 4549.65 4722.97 4376.33 4462.99 4419.66 4549.65 4636.31 4376.33 4679.64 4722.97 4376.33 4549.65 4679.64 4376.33 4722.97 4679.64 4462.99 4766.3 4766.3 4766.3 4419.66 4506.32 4636.31 4419.66 4766.3 4419.66 4592.98 4506.32 4766.3 4462.99 4419.66 4549.65 4766.3 4679.64 4419.66 4766.3 4549.65 4592.98 4376.33 4419.66 4419.66 4592.98 4549.65 4766.3 4636.31 4636.31 4462.99 -------------------------------------------------------------------------------- /tests/test_data/dataMatrix_lcms_theoretical_nls.txt: -------------------------------------------------------------------------------- 1 | name sample01 sample02 sample03 sample04 sample05 sample06 sample07 sample08 sample09 sample10 sample11 sample12 sample13 sample14 sample15 sample16 sample17 sample18 sample19 sample20 sample21 sample22 sample23 sample24 sample25 sample26 sample27 sample28 sample29 sample30 sample31 sample32 sample33 sample34 sample35 sample36 sample37 sample38 sample39 sample40 sample41 sample42 sample43 sample44 sample45 sample46 sample47 sample48 sample49 sample50 M117T80 3451.436364 3551.963636 3585.472727 3585.472727 3518.454545 3451.436364 3484.945455 3551.963636 3652.490909 3618.981818 3551.963636 3585.472727 3585.472727 3451.436364 3618.981818 3417.927273 3518.454545 3417.927273 3585.472727 3518.454545 3686 3652.490909 3451.436364 3484.945455 3652.490909 3484.945455 3417.927273 3518.454545 3686 3551.963636 3484.945455 3618.981818 3384.418182 3585.472727 3518.454545 3585.472727 3417.927273 3551.963636 3451.436364 3585.472727 3585.472727 3618.981818 3417.927273 3384.418182 3484.945455 3451.436364 3551.963636 3451.436364 3451.436364 3484.945455 M121T80 3416.922 3516.444 3549.618 3549.618 3483.27 3416.922 3450.096 3516.444 3615.966 3582.792 3516.444 3549.618 3549.618 3416.922 3582.792 3383.748 3483.27 3383.748 3549.618 3483.27 3649.14 3615.966 3416.922 3450.096 3615.966 3450.096 3383.748 3483.27 3649.14 3516.444 3450.096 3582.792 3350.574 3549.618 3483.27 3549.618 3383.748 3516.444 3416.922 3549.618 3549.618 3582.792 3383.748 3350.574 3450.096 3416.922 3516.444 3416.922 3416.922 3450.096 M122T80 136.67688 140.65776 141.98472 141.98472 139.3308 136.67688 138.00384 140.65776 144.63864 143.31168 140.65776 141.98472 141.98472 136.67688 143.31168 135.34992 139.3308 135.34992 141.98472 139.3308 145.9656 144.63864 136.67688 138.00384 144.63864 138.00384 135.34992 139.3308 145.9656 140.65776 138.00384 143.31168 134.02296 141.98472 139.3308 141.98472 135.34992 140.65776 136.67688 141.98472 141.98472 143.31168 135.34992 134.02296 138.00384 136.67688 140.65776 136.67688 136.67688 138.00384 M139T80 3796.58 3907.16 3944.02 3944.02 3870.3 3796.58 3833.44 3907.16 4017.74 3980.88 3907.16 3944.02 3944.02 3796.58 3980.88 3759.72 3870.3 3759.72 3944.02 3870.3 4054.6 4017.74 3796.58 3833.44 4017.74 3833.44 3759.72 3870.3 4054.6 3907.16 3833.44 3980.88 3722.86 3944.02 3870.3 3944.02 3759.72 3907.16 3796.58 3944.02 3944.02 3980.88 3759.72 3722.86 3833.44 3796.58 3907.16 3796.58 3796.58 3833.44 M140T80 151.8632 156.2864 157.7608 157.7608 154.812 151.8632 153.3376 156.2864 160.7096 159.2352 156.2864 157.7608 157.7608 151.8632 159.2352 150.3888 154.812 150.3888 157.7608 154.812 162.184 160.7096 151.8632 153.3376 160.7096 153.3376 150.3888 154.812 162.184 156.2864 153.3376 159.2352 148.9144 157.7608 154.812 157.7608 150.3888 156.2864 151.8632 157.7608 157.7608 159.2352 150.3888 148.9144 153.3376 151.8632 156.2864 151.8632 151.8632 153.3376 M157T80 2657.606 2735.012 2760.814 2760.814 2709.21 2657.606 2683.408 2735.012 2812.418 2786.616 2735.012 2760.814 2760.814 2657.606 2786.616 2631.804 2709.21 2631.804 2760.814 2709.21 2838.22 2812.418 2657.606 2683.408 2812.418 2683.408 2631.804 2709.21 2838.22 2735.012 2683.408 2786.616 2606.002 2760.814 2709.21 2760.814 2631.804 2735.012 2657.606 2760.814 2760.814 2786.616 2631.804 2606.002 2683.408 2657.606 2735.012 2657.606 2657.606 2683.408 M158T80 106.30424 109.40048 110.43256 110.43256 108.3684 106.30424 107.33632 109.40048 112.49672 111.46464 109.40048 110.43256 110.43256 106.30424 111.46464 105.27216 108.3684 105.27216 110.43256 108.3684 113.5288 112.49672 106.30424 107.33632 112.49672 107.33632 105.27216 108.3684 113.5288 109.40048 107.33632 111.46464 104.24008 110.43256 108.3684 110.43256 105.27216 109.40048 106.30424 110.43256 110.43256 111.46464 105.27216 104.24008 107.33632 106.30424 109.40048 106.30424 106.30424 107.33632 -------------------------------------------------------------------------------- /tests/test_data/peaklist_dims_pos_theoretical.txt: -------------------------------------------------------------------------------- 1 | mz intensity names 2 | 126.979204 1421.78 Pyruvate (C00022) 3 | 135.028801 2581.91 (S)-Malate (C00149) 4 | 139.000181 3870.30 Fumarate (C00122) 5 | 147.028801 2334.12 2-Oxoglutarate (C00026) 6 | 154.995096 2341.50 Oxaloacetate (C00036) 7 | 156.989769 1417.22 Succinate (C00042) 8 | 168.989654 520.00 Phosphoenolpyruvate (C00074) 9 | 336.972032 1040.00 Phosphoenolpyruvate (Dimer) (C00074) 10 | 504.954410 390.00 Phosphoenolpyruvate (Trimer) (C00074) 11 | 197.005661 5112.38 cis-Aconitate (C00417) 12 | 213.000576 1018.08 Oxalosuccinate (C05379) 13 | 215.016226 3859.80 Isocitrate (C00311) 14 | 230.990164 1929.90 Citrate (C00158) 15 | 426.052250 1149.95 Thiamin diphosphate (C00068) 16 | 492.060410 1060.00 2-(alpha-Hydroxyethyl)thiamine diphosphate (C05125) 17 | 493.063765 163.33 2-(alpha-Hydroxyethyl)thiamine diphosphate (C05125) 18 | 550.065890 4549.65 3-Carboxy-1-hydroxypropyl-ThPP (C05381) 19 | -------------------------------------------------------------------------------- /tests/test_data/peaklist_lcms_pos_theoretical.txt: -------------------------------------------------------------------------------- 1 | name mz rt intensity 2 | M127T60 126.979204 60 1421.78 3 | M135T70 135.028801 70 2581.91 4 | M139T80 139.000181 80 3870.30 5 | M147T90 147.028801 90 2334.12 6 | M155T100 154.995096 100 2341.50 7 | M157T110 156.989769 110 1417.22 8 | M169T120 168.989654 120 520.00 9 | M337T121 336.972032 121 1040.00 10 | M505T122 504.954410 122.5 390.00 11 | M197T150 197.005661 150 5112.38 12 | M213T160 213.000576 160 1018.08 13 | M215T170 215.016226 170 3859.80 14 | M231T174 230.990164 173.5 1929.90 15 | M426T180 426.052250 180 1149.95 16 | M492T190 492.060410 190 1060.00 17 | M493T192 493.063765 192.5 163.33 18 | M550T200 550.065890 200 4549.65 19 | -------------------------------------------------------------------------------- /tests/test_data/peaklist_lcms_pos_theoretical_mc_o.txt: -------------------------------------------------------------------------------- 1 | name mz rt intensity M127T60 126.979204 60 1421.78 M135T70 135.028801 70 2581.91 M139T80 139.000181 80 3870.30 M147T90 147.028801 90 2334.12 M155T100 154.995096 100 2341.50 M157T110 156.989769 110 1417.22 M169T120 168.989654 120 520.00 M337T121 336.972032 121 1040.00 M505T122 504.954410 122.5 390.00 M197T150 197.005661 150 5112.38 M213T160 213.000576 160 1018.08 M214T181 213.529763 181 530.00 M214T182 214.031441 182 81.50 M215T170 215.016226 170 3859.8 M225T182 224.520736 182 300.00 M231T174 230.990164 173.5 1929.90 M426T180 426.052250 180 1149.95 M492T190 492.060410 190 1060.00 M493T192 493.063765 192.5 163.33 M550T200 550.065890 200 4549.65 -------------------------------------------------------------------------------- /tests/test_data/peaklist_lcms_pos_theoretical_mn.txt: -------------------------------------------------------------------------------- 1 | name mz rt intensity names 2 | M127T60 126.979204 60 1421.78 Pyruvate (C00022) 3 | M135T70 135.028801 70 2581.91 (S)-Malate (C00149) 4 | M139T80 139.000181 80 3870.30 Fumarate (C00122) 5 | M147T90 147.028801 90 2334.12 2-Oxoglutarate (C00026) 6 | M155T100 154.995096 100 2341.50 Oxaloacetate (C00036) 7 | M157T110 156.989769 110 1417.22 Succinate (C00042) 8 | M169T120 168.989654 120 520.00 Phosphoenolpyruvate (C00074) 9 | M337T121 336.972032 121 1040.00 Phosphoenolpyruvate (Dimer) (C00074) 10 | M505T122 504.954410 122.5 390.00 Phosphoenolpyruvate (Trimer) (C00074) 11 | M197T150 197.005661 150 5112.38 cis-Aconitate (C00417) 12 | M213T160 213.000576 160 1018.08 Oxalosuccinate (C05379) 13 | M215T170 215.016226 170 3859.80 Isocitrate (C00311) 14 | M231T174 230.990164 173.5 1929.90 Citrate (C00158) 15 | M426T180 426.052250 180 1149.95 Thiamin diphosphate (C00068) 16 | M492T190 492.060410 190 1060.00 2-(alpha-Hydroxyethyl)thiamine diphosphate (C05125) 17 | M493T192 493.063765 192.5 163.33 2-(alpha-Hydroxyethyl)thiamine diphosphate (C05125) 18 | M550T200 550.065890 200 4549.65 3-Carboxy-1-hydroxypropyl-ThPP (C05381) 19 | -------------------------------------------------------------------------------- /tests/test_data/peaklist_lcms_pos_theoretical_nls.txt: -------------------------------------------------------------------------------- 1 | name mz rt intensity M117T80 117.018236 80 1000 M121T80 120.989616 80 100 M122T80 121.992971 80 101 M139T80 139.000181 80 3870.3 M140T80 140.003536 80 387.03 M157T80 157.010746 80 2715.92 M158T80 158.014101 80 271.592 -------------------------------------------------------------------------------- /tests/test_data/peaklist_lcms_pos_theoretical_no_name.txt: -------------------------------------------------------------------------------- 1 | mz rt intensity names 2 | 126.979204 60 1421.78 Pyruvate (C00022) 3 | 135.028801 70 2581.91 (S)-Malate (C00149) 4 | 139.000181 80 3870.30 Fumarate (C00122) 5 | 147.028801 90 2334.12 2-Oxoglutarate (C00026) 6 | 154.995096 100 2341.50 Oxaloacetate (C00036) 7 | 156.989769 110 1417.22 Succinate (C00042) 8 | 168.989654 120 520.00 Phosphoenolpyruvate (C00074) 9 | 336.972032 121 1040.00 Phosphoenolpyruvate (Dimer) (C00074) 10 | 504.954410 122.5 390.00 Phosphoenolpyruvate (Trimer) (C00074) 11 | 197.005661 150 5112.38 cis-Aconitate (C00417) 12 | 213.000576 160 1018.08 Oxalosuccinate (C05379) 13 | 215.016226 170 3859.80 Isocitrate (C00311) 14 | 230.990164 173.5 1929.90 Citrate (C00158) 15 | 426.052250 180 1149.95 Thiamin diphosphate (C00068) 16 | 492.060410 190 1060.00 2-(alpha-Hydroxyethyl)thiamine diphosphate (C05125) 17 | 493.063765 192.5 163.33 2-(alpha-Hydroxyethyl)thiamine diphosphate (C05125) 18 | 550.065890 200 4549.65 3-Carboxy-1-hydroxypropyl-ThPP (C05381) 19 | -------------------------------------------------------------------------------- /tests/test_data/results_annotation.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/tests/test_data/results_annotation.sqlite -------------------------------------------------------------------------------- /tests/test_data/results_annotation_excl_pattern.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/tests/test_data/results_annotation_excl_pattern.sqlite -------------------------------------------------------------------------------- /tests/test_data/results_annotation_graph.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/tests/test_data/results_annotation_graph.sqlite -------------------------------------------------------------------------------- /tests/test_data/results_annotation_mc_o.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/tests/test_data/results_annotation_mc_o.sqlite -------------------------------------------------------------------------------- /tests/test_data/results_annotation_nls.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/tests/test_data/results_annotation_nls.sqlite -------------------------------------------------------------------------------- /tests/test_data/results_mfdb.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/tests/test_data/results_mfdb.sqlite -------------------------------------------------------------------------------- /tests/test_data/results_mfdb_excl_hrules.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/tests/test_data/results_mfdb_excl_hrules.sqlite -------------------------------------------------------------------------------- /tests/test_data/results_pearson.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/tests/test_data/results_pearson.sqlite -------------------------------------------------------------------------------- /tests/test_data/results_pearson_all.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/tests/test_data/results_pearson_all.sqlite -------------------------------------------------------------------------------- /tests/test_data/results_spearman.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/computational-metabolomics/beamspy/66c74c16fe744582fed730cde843fc2b6e9f061f/tests/test_data/results_spearman.sqlite -------------------------------------------------------------------------------- /tests/test_data/sdf_record.sdf: -------------------------------------------------------------------------------- 1 | 2 | Marvin 01211310252D 3 | 4 | 22 24 0 0 0 0 999 V2000 5 | -2.8644 -0.2905 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -2.8656 -1.1176 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -2.1509 -1.5304 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -2.1527 0.1221 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | -1.4377 -0.2868 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -1.4343 -1.1151 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -0.7192 -1.5240 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -0.0030 -1.1092 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 13 | -0.0064 -0.2809 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 14 | -0.7260 0.1325 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 15 | 0.7066 0.1336 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 1.4211 -0.2790 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 2.1336 0.1349 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 2.1315 0.9606 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 1.4109 1.3705 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 0.7014 0.9543 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 2.8489 -0.2755 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 22 | 2.8438 1.3760 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 23 | 0.7124 -1.5197 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 24 | -2.1504 -2.3552 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 25 | -3.5788 0.1217 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 26 | -0.0064 0.5438 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 27 | 4 1 1 0 0 0 0 28 | 1 21 1 0 0 0 0 29 | 1 2 2 0 0 0 0 30 | 2 3 1 0 0 0 0 31 | 3 20 1 0 0 0 0 32 | 3 6 2 0 0 0 0 33 | 5 4 2 0 0 0 0 34 | 5 10 1 0 0 0 0 35 | 5 6 1 0 0 0 0 36 | 6 7 1 0 0 0 0 37 | 7 8 1 0 0 0 0 38 | 8 19 1 6 0 0 0 39 | 8 9 1 0 0 0 0 40 | 9 10 1 0 0 0 0 41 | 9 11 1 0 0 0 0 42 | 9 22 1 1 0 0 0 43 | 16 11 1 0 0 0 0 44 | 11 12 2 0 0 0 0 45 | 12 13 1 0 0 0 0 46 | 13 17 1 0 0 0 0 47 | 13 14 2 0 0 0 0 48 | 14 18 1 0 0 0 0 49 | 14 15 1 0 0 0 0 50 | 15 16 2 0 0 0 0 51 | M END 52 | > 53 | CHEBI:90 54 | 55 | > 56 | (-)-epicatechin 57 | 58 | > 59 | 3 60 | 61 | > 62 | A catechin with (2R,3R)-configuration. 63 | 64 | > 65 | CHEBI:18484 66 | 67 | > 68 | [H][C@@]1(Oc2cc(O)cc(O)c2C[C@H]1O)c1ccc(O)c(O)c1 69 | 70 | > 71 | PFTAWBLQPZVEMU-UKRRQHHQSA-N 72 | 73 | > 74 | InChI=1S/C15H14O6/c16-8-4-11(18)9-6-13(20)15(21-14(9)5-8)7-1-2-10(17)12(19)3-7/h1-5,13,15-20H,6H2/t13-,15-/m1/s1 75 | 76 | > 77 | C15H14O6 78 | 79 | > 80 | 0 81 | 82 | > 83 | 290.26810 84 | 85 | > 86 | 290.079 87 | 88 | > 89 | (2R,3R)-2-(3,4-dihydroxyphenyl)-3,4-dihydro-2H-chromene-3,5,7-triol 90 | 91 | > 92 | (-)-Epicatechin 93 | 94 | > 95 | 490-46-0 96 | 97 | > 98 | C09727 99 | 100 | > 101 | LSM-20956 102 | 103 | > 104 | CPD-7630 105 | 106 | > 107 | C00000956 108 | 109 | > 110 | LMPK12020003 111 | 112 | > 113 | CID: 72276 114 | SID: 160709352 115 | 116 | > 117 | 10427682 118 | 7655336 119 | 120 | > 121 | Q9SEV0 122 | 123 | > 124 | 15 Feb 2018 125 | 126 | $$$$ -------------------------------------------------------------------------------- /tests/test_data/summary_mr_mc.txt: -------------------------------------------------------------------------------- 1 | name mz rt intensity label charge oligomer isotope_labels_a isotope_ids isotope_labels_b isotope_charges atoms exact_mass ppm_error rt_diff adduct C H N O P S molecular_formula compound_name compound_id compound_count compounds_hmdb_full_v4_0_20200909_v1 compounds_test compounds_test_rt 2 | M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 2-hydroxyacrylic Acid HMDB0062676 1 1 0 0 3 | M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 3-Hydroxypropenoate 8947 1 0 1 0 4 | M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 3-Oxopropanoate 721 1 0 1 0 5 | M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Glucosereductone HMDB0040261 1 1 0 0 6 | M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Malonic semialdehyde HMDB0011111 1 1 0 0 7 | M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Pyruvate 578 1 0 1 0 8 | M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Pyruvic acid HMDB0000243 1 1 0 0 9 | M127T60 126.979204 60.0 1421.775 126.97908600000001 0.9292868904907359 [M+H]+ 1 3 0 5 1 0 CH3O5P Formyl phosphate 1969 1 0 1 0 10 | M127T60 126.979204 60.0 1421.775 126.97908600000001 0.9292868904907359 [M+H]+ 1 3 0 5 1 0 CH3O5P Foscarnet HMDB0014670 1 1 0 0 11 | M127T60 126.979204 60.0 1421.775 126.97908600000001 0.9292868904907359 [M+H]+ 1 3 0 5 1 0 CH3O5P Phosphonoformate 4022 1 0 1 0 12 | M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 3-Dehydro-L-threonate 2300 1 0 1 0 13 | M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 1 1 0 0 14 | M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 Malate 1018 1 0 1 0 15 | M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 1 1 0 0 16 | M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 1 1 0 0 17 | M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Formylpyruvate 1832 1 0 1 0 18 | M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumarate 652 1 0 1 0 19 | M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 1 1 0 0 20 | M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid 1414 1 0 1 0 21 | M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 1 1 0 0 22 | M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 2-Oxoglutarate 582 1 0 1 0 23 | M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 3-Oxoglutaric acid HMDB0013701 1 1 0 0 24 | M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 D-erythro-Ascorbate 239 1 0 1 0 25 | M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 Dehydro-D-arabinono-1,4-lactone 3919 1 0 1 0 26 | M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 Methyloxaloacetate 3778 1 0 1 0 27 | M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 Oxaloacetate 4-methyl ester 2636 1 0 1 0 28 | M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 Oxoglutaric acid HMDB0000208 1 1 0 0 29 | M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 2-Hydroxyethylenedicarboxylate 2770 1 0 1 0 30 | M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 Oxalacetic acid HMDB0000223 1 1 0 0 31 | M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 Oxaloacetate 589 1 0 1 0 32 | M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 enol-oxaloacetate 19221 1 0 1 0 33 | M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 trans-2,3-Epoxysuccinate 2551 1 0 1 0 34 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 3-methoxy-3-oxopropanoic acid HMDB0130020 1 1 0 0 35 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 4-Hydroxy-2-oxobutanoic acid HMDB0031204 1 1 0 0 36 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 D,L-malic semialdehyde 18220 1 0 1 0 37 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Erythrono-1,4-lactone 11996 1 0 1 0 38 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Erythrono-1,4-lactone HMDB0000349 1 1 0 0 39 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Methyl oxalate 7890 1 0 1 0 40 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Methylmalonate 1845 1 0 1 0 41 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Methylmalonic acid HMDB0000202 1 1 0 0 42 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Succinate 592 1 0 1 0 43 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Succinic acid HMDB0000254 1 1 0 0 44 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Threonolactone HMDB0000940 1 1 0 0 45 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 xi-3-Hydroxy-2-oxobutanoic acid HMDB0039324 1 1 0 0 46 | M157T110 156.989769 110.0 1417.22 156.98965099999998 0.7516419029093673 [M+H]+ 2 5 0 6 1 0 C2H5O6P 2-Phosphoglycolate 1183 1 0 1 0 47 | M157T110 156.989769 110.0 1417.22 156.98965099999998 0.7516419029093673 [M+H]+ 2 5 0 6 1 0 C2H5O6P Phosphoglycolic acid HMDB0000816 1 1 0 0 48 | M169T120 168.989654 120.0 520.0 [M+H]+ 1 1 168.98965099999998 0.017752566521647164 [M+H]+ 3 5 0 6 1 0 C3H5O6P 3-Phosphonopyruvate 2162 3 0 1 0 49 | M169T120 168.989654 120.0 520.0 [M+H]+ 1 1 168.98965099999998 0.017752566521647164 [M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvate 614 3 0 1 0 50 | M169T120 168.989654 120.0 520.0 [M+H]+ 1 1 168.98965099999998 0.017752566521647164 [M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 0 0 51 | M169T120 168.989654 120.0 520.0 [M+H]+ 1 1 168.98965099999998 0.017752566521647164 2.0 [M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 0 0 1 52 | M337T121 336.972032 121.0 1040.0 [2M+H]+ 1 2 336.97202599999997 0.017805632452394953 [2M+H]+ 3 5 0 6 1 0 C3H5O6P 3-Phosphonopyruvate 2162 3 0 1 0 53 | M337T121 336.972032 121.0 1040.0 [2M+H]+ 1 2 336.97202599999997 0.017805632452394953 [2M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvate 614 3 0 1 0 54 | M337T121 336.972032 121.0 1040.0 [2M+H]+ 1 2 336.97202599999997 0.017805632452394953 [2M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 0 0 55 | M505T122 504.95441 122.5 390.0 [3M+H]+ 1 3 504.95440099999996 0.017823391609811126 [3M+H]+ 3 5 0 6 1 0 C3H5O6P 3-Phosphonopyruvate 2162 3 0 1 0 56 | M505T122 504.95441 122.5 390.0 [3M+H]+ 1 3 504.95440099999996 0.017823391609811126 [3M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvate 614 3 0 1 0 57 | M505T122 504.95441 122.5 390.0 [3M+H]+ 1 3 504.95440099999996 0.017823391609811126 [3M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 0 0 58 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Aconitate Ion 38282 1 0 1 0 59 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Dehydroascorbic acid 12423 1 0 1 0 60 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Dehydroascorbic acid HMDB0001264 1 1 0 0 61 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Dehydroascorbide(1-) HMDB0062706 1 1 0 0 62 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 cis-Aconitate 843 1 0 1 0 63 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 cis-Aconitic acid HMDB0000072 1 1 0 0 64 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 trans-Aconitate 1934 1 0 1 0 65 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 trans-Aconitic acid HMDB0000958 1 1 0 0 66 | M213T160 213.000576 160.0 1018.075 213.00057400000003 0.009389645900807648 [M+Na]+ 6 6 0 7 0 0 C6H6O7 4-Hydroxy-Aconitate Ion 41990 1 0 1 0 67 | M213T160 213.000576 160.0 1018.075 213.00057400000003 0.009389645900807648 [M+Na]+ 6 6 0 7 0 0 C6H6O7 Oxalosuccinate 3402 1 0 1 0 68 | M213T160 213.000576 160.0 1018.075 213.00057400000003 0.009389645900807648 [M+Na]+ 6 6 0 7 0 0 C6H6O7 Oxalosuccinic acid HMDB0003974 1 1 0 0 69 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 (1R,2R)-Isocitric acid HMDB0033717 2 1 0 0 70 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 (1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 863 2 0 1 0 71 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 (1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 3090 2 0 1 0 72 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 (4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate 3064 2 0 1 0 73 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-Dihydroxy-5-Oxo-Hexanedioate 37494 2 0 1 0 74 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-Diketo-L-gulonate HMDB0006511 2 1 0 0 75 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-Dioxo-L-gulonate 13506 2 0 1 0 76 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-diketo-L-gulonate 20293 2 0 1 0 77 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-diketogulonate HMDB0062803 2 1 0 0 78 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,5-Didehydro-D-gluconate 2148 2 0 1 0 79 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2-Dehydro-3-deoxy-D-glucarate 2740 2 0 1 0 80 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 4,5-Dehydro-D-Glucuronic Acid 36800 2 0 1 0 81 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 5-Dehydro-4-deoxy-D-glucarate 1000 2 0 1 0 82 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 5-keto-4-deoxy-D-glucarate 20308 2 0 1 0 83 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Carboxymethyloxysuccinate 2580 2 0 1 0 84 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Citrate 675 2 0 1 0 85 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Citric acid HMDB0000094 2 1 0 0 86 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 D-Glucaro-1,4-lactone HMDB0041862 2 1 0 0 87 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 D-threo-Isocitric acid HMDB0001874 2 1 0 0 88 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Diketogulonic acid HMDB0005971 2 1 0 0 89 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Isocitrate 781 2 0 1 0 90 | M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Isocitric acid HMDB0000193 2 1 0 0 91 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 (1R,2R)-Isocitric acid HMDB0033717 2 1 0 0 92 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 (1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 863 2 0 1 0 93 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 (1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 3090 2 0 1 0 94 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 (4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate 3064 2 0 1 0 95 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-Dihydroxy-5-Oxo-Hexanedioate 37494 2 0 1 0 96 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-Diketo-L-gulonate HMDB0006511 2 1 0 0 97 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-Dioxo-L-gulonate 13506 2 0 1 0 98 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-diketo-L-gulonate 20293 2 0 1 0 99 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-diketogulonate HMDB0062803 2 1 0 0 100 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,5-Didehydro-D-gluconate 2148 2 0 1 0 101 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2-Dehydro-3-deoxy-D-glucarate 2740 2 0 1 0 102 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 4,5-Dehydro-D-Glucuronic Acid 36800 2 0 1 0 103 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 5-Dehydro-4-deoxy-D-glucarate 1000 2 0 1 0 104 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 5-keto-4-deoxy-D-glucarate 20308 2 0 1 0 105 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Carboxymethyloxysuccinate 2580 2 0 1 0 106 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Citrate 675 2 0 1 0 107 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Citric acid HMDB0000094 2 1 0 0 108 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 D-Glucaro-1,4-lactone HMDB0041862 2 1 0 0 109 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 D-threo-Isocitric acid HMDB0001874 2 1 0 0 110 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Diketogulonic acid HMDB0005971 2 1 0 0 111 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Isocitrate 781 2 0 1 0 112 | M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Isocitric acid HMDB0000193 2 1 0 0 113 | M426T180 426.05225 180.0 1149.95 426.05224499999997 0.011735650035642068 [M+H]+ 12 19 4 7 2 1 C12H19N4O7P2S Thiamine pyrophosphate HMDB0001372 1 1 0 0 114 | M426T180 426.05225 180.0 1149.95 426.05231399999997 -0.15021629468746972 [M+H]+ 14 19 1 10 0 2 C14H19NO10S2 Glucosinalbin HMDB0038401 1 1 0 0 115 | M426T180 426.05225 180.0 1149.95 426.05231399999997 -0.15021629468746972 [M+H]+ 14 19 1 10 0 2 C14H19NO10S2 Sinalbin 40568 1 0 1 0 116 | M492T190 492.06041 190.0 1060.0 C M493T192 (13C) 1 14.4 492.060405 0.010161354046311069 [M+Na]+ 14 23 4 8 2 1 C14H23N4O8P2S 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 1 0 0 117 | M493T192 493.063765 192.5 163.33 (13C) M492T190 C 1 14.4 493.06376 0.010140676303965665 [M+Na]+ 14 23 4 8 2 1 C14H23N4O8P2S 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 1 0 0 118 | M550T200 550.06589 200.0 4549.65 550.0658840000001 0.01090778404131062 [M+Na]+ 16 25 4 10 2 1 C16H25N4O10P2S 3-Carboxy-1-hydroxypropylthiamine diphosphate HMDB0006744 1 1 0 0 119 | -------------------------------------------------------------------------------- /tests/test_data/summary_mr_mc_graphs.txt: -------------------------------------------------------------------------------- 1 | name mz rt intensity group_id degree_cor sub_group_id degree n_nodes n_edges label charge oligomer isotope_labels_a isotope_ids isotope_labels_b isotope_charges atoms exact_mass ppm_error rt_diff adduct C H N O P S molecular_formula compound_name compound_id compound_count compounds_hmdb_full_v4_0_20200909_v1 2 | M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 2-hydroxyacrylic Acid HMDB0062676 1 1 3 | M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Glucosereductone HMDB0040261 1 1 4 | M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Malonic semialdehyde HMDB0011111 1 1 5 | M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Pyruvic acid HMDB0000243 1 1 6 | M127T60 126.979204 60.0 1421.775 126.97908600000001 0.9292868904907359 [M+H]+ 1 3 0 5 1 0 CH3O5P Foscarnet HMDB0014670 1 1 7 | M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 1 1 8 | M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 1 1 9 | M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 1 1 10 | M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 1 1 11 | M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 1 1 12 | M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 3-Oxoglutaric acid HMDB0013701 1 1 13 | M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 Oxoglutaric acid HMDB0000208 1 1 14 | M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 Oxalacetic acid HMDB0000223 1 1 15 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 3-methoxy-3-oxopropanoic acid HMDB0130020 1 1 16 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 4-Hydroxy-2-oxobutanoic acid HMDB0031204 1 1 17 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Erythrono-1,4-lactone HMDB0000349 1 1 18 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Methylmalonic acid HMDB0000202 1 1 19 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Succinic acid HMDB0000254 1 1 20 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Threonolactone HMDB0000940 1 1 21 | M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 xi-3-Hydroxy-2-oxobutanoic acid HMDB0039324 1 1 22 | M157T110 156.989769 110.0 1417.22 156.98965099999998 0.7516419029093673 [M+H]+ 2 5 0 6 1 0 C2H5O6P Phosphoglycolic acid HMDB0000816 1 1 23 | M169T120 168.989654 120.0 520.0 1 2 1 2 3 2 [M+H]+ 1 1 168.98965099999998 0.017752566521647164 [M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 24 | M337T121 336.972032 121.0 1040.0 1 2 1 1 3 2 [2M+H]+ 1 2 336.97202599999997 0.017805632452394953 [2M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 25 | M505T122 504.95441 122.5 390.0 1 2 1 1 3 2 [3M+H]+ 1 3 504.95440099999996 0.017823391609811126 [3M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 26 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Dehydroascorbic acid HMDB0001264 1 1 27 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Dehydroascorbide(1-) HMDB0062706 1 1 28 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 cis-Aconitic acid HMDB0000072 1 1 29 | M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 trans-Aconitic acid HMDB0000958 1 1 30 | M213T160 213.000576 160.0 1018.075 213.00057400000003 0.009389645900807648 [M+Na]+ 6 6 0 7 0 0 C6H6O7 Oxalosuccinic acid HMDB0003974 1 1 31 | M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 (1R,2R)-Isocitric acid HMDB0033717 2 1 32 | M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-Diketo-L-gulonate HMDB0006511 2 1 33 | M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-diketogulonate HMDB0062803 2 1 34 | M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Citric acid HMDB0000094 2 1 35 | M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 D-Glucaro-1,4-lactone HMDB0041862 2 1 36 | M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 D-threo-Isocitric acid HMDB0001874 2 1 37 | M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Diketogulonic acid HMDB0005971 2 1 38 | M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Isocitric acid HMDB0000193 2 1 39 | M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 (1R,2R)-Isocitric acid HMDB0033717 2 1 40 | M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-Diketo-L-gulonate HMDB0006511 2 1 41 | M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-diketogulonate HMDB0062803 2 1 42 | M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Citric acid HMDB0000094 2 1 43 | M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 D-Glucaro-1,4-lactone HMDB0041862 2 1 44 | M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 D-threo-Isocitric acid HMDB0001874 2 1 45 | M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Diketogulonic acid HMDB0005971 2 1 46 | M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Isocitric acid HMDB0000193 2 1 47 | M426T180 426.05225 180.0 1149.95 426.05224499999997 0.011735650035642068 [M+H]+ 12 19 4 7 2 1 C12H19N4O7P2S Thiamine pyrophosphate HMDB0001372 1 1 48 | M426T180 426.05225 180.0 1149.95 426.05231399999997 -0.15021629468746972 [M+H]+ 14 19 1 10 0 2 C14H19NO10S2 Glucosinalbin HMDB0038401 1 1 49 | M492T190 492.06041 190.0 1060.0 3 1 3 1 2 1 C M493T192 (13C) 1 14.4 492.060405 0.010161354046311069 [M+Na]+ 14 23 4 8 2 1 C14H23N4O8P2S 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 1 50 | M493T192 493.063765 192.5 163.33 3 1 3 1 2 1 (13C) M492T190 C 1 14.4 493.06376 0.010140676303965665 [M+Na]+ 14 23 4 8 2 1 C14H23N4O8P2S 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 1 51 | M550T200 550.06589 200.0 4549.65 550.0658840000001 0.01090778404131062 [M+Na]+ 16 25 4 10 2 1 C16H25N4O10P2S 3-Carboxy-1-hydroxypropylthiamine diphosphate HMDB0006744 1 1 52 | -------------------------------------------------------------------------------- /tests/test_data/summary_mr_mc_nls.txt: -------------------------------------------------------------------------------- 1 | name mz rt intensity label charge oligomer isotope_labels_a isotope_ids isotope_labels_b isotope_charges atoms nl_labels nl_ids exact_mass ppm_error rt_diff adduct C H N O P S molecular_formula compound_name compound_id compound_count compounds_hmdb_full_v4_0_20200909_v1 2 | M117T80 117.018236 80 3518.454545 [M+H]+ 1 1 117.018235 0.008545676641552855 [M+H]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 3 | M117T80 117.018236 80 3518.454545 [M+H]+ 1 1 117.018235 0.008545676641552855 [M+H]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 4 | M121T80 120.989616 80 3483.27 C M122T80 (13C) 1 3.7 H2O M139T80 120.989615 0.008265172159405936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 5 | M121T80 120.989616 80 3483.27 C M122T80 (13C) 1 3.7 H2O M139T80 120.989615 0.008265172159405936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 6 | M121T80 120.989616 80 3483.27 C M122T80 (13C) 1 3.7 H2O M139T80 120.989615 0.008265172159405936 [M+Na]+ 4 2 0 3 0 0 C4H2O3 7 | M122T80 121.992971 80 139.3308 (13C) M121T80 C 1 3.7 H2O M140T80 121.99297 0.008197193637266498 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 8 | M122T80 121.992971 80 139.3308 (13C) M121T80 C 1 3.7 H2O M140T80 121.99297 0.008197193637266498 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 9 | M122T80 121.992971 80 139.3308 (13C) M121T80 C 1 3.7 H2O M140T80 121.99297 0.008197193637266498 [M+Na]+ 4 2 0 3 0 0 C4H2O3 10 | M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 11 | M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 12 | M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.00018 0.007194235269876936 [M+Na]+ 4 2 0 3 0 0 C4H2O3 13 | M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.000179 0.014388470643267915 [M+Na]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 4.0 1 14 | M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.000179 0.014388470643267915 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 4.0 1 15 | M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.000179 0.014388470643267915 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 4.0 1 16 | M139T80 139.000181 80 3870.3 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 17 | M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003535 0.007142676772234664 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 18 | M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003535 0.007142676772234664 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 19 | M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003535 0.007142676772234664 [M+Na]+ 4 2 0 3 0 0 C4H2O3 20 | M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003534 0.014285353646504993 [M+Na]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 4.0 1 21 | M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003534 0.014285353646504993 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 4.0 1 22 | M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003534 0.014285353646504993 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 4.0 1 23 | M157T80 157.010746 80 2709.21 C M158T80 (13C) 1 3.7 H2O M139T80 157.010745 0.006368991026040621 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 24 | M157T80 157.010746 80 2709.21 C M158T80 (13C) 1 3.7 H2O M139T80 157.010745 0.006368991026040621 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 25 | M157T80 157.010746 80 2709.21 C M158T80 (13C) 1 3.7 H2O M139T80 157.010744 0.012737981952191725 [M+Na]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 4.0 1 26 | M157T80 157.010746 80 2709.21 C M158T80 (13C) 1 3.7 H2O M139T80 157.010744 0.012737981952191725 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 4.0 1 27 | M157T80 157.010746 80 2709.21 C M158T80 (13C) 1 3.7 H2O M139T80 157.010744 0.012737981952191725 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 4.0 1 28 | M158T80 158.014101 80 108.3684 (13C) M157T80 C 1 3.7 H2O M140T80 158.01409999999998 0.006328549325009302 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 29 | M158T80 158.014101 80 108.3684 (13C) M157T80 C 1 3.7 H2O M140T80 158.01409999999998 0.006328549325009302 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 30 | M158T80 158.014101 80 108.3684 (13C) M157T80 C 1 3.7 H2O M140T80 158.014099 0.012657098550251489 [M+Na]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 4.0 1 31 | M158T80 158.014101 80 108.3684 (13C) M157T80 C 1 3.7 H2O M140T80 158.014099 0.012657098550251489 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 4.0 1 32 | M158T80 158.014101 80 108.3684 (13C) M157T80 C 1 3.7 H2O M140T80 158.014099 0.012657098550251489 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 4.0 1 33 | -------------------------------------------------------------------------------- /tests/test_data/summary_sr_mc.txt: -------------------------------------------------------------------------------- 1 | name mz rt intensity label_charge_oligomer isotope_labels_a isotope_ids isotope_labels_b isotope_charges atoms molecular_formula adduct compound_name compound_id compound_count exact_mass ppm_error rt_diff 2 | M127T60 126.979204 60.0 1421.775 C3H4O3||C3H4O3||C3H4O3||C3H4O3||C3H4O3||C3H4O3||C3H4O3||CH3O5P||CH3O5P||CH3O5P [M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+H]+||[M+H]+||[M+H]+ 2-hydroxyacrylic Acid||3-Hydroxypropenoate||3-Oxopropanoate||Glucosereductone||Malonic semialdehyde||Pyruvate||Pyruvic acid||Formyl phosphate||Foscarnet||Phosphonoformate HMDB0062676||8947||721||HMDB0040261||HMDB0011111||578||HMDB0000243||1969||HMDB0014670||4022 1||1||1||1||1||1||1||1||1||1 126.979202||126.979202||126.979202||126.979202||126.979202||126.979202||126.979202||126.979086||126.979086||126.979086 0.02||0.02||0.02||0.02||0.02||0.02||0.02||0.93||0.93||0.93 None||None||None||None||None||None||None||None||None||None 3 | M135T70 135.028801 70.0 2581.91 C4H6O5||C4H6O5||C4H6O5||C4H6O5||C4H6O5 [M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+ 3-Dehydro-L-threonate||D-Malic acid||Malate||Malic acid||Velcorin 2300||HMDB0031518||1018||HMDB0000156||HMDB0032872 1||1||1||1||1 135.028799||135.028799||135.028799||135.028799||135.028799 0.01||0.01||0.01||0.01||0.01 None||None||None||None||None 4 | M139T80 139.000181 80.0 3870.3 C4H4O4||C4H4O4||C4H4O4||C4H4O4||C4H4O4 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ Formylpyruvate||Fumarate||Fumaric acid||Maleic acid||Maleic acid 1832||652||HMDB0000134||1414||HMDB0000176 1||1||1||1||1 139.00018||139.00018||139.00018||139.00018||139.00018 0.01||0.01||0.01||0.01||0.01 None||None||None||None||None 5 | M147T90 147.028801 90.0 2334.12 C5H6O5||C5H6O5||C5H6O5||C5H6O5||C5H6O5||C5H6O5||C5H6O5 [M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+ 2-Oxoglutarate||3-Oxoglutaric acid||D-erythro-Ascorbate||Dehydro-D-arabinono-1,4-lactone||Methyloxaloacetate||Oxaloacetate 4-methyl ester||Oxoglutaric acid 582||HMDB0013701||239||3919||3778||2636||HMDB0000208 1||1||1||1||1||1||1 147.028799||147.028799||147.028799||147.028799||147.028799||147.028799||147.028799 0.01||0.01||0.01||0.01||0.01||0.01||0.01 None||None||None||None||None||None||None 6 | M155T100 154.995096 100.0 2341.5 C4H4O5||C4H4O5||C4H4O5||C4H4O5||C4H4O5 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ 2-Hydroxyethylenedicarboxylate||Oxalacetic acid||Oxaloacetate||enol-oxaloacetate||trans-2,3-Epoxysuccinate 2770||HMDB0000223||589||19221||2551 1||1||1||1||1 154.995094||154.995094||154.995094||154.995094||154.995094 0.01||0.01||0.01||0.01||0.01 None||None||None||None||None 7 | M157T110 156.989769 110.0 1417.22 C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C2H5O6P||C2H5O6P [M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+H]+||[M+H]+ 3-methoxy-3-oxopropanoic acid||4-Hydroxy-2-oxobutanoic acid||D,L-malic semialdehyde||Erythrono-1,4-lactone||Erythrono-1,4-lactone||Methyl oxalate||Methylmalonate||Methylmalonic acid||Succinate||Succinic acid||Threonolactone||xi-3-Hydroxy-2-oxobutanoic acid||2-Phosphoglycolate||Phosphoglycolic acid HMDB0130020||HMDB0031204||18220||11996||HMDB0000349||7890||1845||HMDB0000202||592||HMDB0000254||HMDB0000940||HMDB0039324||1183||HMDB0000816 1||1||1||1||1||1||1||1||1||1||1||1||1||1 156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989651||156.989651 0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.75||0.75 None||None||None||None||None||None||None||None||None||None||None||None||None||None 8 | M169T120 168.989654 120.0 520.0 [M+H]+::1::1 C3H5O6P||C3H5O6P||C3H5O6P||C3H5O6P [M+H]+||[M+H]+||[M+H]+||[M+H]+ 3-Phosphonopyruvate||Phosphoenolpyruvate||Phosphoenolpyruvic acid||Phosphoenolpyruvic acid 2162||614||HMDB0000263||HMDB0000263 3||3||3||3 168.989651||168.989651||168.989651||168.989651 0.02||0.02||0.02||0.02 None||None||None||2.0 9 | M337T121 336.972032 121.0 1040.0 [2M+H]+::1::2 C3H5O6P||C3H5O6P||C3H5O6P [2M+H]+||[2M+H]+||[2M+H]+ 3-Phosphonopyruvate||Phosphoenolpyruvate||Phosphoenolpyruvic acid 2162||614||HMDB0000263 3||3||3 336.972026||336.972026||336.972026 0.02||0.02||0.02 None||None||None 10 | M505T122 504.95441 122.5 390.0 [3M+H]+::1::3 C3H5O6P||C3H5O6P||C3H5O6P [3M+H]+||[3M+H]+||[3M+H]+ 3-Phosphonopyruvate||Phosphoenolpyruvate||Phosphoenolpyruvic acid 2162||614||HMDB0000263 3||3||3 504.954401||504.954401||504.954401 0.02||0.02||0.02 None||None||None 11 | M197T150 197.005661 150.0 5112.38 C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ Aconitate Ion||Dehydroascorbic acid||Dehydroascorbic acid||Dehydroascorbide(1-)||cis-Aconitate||cis-Aconitic acid||trans-Aconitate||trans-Aconitic acid 38282||12423||HMDB0001264||HMDB0062706||843||HMDB0000072||1934||HMDB0000958 1||1||1||1||1||1||1||1 197.005659||197.005659||197.005659||197.005659||197.005659||197.005659||197.005659||197.005659 0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01 None||None||None||None||None||None||None||None 12 | M213T160 213.000576 160.0 1018.075 C6H6O7||C6H6O7||C6H6O7 [M+Na]+||[M+Na]+||[M+Na]+ 4-Hydroxy-Aconitate Ion||Oxalosuccinate||Oxalosuccinic acid 41990||3402||HMDB0003974 1||1||1 213.000574||213.000574||213.000574 0.01||0.01||0.01 None||None||None 13 | M215T170 215.016226 170.0 3859.8 [M+Na]+::1::1 C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ (1R,2R)-Isocitric acid||(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate||2,3-Dihydroxy-5-Oxo-Hexanedioate||2,3-Diketo-L-gulonate||2,3-Dioxo-L-gulonate||2,3-diketo-L-gulonate||2,3-diketogulonate||2,5-Didehydro-D-gluconate||2-Dehydro-3-deoxy-D-glucarate||4,5-Dehydro-D-Glucuronic Acid||5-Dehydro-4-deoxy-D-glucarate||5-keto-4-deoxy-D-glucarate||Carboxymethyloxysuccinate||Citrate||Citric acid||D-Glucaro-1,4-lactone||D-threo-Isocitric acid||Diketogulonic acid||Isocitrate||Isocitric acid HMDB0033717||863||3090||3064||37494||HMDB0006511||13506||20293||HMDB0062803||2148||2740||36800||1000||20308||2580||675||HMDB0000094||HMDB0041862||HMDB0001874||HMDB0005971||781||HMDB0000193 2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2 215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224 0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01 None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None 14 | M231T174 230.990164 173.5 1929.9 [M+K]+::1::1 C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7 [M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+ (1R,2R)-Isocitric acid||(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate||2,3-Dihydroxy-5-Oxo-Hexanedioate||2,3-Diketo-L-gulonate||2,3-Dioxo-L-gulonate||2,3-diketo-L-gulonate||2,3-diketogulonate||2,5-Didehydro-D-gluconate||2-Dehydro-3-deoxy-D-glucarate||4,5-Dehydro-D-Glucuronic Acid||5-Dehydro-4-deoxy-D-glucarate||5-keto-4-deoxy-D-glucarate||Carboxymethyloxysuccinate||Citrate||Citric acid||D-Glucaro-1,4-lactone||D-threo-Isocitric acid||Diketogulonic acid||Isocitrate||Isocitric acid HMDB0033717||863||3090||3064||37494||HMDB0006511||13506||20293||HMDB0062803||2148||2740||36800||1000||20308||2580||675||HMDB0000094||HMDB0041862||HMDB0001874||HMDB0005971||781||HMDB0000193 2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2 230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161 0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01 None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None 15 | M426T180 426.05225 180.0 1149.95 C12H19N4O7P2S||C14H19NO10S2||C14H19NO10S2 [M+H]+||[M+H]+||[M+H]+ Thiamine pyrophosphate||Glucosinalbin||Sinalbin HMDB0001372||HMDB0038401||40568 1||1||1 426.052245||426.052314||426.052314 0.01||-0.15||-0.15 None||None||None 16 | M492T190 492.06041 190.0 1060.0 C M493T192 (13C) 1 14.4 C14H23N4O8P2S [M+Na]+ 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 492.060405 0.01 None 17 | M493T192 493.063765 192.5 163.33 (13C) M492T190 C 1 14.4 C14H23N4O8P2S [M+Na]+ 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 493.06376 0.01 None 18 | M550T200 550.06589 200.0 4549.65 C16H25N4O10P2S [M+Na]+ 3-Carboxy-1-hydroxypropylthiamine diphosphate HMDB0006744 1 550.065884 0.01 None 19 | -------------------------------------------------------------------------------- /tests/test_data/summary_sr_sc.txt: -------------------------------------------------------------------------------- 1 | name mz rt intensity label_charge_oligomer isotope_labels_a isotope_ids isotope_labels_b isotope_charges atoms compounds_hmdb_full_v4_0_20200909_v1 compounds_test compounds_test_rt 2 | M127T60 126.979204 60.0 1421.775 C3H4O3::[M+K]+::2-hydroxyacrylic Acid::HMDB0062676::1::126.979202::0.02||C3H4O3::[M+K]+::Glucosereductone::HMDB0040261::1::126.979202::0.02||C3H4O3::[M+K]+::Malonic semialdehyde::HMDB0011111::1::126.979202::0.02||C3H4O3::[M+K]+::Pyruvic acid::HMDB0000243::1::126.979202::0.02||CH3O5P::[M+H]+::Foscarnet::HMDB0014670::1::126.979086::0.93 C3H4O3::[M+K]+::3-Hydroxypropenoate::8947::1::126.979202::0.02||C3H4O3::[M+K]+::3-Oxopropanoate::721::1::126.979202::0.02||C3H4O3::[M+K]+::Pyruvate::578::1::126.979202::0.02||CH3O5P::[M+H]+::Formyl phosphate::1969::1::126.979086::0.93||CH3O5P::[M+H]+::Phosphonoformate::4022::1::126.979086::0.93 3 | M135T70 135.028801 70.0 2581.91 C4H6O5::[M+H]+::D-Malic acid::HMDB0031518::1::135.028799::0.01||C4H6O5::[M+H]+::Malic acid::HMDB0000156::1::135.028799::0.01||C4H6O5::[M+H]+::Velcorin::HMDB0032872::1::135.028799::0.01 C4H6O5::[M+H]+::3-Dehydro-L-threonate::2300::1::135.028799::0.01||C4H6O5::[M+H]+::Malate::1018::1::135.028799::0.01 4 | M139T80 139.000181 80.0 3870.3 C4H4O4::[M+Na]+::Fumaric acid::HMDB0000134::1::139.00018::0.01||C4H4O4::[M+Na]+::Maleic acid::HMDB0000176::1::139.00018::0.01 C4H4O4::[M+Na]+::Formylpyruvate::1832::1::139.00018::0.01||C4H4O4::[M+Na]+::Fumarate::652::1::139.00018::0.01||C4H4O4::[M+Na]+::Maleic acid::1414::1::139.00018::0.01 5 | M147T90 147.028801 90.0 2334.12 C5H6O5::[M+H]+::3-Oxoglutaric acid::HMDB0013701::1::147.028799::0.01||C5H6O5::[M+H]+::Oxoglutaric acid::HMDB0000208::1::147.028799::0.01 C5H6O5::[M+H]+::2-Oxoglutarate::582::1::147.028799::0.01||C5H6O5::[M+H]+::D-erythro-Ascorbate::239::1::147.028799::0.01||C5H6O5::[M+H]+::Dehydro-D-arabinono-1,4-lactone::3919::1::147.028799::0.01||C5H6O5::[M+H]+::Methyloxaloacetate::3778::1::147.028799::0.01||C5H6O5::[M+H]+::Oxaloacetate 4-methyl ester::2636::1::147.028799::0.01 6 | M155T100 154.995096 100.0 2341.5 C4H4O5::[M+Na]+::Oxalacetic acid::HMDB0000223::1::154.995094::0.01 C4H4O5::[M+Na]+::2-Hydroxyethylenedicarboxylate::2770::1::154.995094::0.01||C4H4O5::[M+Na]+::Oxaloacetate::589::1::154.995094::0.01||C4H4O5::[M+Na]+::enol-oxaloacetate::19221::1::154.995094::0.01||C4H4O5::[M+Na]+::trans-2,3-Epoxysuccinate::2551::1::154.995094::0.01 7 | M157T110 156.989769 110.0 1417.22 C4H6O4::[M+K]+::3-methoxy-3-oxopropanoic acid::HMDB0130020::1::156.989767::0.01||C4H6O4::[M+K]+::4-Hydroxy-2-oxobutanoic acid::HMDB0031204::1::156.989767::0.01||C4H6O4::[M+K]+::Erythrono-1,4-lactone::HMDB0000349::1::156.989767::0.01||C4H6O4::[M+K]+::Methylmalonic acid::HMDB0000202::1::156.989767::0.01||C4H6O4::[M+K]+::Succinic acid::HMDB0000254::1::156.989767::0.01||C4H6O4::[M+K]+::Threonolactone::HMDB0000940::1::156.989767::0.01||C4H6O4::[M+K]+::xi-3-Hydroxy-2-oxobutanoic acid::HMDB0039324::1::156.989767::0.01||C2H5O6P::[M+H]+::Phosphoglycolic acid::HMDB0000816::1::156.989651::0.75 C4H6O4::[M+K]+::D,L-malic semialdehyde::18220::1::156.989767::0.01||C4H6O4::[M+K]+::Erythrono-1,4-lactone::11996::1::156.989767::0.01||C4H6O4::[M+K]+::Methyl oxalate::7890::1::156.989767::0.01||C4H6O4::[M+K]+::Methylmalonate::1845::1::156.989767::0.01||C4H6O4::[M+K]+::Succinate::592::1::156.989767::0.01||C2H5O6P::[M+H]+::2-Phosphoglycolate::1183::1::156.989651::0.75 8 | M169T120 168.989654 120.0 520.0 [M+H]+::1::1 C3H5O6P::[M+H]+::Phosphoenolpyruvic acid::HMDB0000263::3::168.989651::0.02 C3H5O6P::[M+H]+::3-Phosphonopyruvate::2162::3::168.989651::0.02||C3H5O6P::[M+H]+::Phosphoenolpyruvate::614::3::168.989651::0.02 C3H5O6P::[M+H]+::Phosphoenolpyruvic acid::HMDB0000263::3::168.989651::0.02::2.0 9 | M337T121 336.972032 121.0 1040.0 [2M+H]+::1::2 C3H5O6P::[2M+H]+::Phosphoenolpyruvic acid::HMDB0000263::3::336.972026::0.02 C3H5O6P::[2M+H]+::3-Phosphonopyruvate::2162::3::336.972026::0.02||C3H5O6P::[2M+H]+::Phosphoenolpyruvate::614::3::336.972026::0.02 10 | M505T122 504.95441 122.5 390.0 [3M+H]+::1::3 C3H5O6P::[3M+H]+::Phosphoenolpyruvic acid::HMDB0000263::3::504.954401::0.02 C3H5O6P::[3M+H]+::3-Phosphonopyruvate::2162::3::504.954401::0.02||C3H5O6P::[3M+H]+::Phosphoenolpyruvate::614::3::504.954401::0.02 11 | M197T150 197.005661 150.0 5112.38 C6H6O6::[M+Na]+::Dehydroascorbic acid::HMDB0001264::1::197.005659::0.01||C6H6O6::[M+Na]+::Dehydroascorbide(1-)::HMDB0062706::1::197.005659::0.01||C6H6O6::[M+Na]+::cis-Aconitic acid::HMDB0000072::1::197.005659::0.01||C6H6O6::[M+Na]+::trans-Aconitic acid::HMDB0000958::1::197.005659::0.01 C6H6O6::[M+Na]+::Aconitate Ion::38282::1::197.005659::0.01||C6H6O6::[M+Na]+::Dehydroascorbic acid::12423::1::197.005659::0.01||C6H6O6::[M+Na]+::cis-Aconitate::843::1::197.005659::0.01||C6H6O6::[M+Na]+::trans-Aconitate::1934::1::197.005659::0.01 12 | M213T160 213.000576 160.0 1018.075 C6H6O7::[M+Na]+::Oxalosuccinic acid::HMDB0003974::1::213.000574::0.01 C6H6O7::[M+Na]+::4-Hydroxy-Aconitate Ion::41990::1::213.000574::0.01||C6H6O7::[M+Na]+::Oxalosuccinate::3402::1::213.000574::0.01 13 | M215T170 215.016226 170.0 3859.8 [M+Na]+::1::1 C6H8O7::[M+Na]+::(1R,2R)-Isocitric acid::HMDB0033717::2::215.016224::0.01||C6H8O7::[M+Na]+::2,3-Diketo-L-gulonate::HMDB0006511::2::215.016224::0.01||C6H8O7::[M+Na]+::2,3-diketogulonate::HMDB0062803::2::215.016224::0.01||C6H8O7::[M+Na]+::Citric acid::HMDB0000094::2::215.016224::0.01||C6H8O7::[M+Na]+::D-Glucaro-1,4-lactone::HMDB0041862::2::215.016224::0.01||C6H8O7::[M+Na]+::D-threo-Isocitric acid::HMDB0001874::2::215.016224::0.01||C6H8O7::[M+Na]+::Diketogulonic acid::HMDB0005971::2::215.016224::0.01||C6H8O7::[M+Na]+::Isocitric acid::HMDB0000193::2::215.016224::0.01 C6H8O7::[M+Na]+::(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::863::2::215.016224::0.01||C6H8O7::[M+Na]+::(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::3090::2::215.016224::0.01||C6H8O7::[M+Na]+::(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate::3064::2::215.016224::0.01||C6H8O7::[M+Na]+::2,3-Dihydroxy-5-Oxo-Hexanedioate::37494::2::215.016224::0.01||C6H8O7::[M+Na]+::2,3-Dioxo-L-gulonate::13506::2::215.016224::0.01||C6H8O7::[M+Na]+::2,3-diketo-L-gulonate::20293::2::215.016224::0.01||C6H8O7::[M+Na]+::2,5-Didehydro-D-gluconate::2148::2::215.016224::0.01||C6H8O7::[M+Na]+::2-Dehydro-3-deoxy-D-glucarate::2740::2::215.016224::0.01||C6H8O7::[M+Na]+::4,5-Dehydro-D-Glucuronic Acid::36800::2::215.016224::0.01||C6H8O7::[M+Na]+::5-Dehydro-4-deoxy-D-glucarate::1000::2::215.016224::0.01||C6H8O7::[M+Na]+::5-keto-4-deoxy-D-glucarate::20308::2::215.016224::0.01||C6H8O7::[M+Na]+::Carboxymethyloxysuccinate::2580::2::215.016224::0.01||C6H8O7::[M+Na]+::Citrate::675::2::215.016224::0.01||C6H8O7::[M+Na]+::Isocitrate::781::2::215.016224::0.01 14 | M231T174 230.990164 173.5 1929.9 [M+K]+::1::1 C6H8O7::[M+K]+::(1R,2R)-Isocitric acid::HMDB0033717::2::230.990161::0.01||C6H8O7::[M+K]+::2,3-Diketo-L-gulonate::HMDB0006511::2::230.990161::0.01||C6H8O7::[M+K]+::2,3-diketogulonate::HMDB0062803::2::230.990161::0.01||C6H8O7::[M+K]+::Citric acid::HMDB0000094::2::230.990161::0.01||C6H8O7::[M+K]+::D-Glucaro-1,4-lactone::HMDB0041862::2::230.990161::0.01||C6H8O7::[M+K]+::D-threo-Isocitric acid::HMDB0001874::2::230.990161::0.01||C6H8O7::[M+K]+::Diketogulonic acid::HMDB0005971::2::230.990161::0.01||C6H8O7::[M+K]+::Isocitric acid::HMDB0000193::2::230.990161::0.01 C6H8O7::[M+K]+::(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::863::2::230.990161::0.01||C6H8O7::[M+K]+::(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::3090::2::230.990161::0.01||C6H8O7::[M+K]+::(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate::3064::2::230.990161::0.01||C6H8O7::[M+K]+::2,3-Dihydroxy-5-Oxo-Hexanedioate::37494::2::230.990161::0.01||C6H8O7::[M+K]+::2,3-Dioxo-L-gulonate::13506::2::230.990161::0.01||C6H8O7::[M+K]+::2,3-diketo-L-gulonate::20293::2::230.990161::0.01||C6H8O7::[M+K]+::2,5-Didehydro-D-gluconate::2148::2::230.990161::0.01||C6H8O7::[M+K]+::2-Dehydro-3-deoxy-D-glucarate::2740::2::230.990161::0.01||C6H8O7::[M+K]+::4,5-Dehydro-D-Glucuronic Acid::36800::2::230.990161::0.01||C6H8O7::[M+K]+::5-Dehydro-4-deoxy-D-glucarate::1000::2::230.990161::0.01||C6H8O7::[M+K]+::5-keto-4-deoxy-D-glucarate::20308::2::230.990161::0.01||C6H8O7::[M+K]+::Carboxymethyloxysuccinate::2580::2::230.990161::0.01||C6H8O7::[M+K]+::Citrate::675::2::230.990161::0.01||C6H8O7::[M+K]+::Isocitrate::781::2::230.990161::0.01 15 | M426T180 426.05225 180.0 1149.95 C12H19N4O7P2S::[M+H]+::Thiamine pyrophosphate::HMDB0001372::1::426.052245::0.01||C14H19NO10S2::[M+H]+::Glucosinalbin::HMDB0038401::1::426.052314::-0.15 C14H19NO10S2::[M+H]+::Sinalbin::40568::1::426.052314::-0.15 16 | M492T190 492.06041 190.0 1060.0 C M493T192 (13C) 1 14.4 C14H23N4O8P2S::[M+Na]+::2-(a-Hydroxyethyl)thiamine diphosphate::HMDB0003904::2::492.060405::0.01 17 | M493T192 493.063765 192.5 163.33 (13C) M492T190 C 1 14.4 C14H23N4O8P2S::[M+Na]+::2-(a-Hydroxyethyl)thiamine diphosphate::HMDB0003904::2::493.06376::0.01 18 | M550T200 550.06589 200.0 4549.65 C16H25N4O10P2S::[M+Na]+::3-Carboxy-1-hydroxypropylthiamine diphosphate::HMDB0006744::1::550.065884::0.01 19 | -------------------------------------------------------------------------------- /tests/test_data/tab_delimited_record.txt: -------------------------------------------------------------------------------- 1 | Compound_id Compound_common_name Compound_synonyms Molecular_weight Chemical_formula Smiles Links EC Reaction_equation Pathway 2 | 2-METHYL-6-SOLANYL-14-BENZOQUINONE 2-methyl-6-all-trans-nonaprenyl-1,4-benzoquinol 2-Methyl-6-nonaprenyl-benzene-1,4-diol*MSBQ*2-methyl-6-solanesyl-1,4-benzoquinol*2-methyl-6--all-trans-nonaprenyl-benzene-1,4-diol*2-methyl-6-solanyl-1,4-benzoquinol 737.203 C52 H80 O2 CC(=CCCC(C)=CCCC(=CCCC(C)=CCCC(C)=CCCC(C)=CCCC(C)=CCCC(=CCCC(=CCC1(=C(O)C(C)=CC(O)=C1))C)C)C)C CHEBI:75402*PUBCHEM:44237185*LIGAND-CPD:C17570*PUBCHEM:25245481 EC-2.1.1.295 S-adenosyl-L-methionine + 2-methyl-6-all-trans-nonaprenyl-1,4-benzoquinol -> S-adenosyl-L-homocysteine + plastoquinol-9 + H+ plastoquinol-9 biosynthesis I 3 | 5-METHYLTHIOADENOSINE S-methyl-5'-thioadenosine S-methyl-5'-thioadenosine*methylthioadenosine*5'-methylthioadenosine*5'-S-methyl-5'-thioadenosine*MTA*S-methyl-adenosine 297.331 C11 H15 N5 O3 S1 CSCC1(OC(C(O)C(O)1)N3(C=NC2(=C(N)N=CN=C23))) REFMET:5'-Methylthioadenosine*BIGG:5mta*METABOLIGHTS:MTBLC17509*HMDB:HMDB01173*DRUGBANK:DB02282*BIGG:34127*CHEMSPIDER:388321*PUBCHEM:439176*CHEBI:17509*L -------------------------------------------------------------------------------- /tests/test_data/xset_matrix.txt: -------------------------------------------------------------------------------- 1 | name mz rt sample01 sample02 sample03 sample04 sample05 sample06 sample07 sample08 sample09 sample10 sample11 sample12 sample13 sample14 sample15 sample16 sample17 sample18 sample19 sample20 sample21 sample22 sample23 sample24 sample25 sample26 sample27 sample28 sample29 sample30 sample31 sample32 sample33 sample34 sample35 sample36 sample37 sample38 sample39 sample40 sample41 sample42 sample43 sample44 sample45 sample46 sample47 sample48 sample49 sample50 2 | M127T60 126.979204 60 1348.35 1468.5 1348.35 1441.8 1361.7 1428.45 1428.45 1468.5 1455.15 1388.4 1468.5 1468.5 1388.4 1375.05 1428.45 1348.35 1428.45 1428.45 1441.8 1428.45 1375.05 1428.45 1415.1 1468.5 1361.7 1348.35 1388.4 1415.1 1415.1 1361.7 1428.45 1401.75 1428.45 1441.8 1375.05 1468.5 1455.15 1455.15 1388.4 1348.35 1428.45 1468.5 1375.05 1375.05 1415.1 1401.75 1361.7 1361.7 1468.5 1455.15 3 | M135T70 135.028801 70 2606.04 2581.91 2654.3 2606.04 2654.3 2485.39 2509.52 2606.04 2606.04 2654.3 2654.3 2630.17 2557.78 2630.17 2437.13 2606.04 2630.17 2581.91 2606.04 2533.65 2581.91 2581.91 2557.78 2485.39 2485.39 2581.91 2630.17 2509.52 2606.04 2606.04 2461.26 2533.65 2485.39 2485.39 2557.78 2437.13 2630.17 2509.52 2606.04 2533.65 2533.65 2437.13 2630.17 2654.3 2630.17 2485.39 2485.39 2606.04 2461.26 2557.78 4 | M139T80 139.000181 80 3796.58 3907.16 3944.02 3944.02 3870.3 3796.58 3833.44 3907.16 4017.74 3980.88 3907.16 3944.02 3944.02 3796.58 3980.88 3759.72 3870.3 3759.72 3944.02 3870.3 4054.6 4017.74 3796.58 3833.44 4017.74 3833.44 3759.72 3870.3 4054.6 3907.16 3833.44 3980.88 3722.86 3944.02 3870.3 3944.02 3759.72 3907.16 3796.58 3944.02 3944.02 3980.88 3759.72 3722.86 3833.44 3796.58 3907.16 3796.58 3796.58 3833.44 5 | M147T90 147.028801 90 2268.06 2400.18 2268.06 2290.08 2246.04 2312.1 2422.2 2268.06 2246.04 2422.2 2422.2 2224.02 2290.08 2378.16 2400.18 2268.06 2356.14 2290.08 2246.04 2422.2 2422.2 2378.16 2378.16 2290.08 2400.18 2400.18 2334.12 2400.18 2246.04 2312.1 2422.2 2224.02 2400.18 2246.04 2356.14 2268.06 2334.12 2400.18 2290.08 2268.06 2400.18 2400.18 2268.06 2400.18 2400.18 2312.1 2268.06 2334.12 2334.12 2400.18 6 | M155T100 154.995096 100 2296.9 2430.7 2274.6 2319.2 2252.3 2296.9 2252.3 2453 2453 2252.3 2453 2274.6 2408.4 2319.2 2363.8 2430.7 2408.4 2252.3 2341.5 2408.4 2453 2319.2 2386.1 2408.4 2341.5 2363.8 2341.5 2296.9 2319.2 2408.4 2386.1 2363.8 2319.2 2252.3 2252.3 2341.5 2453 2274.6 2363.8 2341.5 2341.5 2252.3 2252.3 2453 2319.2 2430.7 2319.2 2430.7 2319.2 2430.7 7 | M157T110 156.989769 110 1363.74 1457.33 1377.11 1363.74 1417.22 1350.37 1443.96 1417.22 1457.33 1457.33 1430.59 1443.96 1377.11 1443.96 1377.11 1470.7 1403.85 1430.59 1443.96 1363.74 1417.22 1457.33 1470.7 1363.74 1363.74 1457.33 1417.22 1377.11 1403.85 1443.96 1417.22 1377.11 1417.22 1350.37 1443.96 1377.11 1350.37 1350.37 1403.85 1417.22 1377.11 1470.7 1470.7 1350.37 1350.37 1443.96 1443.96 1430.59 1377.11 1363.74 8 | M169T120 168.989654 120 515 515 525 550 510 505 520 515 525 545 515 525 525 530 540 520 520 510 525 520 540 510 550 520 510 520 505 525 550 530 510 530 510 525 505 510 530 525 510 550 525 545 545 515 520 520 550 520 540 505 9 | M337T121 336.972032 121 1030 1030 1050 1100 1020 1010 1040 1030 1050 1090 1030 1050 1050 1060 1080 1040 1040 1020 1050 1040 1080 1020 1100 1040 1020 1040 1010 1050 1100 1060 1020 1060 1020 1050 1010 1020 1060 1050 1020 1100 1050 1090 1090 1030 1040 1040 1100 1040 1080 1010 10 | M505T122 504.954410 122.5 386.25 386.25 393.75 412.5 382.5 378.75 390 386.25 393.75 408.75 386.25 393.75 393.75 397.5 405 390 390 382.5 393.75 390 405 382.5 412.5 390 382.5 390 378.75 393.75 412.5 397.5 382.5 397.5 382.5 393.75 378.75 382.5 397.5 393.75 382.5 412.5 393.75 408.75 408.75 386.25 390 390 412.5 390 405 378.75 11 | M197T150 197.005661 150 5160.61 5160.61 4919.46 4967.69 4919.46 5112.38 5208.84 5112.38 5160.61 5160.61 5015.92 5257.07 5112.38 5208.84 5064.15 4871.23 5305.3 5112.38 5160.61 5160.61 5112.38 4871.23 4919.46 5160.61 5015.92 5257.07 5015.92 4967.69 4967.69 5305.3 4967.69 5160.61 5208.84 4967.69 5112.38 5305.3 4967.69 5208.84 5015.92 5160.61 5208.84 4919.46 5257.07 4967.69 4871.23 5160.61 5112.38 4967.69 5257.07 5305.3 12 | M213T160 213.000576 160 974.65 1013.25 984.3 1061.5 1061.5 974.65 1032.55 1032.55 1051.85 1042.2 1022.9 1061.5 1003.6 984.3 1051.85 1003.6 1042.2 1013.25 984.3 1061.5 1061.5 993.95 1003.6 1042.2 1003.6 1051.85 984.3 1013.25 1051.85 984.3 1042.2 1032.55 974.65 1051.85 1013.25 984.3 993.95 1003.6 1042.2 974.65 1003.6 984.3 1032.55 1032.55 1051.85 993.95 1061.5 1042.2 984.3 1042.2 13 | M215T170 215.016226 170 3859.8 3786.28 3970.08 3823.04 4006.84 3823.04 3970.08 4006.84 3859.8 3896.56 3749.52 3749.52 3786.28 3970.08 3823.04 3859.8 3970.08 3933.32 3970.08 3823.04 3970.08 3896.56 3823.04 3712.76 4006.84 3712.76 3970.08 3823.04 4006.84 3896.56 3823.04 3896.56 3970.08 3786.28 3712.76 3712.76 3970.08 3859.8 3933.32 3786.28 3786.28 3786.28 3970.08 3712.76 3749.52 3712.76 4006.84 4043.6 3859.8 3933.32 14 | M231T174 230.990164 173.5 1929.9 1893.14 1985.04 1911.52 2003.42 1911.52 1985.04 2003.42 1929.9 1948.28 1874.76 1874.76 1893.14 1985.04 1911.52 1929.9 1985.04 1966.66 1985.04 1911.52 1985.04 1948.28 1911.52 1856.38 2003.42 1856.38 1985.04 1911.52 2003.42 1948.28 1911.52 1948.28 1985.04 1893.14 1856.38 1856.38 1985.04 1929.9 1966.66 1893.14 1893.14 1893.14 1985.04 1856.38 1874.76 1856.38 2003.42 2021.8 1929.9 1966.66 15 | M426T180 426.052250 180 1144.5 1155.4 1199 1122.7 1155.4 1133.6 1122.7 1188.1 1111.8 1144.5 1133.6 1133.6 1166.3 1166.3 1144.5 1166.3 1122.7 1166.3 1155.4 1177.2 1155.4 1144.5 1199 1177.2 1100.9 1133.6 1177.2 1144.5 1177.2 1177.2 1122.7 1100.9 1177.2 1177.2 1166.3 1111.8 1177.2 1199 1144.5 1122.7 1100.9 1111.8 1122.7 1188.1 1144.5 1111.8 1144.5 1199 1177.2 1155.4 16 | M492T190 492.060410 190 1080 1100 1090 1060 1100 1070 1090 1030 1080 1050 1070 1060 1040 1100 1080 1100 1100 1090 1020 1060 1050 1080 1020 1060 1100 1040 1040 1030 1020 1030 1060 1010 1100 1030 1090 1090 1060 1050 1060 1060 1090 1060 1080 1080 1080 1030 1070 1040 1060 1060 17 | M493T192 493.063765 192.5 166.41 169.49 167.95 163.33 169.49 164.87 167.95 158.71 166.41 161.79 164.87 163.33 160.25 169.49 166.41 169.49 169.49 167.95 157.16 163.33 161.79 166.41 157.16 163.33 169.49 160.25 160.25 158.71 157.16 158.71 163.33 155.62 169.49 158.71 167.95 167.95 163.33 161.79 163.33 163.33 167.95 163.33 166.41 166.41 166.41 158.71 164.87 160.25 163.33 163.33 18 | M550T200 550.065890 200 4766.3 4506.32 4419.66 4549.65 4722.97 4376.33 4462.99 4419.66 4549.65 4636.31 4376.33 4679.64 4722.97 4376.33 4549.65 4679.64 4376.33 4722.97 4679.64 4462.99 4766.3 4766.3 4766.3 4419.66 4506.32 4636.31 4419.66 4766.3 4419.66 4592.98 4506.32 4766.3 4462.99 4419.66 4549.65 4766.3 4679.64 4419.66 4766.3 4549.65 4592.98 4376.33 4419.66 4419.66 4592.98 4549.65 4766.3 4636.31 4636.31 4462.99 19 | -------------------------------------------------------------------------------- /tests/test_db_parsers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import unittest 6 | from collections import OrderedDict 7 | from beamspy.db_parsers import * 8 | from tests.utils import to_test_data 9 | import numpy as np 10 | 11 | 12 | class DbParsersTestCase(unittest.TestCase): 13 | 14 | def setUp(self): 15 | self.path, f = os.path.split(os.path.dirname(os.path.abspath(__file__))) 16 | 17 | def test_parse_biocyc(self): 18 | records = list(parse_biocyc(to_test_data("biocyc_record.txt"))) 19 | self.assertEqual(records[0]['UNIQUE-ID'], 'PANTOTHENATE') 20 | 21 | def test_parse_sdf(self): 22 | records = list(parse_sdf(to_test_data("sdf_record.sdf"))) 23 | self.assertEqual(records[0]['ChEBI ID'], 'CHEBI:90') 24 | self.assertEqual(records[0]['ChEBI Name'], '(-)-epicatechin') 25 | 26 | def test_parse_xml(self): 27 | records = list(parse_xml(to_test_data("hmdb_record.xml"))) 28 | self.assertEqual(records[0]['accession'], 'HMDB0000001') 29 | 30 | def test_parse_kegg_compound(self): 31 | records = list(parse_kegg_compound(to_test_data("kegg_record.txt"))) 32 | self.assertEqual(records[0]['ENTRY'], 'C00001') 33 | 34 | def test_parse_delimited(self): 35 | records = list(parse_delimited(to_test_data("tab_delimited_record.txt"), "\t")) 36 | self.assertEqual(records[0]["Compound_id"], "2-METHYL-6-SOLANYL-14-BENZOQUINONE") 37 | 38 | def parse_nist_database(self): 39 | records = parse_nist_database(os.path.join(self.path, "beamspy", "data", "nist_database.txt")) 40 | self.assertEqual(records[0]["Atomic Number"], 1) 41 | self.assertEqual(records[0]["Atomic Symbol"], "H") 42 | self.assertEqual(records[0]["Mass Number"], 1) 43 | self.assertEqual(records[0]["Relative Atomic Mass"], [1.00782503223, 9]) 44 | self.assertEqual(records[0]["Isotopic Composition"], [0.999885, 70]) 45 | self.assertEqual(records[0]["Standard Atomic Weight"], [1.00784,1.00811]) 46 | self.assertEqual(records[0]["Notes"], "m") 47 | 48 | 49 | if __name__ == '__main__': 50 | unittest.main() -------------------------------------------------------------------------------- /tests/test_grouping.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import unittest 5 | import numpy as np 6 | from tests.utils import to_test_data, to_test_results, sqlite_records 7 | from beamspy.in_out import combine_peaklist_matrix 8 | from beamspy.grouping import group_features 9 | 10 | 11 | class GroupFeaturesTestCase(unittest.TestCase): 12 | 13 | def setUp(self): 14 | self.df = combine_peaklist_matrix(to_test_data("peaklist_lcms_pos_theoretical.txt"), to_test_data("dataMatrix_lcms_theoretical.txt")) 15 | 16 | def test_group_features(self): 17 | fn_sql = "results_pearson.sqlite" 18 | db_out = to_test_results(fn_sql) 19 | group_features(self.df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", positive=False, block=5000, ncpus=None) 20 | 21 | records = sqlite_records(to_test_results(fn_sql), "groups") 22 | records_comp = sqlite_records(to_test_data(fn_sql), "groups") 23 | for i in range(len(records)): 24 | self.assertEqual(records[i][0:6], records_comp[i][0:6]) 25 | np.testing.assert_almost_equal(records[i][6:], records_comp[i][6:]) 26 | 27 | fn_sql = "results_pearson_all.sqlite" 28 | db_out = to_test_results(fn_sql) 29 | group_features(self.df, db_out, max_rt_diff=200.0, coeff_thres=0.0, pvalue_thres=1.0, method="pearson", positive=False, block=5000, ncpus=None) 30 | 31 | records = sqlite_records(to_test_results(fn_sql), "groups") 32 | records_comp = sqlite_records(to_test_data(fn_sql), "groups") 33 | for i in range(len(records)): 34 | self.assertEqual(records[i][0:6], records_comp[i][0:6]) 35 | np.testing.assert_almost_equal(records[i][6:], records_comp[i][6:]) 36 | 37 | fn_sql = "results_pearson_all.sqlite" 38 | db_out = to_test_results(fn_sql) 39 | group_features(self.df, db_out, max_rt_diff=200.0, coeff_thres=0.0, pvalue_thres=1.0, method="pearson", positive=False, block=20, ncpus=1) 40 | 41 | records = sqlite_records(to_test_results(fn_sql), "groups") 42 | records_comp = sqlite_records(to_test_data(fn_sql), "groups") 43 | for i in range(len(records)): 44 | self.assertEqual(records[i][0:6], records_comp[i][0:6]) 45 | np.testing.assert_almost_equal(records[i][6:], records_comp[i][6:]) 46 | 47 | fn_sql = "results_spearman.sqlite" 48 | db_out = to_test_results(fn_sql) 49 | group_features(self.df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="spearman", positive=False, block=5000, ncpus=None) 50 | records = sqlite_records(to_test_results(fn_sql), "groups") 51 | records_comp = sqlite_records(to_test_data(fn_sql), "groups") 52 | 53 | for i in range(len(records)): 54 | self.assertEqual(records[i][0:6], records_comp[i][0:6]) 55 | np.testing.assert_almost_equal(records[i][6:], records_comp[i][6:]) 56 | 57 | 58 | if __name__ == '__main__': 59 | unittest.main() -------------------------------------------------------------------------------- /tests/test_in_out.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import unittest 5 | from collections import OrderedDict 6 | from beamspy.in_out import * 7 | from tests.utils import to_test_data 8 | import numpy as np 9 | 10 | 11 | class InOutTestCase(unittest.TestCase): 12 | 13 | def setUp(self): 14 | self.path, f = os.path.split(os.path.dirname(os.path.abspath(__file__))) 15 | 16 | def test_read_peaklist(self): 17 | 18 | self.df_peaklist = read_peaklist(to_test_data("peaklist_lcms_pos_theoretical.txt")) 19 | 20 | self.assertEqual(self.df_peaklist["name"].iloc[0], "M127T60") 21 | self.assertEqual(self.df_peaklist["name"].iloc[-1], "M550T200") 22 | 23 | self.assertEqual(self.df_peaklist["mz"].iloc[0], 126.979204) 24 | self.assertEqual(self.df_peaklist["mz"].iloc[-1], 550.065890) 25 | 26 | self.assertEqual(self.df_peaklist["rt"].iloc[0], 60) 27 | self.assertEqual(self.df_peaklist["rt"].iloc[-1], 200) 28 | 29 | self.assertEqual(self.df_peaklist["intensity"].iloc[0], 1421.78) 30 | self.assertEqual(self.df_peaklist["intensity"].iloc[-1], 4549.65) 31 | 32 | self.df_peaklist = read_peaklist(to_test_data("peaklist_lcms_pos_theoretical_no_name.txt")) 33 | 34 | self.assertEqual(self.df_peaklist["name"].iloc[0], "M127T60") 35 | self.assertEqual(self.df_peaklist["name"].iloc[-1], "M550T200") 36 | 37 | self.assertEqual(self.df_peaklist["mz"].iloc[0], 126.979204) 38 | self.assertEqual(self.df_peaklist["mz"].iloc[-1], 550.065890) 39 | 40 | self.assertEqual(self.df_peaklist["rt"].iloc[0], 60) 41 | self.assertEqual(self.df_peaklist["rt"].iloc[-1], 200) 42 | 43 | self.assertEqual(self.df_peaklist["intensity"].iloc[0], 1421.78) 44 | self.assertEqual(self.df_peaklist["intensity"].iloc[-1], 4549.65) 45 | 46 | self.df_peaklist = read_peaklist(to_test_data("peaklist_dims_pos_theoretical.txt")) 47 | 48 | self.assertEqual(self.df_peaklist["name"].iloc[0], "126_979204") 49 | self.assertEqual(self.df_peaklist["name"].iloc[-1], "550_065890") 50 | 51 | self.assertEqual(self.df_peaklist["mz"].iloc[0], 126.979204) 52 | self.assertEqual(self.df_peaklist["mz"].iloc[-1], 550.065890) 53 | 54 | self.assertEqual(self.df_peaklist["intensity"].iloc[0], 1421.78) 55 | self.assertEqual(self.df_peaklist["intensity"].iloc[-1], 4549.65) 56 | 57 | def test_combine_peaklist_matrix(self): 58 | df = combine_peaklist_matrix(to_test_data("peaklist_lcms_pos_theoretical.txt"), to_test_data("dataMatrix_lcms_theoretical.txt")) 59 | 60 | self.assertEqual(df["name"].iloc[0], "M127T60") 61 | self.assertEqual(df["name"].iloc[-1], "M550T200") 62 | 63 | self.assertEqual(df["mz"].iloc[0], 126.979204) 64 | self.assertEqual(df["mz"].iloc[-1], 550.065890) 65 | 66 | self.assertEqual(df["rt"].iloc[0], 60) 67 | self.assertEqual(df["rt"].iloc[-1], 200) 68 | 69 | self.assertEqual(df["intensity"].iloc[0], 1421.775) 70 | self.assertEqual(df["intensity"].iloc[-1], 4549.65) 71 | 72 | df = combine_peaklist_matrix(to_test_data("peaklist_dims_pos_theoretical.txt"), to_test_data("dataMatrix_dims_theoretical.txt")) 73 | 74 | self.assertEqual(df["name"].iloc[0], "126_979204") 75 | self.assertEqual(df["name"].iloc[-1], "550_065890") 76 | 77 | self.assertEqual(df["mz"].iloc[0], 126.979204) 78 | self.assertEqual(df["mz"].iloc[-1], 550.065890) 79 | 80 | self.assertEqual(df["intensity"].iloc[0], 1421.78) 81 | self.assertEqual(df["intensity"].iloc[-1], 4549.65) 82 | 83 | def test_read_xset_matrix(self): 84 | df = read_xset_matrix(to_test_data("xset_matrix.txt"), "sample01") 85 | 86 | self.assertEqual(df["name"].iloc[0], "M127T60") 87 | self.assertEqual(df["name"].iloc[-1], "M550T200") 88 | 89 | np.testing.assert_almost_equal(df["mz"].iloc[0], 126.979204, 8) 90 | np.testing.assert_almost_equal(df["mz"].iloc[-1], 550.065890, 8) 91 | 92 | self.assertEqual(df["rt"].iloc[0], 60) 93 | self.assertEqual(df["rt"].iloc[-1], 200) 94 | 95 | np.testing.assert_almost_equal(df["intensity"].iloc[0], 1421.775, 8) 96 | np.testing.assert_almost_equal(df["intensity"].iloc[-1], 4549.65, 8) 97 | 98 | def test_read_molecular_formulae(self): 99 | 100 | db_molecular_formula = os.path.join(self.path, "beamspy", "data", "db_mf.txt") 101 | records = read_molecular_formulae(db_molecular_formula, separator="\t") 102 | self.assertEqual(len(records), 13061) 103 | 104 | record_01 = [("composition", OrderedDict([('C', 1), ('H', 2), ('O', 1)])), ('CHNOPS', True), 105 | ('exact_mass', 30.010565), 106 | ('HC', 1), ('NOPSC', 1), ('lewis', 1), ('senior', 1), ('double_bond_equivalents', 1.0)] 107 | record_02 = [("composition", OrderedDict([('C', 17), ('H', 19), ('Cl', 1), ('N', 2), ('O', 1), ('S', 1)])), ('CHNOPS', False), 108 | ('exact_mass', 334.090662), 109 | ('HC', 1), ('NOPSC', 1), ('lewis', 0), ('senior', 1), ('double_bond_equivalents', 9.0)] 110 | record_03 = [("composition", OrderedDict([('C', 48), ('H', 86), ('O', 18), ('P', 2)])), ('CHNOPS', True), 111 | ('exact_mass', 1012.528940), 112 | ('HC', 1), ('NOPSC', 1), ('lewis', 1), ('senior', 1), ('double_bond_equivalents', 6.0)] 113 | 114 | self.assertEqual(records[0], OrderedDict(record_01)) 115 | self.assertEqual(records[5000], OrderedDict(record_02)) 116 | self.assertEqual(records[-1], OrderedDict(record_03)) 117 | 118 | def test_read_compounds(self): 119 | 120 | db_compounds = os.path.join(self.path, "beamspy", "data", "db_compounds.txt") 121 | records = read_compounds(db_compounds, separator="\t") 122 | self.assertEqual(len(records), 31644) 123 | record_01 = [("composition", OrderedDict([('C', 10), ('Cl', 10), ('O', 1)])), ('CHNOPS', False), 124 | ('exact_mass', 485.683441), 125 | ('compound_id', 1638), 126 | ('compound_name', 'Chlordecone'), 127 | ('molecular_formula', 'C10Cl10O')] 128 | record_02 = [("composition", OrderedDict([('C', 24), ('H', 42), ('O', 21)])), ('CHNOPS', True), 129 | ('exact_mass', 666.221858), 130 | ('compound_id', 17543), 131 | ('compound_name', '6G,6-kestotetraose'), 132 | ('molecular_formula', 'C24H42O21')] 133 | record_03 = [("composition", OrderedDict([('H', 1), ('N', 1), ('O', 3)])), ('CHNOPS', True), 134 | ('exact_mass', 62.995643), 135 | ('compound_id', 40762), 136 | ('compound_name', 'Peroxynitrite'), 137 | ('molecular_formula', 'HNO3')] 138 | 139 | self.assertEqual(records[0], OrderedDict(record_01)) 140 | self.assertEqual(records[14000], OrderedDict(record_02)) 141 | self.assertEqual(records[-1], OrderedDict(record_03)) 142 | 143 | 144 | def test_read_adducts(self): 145 | adducts_lib = os.path.join(self.path, "beamspy", "data", "adducts.txt") 146 | records_pos = read_adducts(adducts_lib, "pos") 147 | records_pos_comp = OrderedDict([('[M+H]+', OrderedDict([('mass', 1.007276), ('charge', 1)])), 148 | ('[M+Na]+', OrderedDict([('mass', 22.989221), ('charge', 1)])), 149 | ('[M+K]+', OrderedDict([('mass', 38.963158), ('charge', 1)]))]) 150 | self.assertEqual(records_pos.lib, OrderedDict(records_pos_comp)) 151 | records_neg = read_adducts(adducts_lib, "neg") 152 | records_neg_comp = OrderedDict([('[M-H]-', OrderedDict([('mass', -1.007276), ('charge', 1)])), 153 | ('[M+Na-2H]-', OrderedDict([('mass', 20.974668), ('charge', 1)])), 154 | ('[M+Cl]-', OrderedDict([('mass', 34.969401), ('charge', 1)])), 155 | ('[M+K-2H]-', OrderedDict([('mass', 36.948605), ('charge', 1)])), 156 | ('[M+Hac-H]-', OrderedDict([('mass', 59.013853), ('charge', 1)]))]) 157 | self.assertEqual(records_neg.lib, OrderedDict(records_neg_comp)) 158 | 159 | def test_read_isotopes(self): 160 | isotopes_lib = os.path.join(self.path, "beamspy", "data", "isotopes.txt") 161 | records_pos = read_isotopes(isotopes_lib, "pos") 162 | records_pos_comp = [OrderedDict([('C', {'abundance': 98.93}), ('(13C)', {'abundance': 1.07}), ('mass_difference', 0.5016775), ('charge', 2)]), 163 | OrderedDict([('C', {'abundance': 98.93}), ('(13C)', {'abundance': 1.07}), ('mass_difference', 1.003355), ('charge', 1)]), 164 | OrderedDict([('S', {'abundance': 94.99}), ('(34S)', {'abundance': 4.25}), ('mass_difference', 1.995796), ('charge', 1)]), 165 | OrderedDict([('K', {'abundance': 93.25}), ('(41K)', {'abundance': 6.73}), ('mass_difference', 1.998119), ('charge', 1)])] 166 | self.assertEqual(records_pos.lib, records_pos_comp) 167 | records_neg = read_isotopes(isotopes_lib, "neg") 168 | records_neg_comp = [OrderedDict([('C', {'abundance': 98.93}), ('(13C)', {'abundance': 1.07}), ('mass_difference', 0.5016775), ('charge', 2)]), 169 | OrderedDict([('C', {'abundance': 98.93}), ('(13C)', {'abundance': 1.07}), ('mass_difference', 1.003355), ('charge', 1)]), 170 | OrderedDict([('S', {'abundance': 94.99}), ('(34S)', {'abundance': 4.25}), ('mass_difference', 1.995796), ('charge', 1)]), 171 | OrderedDict([('Cl', {'abundance': 75.76}), ('(37Cl)', {'abundance': 24.24}), ('mass_difference', 1.997050), ('charge', 1)])] 172 | self.assertEqual(records_neg.lib, records_neg_comp) 173 | 174 | # def test_read_mass_differences(self): 175 | # differences_lib = os.path.join(self.path, "beamspy", "data", "adducts_differences.txt") 176 | # records = read_mass_differences(differences_lib, ion_mode="pos") 177 | # self.assertEqual(records.lib, [OrderedDict([('[M+H]+', {'charge': 1.0}), 178 | # ('[M+Na]+', {'charge': 1.0}), 179 | # ('mass_difference', 21.981945)])]) 180 | # records = read_mass_differences(differences_lib, ion_mode="neg") 181 | # self.assertEqual(records.lib, []) 182 | # records = read_mass_differences(differences_lib, ion_mode="both") 183 | # self.assertEqual(records.lib, []) 184 | 185 | 186 | if __name__ == '__main__': 187 | unittest.main() 188 | -------------------------------------------------------------------------------- /tests/test_libraries.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import unittest 6 | from beamspy.in_out import * 7 | from beamspy.libraries import * 8 | from beamspy.auxiliary import nist_database_to_pyteomics 9 | from collections import OrderedDict 10 | 11 | 12 | class LibrariesTestCase(unittest.TestCase): 13 | def setUp(self): 14 | self.path, f = os.path.split(os.path.dirname(os.path.abspath(__file__))) 15 | 16 | def test_read_isotopes(self): 17 | lib_isotopes = read_isotopes(os.path.join(self.path, "beamspy", "data", "isotopes.txt"), "pos") 18 | self.assertTrue("in library" in lib_isotopes.__str__()) 19 | 20 | def test_read_adducts(self): 21 | lib_adducts = read_adducts(os.path.join(self.path, "beamspy", "data", "adducts.txt"), "pos") 22 | self.assertTrue("in library" in lib_adducts.__str__()) 23 | lib_adducts.add("test", 100.0, 1) 24 | self.assertEqual(lib_adducts.lib["test"]["mass"], 100.0) 25 | self.assertEqual(lib_adducts.lib["test"]["charge"], 1) 26 | 27 | lib_adducts.remove("*") 28 | self.assertEqual(lib_adducts.lib, OrderedDict()) 29 | 30 | # def test_mass_differences(self): 31 | # lib_differences = read_mass_differences(os.path.join(self.path, "beamspy", "data", "adducts_differences.txt"), "pos") 32 | # self.assertTrue("in library" in lib_differences.__str__()) 33 | # 34 | # lib_differences.remove("*", "*") 35 | # self.assertEqual(lib_differences.lib, []) 36 | 37 | def test_nist_database_to_pyteomics(self): 38 | nist_database = nist_database_to_pyteomics(os.path.join(self.path, "beamspy", "data", "nist_database.txt")) 39 | self.assertEqual(nist_database["C"][0], (12.0, 1.0)) 40 | self.assertEqual(nist_database["H"][0], (1.00782503223, 1.0)) 41 | self.assertEqual(nist_database["N"][0], (14.00307400443, 1.0)) 42 | self.assertEqual(nist_database["O"][0], (15.99491461957, 1.0)) 43 | self.assertEqual(nist_database["P"][0], (30.97376199842, 1.0)) 44 | self.assertEqual(nist_database["S"][0], (31.9720711744, 1.0)) 45 | 46 | 47 | if __name__ == '__main__': 48 | unittest.main() -------------------------------------------------------------------------------- /tests/test_plots.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import unittest 5 | import os 6 | import numpy as np 7 | import pandas as pd 8 | from tests.utils import to_test_data, to_test_results 9 | from beamspy.plots import report 10 | 11 | 12 | class PlotsTestCase(unittest.TestCase): 13 | 14 | def test_report(self): 15 | 16 | report(to_test_data("results_annotation.sqlite"), to_test_results("test_report_01.pdf"), 17 | "r_value", "p_value", "ppm_error", "adduct") 18 | statinfo = os.stat(to_test_results("test_report_01.pdf")) 19 | # print(statinfo.st_size) 20 | self.assertTrue(statinfo.st_size > 10000) 21 | 22 | report(to_test_data("results_pearson_all.sqlite"), to_test_results("test_report_02.pdf"), 23 | "r_value", "p_value", "ppm_error", "adduct") 24 | statinfo = os.stat(to_test_results("test_report_02.pdf")) 25 | # print(statinfo.st_size) 26 | self.assertTrue(statinfo.st_size > 10000) 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /tests/test_results/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /tests/test_statistics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | from beamspy.in_out import combine_peaklist_matrix 10 | from beamspy.statistics import correlation_coefficients, correlation_graphs 11 | from tests.utils import to_test_data 12 | 13 | 14 | class StatisticsTestCase(unittest.TestCase): 15 | 16 | def setUp(self): 17 | self.df = combine_peaklist_matrix(to_test_data("peaklist_lcms_pos_theoretical.txt"), to_test_data("dataMatrix_lcms_theoretical.txt")) 18 | 19 | def test_correlation_coefficients(self): 20 | df_coeffs_comp = pd.DataFrame({"name_a": ["M169T120", "M169T120", "M337T121", "M215T170", "M492T190"], 21 | "name_b": ["M337T121", "M505T122", "M505T122", "M231T174", "M493T192"], 22 | "r_value": [np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0)], 23 | "p_value": [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(5.85415087865495e-157)]}, columns=["name_a", "name_b", "r_value", "p_value"]) 24 | df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", positive=False, block=5000, ncpus=None) 25 | pd.testing.assert_frame_equal(df_coeffs, df_coeffs_comp) 26 | 27 | df_coeffs_comp = pd.DataFrame({"name_a": ["M169T120", "M169T120", "M337T121", "M215T170", "M492T190"], 28 | "name_b": ["M337T121", "M505T122", "M505T122", "M231T174", "M493T192"], 29 | "r_value": [np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0)], 30 | "p_value": [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]}, columns=["name_a", "name_b", "r_value", "p_value"]) 31 | 32 | df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="spearman", positive=False, block=5000, ncpus=None) 33 | pd.testing.assert_frame_equal(df_coeffs, df_coeffs_comp) 34 | 35 | df_coeffs = correlation_coefficients(self.df, max_rt_diff=50000.0, coeff_thres=0.0, pvalue_thres=1.0, method="pearson", positive=False, block=5000, ncpus=None) 36 | self.assertEqual(df_coeffs.shape, (136, 4)) 37 | 38 | def test_correlation_graphs(self): 39 | df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", positive=False, block=5000, ncpus=None) 40 | graph = correlation_graphs(df_coeffs, self.df) 41 | 42 | n0 = list(graph.nodes(data=True))[0] 43 | n1 = list(graph.nodes(data=True))[-1] 44 | 45 | e0 = list(graph.edges(data=True))[0] 46 | e1 = list(graph.edges(data=True))[-1] 47 | 48 | # order is different between python 2 and 3 49 | np.testing.assert_almost_equal([n0[1]["mz"], n0[1]["intensity"], n0[1]["rt"]], [168.989654, 520.0, 120.0]) 50 | np.testing.assert_almost_equal([n1[1]["mz"], n1[1]["intensity"], n1[1]["rt"]], [493.063765, 163.33, 192.5]) 51 | np.testing.assert_almost_equal([e0[2]["rvalue"], e0[2]["pvalue"], e0[2]["mzdiff"], e0[2]["rtdiff"]], [1.0, 0.0, 167.982378, 1.0]) 52 | np.testing.assert_almost_equal([e1[2]["rvalue"], e1[2]["pvalue"], e1[2]["mzdiff"], e1[2]["rtdiff"]], [1.0, 0.0, 1.003355, 2.5]) 53 | 54 | #nx.write_gml(graph, to_test_data("graph_simple.gml")) 55 | #graph = nx.read_gml(to_test_data("graph_simple.gml")) 56 | 57 | 58 | if __name__ == '__main__': 59 | unittest.main() -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import sqlite3 5 | 6 | 7 | def to_test_data(*args): 8 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_data", *args) 9 | 10 | def to_test_results(*args): 11 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_results", *args) 12 | 13 | def sqlite_records(db, table): 14 | conn = sqlite3.connect(db) 15 | cursor = conn.cursor() 16 | cursor.execute("select * from {}".format(table)) 17 | records = cursor.fetchall() 18 | conn.close() 19 | return records 20 | 21 | def sqlite_count(db, table): 22 | conn = sqlite3.connect(db) 23 | cursor = conn.cursor() 24 | cursor.execute("select count(*) from {}".format(table)) 25 | records = cursor.fetchone()[0] 26 | conn.close() 27 | return records --------------------------------------------------------------------------------