├── .DS_Store
├── .github
└── workflows
│ └── ci.yml
├── .gitignore
├── .readthedocs.yaml
├── .spr.yml
├── LICENSE.txt
├── MoleculeResolver.png
├── README.md
├── apply.py
├── docs
├── Makefile
├── make.bat
├── requirements.txt
└── source
│ ├── conf.py
│ └── index.rst
├── molecule_test_set
├── benchmark_molecule_names.json
└── get_data_generate_identifiers.py
├── moleculeresolver
├── SqliteMoleculeCache.py
├── __init__.py
├── molecule.py
├── moleculeresolver.py
├── opsin-cli-2.8.0-jar-with-dependencies.jar
└── rdkitmods.py
├── poetry.lock
├── pyproject.toml
└── tests
├── benchmark_component_molecules.json
├── benchmark_component_molecules_iupac.json
├── get_iupac.py
├── rdkit_normalization_exceptions.py
└── test_integration.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoleculeResolver/molecule-resolver/15253f702a3ca823c476e0551ff4f895f017cc68/.DS_Store
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | pull_request:
4 | branches:
5 | # Branches from forks have the form 'user:branch-name' so we only run
6 | # this job on pull_request events for branches that look like fork
7 | # branches. Without this we would end up running this job twice for non
8 | # forked PRs, once for the push and then once for opening the PR.
9 | - '**:**'
10 |
11 |
12 | jobs:
13 | # Build the package
14 | build:
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - name: Checkout repository
19 | uses: actions/checkout@v4
20 |
21 | - name: Install python
22 | uses: actions/setup-python@v4
23 | with:
24 | python-version: '3.11'
25 |
26 | - name: Install poetry
27 | uses: Gr1N/setup-poetry@v8
28 |
29 | - name: Build package
30 | run: poetry build
31 |
32 | - name: Save built package
33 | uses: actions/upload-artifact@v4
34 | with:
35 | name: dist
36 | path: dist/
37 | retention-days: 1
38 |
39 |
40 | # Run pytest using built package
41 | test:
42 | needs: build
43 | runs-on: ubuntu-latest
44 | strategy:
45 | matrix:
46 | python: ["3.10", "3.11", "3.12", "3.13"]
47 |
48 | steps:
49 | - name: Checkout repository
50 | uses: actions/checkout@v4
51 |
52 | - name: Install python
53 | uses: actions/setup-python@v4
54 | with:
55 | python-version: ${{ matrix.python }}
56 | cache: 'pip'
57 | cache-dependency-path: "poetry.lock"
58 |
59 | - name: Download built package
60 | uses: actions/download-artifact@v4
61 | with:
62 | name: dist
63 |
64 | - name: Install package and pytest
65 | shell: bash
66 | run: |
67 | WHL_NAME=$(ls molecule_resolver-*.whl)
68 | pip install ${WHL_NAME} pytest
69 |
70 | - name: Run tests
71 | shell: bash
72 | run: pytest
73 |
74 | # Tag and release the package
75 | release:
76 | needs: test
77 | runs-on: ubuntu-latest
78 | permissions:
79 | contents: write
80 |
81 | steps:
82 | - name: Checkout repository
83 | uses: actions/checkout@v4
84 |
85 | - name: Install python
86 | uses: actions/setup-python@v4
87 | with:
88 | python-version: '3.10'
89 |
90 | - name: Download built package
91 | uses: actions/download-artifact@v4
92 | with:
93 | name: dist
94 | path: dist/
95 |
96 | - name: Determine the version for this release from the build
97 | id: current
98 | run: |
99 | BUILD_VER="$(ls dist/molecule_resolver-*.tar.gz)"
100 | echo "Path: $BUILD_VER"
101 | if [[ $BUILD_VER =~ (molecule_resolver-)([^,][0-9.]{4}) ]]; then
102 | echo "version=${BASH_REMATCH[2]}" >> $GITHUB_OUTPUT
103 | echo "Version of build: ${BASH_REMATCH[2]}"
104 | else
105 | echo "No version found found"
106 | fi
107 |
108 | - name: Install coveo-pypi-cli
109 | run: pip install coveo-pypi-cli
110 |
111 | - name: Get latest published version
112 | id: published
113 | run: |
114 | PUB_VER="$(pypi current-version molecule-resolver)"
115 | echo "version=$PUB_VER" >> $GITHUB_OUTPUT
116 | echo "Latest published version: $PUB_VER"
117 |
118 | - name: Tag repository
119 | shell: bash
120 | id: get-next-tag
121 | if: (steps.current.outputs.version != steps.published.outputs.version)
122 | run: |
123 | TAG_NAME=${{ steps.current.outputs.version }}
124 | echo "tag-name=$TAG_NAME" >> $GITHUB_OUTPUT
125 | echo "This release will be tagged as $TAG_NAME"
126 | git config user.name "github-actions"
127 | git config user.email "actions@users.noreply.github.com"
128 | git tag --annotate --message="Automated tagging system" $TAG_NAME ${{ github.sha }}
129 |
130 | - name: Push the tag
131 | if: (steps.current.outputs.version != steps.published.outputs.version)
132 | id: push_tag
133 | env:
134 | TAG_NAME: ${{ steps.current.outputs.version }}
135 | run: |
136 | if [[ ${{ github.ref_name }} == 'main' ]]; then
137 | git push origin $TAG_NAME
138 | echo "should_release=true" >> $GITHUB_OUTPUT
139 | else
140 | echo "If this was the main branch, I would push a new tag named $TAG_NAME"
141 | echo "should_release=false" >> $GITHUB_OUTPUT
142 | fi
143 | - name: Wait for GitHub to register the tag
144 | run: sleep 10
145 |
146 | - name: Release
147 | uses: softprops/action-gh-release@v2
148 | if: ${{ steps.push_tag.outputs.should_release == 'true' }}
149 | with:
150 | tag_name: ${{ steps.current.outputs.version }}
151 | files: dist/molecule_resolver-${{ steps.current.outputs.version }}.tar.gz
152 |
153 | outputs:
154 | publish_pypi: ${{ steps.push_tag.outputs.should_release }}
155 | publish_test_pypi: ${{ (steps.current.outputs.version != steps.published.outputs.version) && github.ref_name != 'main' }}
156 |
157 | # Publish the package to pypi
158 | publish:
159 | needs: release
160 | runs-on: ubuntu-latest
161 | permissions:
162 | id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
163 |
164 | steps:
165 | - name: Download built package
166 | uses: actions/download-artifact@v4
167 | with:
168 | name: dist
169 | path: dist/
170 |
171 | - name: Publish to pypi if new version
172 | env:
173 | should_publish: ${{ needs.release.outputs.publish_pypi }}
174 | if: ${{ env.should_publish == 'true' }}
175 | uses: pypa/gh-action-pypi-publish@release/v1
176 | with:
177 | packages-dir: dist/
178 |
179 | - name: Publish to test pypi if new version not on main
180 | env:
181 | should_publish: ${{ needs.release.outputs.publish_test_pypi }}
182 | if: ${{ env.should_publish == 'true' }}
183 | uses: pypa/gh-action-pypi-publish@release/v1
184 | with:
185 | packages-dir: dist/
186 | repository-url: https://test.pypi.org/legacy/
187 | skip-existing: true
188 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | test.py
2 | .vscode
3 | __pycache__/
4 | *.egg-info
5 | moleculeresolver/molecule_cache.*
6 | .venv/
7 | *.db*
8 | debug.log
9 | *build/
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
2 |
3 | # Required
4 | version: 2
5 |
6 | # Set the OS, Python version, and other tools you might need
7 | build:
8 | os: ubuntu-24.04
9 | tools:
10 | python: "3.13"
11 |
12 | # Build documentation in the "docs/" directory with Sphinx
13 | sphinx:
14 | configuration: docs/source/conf.py
15 |
16 | # Optionally, but recommended,
17 | # declare the Python requirements required to build your documentation
18 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
19 | python:
20 | install:
21 | - requirements: docs/requirements.txt
22 |
--------------------------------------------------------------------------------
/.spr.yml:
--------------------------------------------------------------------------------
1 | githubRepoOwner: MoleculeResolver
2 | githubRepoName: molecule-resolver
3 | githubHost: github.com
4 | githubRemote: origin
5 | githubBranch: main
6 | requireChecks: true
7 | requireApproval: true
8 | mergeMethod: rebase
9 | mergeQueue: false
10 | forceFetchTags: false
11 | showPrTitlesInStack: false
12 | branchPushIndividually: false
13 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2022 Simon Müller
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
--------------------------------------------------------------------------------
/MoleculeResolver.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoleculeResolver/molecule-resolver/15253f702a3ca823c476e0551ff4f895f017cc68/MoleculeResolver.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | # MoleculeResolver
6 |
7 | The **moleculeresolver** was born out of the need to annotate large datasets with accurate structural information fast and to crosscheck whether given metadata (name, SMILES) agrees with each other. It also allows to efficiently compare whether structures are available in two large datasets.
8 |
9 | In short it's a Python module that allows you to retrieve molecular structures from multiple chemical databases, perform crosschecks to ensure data reliability, and standardize the best representation of molecules. It also provides functions for comparing molecules and sets of molecules based on specific configurations. This makes it a useful tool for researchers, chemists, or anyone working in computational chemistry / cheminformatics who needs to ensure they are working with the best available data for a molecule.
10 |
11 |
12 | ## Installation
13 |
14 | The package is available on [pypi](https://pypi.org/project/molecule-resolver/):
15 |
16 | ```sh
17 | pip install molecule-resolver
18 | ```
19 | While the source code is available here: [https://github.com/MoleculeResolver/molecule-resolver](https://github.com/MoleculeResolver/molecule-resolver)
20 |
21 | ## Features
22 |
23 | - **🔍 Retrieve Molecular Structures**: Fetch molecular structures from different chemical databases, including PubChem, Comptox, Chemo, and others.
24 | - **🆔 Support for Different Identifier Types**: Retrieve molecular structures using a variety of identifier types, including CAS numbers, SMILES, InChI, InChIkey and common names.
25 | - **✅ Cross-check Capabilities**: Use data from multiple sources to verify molecular structures and identify the best representation.
26 | - **🔄 Molecule Comparison**: Compare molecules or sets of molecules based on their structure, properties, and specified ⚙️ configurations.
27 | - **⚙️ Standardization**: Standardize molecular structures, including handling isomers, tautomers, and isotopes.
28 | - **💾 Caching Mechanism**: Use local caching to store molecules and reduce the number of repeated requests to external services, improving performance and reducing latency.
29 |
30 | ## Services used
31 | At this moment, the following services are used to get the best structure for a given identifier. In the future, this list might be reviewed to improve perfomance, adding new services or removing some.
32 | In case you want to add an additional service, open an issue or a pull request.
33 |
34 | The MoleculeResolver does not offer all options/configurations for each service available with the specific related repos as it focusses on getting the structure based on the identifiers and doing so as accurate as possible while still being fast using parallelization under the hood.
35 | | Service | Name | CAS | Formula | SMILES | InChI | InChIKey | CID | Batch search | Repos |
36 | |-------------------------------------------------------------------------|------|-----|---------|--------|-------|----------|-----|--------------------|------------------------------------------------------------------------------|
37 | | [cas_registry](https://commonchemistry.cas.org/) | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | |
38 | | [chebi](https://www.ebi.ac.uk/chebi/) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | |
39 | | [chemeo](https://www.chemeo.com/) | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | |
40 | | [cir](https://cactus.nci.nih.gov/chemical/structure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | - [CIRpy](https://github.com/mcs07/CIRpy "wrapper for the CIR. FYI, CIR uses OPSIN under the hood, unless specified otherwise.") |
41 | | [comptox](https://comptox.epa.gov/dashboard) | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ✅ | |
42 | | [cts](https://cts.fiehnlab.ucdavis.edu/) | (✅) | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | |
43 | | [nist](https://webbook.nist.gov/chemistry/) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | - [NistChemPy](https://github.com/IvanChernyshov/NistChemPy "unofficial wrapper for search and data extraction of the NIST Webbook.") |
44 | | [opsin](https://opsin.ch.cam.ac.uk/) | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | - [py2opsin](https://github.com/JacksonBurns/py2opsin "lightweight OPSIN wrapper only depending on having Java installed.")
- [pyopsin](https://github.com/Dingyun-Huang/pyopsin "lightweight OPSIN wrapper depending on having Java installed + additional dependencies.") |
45 | | [pubchem](https://pubchem.ncbi.nlm.nih.gov/) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - [PubChemPy](https://github.com/mcs07/PubChemPy "wrapper for the pubchem PUG API") |
46 | | [srs](https://cdxapps.epa.gov/oms-substance-registry-services/search) | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | |
47 |
48 | ChemSpider was not used as it is already included in CIR [[1]](https://matt-swain.com/blog/2012-03-20-cirpy-python-nci-chemical-identifier-resolver) [[2]](https://cactus.nci.nih.gov/blog/?p=1456) [[3]](https://github.com/mcs07/ChemSpiPy). ChemIDplus and the Drug Information Portal were retired in 2022 [[4]](https://www.nlm.nih.gov/pubs/techbull/ja22/ja22_pubchem.html).
49 |
50 | ## 🚀 Usage
51 |
52 | ### Initialization
53 |
54 | To use **Molecule Resolver**, first import and initialize the `MoleculeResolver` class. it is supposed to be used as a context manager:
55 |
56 | ```python
57 | from moleculeresolver import MoleculeResolver
58 |
59 | with MoleculeResolver(available_service_API_keys={"chemeo": "YOUR_API_KEY"}) as mr:
60 | ...
61 | ```
62 |
63 | ### Retrieve and Compare Molecules by Name and CAS
64 |
65 | Retrieve a molecule using both its common name and CAS number, then compare the two to ensure they represent the same structure:
66 |
67 | ```python
68 | from rdkit import Chem
69 | from moleculeresolver import MoleculeResolver
70 |
71 | with MoleculeResolver(available_service_API_keys={"chemeo": "YOUR_API_KEY"}) as mr:
72 | molecule_name = mr.find_single_molecule(["aspirin"], ["name"])
73 | molecule_cas = mr.find_single_molecule(["50-78-2"], ["cas"])
74 |
75 | are_same = mr.are_equal(Chem.MolFromSmiles(molecule_name.SMILES),
76 | Chem.MolFromSmiles(molecule_cas.SMILES))
77 | print(f"Are the molecules the same? {are_same}")
78 | ```
79 |
80 | ### Parallelized Molecule Retrieval and Saving to JSON
81 |
82 | Use the parallelized version to retrieve multiple molecules. If a large number of molecules is searched, moleculeresolver will try to use batch download capabilities whenever the database supports this.
83 |
84 | ```python
85 | import json
86 | from moleculeresolver import MoleculeResolver
87 |
88 | molecule_names = ["aspirin", "propanol", "ibuprofen", "non-exixtent-name"]
89 | not_found_molecules = []
90 | molecules_dicts = {}
91 |
92 | with MoleculeResolver(available_service_API_keys={"chemeo": "YOUR_API_KEY"}) as mr:
93 | molecules = mr.find_multiple_molecules_parallelized(molecule_names, [["name"]] * len(molecule_names))
94 | for name, molecule in zip(molecule_names, molecules):
95 | if molecule:
96 | molecules_dicts[name] = molecule.to_dict(found_molecules='remove')
97 | else:
98 | not_found_molecules.append(name)
99 |
100 | with open("molecules.json", "w") as json_file:
101 | json.dump(molecules_dicts, json_file, indent=4)
102 |
103 | print(f"Molecules not found: {not_found_molecules}")
104 | ```
105 |
106 | ## ⚙️ Configuration
107 |
108 | The `MoleculeResolver` class allows users to configure various options like:
109 |
110 | - **API Keys**: Set API keys for accessing different molecular databases. Currently only chemeo needs one.
111 | - **Standardization Options**: Choose how to handle molecular standardization (e.g., normalizing functional groups, disconnecting metals, handling isomers, etc.).
112 | - **Differentiation Settings**: Options for distinguishing between isomers, tautomers, and isotopes.
113 |
114 | ## ⚠️ Warning
115 |
116 | **Inchi** is included in the set of valid identifiers for various [services](#services-used). You should be aware that using Inchi to get SMILES using RDKit is not the most robust approach. You can read more about it [here](https://github.com/rdkit/rdkit/issues/542).
117 |
118 | ## 🤝 Contributing
119 |
120 | Contributions are welcome! If you have suggestions for improving the Molecule Resolver or want to add new features, feel free to submit an issue or a pull request on GitHub.
121 |
122 | ## 📚 Citing
123 |
124 | If you use MoleculeResolver in your research, please cite as follows:
125 |
126 | **Müller, S.**
127 | *How to crack a SMILES: automatic crosschecked chemical structure resolution across multiple services using MoleculeResolver*
128 | **Journal of Cheminformatics**, 17:117 (2025).
129 | DOI: [10.1186/s13321-025-01064-7](https://doi.org/10.1186/s13321-025-01064-7)
130 |
131 | ```bibtex
132 | @article{Muller2025MoleculeResolver,
133 | author = {Müller, Simon},
134 | title = {How to crack a SMILES: automatic crosschecked chemical structure resolution across multiple services using MoleculeResolver},
135 | journal = {Journal of Cheminformatics},
136 | year = {2025},
137 | volume = {17},
138 | page = {117},
139 | doi = {10.1186/s13321-025-01064-7},
140 | url = {https://doi.org/10.1186/s13321-025-01064-7}
141 | }
142 |
143 |
--------------------------------------------------------------------------------
/apply.py:
--------------------------------------------------------------------------------
1 | from moleculeresolver import MoleculeResolver
2 |
3 | # if you specify molecule_cache_db_path, you will have a separate cache for
4 | # the found molecules in each folder. If you leave the default, all instances
5 | # of MoleculeResolver will share the same cache
6 |
7 | with MoleculeResolver(
8 | available_service_API_keys={"chemeo": "YOUR_CHEMEO_API_KEY"},
9 | molecule_cache_db_path="test.db",
10 | ) as mr:
11 | names_to_find = ["2-bromobutane", "ethanol", "methanol", "propane", "butane"]
12 |
13 | # search for the names in parallel
14 | all_names = []
15 | all_modes = []
16 | for name in names_to_find:
17 | names = mr.expand_name_heuristically(name)
18 | all_names.append(names)
19 | all_modes.append(["name"])
20 | r = mr.get_molecule_from_OPSIN(name)
21 |
22 | molecules_found_in_parallel = mr.find_multiple_molecules_parallelized(
23 | all_names, all_modes
24 | )
25 | print("all_found_in_parallel:", all(molecules_found_in_parallel))
26 |
27 | # search for the names sequentially
28 | molecules = []
29 | for name in names_to_find:
30 | names = mr.expand_name_heuristically(name)
31 | molecule = mr.find_single_molecule_crosschecked(names, "name")
32 | molecules.append(molecule)
33 | print("all_found:", all(molecules))
34 |
35 | # search for CAS numbers
36 | molecules_found_by_CAS = []
37 | CAS_numbers = ["7732-18-5", "78-76-2", "64-17-5", "67-56-1", "74-98-6", "106-97-8"]
38 | for CAS in CAS_numbers:
39 | molecule = mr.find_single_molecule_crosschecked(CAS, "CAS")
40 | molecules_found_by_CAS.append(molecule)
41 |
42 | print("all_found_by_CAS:", all(molecules_found_by_CAS))
43 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | echo.
16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | echo.installed, then set the SPHINXBUILD environment variable to point
18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | echo.may add the Sphinx directory to PATH.
20 | echo.
21 | echo.If you don't have Sphinx installed, grab it from
22 | echo.https://www.sphinx-doc.org/
23 | exit /b 1
24 | )
25 |
26 | if "%1" == "" goto help
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx-autoapi>=3.4.0
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # For the full list of built-in configuration values, see the documentation:
4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
5 |
6 | # -- Project information -----------------------------------------------------
7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
8 |
9 | project = 'MoleculeResolver'
10 | copyright = '2025, Simon Muller and Kobi Felton'
11 | author = 'Simon Muller and Kobi Felton'
12 | release = '0.3.2'
13 |
14 | # -- General configuration ---------------------------------------------------
15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
16 |
17 | extensions = ['autoapi.extension']
18 |
19 | templates_path = ['_templates']
20 | exclude_patterns = []
21 |
22 | autoapi_dirs = ['../../moleculeresolver']
23 |
24 | # -- Options for HTML output -------------------------------------------------
25 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
26 |
27 | html_theme = 'alabaster'
28 | html_static_path = ['_static']
29 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. MoleculeResolver documentation master file, created by
2 | sphinx-quickstart on Mon Jan 13 19:01:52 2025.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | MoleculeResolver documentation
7 | ==============================
8 |
9 | Add your content using ``reStructuredText`` syntax. See the
10 | `reStructuredText `_
11 | documentation for details.
12 |
13 |
14 | .. toctree::
15 | :maxdepth: 2
16 | :caption: Contents:
17 |
18 |
--------------------------------------------------------------------------------
/molecule_test_set/benchmark_molecule_names.json:
--------------------------------------------------------------------------------
1 | [
2 | "Acetic Acid",
3 | "Acetone",
4 | "Acetylene",
5 | "Acetylsalicylic Acid",
6 | "Adenine",
7 | "Adenosine Triphosphate",
8 | "Adipic Acid",
9 | "Aluminum(III) Oxide",
10 | "Ammonia",
11 | "Ascorbic Acid",
12 | "Aspartame",
13 | "Benzene",
14 | "Benzoic Acid",
15 | "Biphenyl ",
16 | "Butane",
17 | "Butene",
18 | "Butyric Acid",
19 | "Caffeine",
20 | "Calcium Carbonate",
21 | "Calcium Oxide",
22 | "Calcium Sulfate",
23 | "Carbon Dioxide",
24 | "Carbon Monoxide",
25 | "Chloroform",
26 | "Chlorophyll",
27 | "Cholesterol",
28 | "Citric Acid",
29 | "Cocaine",
30 | "Cytosine",
31 | "DDT",
32 | "DEET",
33 | "R-12",
34 | "Dopamine",
35 | "Adrenaline",
36 | "Ethane",
37 | "Ethylene",
38 | "Ether",
39 | "Ethanol",
40 | "Ethylenediaminetetraacetic Acid",
41 | "Fluoxetine",
42 | "Formaldehyde",
43 | "Formic Acid",
44 | "Glucose",
45 | "Glycerol",
46 | "Guanine",
47 | "Hydrochloric Acid",
48 | "Hydrogen Peroxide",
49 | "Hydrogen Sulfide",
50 | "Ibuprofen",
51 | "Indigo",
52 | "Insulin",
53 | "Iron(III) Oxide",
54 | "Isooctane",
55 | "Isoprene",
56 | "l-Dopa",
57 | "Methane",
58 | "Methyl Alcohol",
59 | "Methylphenidate",
60 | "Monosodium Glutamate",
61 | "Morphine",
62 | "Naphthalene",
63 | "Nicotine",
64 | "Nitric Acid",
65 | "Nitric Oxide",
66 | "Nitrogen Dioxide",
67 | "Nitrous Oxide",
68 | "Nitroglycerin",
69 | "Norethindrone",
70 | "Penicillin",
71 | "Phenol",
72 | "Phosphoric Acid",
73 | "Piperine",
74 | "Potassium Carbonate",
75 | "Potassium Nitrate",
76 | "Propane",
77 | "Propylene",
78 | "Quinine",
79 | "Saccharin",
80 | "Silicon Dioxide",
81 | "Sodium Bicarbonate",
82 | "Sodium Carbonate",
83 | "Sodium Chloride",
84 | "Sodium Hydroxide",
85 | "Sodium Hypochlorite",
86 | "Strychnine",
87 | "Styrene",
88 | "Sucrose",
89 | "Sulfuric Acid",
90 | "Tetrafluoroethylene",
91 | "Tetrahydrocannabinol",
92 | "Thymine",
93 | "Trinitrotoluene",
94 | "Toluene",
95 | "Triuranium Octaoxide",
96 | "Uracil",
97 | "Urea",
98 | "Vanillin",
99 | "Vinyl Chloride",
100 | "Water",
101 | "Xylene"
102 | ]
--------------------------------------------------------------------------------
/molecule_test_set/get_data_generate_identifiers.py:
--------------------------------------------------------------------------------
1 |
2 | import json
3 | import os
4 | from moleculeresolver import MoleculeResolver
5 | from rdkit import Chem
6 |
7 | with open('benchmark_molecule_names.json', 'r') as f:
8 | benchmark_molecule_names = json.load(f)
9 |
10 | if not os.path.exists('benchmark_component_molecules.json'):
11 |
12 | benchmark_component_molecules = {}
13 | with MoleculeResolver(available_service_API_keys={"chemeo": 'YOUR_KEY'}, molecule_cache_db_path='molecule_cache.db') as mr:
14 | mr._available_services.remove('cts')
15 | temp = mr.find_multiple_molecules_parallelized(benchmark_molecule_names, [['name']] * len(benchmark_molecule_names))
16 |
17 | for name, molecule in zip(benchmark_molecule_names, temp, strict=True):
18 | molecule.found_molecules = []
19 | molecule = molecule.__dict__
20 | mol = Chem.MolFromSmiles(molecule['SMILES'])
21 | pubchem_cid = [v.strip() for v in molecule['additional_information'].split(';') if 'pubchem' in v]
22 | if pubchem_cid:
23 | pubchem_cid = int(pubchem_cid[0].split(':')[-1])
24 | else:
25 | pubchem_cid = None
26 |
27 | molecule['pubchem_cid'] = pubchem_cid
28 | molecule['formula'] = Chem.rdMolDescriptors.CalcMolFormula(mol)
29 | molecule['hill_formula'] = mr.to_hill_formula(mol)
30 | molecule['inchi'] = Chem.MolToInchi(mol)
31 | molecule['inchikey'] = Chem.InchiToInchiKey(molecule['inchi'])
32 | benchmark_component_molecules[name] = molecule
33 |
34 | with open('benchmark_component_molecules.json', 'w') as f:
35 | json.dump(benchmark_component_molecules, f, indent=4)
36 |
--------------------------------------------------------------------------------
/moleculeresolver/SqliteMoleculeCache.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | import os
3 | import sqlite3
4 | from typing import Optional, Union
5 | import threading
6 | import uuid
7 |
8 | from moleculeresolver.molecule import Molecule
9 |
10 |
11 | class SqliteMoleculeCache:
12 | """
13 | A class for caching molecule information using SQLite.
14 |
15 | This class provides methods to initialize, manage, and query a SQLite database
16 | for storing molecule information. It supports multi-threading and implements
17 | context management for proper resource handling.
18 |
19 | Attributes:
20 | db_path (str): Path to the SQLite database file. Defaults to ":memory:".
21 |
22 | expiration_datetime (Optional[datetime]): Expiration date for cached entries.
23 |
24 | _connections (dict): Thread-specific database connections.
25 |
26 | _main_thread_id (int): ID of the main thread.
27 | """
28 |
29 | def __init__(
30 | self, db_path: Optional[str] = ":memory:", expiration_datetime: Optional[datetime] = None
31 | ):
32 | """
33 | Initialize a new SqliteMoleculeCache instance.
34 |
35 | Args:
36 | db_path (Optional[str]): Path to the SQLite database file. Defaults to ":memory:".
37 |
38 | expiration_datetime (Optional[datetime]): Expiration date for cached entries.
39 | """
40 | self.db_path = db_path
41 | self.expiration_datetime = expiration_datetime
42 | self._connections = {}
43 | self._main_thread_id = threading.get_ident()
44 |
45 | def __enter__(self) -> "SqliteMoleculeCache":
46 | """
47 | Enter the runtime context related to this object.
48 |
49 | Creates tables and deletes expired entries.
50 |
51 | Returns:
52 | SqliteMoleculeCache: The instance of the class.
53 | """
54 | self._create_tables()
55 | self.delete_expired()
56 | return self
57 |
58 | def close_child_connections(self) -> None:
59 | """
60 | Close all child thread database connections.
61 | """
62 | for thread_id, thread_connection in self._connections.items():
63 | if thread_id != self._main_thread_id:
64 | if thread_connection:
65 | thread_connection.close()
66 | self._connections[thread_id] = None
67 |
68 | def __exit__(self, exception_type, exception_value, exception_traceback) -> None:
69 | """
70 | Exit the runtime context and close all database connections.
71 |
72 | Closes all child thread connections and optimizes the main thread's connection before closing.
73 | """
74 | self.close_child_connections()
75 |
76 | # Close the connection from the main thread
77 | this_thread_id = threading.get_ident()
78 | if this_thread_id == self._main_thread_id:
79 | if self._main_thread_id in self._connections:
80 | main_thread_connection = self._connections[self._main_thread_id]
81 | main_thread_connection.execute("PRAGMA analysis_limit=8192")
82 | main_thread_connection.execute("PRAGMA optimize")
83 | main_thread_connection.close()
84 | self._connections.clear()
85 |
86 | def get_connection(self) -> sqlite3.Connection:
87 | """
88 | Get or create a thread-specific database connection.
89 |
90 | Returns:
91 | sqlite3.Connection: A SQLite database connection for the current thread.
92 | """
93 | thread_id = threading.get_ident()
94 | if thread_id not in self._connections:
95 | self._connections[thread_id] = sqlite3.connect(
96 | self.db_path, check_same_thread=False
97 | )
98 | self._connections[thread_id].execute("PRAGMA foreign_keys = 1")
99 | self._connections[thread_id].execute("PRAGMA journal_mode=WAL")
100 | self._connections[thread_id].execute("PRAGMA synchronous=NORMAL")
101 | self._connections[thread_id].execute("PRAGMA temp_store=MEMORY")
102 |
103 | return self._connections[thread_id]
104 |
105 | def _create_tables(self) -> None:
106 | """
107 | Create the necessary tables in the SQLite database if they don't exist.
108 | """
109 | this_thread_connection = self.get_connection()
110 | with this_thread_connection:
111 | this_thread_connection.execute(
112 | """
113 | CREATE TABLE IF NOT EXISTS molecules (
114 | id INTEGER PRIMARY KEY,
115 | service TEXT NOT NULL,
116 | identifier_mode TEXT NOT NULL,
117 | identifier TEXT NOT NULL,
118 | SMILES TEXT,
119 | additional_information TEXT,
120 | datetime_added DATETIME DEFAULT CURRENT_TIMESTAMP
121 | )
122 | """
123 | )
124 | this_thread_connection.execute(
125 | """
126 | CREATE TABLE IF NOT EXISTS synonyms (
127 | id INTEGER PRIMARY KEY,
128 | molecule_id INTEGER NOT NULL,
129 | synonym_index INTEGER NOT NULL,
130 | synonym TEXT NOT NULL COLLATE NOCASE,
131 | CONSTRAINT fk_molecules_synonyms
132 | FOREIGN KEY (molecule_id)
133 | REFERENCES molecules(id)
134 | ON DELETE CASCADE
135 | )
136 | """
137 | )
138 | this_thread_connection.execute(
139 | """
140 | CREATE TABLE IF NOT EXISTS cas_numbers (
141 | id INTEGER PRIMARY KEY,
142 | molecule_id INTEGER NOT NULL,
143 | cas_number_index INTEGER NOT NULL,
144 | cas_number TEXT NOT NULL,
145 | CONSTRAINT fk_molecules_cas_numbers
146 | FOREIGN KEY (molecule_id)
147 | REFERENCES molecules(id)
148 | ON DELETE CASCADE
149 | )
150 | """
151 | )
152 | this_thread_connection.execute(
153 | """
154 | CREATE INDEX IF NOT EXISTS idx_molecules_service_identifier_mode_identifier
155 | ON molecules(service, identifier_mode, identifier)
156 | """
157 | )
158 | this_thread_connection.execute(
159 | """
160 | CREATE INDEX IF NOT EXISTS idx_covering_synonyms ON synonyms (molecule_id, synonym COLLATE NOCASE, synonym_index)
161 | """
162 | )
163 | this_thread_connection.execute(
164 | """
165 | CREATE INDEX IF NOT EXISTS idx_covering_cas_number ON cas_numbers (molecule_id, cas_number, cas_number_index)
166 | """
167 | )
168 |
169 | def save(
170 | self,
171 | service: Union[str, list[str]],
172 | identifier_mode: Union[str, list[str]],
173 | identifier: Union[str, list[str]],
174 | molecules: Union[Molecule, list[Molecule]],
175 | ) -> None:
176 | """
177 | Save molecule information to the database.
178 |
179 | Saves one or multiple Molecule objects to the database, along with their associated service, identifier_mode, and identifier.
180 |
181 | Args:
182 | service (Union[str, list[str]]): The service(s) associated with the molecule(s).
183 |
184 | identifier_mode (Union[str, list[str]]): The identifier mode(s) for the molecule(s).
185 |
186 | identifier (Union[str, list[str]]): The identifier(s) for the molecule(s).
187 |
188 | molecules (Union[Molecule, list[Molecule]]): The molecule(s) to be saved.
189 |
190 | Raises:
191 | ValueError: If a molecule's synonyms contain a pipe symbol or if molecule properties don't match the input values.
192 | """
193 | if isinstance(molecules, Molecule) or molecules is None:
194 | molecules = [molecules]
195 |
196 | for molecule in molecules:
197 | if molecule:
198 | if any(["|" in synonym for synonym in molecule.synonyms]):
199 | raise ValueError(
200 | 'molecule names i.e. synonyms must not contain pipe symbols: "|"'
201 | )
202 |
203 | if isinstance(service, str):
204 | service = [service] * len(molecules)
205 | identifier_mode = [identifier_mode] * len(molecules)
206 | identifier = [identifier] * len(molecules)
207 |
208 | this_thread_connection = self.get_connection()
209 | with this_thread_connection:
210 | # unfortunately it seems, that python sqlite3 does not support executemany while returning
211 | # the inserted rows. And even if it would be supported, the order of returned ids is not
212 | # guaranteed to be the same order of insertion. Therefore we have to do it one by one.
213 | # https://discuss.python.org/t/sqlite3-executemany-with-returning-clauses/26291
214 | # It could be circumvented by constructing the insert statement manually, running with execute
215 | # and then matching the returned ids to the inserted data. Idk what is faster though.
216 | molecule_ids = []
217 | for s, m, i, molecule in zip(
218 | service, identifier_mode, identifier, molecules
219 | ):
220 | if molecule is None:
221 | this_data = (s, m, i, None, None)
222 | else:
223 | if (
224 | molecule.service != s
225 | or molecule.mode != m
226 | or molecule.identifier != i
227 | ):
228 | raise ValueError(
229 | "The molecule properties do not match the input values to the save function."
230 | )
231 |
232 | this_data = (
233 | molecule.service.strip(),
234 | molecule.mode.strip(),
235 | i,
236 | molecule.SMILES.strip(),
237 | (
238 | str(molecule.additional_information).strip()
239 | if molecule.additional_information
240 | else None
241 | ),
242 | )
243 |
244 | cursor = this_thread_connection.execute(
245 | """
246 | INSERT INTO molecules (service, identifier_mode, identifier, SMILES, additional_information)
247 | VALUES (?, ?, ?, ?, ?)
248 | """,
249 | this_data,
250 | )
251 | molecule_ids.append(cursor.lastrowid)
252 |
253 | name_rows_to_insert = []
254 | cas_number_rows_to_insert = []
255 | for molecule_id, molecule in zip(molecule_ids, molecules):
256 | if molecule:
257 | if molecule.synonyms:
258 | this_molecule_synonyms = [
259 | (molecule_id, synonym_index, synonym.strip())
260 | for synonym_index, synonym in enumerate(molecule.synonyms)
261 | ]
262 | name_rows_to_insert.extend(this_molecule_synonyms)
263 |
264 | if molecule.CAS:
265 | this_molecule_cas_numbers = [
266 | (molecule_id, cas_number_index, cas_number.strip())
267 | for cas_number_index, cas_number in enumerate(molecule.CAS)
268 | ]
269 | cas_number_rows_to_insert.extend(this_molecule_cas_numbers)
270 |
271 | this_thread_connection.executemany(
272 | """
273 | INSERT INTO synonyms (molecule_id, synonym_index, synonym)
274 | VALUES (?, ?, ?)
275 | """,
276 | name_rows_to_insert,
277 | )
278 |
279 | this_thread_connection.executemany(
280 | """
281 | INSERT INTO cas_numbers (molecule_id, cas_number_index, cas_number)
282 | VALUES (?, ?, ?)
283 | """,
284 | cas_number_rows_to_insert,
285 | )
286 |
287 | def _search(
288 | self,
289 | service: Union[str, list[str]],
290 | identifier_mode: Union[str, list[str]],
291 | identifier: Union[str, list[str]],
292 | only_check_for_existence: Optional[bool] = False,
293 | ) -> Union[
294 | Optional[list[Molecule]], list[Optional[list[Molecule]]], bool, list[bool]
295 | ]:
296 | """
297 | Search for molecules in the database based on the provided criteria.
298 |
299 | Supports single and multiple molecule searches. It can either return the full molecule information or just check for existence.
300 |
301 | Args:
302 | service (Union[str, list[str]]): The service(s) to search in.
303 |
304 | identifier_mode (Union[str, list[str]]): The mode(s) of identification (e.g., 'name', 'cas').
305 |
306 | identifier (Union[str, list[str]]): The identifier(s) to search for.
307 |
308 | only_check_for_existence (Optional[bool]): If True, only check if the molecule exists. Defaults to False.
309 |
310 | Returns:
311 |
312 | Union[Optional[list[Molecule]], list[Optional[list[Molecule]]], bool, list[bool]]:
313 | - If searching for a single molecule:
314 | - If only_check_for_existence is False: returns Optional[list[Molecule]]
315 | - If only_check_for_existence is True: returns bool
316 | - If searching for multiple molecules:
317 | - If only_check_for_existence is False: returns list[Optional[list[Molecule]]]
318 | - If only_check_for_existence is True: returns list[bool]
319 |
320 | Raises:
321 | ValueError: If the input parameters are inconsistent or invalid for multiple searches.
322 | """
323 | if not isinstance(identifier, str):
324 | search_mode = "multiple"
325 | if not (isinstance(identifier_mode, str) and isinstance(service, str)):
326 | if (
327 | isinstance(service, str)
328 | or isinstance(identifier_mode, str)
329 | or len(service) != len(identifier_mode)
330 | or len(identifier_mode) != len(identifier)
331 | ):
332 | raise ValueError(
333 | "When searching for multiple molecules, service, mode and identifier all must be provided as str or same sized lists."
334 | )
335 | else:
336 | search_mode = "single"
337 | if not (isinstance(identifier_mode, str) and isinstance(service, str)):
338 | raise ValueError(
339 | "When searching for a single molecule, service, mode and identifier all must be provided as strings."
340 | )
341 |
342 | def rows_to_molecules(service_, identifier_mode_, identifier_, rows):
343 | molecules = []
344 | for row in rows:
345 | SMILES, additional_information, temp_synonyms, temp_cas_numbers = row
346 | if SMILES:
347 | synonyms = []
348 | cas_numbers = []
349 |
350 | # Workaround as GROUP_CONCAT does not preserve order of the values
351 | if temp_synonyms:
352 | temp_synonyms = {
353 | int(k): v
354 | for k, v in (
355 | kv.split("|") for kv in temp_synonyms.split("||")
356 | )
357 | }
358 | synonyms = [
359 | temp_synonyms[k] for k in sorted(temp_synonyms.keys())
360 | ]
361 | if temp_cas_numbers:
362 | temp_cas_numbers = {
363 | int(k): v
364 | for k, v in (
365 | kv.split("|") for kv in temp_cas_numbers.split("||")
366 | )
367 | }
368 | cas_numbers = [
369 | temp_cas_numbers[k] for k in sorted(temp_cas_numbers.keys())
370 | ]
371 |
372 | molecules.append(
373 | Molecule(
374 | SMILES,
375 | synonyms,
376 | cas_numbers,
377 | additional_information if additional_information else "",
378 | identifier_mode_,
379 | service_,
380 | 1,
381 | identifier_,
382 | )
383 | )
384 | return molecules
385 |
386 | this_thread_connection = self.get_connection()
387 | with this_thread_connection:
388 | if search_mode == "single":
389 | identifier_clause = "identifier = ?"
390 | identifier_mode_clause = "identifier_mode = ? AND"
391 | values = (service, identifier_mode, identifier)
392 |
393 | if identifier_mode == "name":
394 | identifier_clause = "identifier = ? COLLATE NOCASE"
395 | if identifier_mode == "cas":
396 | identifier_clause = (
397 | "identifier = ? " # "cas_numbers.cas_number = ?"
398 | )
399 |
400 | sql = f"""
401 | SELECT molecules.id,
402 | SMILES,
403 | additional_information,
404 | GROUP_CONCAT(synonym_index || '|' || synonym, '||'),
405 | GROUP_CONCAT(cas_number_index || '|' || cas_number, '||')
406 | FROM molecules
407 | LEFT JOIN synonyms ON molecules.id = synonyms.molecule_id
408 | LEFT JOIN cas_numbers ON molecules.id = cas_numbers.molecule_id
409 | WHERE service = ? AND {identifier_mode_clause} {identifier_clause}
410 | GROUP BY molecules.id
411 | """
412 | cursor = this_thread_connection.execute(sql, values)
413 |
414 | molecule_rows = [row[1:] for row in cursor if row[0]]
415 |
416 | if only_check_for_existence:
417 | return len(molecule_rows) != 0
418 |
419 | if not molecule_rows:
420 | return None
421 |
422 | return rows_to_molecules(
423 | service, identifier_mode, identifier, molecule_rows
424 | )
425 |
426 | else:
427 | this_transaction_unique_temp_table_name = f"tmp_{uuid.uuid4().hex}"
428 |
429 | this_thread_connection.execute(
430 | f"""
431 | CREATE TEMPORARY TABLE {this_transaction_unique_temp_table_name} (
432 | search_index INTEGER NOT NULL,
433 | service TEXT NOT NULL,
434 | identifier TEXT NOT NULL
435 | )
436 | """
437 | )
438 |
439 | this_thread_connection.executemany(
440 | f"""
441 | INSERT INTO {this_transaction_unique_temp_table_name} (search_index, service, identifier)
442 | VALUES (?, ?, ?)
443 | """,
444 | list(
445 | zip(
446 | range(len(service)),
447 | service,
448 | identifier,
449 | )
450 | ),
451 | )
452 |
453 | if only_check_for_existence:
454 | optional_columns = ""
455 | else:
456 | optional_columns = """,
457 | SMILES,
458 | additional_information,
459 | GROUP_CONCAT(synonym_index || '|' || synonym, '||'),
460 | GROUP_CONCAT(cas_number_index || '|' || cas_number, '||')
461 | """
462 |
463 | # Distinction makes queries run much faster
464 | all_one_service = len(set(service)) == 1
465 | molecule_join_on_service = "t.service"
466 | if all_one_service:
467 | molecule_join_on_service = f"'{service[0]}'"
468 |
469 | all_one_identifier_mode = len(set(identifier_mode)) == 1
470 | if not all_one_identifier_mode:
471 | raise ValueError(
472 | "This class expects all identifier modes to be the same."
473 | )
474 |
475 | collation = ""
476 | if identifier_mode[0] == "name":
477 | collation = "COLLATE NOCASE"
478 |
479 | cursor = this_thread_connection.execute(
480 | f"""
481 | SELECT search_index,
482 | m.id{optional_columns}
483 | FROM {this_transaction_unique_temp_table_name} AS t
484 | INNER JOIN molecules AS m
485 | ON m.identifier_mode = '{identifier_mode[0]}'
486 | AND m.service = {molecule_join_on_service}
487 | LEFT JOIN synonyms AS s
488 | ON m.id = s.molecule_id
489 | LEFT JOIN cas_numbers AS c
490 | ON m.id = c.molecule_id
491 | WHERE m.identifier = t.identifier {collation}
492 | GROUP BY search_index, m.id
493 | """
494 | )
495 | # TODO: search also the synonyms and cas_numbers tables
496 | results = [None] * len(service)
497 | rows = cursor.fetchall()
498 | if only_check_for_existence:
499 | for row in rows:
500 | search_index, molecule_id = row
501 | results[search_index] = molecule_id is not None
502 | else:
503 | rows_by_search_index = {}
504 | for row in rows:
505 | (
506 | search_index,
507 | molecule_id,
508 | SMILES,
509 | additional_information,
510 | temp_synonyms,
511 | temp_cas_numbers,
512 | ) = row
513 |
514 | entry_found = molecule_id is not None
515 | if entry_found:
516 | if search_index not in rows_by_search_index:
517 | rows_by_search_index[search_index] = []
518 |
519 | if SMILES:
520 | rows_by_search_index[search_index].append(
521 | (
522 | SMILES,
523 | additional_information,
524 | temp_synonyms,
525 | temp_cas_numbers,
526 | )
527 | )
528 |
529 | for search_index, rows in rows_by_search_index.items():
530 | results[search_index] = rows_to_molecules(
531 | service[search_index],
532 | identifier_mode[search_index],
533 | identifier[search_index],
534 | rows,
535 | )
536 |
537 | return results
538 |
539 | def exists(
540 | self,
541 | service: Union[str, list[str]],
542 | identifier_mode: Union[str, list[str]],
543 | identifier: Union[str, list[str]],
544 | ) -> Union[bool, list[bool]]:
545 | """
546 | Check if molecule(s) exist in the database based on the provided criteria.
547 |
548 | Supports both single and multiple molecule existence checks.
549 |
550 | Args:
551 | service (Union[str, list[str]]): The service(s) to search in.
552 | Can be a single string or a sequence of strings for multiple checks.
553 |
554 | identifier_mode (Union[str, list[str]]): The mode(s) of identification (e.g., 'name', 'cas').
555 | Can be a single string or a sequence of strings for multiple checks.
556 |
557 | identifier (Union[str, list[str]]): The identifier(s) to search for.
558 | Can be a single string or a sequence of strings for multiple checks.
559 |
560 | Returns:
561 |
562 | Union[bool, list[bool]]:
563 |
564 | - For a single check: A boolean indicating whether the molecule exists.
565 | - For multiple checks: A list of booleans, each indicating whether the corresponding molecule exists.
566 |
567 | Note:
568 | This method uses the internal _search method with the 'only_check_for_existence' flag set to True.
569 | """
570 | return self._search(
571 | service, identifier_mode, identifier, only_check_for_existence=True
572 | )
573 |
574 | def search(
575 | self,
576 | service: Union[str, list[str]],
577 | identifier_mode: Union[str, list[str]],
578 | identifier: Union[str, list[str]],
579 | ) -> Union[Optional[list[Molecule]], list[Optional[list[Molecule]]]]:
580 | """
581 | Search for molecules based on the given parameters.
582 |
583 | Searches for molecules using the specified service, identifier mode, and identifier.
584 | Supports both single and multiple searches.
585 |
586 | Args:
587 | service (Union[str, list[str]]): The service(s) to use for the search.
588 | Can be a single string or a sequence of strings.
589 |
590 | identifier_mode (Union[str, list[str]]): The identifier mode(s) to use.
591 | Can be a single string or a sequence of strings.
592 |
593 | identifier (Union[str, list[str]]): The identifier(s) to search for.
594 | Can be a single string or a sequence of strings.
595 |
596 | Returns:
597 |
598 | Union[Optional[list[Molecule]], list[Optional[list[Molecule]]]]:
599 |
600 | - If a single search is performed, returns either None or a list of Molecule objects.
601 | - If multiple searches are performed, returns a list of results, where each result
602 | is either None or a list of Molecule objects.
603 |
604 | Note:
605 | This method internally calls the _search method to perform the actual search operation.
606 | """
607 | return self._search(service, identifier_mode, identifier)
608 |
609 | def delete_expired(self) -> None:
610 | """
611 | Delete expired molecules from the cache.
612 |
613 | Removes all molecules from the database that were added before the expiration datetime, if set.
614 |
615 | Note:
616 | This method only performs the deletion if 'self.expiration_datetime' is set.
617 | """
618 | if self.expiration_datetime:
619 | this_thread_connection = self.get_connection()
620 | with this_thread_connection:
621 | this_thread_connection.execute(
622 | """
623 | DELETE FROM molecules
624 | WHERE datetime_added < ?
625 | """,
626 | (self.expiration_datetime,),
627 | )
628 |
629 | def delete_by_service(self, service: str, mode: Optional[str] = '%') -> None:
630 | """
631 | Delete all molecules associated with a specific service from the cache.
632 |
633 | Args:
634 | service (str): The name of the service whose molecules should be deleted.
635 | """
636 | this_thread_connection = self.get_connection()
637 | with this_thread_connection:
638 | sql = """
639 | DELETE FROM molecules
640 | WHERE service = ? AND identifier_mode LIKE ?
641 | """
642 | this_thread_connection.execute(
643 | sql,
644 | (service, mode),
645 | )
646 |
647 | def recreate_all_tables(self) -> None:
648 | """
649 | Recreate all tables in the database.
650 |
651 | Closes any existing connections, deletes the database files,
652 | and then recreates the tables. Use with caution, as it will
653 | result in data loss.
654 |
655 | Raises:
656 | RuntimeError: If called in a multi-threaded environment (more than one connection).
657 | """
658 | if len(self._connections) > 1:
659 | raise RuntimeError(
660 | "Cannot delete cache files in a multi-threaded environment."
661 | )
662 | else:
663 | if len(self._connections) == 1:
664 | this_thread_connection = self.get_connection()
665 | this_thread_connection.close()
666 | self._connections.clear()
667 |
668 | files = [self.db_path, f"{self.db_path}-shm", f"{self.db_path}-wal"]
669 | for file in files:
670 | if os.path.exists(file):
671 | os.remove(file)
672 |
673 | self._create_tables()
674 |
675 | def count(self, service: Optional[str] = None) -> int:
676 | """
677 | Count the number of molecules in the database, optionally filtered by service.
678 |
679 | Args:
680 | service (Optional[str]): The service to filter by. If None, counts all molecules.
681 |
682 | Returns:
683 | int: The number of molecules matching the criteria.
684 | """
685 | this_thread_connection = self.get_connection()
686 | with this_thread_connection:
687 | if service:
688 | cursor = this_thread_connection.execute(
689 | """
690 | SELECT COUNT(*)
691 | FROM molecules
692 | WHERE service = ?
693 | """,
694 | (service,),
695 | )
696 | else:
697 | cursor = this_thread_connection.execute(
698 | """
699 | SELECT COUNT(*)
700 | FROM molecules
701 | """
702 | )
703 |
704 | return cursor.fetchone()[0]
705 |
--------------------------------------------------------------------------------
/moleculeresolver/__init__.py:
--------------------------------------------------------------------------------
1 | from .moleculeresolver import Molecule
2 | from .moleculeresolver import MoleculeResolver
3 |
4 | from importlib.metadata import version, PackageNotFoundError
5 | try:
6 | __version__ = version("molecule-resolver")
7 | except PackageNotFoundError:
8 | __version__ = "dev"
--------------------------------------------------------------------------------
/moleculeresolver/molecule.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass, field
2 | from typing import Optional, List, Dict, Any
3 | import copy
4 |
5 |
6 | @dataclass
7 | class Molecule:
8 | """
9 | Represents a molecule with various properties and identifiers.
10 |
11 | Attributes:
12 | SMILES (Optional[str]): The SMILES (Simplified Molecular Input Line Entry System) representation of the molecule.
13 |
14 | synonyms (Optional[list[str]]): A list of alternative names or synonyms for the molecule.
15 |
16 | CAS (Optional[list[str]]): A list of CAS (Chemical Abstracts Service) registry numbers for the molecule.
17 |
18 | additional_information (Optional[str]): Any additional information about the molecule.
19 |
20 | mode (Optional[str]): The mode associated with the molecule.
21 |
22 | service (Optional[str]): The service associated with the molecule.
23 |
24 | number_of_crosschecks (Optional[int]): The number of cross-checks performed on the molecule.
25 |
26 | identifier (Optional[str]): A unique identifier for the molecule.
27 |
28 | found_molecules (Optional[list]): A list of related molecules found during processing.
29 | """
30 |
31 | SMILES: Optional[str] = None
32 | synonyms: Optional[List[str]] = field(default_factory=list)
33 | CAS: Optional[List[str]] = field(default_factory=list)
34 | additional_information: Optional[str] = ""
35 | mode: Optional[str] = ""
36 | service: Optional[str] = ""
37 | number_of_crosschecks: Optional[int] = 1
38 | identifier: Optional[str] = ""
39 | found_molecules: Optional[list] = field(default_factory=list)
40 |
41 | def to_dict(self, found_molecules: Optional[str] = 'recursive') -> Dict[str, Any]:
42 | """
43 | Convert the Molecule object to a dictionary.
44 |
45 | Args:
46 | found_molecules (Optional[str]): Determines how 'found_molecules' are handled.
47 | - If 'remove', the 'found_molecules' field will be excluded.
48 | - If 'recursive', 'found_molecules' will be recursively converted to dictionaries.
49 |
50 | Returns:
51 | Dict[str, Any]: A dictionary representation of the Molecule object.
52 |
53 | Note:
54 | This method creates a deep copy of the object's `__dict__` attribute.
55 | Depending on the `found_molecules` parameter, it may exclude or recursively convert
56 | the 'found_molecules' field before returning the dictionary.
57 | """
58 | d = copy.deepcopy(self.__dict__)
59 | if found_molecules == "remove":
60 | if "found_molecules" in d:
61 | d.pop("found_molecules")
62 | elif found_molecules == "recursive":
63 | if "found_molecules" in d:
64 | new_found_molecules = []
65 | for grouped_item in d["found_molecules"]:
66 | key = list(grouped_item.keys())[0]
67 | value = list(grouped_item.values())[0]
68 | new_found_molecules.append(
69 | {key: [m.to_dict("recursive") for m in value]}
70 | )
71 | d["found_molecules"] = new_found_molecules
72 | return d
73 |
--------------------------------------------------------------------------------
/moleculeresolver/opsin-cli-2.8.0-jar-with-dependencies.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MoleculeResolver/molecule-resolver/15253f702a3ca823c476e0551ff4f895f017cc68/moleculeresolver/opsin-cli-2.8.0-jar-with-dependencies.jar
--------------------------------------------------------------------------------
/moleculeresolver/rdkitmods.py:
--------------------------------------------------------------------------------
1 | from contextlib import ContextDecorator
2 | from rdkit import rdBase
3 |
4 |
5 | class disabling_rdkit_logger(ContextDecorator):
6 | """
7 | A context manager for disabling RDKit logging
8 | based on https://github.com/rdkit/rdkit/issues/2320#issuecomment-731261149
9 |
10 | This class allows for temporary suppression of RDKit log messages at various levels
11 | (error, warning, info, debug) to reduce noise in the output during specific operations.
12 |
13 | Attributes:
14 | previous_status (dict): The log status before entering the context manager.
15 | desired_status (dict): The log status desired during the context manager's execution.
16 | """
17 |
18 | def __init__(
19 | self,
20 | mute_errors: bool = True,
21 | mute_warning: bool = True,
22 | mute_info: bool = True,
23 | mute_debug: bool = True,
24 | ) -> None:
25 | """
26 | Initializes the disabling_rdkit_logger context manager.
27 |
28 | Args:
29 | mute_errors (bool): If True, suppress error messages. Defaults to True.
30 | mute_warning (bool): If True, suppress warning messages. Defaults to True.
31 | mute_info (bool): If True, suppress info messages. Defaults to True.
32 | mute_debug (bool): If True, suppress debug messages. Defaults to True.
33 | """
34 |
35 | self.previous_status = self._get_log_status()
36 |
37 | self.desired_status = {
38 | "rdApp.error": not mute_errors,
39 | "rdApp.warning": not mute_warning,
40 | "rdApp.debug": not mute_debug,
41 | "rdApp.info": not mute_info,
42 | }
43 |
44 | def _get_log_status(self) -> dict[str, bool]:
45 | """
46 | Get the current log status of RDKit logs.
47 |
48 | Returns:
49 | dict[str, bool]: A dictionary indicating the log status (enabled/disabled) for each log level.
50 | """
51 | log_status = rdBase.LogStatus()
52 | log_status = {
53 | st.split(":")[0]: st.split(":")[1] for st in log_status.split("\n")
54 | }
55 | log_status = {k: v == "enabled" for k, v in log_status.items()}
56 | return log_status
57 |
58 | def _apply_log_status(self, log_status: dict[str, bool]) -> None:
59 | """
60 | Apply an RDKit log status.
61 |
62 | Args:
63 | log_status (dict[str, bool]): A dictionary with log levels as keys and their desired status (True/False) as values.
64 | """
65 | for k, v in log_status.items():
66 | if v:
67 | rdBase.EnableLog(k)
68 | else:
69 | rdBase.DisableLog(k)
70 |
71 | def __enter__(self) -> "disabling_rdkit_logger":
72 | """
73 | Enter the runtime context related to this object.
74 |
75 | Applies the desired log status when entering the context.
76 |
77 | Returns:
78 | disabling_rdkit_logger: The context manager itself.
79 | """
80 | self._apply_log_status(self.desired_status)
81 | return self
82 |
83 | def __exit__(self, exception_type, exception_value, exception_traceback) -> None:
84 | """
85 | Exit the runtime context related to this object.
86 |
87 | Restores the previous log status when exiting the context.
88 | """
89 | self._apply_log_status(self.previous_status)
90 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "molecule-resolver"
3 | version = "0.3.9"
4 | description = "A package to use several web services to find molecule structures, synonyms and CAS."
5 | authors = [
6 | "Simon Muller ",
7 | "Kobi Felton ",
8 | ]
9 | license = "MIT"
10 | readme = "README.md"
11 | packages = [{ include = "moleculeresolver" }]
12 |
13 | [tool.poetry.dependencies]
14 | python = "^3.10,<3.14"
15 | prompt-toolkit = "^3.0.39"
16 | regex = "^2023.10.3"
17 | rdkit = ">=2023.3.3"
18 | requests = "^2.31.0"
19 | openpyxl = "^3.1.2"
20 | tqdm = "^4.66.3"
21 | urllib3 = "^2.0.6"
22 | xmltodict = "^0.13.0"
23 |
24 |
25 | [tool.poetry.group.dev.dependencies]
26 | pytest = "^7.4.3"
27 | pytest-mock = "^3.12.0"
28 | sphinx = "^8.1.3"
29 | sphinx-autobuild = "^2024.10.3"
30 | sphinx-autoapi = "^3.4.0"
31 |
32 | [build-system]
33 | requires = ["poetry-core"]
34 | build-backend = "poetry.core.masonry.api"
35 |
--------------------------------------------------------------------------------
/tests/benchmark_component_molecules_iupac.json:
--------------------------------------------------------------------------------
1 | {
2 | "Acetic Acid": {
3 | "SMILES": "CC(=O)O",
4 | "synonyms": [
5 | "Acetic Acid",
6 | "Ethanoic acid",
7 | "Ethylic acid",
8 | "Glacial acetic acid",
9 | "Methanecarboxylic acid"
10 | ],
11 | "CAS": [
12 | "64-19-7"
13 | ],
14 | "additional_information": "cas_registry; chemeo id: 51-574-8; cir; comptox id: DTXSID5024394|QC_LEVEL:1.0; nist id: C1186523; opsin; pubchem id: 176; srs id: 3848",
15 | "mode": "name; name; name; name; name; name; name; name",
16 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs",
17 | "number_of_crosschecks": 8,
18 | "identifier": "Acetic Acid",
19 | "found_molecules": [],
20 | "pubchem_cid": 176,
21 | "formula": "C2H4O2",
22 | "hill_formula": "C2H4O2",
23 | "inchi": "InChI=1S/C2H4O2/c1-2(3)4/h1H3,(H,3,4)",
24 | "inchikey": "QTBSBXVTEAMEQO-UHFFFAOYSA-N",
25 | "iupac_name": "Ethanoic acid"
26 | },
27 | "Acetone": {
28 | "SMILES": "CC(C)=O",
29 | "synonyms": [
30 | "Acetone",
31 | "2-Propanone",
32 | "Dimethyl ketone",
33 | "propan-2-one",
34 | "Methyl ketone"
35 | ],
36 | "CAS": [
37 | "67-64-1"
38 | ],
39 | "additional_information": "cas_registry; chebi id: 15347; chemeo id: 50-301-1; cir; comptox id: DTXSID8021482|QC_LEVEL:1.0; nist id: C666524; opsin; pubchem id: 180; srs id: 4309",
40 | "mode": "name; name; name; name; name; name; name; name; name",
41 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
42 | "number_of_crosschecks": 9,
43 | "identifier": "Acetone",
44 | "found_molecules": [],
45 | "pubchem_cid": 180,
46 | "formula": "C3H6O",
47 | "hill_formula": "C3H6O",
48 | "inchi": "InChI=1S/C3H6O/c1-3(2)4/h1-2H3",
49 | "inchikey": "CSCPPACGZOOCGX-UHFFFAOYSA-N",
50 | "iupac_name": "propan-2-one"
51 | },
52 | "Acetylsalicylic Acid": {
53 | "SMILES": "CC(=O)Oc1ccccc1C(=O)O",
54 | "synonyms": [
55 | "Acetylsalicylic Acid",
56 | "Aspirin",
57 | "2-(acetyloxy)-Benzoic acid",
58 | "2-(acetyloxy)benzoic acid",
59 | "Rhodine"
60 | ],
61 | "CAS": [
62 | "50-78-2"
63 | ],
64 | "additional_information": "cas_registry; chebi id: 15365; chemeo id: 43-227-2; cir; comptox id: DTXSID5020108|QC_LEVEL:1.0; nist id: C50782; opsin; pubchem id: 2244; srs id: 1198",
65 | "mode": "name; name; name; name; name; name; name; name; name",
66 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
67 | "number_of_crosschecks": 9,
68 | "identifier": "Acetylsalicylic Acid",
69 | "found_molecules": [],
70 | "pubchem_cid": 2244,
71 | "formula": "C9H8O4",
72 | "hill_formula": "C9H8O4",
73 | "inchi": "InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)",
74 | "inchikey": "BSYNRYMUTXBXSQ-UHFFFAOYSA-N",
75 | "iupac_name": "2-acetyloxybenzoic acid"
76 | },
77 | "Adenine": {
78 | "SMILES": "Nc1ncnc2nc[nH]c12",
79 | "synonyms": [
80 | "Adenine",
81 | "1H-Purin-6-amine",
82 | "9H-Purin-6-amine",
83 | "6-Aminopurine",
84 | "Adeninimine"
85 | ],
86 | "CAS": [
87 | "73-24-5"
88 | ],
89 | "additional_information": "cas_registry; cir; opsin; pubchem id: 190",
90 | "mode": "name; name; name; name",
91 | "service": "cas_registry; cir; opsin; pubchem",
92 | "number_of_crosschecks": 4,
93 | "identifier": "Adenine",
94 | "found_molecules": [],
95 | "pubchem_cid": 190,
96 | "formula": "C5H5N5",
97 | "hill_formula": "C5H5N5",
98 | "inchi": "InChI=1S/C5H5N5/c6-4-3-5(9-1-7-3)10-2-8-4/h1-2H,(H3,6,7,8,9,10)",
99 | "inchikey": "GFFGJBXGBJISGV-UHFFFAOYSA-N",
100 | "iupac_name": "7H-purin-6-amine"
101 | },
102 | "Adenosine Triphosphate": {
103 | "SMILES": "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O",
104 | "synonyms": [
105 | "Adenosine Triphosphate",
106 | "atp",
107 | "[[(2R,3S,4R,5R)-5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] phosphono hydrogen phosphate",
108 | "Adenosine 5'-triphosphate",
109 | "Triphosphaden"
110 | ],
111 | "CAS": [
112 | "56-65-5"
113 | ],
114 | "additional_information": "cir; comptox id: DTXSID6022559|QC_LEVEL:1.0; opsin; pubchem id: 5957; srs id: 2220",
115 | "mode": "name; name; name; name; name",
116 | "service": "cir; comptox; opsin; pubchem; srs",
117 | "number_of_crosschecks": 5,
118 | "identifier": "Adenosine Triphosphate",
119 | "found_molecules": [],
120 | "pubchem_cid": 5957,
121 | "formula": "C10H16N5O13P3",
122 | "hill_formula": "C10H16N5O13P3",
123 | "inchi": "InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1",
124 | "inchikey": "ZKHQWZAMYRWXGA-KQYNXXCUSA-N",
125 | "iupac_name": "[[(2R,3S,4R,5R)-5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] phosphono hydrogen phosphate"
126 | },
127 | "Adipic Acid": {
128 | "SMILES": "O=C(O)CCCCC(=O)O",
129 | "synonyms": [
130 | "Adipic Acid",
131 | "Hexanedioic acid",
132 | "1,4-Butanedicarboxylic acid",
133 | "Adipinic acid",
134 | "1,6-Hexanedioic acid"
135 | ],
136 | "CAS": [
137 | "124-04-9"
138 | ],
139 | "additional_information": "cas_registry; chebi id: 30832; chemeo id: 12-837-9; cir; comptox id: DTXSID7021605|QC_LEVEL:1.0; nist id: C124049; opsin; pubchem id: 196; srs id: 33340",
140 | "mode": "name; name; name; name; name; name; name; name; name",
141 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
142 | "number_of_crosschecks": 9,
143 | "identifier": "Adipic Acid",
144 | "found_molecules": [],
145 | "pubchem_cid": 196,
146 | "formula": "C6H10O4",
147 | "hill_formula": "C6H10O4",
148 | "inchi": "InChI=1S/C6H10O4/c7-5(8)3-1-2-4-6(9)10/h1-4H2,(H,7,8)(H,9,10)",
149 | "inchikey": "WNLRTRBMVRJNCN-UHFFFAOYSA-N",
150 | "iupac_name": "hexanedioic acid"
151 | },
152 | "Aluminum(III) Oxide": {
153 | "SMILES": "[Al+3].[Al+3].[O-2].[O-2].[O-2]",
154 | "synonyms": [
155 | "Aluminum(III) Oxide"
156 | ],
157 | "CAS": [],
158 | "additional_information": "opsin",
159 | "mode": "name",
160 | "service": "opsin",
161 | "number_of_crosschecks": 1,
162 | "identifier": "Aluminum(III) Oxide",
163 | "found_molecules": [],
164 | "pubchem_cid": null,
165 | "formula": "Al2O3",
166 | "hill_formula": "Al2O3",
167 | "inchi": "InChI=1S/2Al.3O/q2*+3;3*-2",
168 | "inchikey": "PNEYBMLMFCGWSK-UHFFFAOYSA-N",
169 | "iupac_name": "Aluminum(III) Oxide"
170 | },
171 | "Ammonia": {
172 | "SMILES": "N",
173 | "synonyms": [
174 | "Ammonia",
175 | "Ammonia gas",
176 | "Nitro-Sil",
177 | "Spirit of Hartshorn",
178 | "azane"
179 | ],
180 | "CAS": [
181 | "7664-41-7"
182 | ],
183 | "additional_information": "cas_registry; chebi id: 16134; chemeo id: 22-992-6; cir; comptox id: DTXSID0023872|QC_LEVEL:1.0; nist id: C84796145; opsin; pubchem id: 222; srs id: 152389",
184 | "mode": "name; name; name; name; name; name; name; name; name",
185 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
186 | "number_of_crosschecks": 9,
187 | "identifier": "Ammonia",
188 | "found_molecules": [],
189 | "pubchem_cid": 222,
190 | "formula": "H3N",
191 | "hill_formula": "H3N",
192 | "inchi": "InChI=1S/H3N/h1H3",
193 | "inchikey": "QGZKDVFQNNGYKY-UHFFFAOYSA-N",
194 | "iupac_name": "azane"
195 | },
196 | "Ascorbic Acid": {
197 | "SMILES": "O=C1O[C@H]([C@@H](O)CO)C(O)=C1O",
198 | "synonyms": [
199 | "Ascorbic Acid",
200 | "(2R)-2-[(1S)-1,2-dihydroxyethyl]-3,4-dihydroxy-2H-furan-5-one",
201 | "l-ascorbic acid",
202 | "vitamin C",
203 | "L-Xyloascorbic acid"
204 | ],
205 | "CAS": [
206 | "50-81-7"
207 | ],
208 | "additional_information": "opsin; pubchem id: 54670067; srs id: 1214",
209 | "mode": "name; name; name",
210 | "service": "opsin; pubchem; srs",
211 | "number_of_crosschecks": 3,
212 | "identifier": "Ascorbic Acid",
213 | "found_molecules": [],
214 | "pubchem_cid": 54670067,
215 | "formula": "C6H8O6",
216 | "hill_formula": "C6H8O6",
217 | "inchi": "InChI=1S/C6H8O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h2,5,7-10H,1H2/t2-,5+/m0/s1",
218 | "inchikey": "CIWBSHSKHKDKBQ-JLAZNSOCSA-N",
219 | "iupac_name": "(2R)-2-[(1S)-1,2-dihydroxyethyl]-3,4-dihydroxy-2H-furan-5-one"
220 | },
221 | "Aspartame": {
222 | "SMILES": "COC(=O)C(Cc1ccccc1)NC(=O)C(N)CC(=O)O",
223 | "synonyms": [
224 | "Aspartame",
225 | "L-Phenylalanine, L-\u03b1-aspartyl-, 2-methyl ester",
226 | "Succinamic acid, 3-amino-N-(\u03b1-carboxyphenethyl)-, N-methyl ester, stereoisomer",
227 | "L-Phenylalanine, N-L-\u03b1-aspartyl-, 1-methyl ester",
228 | "L-Aspartyl-L-phenylalanine methyl ester"
229 | ],
230 | "CAS": [
231 | "22839-47-0"
232 | ],
233 | "additional_information": "cas_registry; cir; srs id: 222604",
234 | "mode": "name; name; name",
235 | "service": "cas_registry; cir; srs",
236 | "number_of_crosschecks": 3,
237 | "identifier": "Aspartame",
238 | "found_molecules": [],
239 | "pubchem_cid": null,
240 | "formula": "C14H18N2O5",
241 | "hill_formula": "C14H18N2O5",
242 | "inchi": "InChI=1S/C14H18N2O5/c1-21-14(20)11(7-9-5-3-2-4-6-9)16-13(19)10(15)8-12(17)18/h2-6,10-11H,7-8,15H2,1H3,(H,16,19)(H,17,18)",
243 | "inchikey": "IAOZJIPTCAWIRG-UHFFFAOYSA-N",
244 | "iupac_name": "3-amino-4-[(1-methoxy-1-oxo-3-phenylpropan-2-yl)amino]-4-oxobutanoic acid"
245 | },
246 | "Benzene": {
247 | "SMILES": "c1ccccc1",
248 | "synonyms": [
249 | "Benzene",
250 | "Benzol",
251 | "Cyclohexatriene",
252 | "Benzole",
253 | "Coal naphtha"
254 | ],
255 | "CAS": [
256 | "71-43-2"
257 | ],
258 | "additional_information": "cas_registry; chebi id: 16716; chemeo id: 12-667-8; cir; comptox id: DTXSID3039242|QC_LEVEL:1.0; nist id: C1076433; opsin; pubchem id: 241; srs id: 4754",
259 | "mode": "name; name; name; name; name; name; name; name; name",
260 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
261 | "number_of_crosschecks": 9,
262 | "identifier": "Benzene",
263 | "found_molecules": [],
264 | "pubchem_cid": 241,
265 | "formula": "C6H6",
266 | "hill_formula": "C6H6",
267 | "inchi": "InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H",
268 | "inchikey": "UHOVQNZJYSORNB-UHFFFAOYSA-N",
269 | "iupac_name": "benzene"
270 | },
271 | "Benzoic Acid": {
272 | "SMILES": "O=C(O)c1ccccc1",
273 | "synonyms": [
274 | "Benzoic Acid",
275 | "Benzenecarboxylic acid",
276 | "Benzeneformic acid",
277 | "Carboxybenzene",
278 | "Dracylic acid"
279 | ],
280 | "CAS": [
281 | "65-85-0"
282 | ],
283 | "additional_information": "cas_registry; chemeo id: 18-337-8; cir; comptox id: DTXSID6020143|QC_LEVEL:1.0; nist id: C65850; opsin; pubchem id: 243; srs id: 4044",
284 | "mode": "name; name; name; name; name; name; name; name",
285 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs",
286 | "number_of_crosschecks": 8,
287 | "identifier": "Benzoic Acid",
288 | "found_molecules": [],
289 | "pubchem_cid": 243,
290 | "formula": "C7H6O2",
291 | "hill_formula": "C7H6O2",
292 | "inchi": "InChI=1S/C7H6O2/c8-7(9)6-4-2-1-3-5-6/h1-5H,(H,8,9)",
293 | "inchikey": "WPYMKLBDIGXBTP-UHFFFAOYSA-N",
294 | "iupac_name": "benzoic acid"
295 | },
296 | "Biphenyl ": {
297 | "SMILES": "c1ccc(-c2ccccc2)cc1",
298 | "synonyms": [
299 | "Biphenyl",
300 | "1,1'-biphenyl",
301 | "Diphenyl",
302 | "Phenylbenzene",
303 | "Bibenzene"
304 | ],
305 | "CAS": [
306 | "92-52-4"
307 | ],
308 | "additional_information": "cas_registry; chebi id: 17097; chemeo id: 41-472-2; cir; comptox id: DTXSID4020161|QC_LEVEL:1.0; nist id: C1486017; opsin; pubchem id: 7095; srs id: 14183",
309 | "mode": "name; name; name; name; name; name; name; name; name",
310 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
311 | "number_of_crosschecks": 9,
312 | "identifier": "Biphenyl",
313 | "found_molecules": [],
314 | "pubchem_cid": 7095,
315 | "formula": "C12H10",
316 | "hill_formula": "C12H10",
317 | "inchi": "InChI=1S/C12H10/c1-3-7-11(8-4-1)12-9-5-2-6-10-12/h1-10H",
318 | "inchikey": "ZUOUZKKEUPVFJK-UHFFFAOYSA-N",
319 | "iupac_name": "1,1'-biphenyl"
320 | },
321 | "Butane": {
322 | "SMILES": "CCCC",
323 | "synonyms": [
324 | "Butane",
325 | "n-Butane",
326 | "Diethyl",
327 | "lpg",
328 | "Liquefied petroleum gas"
329 | ],
330 | "CAS": [
331 | "106-97-8"
332 | ],
333 | "additional_information": "cas_registry; chebi id: 37808; chemeo id: 26-823-9; cir; comptox id: DTXSID7024665|QC_LEVEL:1.0; nist id: C106978; opsin; pubchem id: 7843; srs id: 24026",
334 | "mode": "name; name; name; name; name; name; name; name; name",
335 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
336 | "number_of_crosschecks": 9,
337 | "identifier": "Butane",
338 | "found_molecules": [],
339 | "pubchem_cid": 7843,
340 | "formula": "C4H10",
341 | "hill_formula": "C4H10",
342 | "inchi": "InChI=1S/C4H10/c1-3-4-2/h3-4H2,1-2H3",
343 | "inchikey": "IJDNQMDRQITEOD-UHFFFAOYSA-N",
344 | "iupac_name": "butane"
345 | },
346 | "Butene": {
347 | "SMILES": "C=CCC",
348 | "synonyms": [
349 | "Butene",
350 | "but-1-ene",
351 | "1-butene",
352 | "Ethylethylene",
353 | "1-Butylene"
354 | ],
355 | "CAS": [
356 | "106-98-9"
357 | ],
358 | "additional_information": "cir; pubchem id: 7844; srs id: 230482",
359 | "mode": "name; name; name",
360 | "service": "cir; pubchem; srs",
361 | "number_of_crosschecks": 3,
362 | "identifier": "Butene",
363 | "found_molecules": [],
364 | "pubchem_cid": 7844,
365 | "formula": "C4H8",
366 | "hill_formula": "C4H8",
367 | "inchi": "InChI=1S/C4H8/c1-3-4-2/h3H,1,4H2,2H3",
368 | "inchikey": "VXNZUUAINFGPBY-UHFFFAOYSA-N",
369 | "iupac_name": "but-1-ene"
370 | },
371 | "Butyric Acid": {
372 | "SMILES": "CCCC(=O)O",
373 | "synonyms": [
374 | "Butyric Acid",
375 | "Butanoic acid",
376 | "n-Butyric acid",
377 | "Ethylacetic acid",
378 | "1-Propanecarboxylic acid"
379 | ],
380 | "CAS": [
381 | "107-92-6"
382 | ],
383 | "additional_information": "cas_registry; chebi id: 30772; chemeo id: 41-010-4; cir; comptox id: DTXSID8021515|QC_LEVEL:1.0; nist id: C107926; opsin; pubchem id: 264; srs id: 24729",
384 | "mode": "name; name; name; name; name; name; name; name; name",
385 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
386 | "number_of_crosschecks": 9,
387 | "identifier": "Butyric Acid",
388 | "found_molecules": [],
389 | "pubchem_cid": 264,
390 | "formula": "C4H8O2",
391 | "hill_formula": "C4H8O2",
392 | "inchi": "InChI=1S/C4H8O2/c1-2-3-4(5)6/h2-3H2,1H3,(H,5,6)",
393 | "inchikey": "FERIUCNNQQJTOY-UHFFFAOYSA-N",
394 | "iupac_name": "butanoic acid"
395 | },
396 | "Caffeine": {
397 | "SMILES": "Cn1c(=O)c2c(ncn2C)n(C)c1=O",
398 | "synonyms": [
399 | "Caffeine",
400 | "3,7-dihydro-1,3,7-trimethyl-1H-Purine-2,6-dione",
401 | "Guaranine",
402 | "1,3,7-Trimethylxanthine",
403 | "1,3,7-trimethyl-3,7-dihydro-1H-purine-2,6-dione"
404 | ],
405 | "CAS": [
406 | "58-08-2"
407 | ],
408 | "additional_information": "cas_registry; chebi id: 27732; chemeo id: 39-014-3; cir; comptox id: DTXSID0020232|QC_LEVEL:1.0; nist id: C58082; opsin; pubchem id: 2519; srs id: 2741",
409 | "mode": "name; name; name; name; name; name; name; name; name",
410 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
411 | "number_of_crosschecks": 9,
412 | "identifier": "Caffeine",
413 | "found_molecules": [],
414 | "pubchem_cid": 2519,
415 | "formula": "C8H10N4O2",
416 | "hill_formula": "C8H10N4O2",
417 | "inchi": "InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2/h4H,1-3H3",
418 | "inchikey": "RYYVLZVUVIJVGH-UHFFFAOYSA-N",
419 | "iupac_name": "1,3,7-Trimethyl-3,7-dihydro-1H-purine-2,6-dione"
420 | },
421 |
422 | "Chloroform": {
423 | "SMILES": "ClC(Cl)Cl",
424 | "synonyms": [
425 | "Chloroform",
426 | "Trichloromethane",
427 | "trichloro-Methane",
428 | "Formyl trichloride",
429 | "Trichloroform"
430 | ],
431 | "CAS": [
432 | "67-66-3"
433 | ],
434 | "additional_information": "cas_registry; chebi id: 35255; chemeo id: 21-368-0; cir; comptox id: DTXSID1020306|QC_LEVEL:1.0; nist id: B6007499; opsin; pubchem id: 6212; srs id: 4317",
435 | "mode": "name; name; name; name; name; name; name; name; name",
436 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
437 | "number_of_crosschecks": 9,
438 | "identifier": "Chloroform",
439 | "found_molecules": [],
440 | "pubchem_cid": 6212,
441 | "formula": "CHCl3",
442 | "hill_formula": "CHCl3",
443 | "inchi": "InChI=1S/CHCl3/c2-1(3)4/h1H",
444 | "inchikey": "HEDRZPFGACZZDS-UHFFFAOYSA-N",
445 | "iupac_name": "chloroform"
446 | },
447 | "Cholesterol": {
448 | "SMILES": "CC(C)CCC[C@@H](C)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C",
449 | "synonyms": [
450 | "Cholesterol",
451 | "cholest-5-en-3beta-ol",
452 | "Cholesterin",
453 | "(3beta,14beta,17alpha)-cholest-5-en-3-ol",
454 | "(3S,8S,9S,10R,13R,14S,17R)-10,13-dimethyl-17-[(2R)-6-methylheptan-2-yl]-2,3,4,7,8,9,11,12,14,15,16,17-dodecahydro-1H-cyclopenta[a]phenanthren-3-ol"
455 | ],
456 | "CAS": [
457 | "57-88-5"
458 | ],
459 | "additional_information": "chebi id: 16113; cir; comptox id: DTXSID3022401|QC_LEVEL:1.0; opsin; pubchem id: 5997; srs id: 2717",
460 | "mode": "name; name; name; name; name; name",
461 | "service": "chebi; cir; comptox; opsin; pubchem; srs",
462 | "number_of_crosschecks": 6,
463 | "identifier": "Cholesterol",
464 | "found_molecules": [],
465 | "pubchem_cid": 5997,
466 | "formula": "C27H46O",
467 | "hill_formula": "C27H46O",
468 | "inchi": "InChI=1S/C27H46O/c1-18(2)7-6-8-19(3)23-11-12-24-22-10-9-20-17-21(28)13-15-26(20,4)25(22)14-16-27(23,24)5/h9,18-19,21-25,28H,6-8,10-17H2,1-5H3/t19-,21+,22+,23-,24+,25+,26+,27-/m1/s1",
469 | "inchikey": "HVYWMOMLDIMFJA-DPAQBDIFSA-N",
470 | "iupac_name": "(3S,8S,9S,10R,13R,14S,17R)-10,13-dimethyl-17-[(2R)-6-methylheptan-2-yl]-2,3,4,7,8,9,11,12,14,15,16,17-dodecahydro-1H-cyclopenta[a]phenanthren-3-ol"
471 | },
472 | "Citric Acid": {
473 | "SMILES": "O=C(O)CC(O)(CC(=O)O)C(=O)O",
474 | "synonyms": [
475 | "Citric Acid",
476 | "2-hydroxy-1,2,3-Propanetricarboxylic acid",
477 | "2-hydroxypropane-1,2,3-tricarboxylic acid",
478 | "Aciletten",
479 | "Citretten"
480 | ],
481 | "CAS": [
482 | "77-92-9"
483 | ],
484 | "additional_information": "cas_registry; chebi id: 30769; cir; comptox id: DTXSID3020332|QC_LEVEL:1.0; nist id: C77929; opsin; pubchem id: 311; srs id: 6775",
485 | "mode": "name; name; name; name; name; name; name; name",
486 | "service": "cas_registry; chebi; cir; comptox; nist; opsin; pubchem; srs",
487 | "number_of_crosschecks": 8,
488 | "identifier": "Citric Acid",
489 | "found_molecules": [],
490 | "pubchem_cid": 311,
491 | "formula": "C6H8O7",
492 | "hill_formula": "C6H8O7",
493 | "inchi": "InChI=1S/C6H8O7/c7-3(8)1-6(13,5(11)12)2-4(9)10/h13H,1-2H2,(H,7,8)(H,9,10)(H,11,12)",
494 | "inchikey": "KRKNYBCHXYNGOX-UHFFFAOYSA-N",
495 | "iupac_name": "2-hydroxypropane-1,2,3-tricarboxylic acid"
496 | },
497 | "Cocaine": {
498 | "SMILES": "COC(=O)[C@H]1[C@@H](OC(=O)c2ccccc2)C[C@@H]2CC[C@H]1N2C",
499 | "synonyms": [
500 | "Cocaine",
501 | "methyl (1R,2R,3S,5S)-3-(benzoyloxy)-8-methyl-8-azabicyclo[3.2.1]octane-2-carboxylate",
502 | "Kokain",
503 | "(1R,2R,3S,5S)-2-(methoxycarbonyl)tropan-3-yl benzoate",
504 | "(-)-cocaine"
505 | ],
506 | "CAS": [
507 | "50-36-2"
508 | ],
509 | "additional_information": "chebi id: 27958; comptox id: DTXSID2038443|QC_LEVEL:1.0; pubchem id: 446220",
510 | "mode": "name; name; name",
511 | "service": "chebi; comptox; pubchem",
512 | "number_of_crosschecks": 3,
513 | "identifier": "Cocaine",
514 | "found_molecules": [],
515 | "pubchem_cid": 446220,
516 | "formula": "C17H21NO4",
517 | "hill_formula": "C17H21NO4",
518 | "inchi": "InChI=1S/C17H21NO4/c1-18-12-8-9-13(18)15(17(20)21-2)14(10-12)22-16(19)11-6-4-3-5-7-11/h3-7,12-15H,8-10H2,1-2H3/t12-,13+,14-,15+/m0/s1",
519 | "inchikey": "ZPUCINDJVBIVPJ-LJISPDSOSA-N",
520 | "iupac_name": "methyl (1R,2R,3S,5S)-3-benzoyloxy-8-methyl-8-azabicyclo[3.2.1]octane-2-carboxylate"
521 | },
522 | "DDT": {
523 | "SMILES": "Clc1ccc(C(c2ccc(Cl)cc2)C(Cl)(Cl)Cl)cc1",
524 | "synonyms": [
525 | "ddt",
526 | "p,p'-DDT",
527 | "1,1,1-trichloro-2,2-bis(p-chlorophenyl)-Ethane",
528 | "\u03b1,\u03b1-Bis(p-chlorophenyl)-\u03b2,\u03b2,\u03b2-trichlorethane",
529 | "Dichlorodiphenyltrichloroethane"
530 | ],
531 | "CAS": [
532 | "50-29-3"
533 | ],
534 | "additional_information": "cas_registry; chebi id: 16130; chemeo id: 82-476-3; cir; comptox id: DTXSID4020375|QC_LEVEL:1.0; nist id: C50293; pubchem id: 3036; srs id: 1107",
535 | "mode": "name; name; name; name; name; name; name; name",
536 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; pubchem; srs",
537 | "number_of_crosschecks": 8,
538 | "identifier": "DDT",
539 | "found_molecules": [],
540 | "pubchem_cid": 3036,
541 | "formula": "C14H9Cl5",
542 | "hill_formula": "C14H9Cl5",
543 | "inchi": "InChI=1S/C14H9Cl5/c15-11-5-1-9(2-6-11)13(14(17,18)19)10-3-7-12(16)8-4-10/h1-8,13H",
544 | "inchikey": "YVGGHNCTFXOJCH-UHFFFAOYSA-N",
545 | "iupac_name": "1-chloro-4-[2,2,2-trichloro-1-(4-chlorophenyl)ethyl]benzene"
546 | },
547 | "DEET": {
548 | "SMILES": "CCN(CC)C(=O)c1cccc(C)c1",
549 | "synonyms": [
550 | "deet",
551 | "N,N-Diethyl-m-toluamide",
552 | "N,N-diethyl-3-methyl-Benzamide",
553 | "N,N-Diethyl-3-methylbenzamide",
554 | "diethyltoluamide"
555 | ],
556 | "CAS": [
557 | "134-62-3"
558 | ],
559 | "additional_information": "cas_registry; chebi id: 7071; chemeo id: 13-638-9; cir; comptox id: DTXSID2021995|QC_LEVEL:1.0; nist id: C134623; pubchem id: 4284; srs id: 36137",
560 | "mode": "name; name; name; name; name; name; name; name",
561 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; pubchem; srs",
562 | "number_of_crosschecks": 8,
563 | "identifier": "DEET",
564 | "found_molecules": [],
565 | "pubchem_cid": 4284,
566 | "formula": "C12H17NO",
567 | "hill_formula": "C12H17NO",
568 | "inchi": "InChI=1S/C12H17NO/c1-4-13(5-2)12(14)11-8-6-7-10(3)9-11/h6-9H,4-5H2,1-3H3",
569 | "inchikey": "MMOXZBCLCQITDF-UHFFFAOYSA-N",
570 | "iupac_name": "N,N-diethyl-3-methylbenzamide"
571 | },
572 | "R-12": {
573 | "SMILES": "FC(F)(Cl)Cl",
574 | "synonyms": [
575 | "Difluorodichloromethane",
576 | "Dwuchlorodwufluorometan",
577 | "dichlorodifluoro-Methane",
578 | "R 12, Refrigerant",
579 | "Dichlorodifluoromethane"
580 | ],
581 | "CAS": [
582 | "75-71-8"
583 | ],
584 | "additional_information": "chemeo id: 13-346-3; nist id: C75718",
585 | "mode": "name; name",
586 | "service": "chemeo; nist",
587 | "number_of_crosschecks": 2,
588 | "identifier": "R-12",
589 | "found_molecules": [],
590 | "pubchem_cid": null,
591 | "formula": "CCl2F2",
592 | "hill_formula": "CCl2F2",
593 | "inchi": "InChI=1S/CCl2F2/c2-1(3,4)5",
594 | "inchikey": "PXBRQCKWGAHEHS-UHFFFAOYSA-N",
595 | "iupac_name": "dichloro(difluoro)methane"
596 | },
597 | "Dopamine": {
598 | "SMILES": "NCCc1ccc(O)c(O)c1",
599 | "synonyms": [
600 | "Dopamine",
601 | "4-(2-aminoethyl)-1,2-Benzenediol",
602 | "4-(2-aminoethyl)-Pyrocatechol",
603 | "4-(2-aminoethyl)benzene-1,2-diol",
604 | "Dopamin"
605 | ],
606 | "CAS": [
607 | "51-61-6"
608 | ],
609 | "additional_information": "cas_registry; chebi id: 18243; chemeo id: 36-704-0; cir; comptox id: DTXSID6022420|QC_LEVEL:1.0; nist id: C51616; opsin; pubchem id: 681; srs id: 17163551",
610 | "mode": "name; name; name; name; name; name; name; name; name",
611 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
612 | "number_of_crosschecks": 9,
613 | "identifier": "Dopamine",
614 | "found_molecules": [],
615 | "pubchem_cid": 681,
616 | "formula": "C8H11NO2",
617 | "hill_formula": "C8H11NO2",
618 | "inchi": "InChI=1S/C8H11NO2/c9-4-3-6-1-2-7(10)8(11)5-6/h1-2,5,10-11H,3-4,9H2",
619 | "inchikey": "VYFYYTLLBUKUHU-UHFFFAOYSA-N",
620 | "iupac_name": "4-(2-aminoethyl)benzene-1,2-diol"
621 | },
622 | "Adrenaline": {
623 | "SMILES": "CNC[C@H](O)c1ccc(O)c(O)c1",
624 | "synonyms": [
625 | "Adrenaline",
626 | "Epinephrine",
627 | "4-[(1R)-1-Hydroxy-2-(methylamino)ethyl]benzene-1,2-diol",
628 | "(-)-Epinephrine",
629 | "(R)-4-[1-hydroxy-2-(methylamino)ethyl]-1,2-Benzenediol"
630 | ],
631 | "CAS": [
632 | "51-43-4"
633 | ],
634 | "additional_information": "cir; comptox id: DTXSID5022986|QC_LEVEL:1.0; nist id: C51434; pubchem id: 5816",
635 | "mode": "name; name; name; name",
636 | "service": "cir; comptox; nist; pubchem",
637 | "number_of_crosschecks": 4,
638 | "identifier": "Adrenaline",
639 | "found_molecules": [],
640 | "pubchem_cid": 5816,
641 | "formula": "C9H13NO3",
642 | "hill_formula": "C9H13NO3",
643 | "inchi": "InChI=1S/C9H13NO3/c1-10-5-9(13)6-2-3-7(11)8(12)4-6/h2-4,9-13H,5H2,1H3/t9-/m0/s1",
644 | "inchikey": "UCTWMZQNUQWSLP-VIFPVBQESA-N",
645 | "iupac_name": "4-[(1R)-1-hydroxy-2-(methylamino)ethyl]benzene-1,2-diol"
646 | },
647 | "Ethane": {
648 | "SMILES": "CC",
649 | "synonyms": [
650 | "Ethane",
651 | "Bimethyl",
652 | "Dimethyl",
653 | "Methylmethane",
654 | "Ethyl hydride"
655 | ],
656 | "CAS": [
657 | "74-84-0"
658 | ],
659 | "additional_information": "cas_registry; chebi id: 42266; chemeo id: 31-101-4; cir; comptox id: DTXSID6026377|QC_LEVEL:1.0; nist id: R633468; opsin; pubchem id: 6324; srs id: 5082",
660 | "mode": "name; name; name; name; name; name; name; name; name",
661 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
662 | "number_of_crosschecks": 9,
663 | "identifier": "Ethane",
664 | "found_molecules": [],
665 | "pubchem_cid": 6324,
666 | "formula": "C2H6",
667 | "hill_formula": "C2H6",
668 | "inchi": "InChI=1S/C2H6/c1-2/h1-2H3",
669 | "inchikey": "OTMSDBZUPAUEDD-UHFFFAOYSA-N",
670 | "iupac_name": "ethane"
671 | },
672 | "Ethylene": {
673 | "SMILES": "C=C",
674 | "synonyms": [
675 | "Ethylene",
676 | "Ethene",
677 | "Acetene",
678 | "Elayl",
679 | "Bicarburretted hydrogen"
680 | ],
681 | "CAS": [
682 | "74-85-1"
683 | ],
684 | "additional_information": "cas_registry; chemeo id: 56-863-2; cir; comptox id: DTXSID1026378|QC_LEVEL:1.0; nist id: C2813629; opsin; pubchem id: 6325; srs id: 5090",
685 | "mode": "name; name; name; name; name; name; name; name",
686 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs",
687 | "number_of_crosschecks": 8,
688 | "identifier": "Ethylene",
689 | "found_molecules": [],
690 | "pubchem_cid": 6325,
691 | "formula": "C2H4",
692 | "hill_formula": "C2H4",
693 | "inchi": "InChI=1S/C2H4/c1-2/h1-2H2",
694 | "inchikey": "VGGSQFUCUMXWEO-UHFFFAOYSA-N",
695 | "iupac_name": "ethene"
696 | },
697 | "Ether": {
698 | "SMILES": "CCOCC",
699 | "synonyms": [
700 | "Ether",
701 | "Ethyl ether",
702 | "Diethyl ether",
703 | "1,1'-oxybis-ethane",
704 | "Anaesthetic ether"
705 | ],
706 | "CAS": [
707 | "60-29-7"
708 | ],
709 | "additional_information": "cas_registry; chemeo id: 52-430-6; cir; nist id: C60297; pubchem id: 3283; srs id: 3335",
710 | "mode": "name; name; name; name; name; name",
711 | "service": "cas_registry; chemeo; cir; nist; pubchem; srs",
712 | "number_of_crosschecks": 6,
713 | "identifier": "Ether",
714 | "found_molecules": [],
715 | "pubchem_cid": 3283,
716 | "formula": "C4H10O",
717 | "hill_formula": "C4H10O",
718 | "inchi": "InChI=1S/C4H10O/c1-3-5-4-2/h3-4H2,1-2H3",
719 | "inchikey": "RTZKZFJDLAIYFH-UHFFFAOYSA-N",
720 | "iupac_name": "ethoxyethane"
721 | },
722 | "Ethanol": {
723 | "SMILES": "CCO",
724 | "synonyms": [
725 | "Ethanol",
726 | "Alcohol",
727 | "Ethyl alcohol",
728 | "Algrain",
729 | "Anhydrol"
730 | ],
731 | "CAS": [
732 | "64-17-5"
733 | ],
734 | "additional_information": "cas_registry; chebi id: 16236; chemeo id: 35-653-8; cir; comptox id: DTXSID9020584|QC_LEVEL:1.0; nist id: C1516081; opsin; pubchem id: 702; srs id: 3822",
735 | "mode": "name; name; name; name; name; name; name; name; name",
736 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
737 | "number_of_crosschecks": 9,
738 | "identifier": "Ethanol",
739 | "found_molecules": [],
740 | "pubchem_cid": 702,
741 | "formula": "C2H6O",
742 | "hill_formula": "C2H6O",
743 | "inchi": "InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3",
744 | "inchikey": "LFQSCWFLJHTTHZ-UHFFFAOYSA-N",
745 | "iupac_name": "ethanol"
746 | },
747 | "Ethylenediaminetetraacetic Acid": {
748 | "SMILES": "O=C(O)CN(CCN(CC(=O)O)CC(=O)O)CC(=O)O",
749 | "synonyms": [
750 | "Ethylenediaminetetraacetic Acid",
751 | "(ethylenedinitrilo)tetra-Acetic acid",
752 | "Edetic Acid",
753 | "N,N'-1,2-ethanediylbis[N-(carboxymethyl)-Glycine",
754 | "2,2',2'',2'''-(ethane-1,2-diyldinitrilo)tetraacetic acid"
755 | ],
756 | "CAS": [
757 | "60-00-4"
758 | ],
759 | "additional_information": "cas_registry; chebi id: 4735; chemeo id: 119-689-6; cir; comptox id: DTXSID6022977|QC_LEVEL:1.0; nist id: C60004; opsin; pubchem id: 6049; srs id: 3228",
760 | "mode": "name; name; name; name; name; name; name; name; name",
761 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
762 | "number_of_crosschecks": 9,
763 | "identifier": "Ethylenediaminetetraacetic Acid",
764 | "found_molecules": [],
765 | "pubchem_cid": 6049,
766 | "formula": "C10H16N2O8",
767 | "hill_formula": "C10H16N2O8",
768 | "inchi": "InChI=1S/C10H16N2O8/c13-7(14)3-11(4-8(15)16)1-2-12(5-9(17)18)6-10(19)20/h1-6H2,(H,13,14)(H,15,16)(H,17,18)(H,19,20)",
769 | "inchikey": "KCXVZYZYPLLWCC-UHFFFAOYSA-N",
770 | "iupac_name": "2-[2-[bis(carboxymethyl)amino]ethyl-(carboxymethyl)amino]acetic acid"
771 | },
772 | "Fluoxetine": {
773 | "SMILES": "CNCCC(Oc1ccc(C(F)(F)F)cc1)c1ccccc1",
774 | "synonyms": [
775 | "Fluoxetine",
776 | "N-Methyl-3-phenyl-3-[4-(trifluoromethyl)phenoxy]propan-1-amine",
777 | "Prozac",
778 | "N-methyl-\u03b3-[4-(trifluoromethyl)phenoxy]-Benzenepropanamine",
779 | "(\u00b1)-N-methyl-\u03b3-[4-(trifluoromethyl)phenoxy]-Benzenepropanamine"
780 | ],
781 | "CAS": [
782 | "54910-89-3"
783 | ],
784 | "additional_information": "cas_registry; chemeo id: 106-550-3; cir; comptox id: DTXSID7023067|QC_LEVEL:1.0; nist id: C54910893; pubchem id: 3386; srs id: 1735870",
785 | "mode": "name; name; name; name; name; name; name",
786 | "service": "cas_registry; chemeo; cir; comptox; nist; pubchem; srs",
787 | "number_of_crosschecks": 7,
788 | "identifier": "Fluoxetine",
789 | "found_molecules": [],
790 | "pubchem_cid": 3386,
791 | "formula": "C17H18F3NO",
792 | "hill_formula": "C17H18F3NO",
793 | "inchi": "InChI=1S/C17H18F3NO/c1-21-12-11-16(13-5-3-2-4-6-13)22-15-9-7-14(8-10-15)17(18,19)20/h2-10,16,21H,11-12H2,1H3",
794 | "inchikey": "RTHCYVBBDHJXIQ-UHFFFAOYSA-N",
795 | "iupac_name": "N-methyl-3-phenyl-3-[4-(trifluoromethyl)phenoxy]propan-1-amine"
796 | },
797 | "Formaldehyde": {
798 | "SMILES": "C=O",
799 | "synonyms": [
800 | "Formaldehyde",
801 | "Formalin",
802 | "Methanal",
803 | "bfv",
804 | "Fannoform"
805 | ],
806 | "CAS": [
807 | "50-00-0"
808 | ],
809 | "additional_information": "cas_registry; chebi id: 16842; chemeo id: 44-234-3; cir; comptox id: DTXSID7020637|QC_LEVEL:1.0; nist id: C1664999; opsin; pubchem id: 712; srs id: 1008",
810 | "mode": "name; name; name; name; name; name; name; name; name",
811 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
812 | "number_of_crosschecks": 9,
813 | "identifier": "Formaldehyde",
814 | "found_molecules": [],
815 | "pubchem_cid": 712,
816 | "formula": "CH2O",
817 | "hill_formula": "CH2O",
818 | "inchi": "InChI=1S/CH2O/c1-2/h1H2",
819 | "inchikey": "WSFSSNUMVMOOMR-UHFFFAOYSA-N",
820 | "iupac_name": "formaldehyde"
821 | },
822 | "Formic Acid": {
823 | "SMILES": "O=CO",
824 | "synonyms": [
825 | "Formic Acid",
826 | "Aminic acid",
827 | "Methanoic acid",
828 | "Formylic acid",
829 | "bilorin"
830 | ],
831 | "CAS": [
832 | "64-18-6"
833 | ],
834 | "additional_information": "cas_registry; chebi id: 30751; chemeo id: 39-377-1; cir; comptox id: DTXSID2024115|QC_LEVEL:1.0; nist id: C64186; opsin; pubchem id: 284; srs id: 3830",
835 | "mode": "name; name; name; name; name; name; name; name; name",
836 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
837 | "number_of_crosschecks": 9,
838 | "identifier": "Formic Acid",
839 | "found_molecules": [],
840 | "pubchem_cid": 284,
841 | "formula": "CH2O2",
842 | "hill_formula": "CH2O2",
843 | "inchi": "InChI=1S/CH2O2/c2-1-3/h1H,(H,2,3)",
844 | "inchikey": "BDAGIHXWWSANSR-UHFFFAOYSA-N",
845 | "iupac_name": "formic acid"
846 | },
847 | "Glucose": {
848 | "SMILES": "O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO",
849 | "synonyms": [
850 | "Glucose",
851 | "D-Glucose",
852 | "DL-Glucose",
853 | "Cartose",
854 | "Cerelose"
855 | ],
856 | "CAS": [
857 | "58367-01-4"
858 | ],
859 | "additional_information": "comptox id: DTXSID4048729|QC_LEVEL:1.0; nist id: C3458284; opsin",
860 | "mode": "name; name; name",
861 | "service": "comptox; nist; opsin",
862 | "number_of_crosschecks": 3,
863 | "identifier": "Glucose",
864 | "found_molecules": [],
865 | "pubchem_cid": null,
866 | "formula": "C6H12O6",
867 | "hill_formula": "C6H12O6",
868 | "inchi": "InChI=1S/C6H12O6/c7-1-3(9)5(11)6(12)4(10)2-8/h1,3-6,8-12H,2H2/t3-,4+,5+,6+/m0/s1",
869 | "inchikey": "GZCGUPFRVQAUEE-SLPGGIOYSA-N",
870 | "iupac_name": "(2R,3S,4R,5R)-2,3,4,5,6-pentahydroxyhexanal"
871 | },
872 | "Glycerol": {
873 | "SMILES": "OCC(O)CO",
874 | "synonyms": [
875 | "Glycerol",
876 | "1,2,3-Propanetriol",
877 | "Glycerin",
878 | "Glycerine",
879 | "Propane-1,2,3-triol"
880 | ],
881 | "CAS": [
882 | "56-81-5"
883 | ],
884 | "additional_information": "cas_registry; chemeo id: 47-118-9; cir; comptox id: DTXSID9020663|QC_LEVEL:1.0; nist id: C56815; opsin; pubchem id: 753; srs id: 2279",
885 | "mode": "name; name; name; name; name; name; name; name",
886 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs",
887 | "number_of_crosschecks": 8,
888 | "identifier": "Glycerol",
889 | "found_molecules": [],
890 | "pubchem_cid": 753,
891 | "formula": "C3H8O3",
892 | "hill_formula": "C3H8O3",
893 | "inchi": "InChI=1S/C3H8O3/c4-1-3(6)2-5/h3-6H,1-2H2",
894 | "inchikey": "PEDCQBHIVMGVHV-UHFFFAOYSA-N",
895 | "iupac_name": "propane-1,2,3-triol"
896 | },
897 | "Guanine": {
898 | "SMILES": "Nc1nc2nc[nH]c2c(=O)[nH]1",
899 | "synonyms": [
900 | "Guanine",
901 | "2-amino-1,7-dihydro-6H-Purin-6-one",
902 | "2-amino-1,9-dihydro-6H-purin-6-one",
903 | "2-amino-1,7-dihydropurin-6-one",
904 | "2-amino-Hypoxanthine"
905 | ],
906 | "CAS": [
907 | "73-40-5"
908 | ],
909 | "additional_information": "opsin; pubchem id: 135398634; srs id: 5009",
910 | "mode": "name; name; name",
911 | "service": "opsin; pubchem; srs",
912 | "number_of_crosschecks": 3,
913 | "identifier": "Guanine",
914 | "found_molecules": [],
915 | "pubchem_cid": 135398634,
916 | "formula": "C5H5N5O",
917 | "hill_formula": "C5H5N5O",
918 | "inchi": "InChI=1S/C5H5N5O/c6-5-9-3-2(4(11)10-5)7-1-8-3/h1H,(H4,6,7,8,9,10,11)",
919 | "inchikey": "UYTPUPDQBNUYGX-UHFFFAOYSA-N",
920 | "iupac_name": "2-amino-1,7-dihydropurin-6-one"
921 | },
922 | "Hydrochloric Acid": {
923 | "SMILES": "Cl",
924 | "synonyms": [
925 | "Hydrochloric Acid",
926 | "Hydrogen chloride",
927 | "Chlorohydric acid",
928 | "Muriatic acid",
929 | "Dilute hydrochloric acid"
930 | ],
931 | "CAS": [
932 | "7647-01-0"
933 | ],
934 | "additional_information": "cas_registry; chemeo id: 57-172-8; comptox id: DTXSID2020711|QC_LEVEL:1.0; nist id: C7698057; opsin; pubchem id: 313; srs id: 152231",
935 | "mode": "name; name; name; name; name; name; name",
936 | "service": "cas_registry; chemeo; comptox; nist; opsin; pubchem; srs",
937 | "number_of_crosschecks": 7,
938 | "identifier": "Hydrochloric Acid",
939 | "found_molecules": [],
940 | "pubchem_cid": 313,
941 | "formula": "HCl",
942 | "hill_formula": "ClH",
943 | "inchi": "InChI=1S/ClH/h1H",
944 | "inchikey": "VEXZGXHMUGYJMC-UHFFFAOYSA-N",
945 | "iupac_name": "Hydrogen chloride"
946 | },
947 | "Hydrogen Peroxide": {
948 | "SMILES": "OO",
949 | "synonyms": [
950 | "Hydrogen Peroxide",
951 | "Albone",
952 | "Inhibine",
953 | "Perhydrol",
954 | "dihydrogen dioxide"
955 | ],
956 | "CAS": [
957 | "7722-84-1"
958 | ],
959 | "additional_information": "cas_registry; chebi id: 16240; chemeo id: 67-803-6; cir; comptox id: DTXSID2020715|QC_LEVEL:1.0; nist id: C7722841; opsin; pubchem id: 784; srs id: 153015",
960 | "mode": "name; name; name; name; name; name; name; name; name",
961 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
962 | "number_of_crosschecks": 9,
963 | "identifier": "Hydrogen Peroxide",
964 | "found_molecules": [],
965 | "pubchem_cid": 784,
966 | "formula": "H2O2",
967 | "hill_formula": "H2O2",
968 | "inchi": "InChI=1S/H2O2/c1-2/h1-2H",
969 | "inchikey": "MHAJPDPJQMAIIY-UHFFFAOYSA-N",
970 | "iupac_name": "hydrogen peroxide"
971 | },
972 | "Hydrogen Sulfide": {
973 | "SMILES": "S",
974 | "synonyms": [
975 | "Hydrogen Sulfide",
976 | "Hydrosulfuric acid",
977 | "dihydrogen monosulfide",
978 | "Stink damp",
979 | "Sulfur hydride"
980 | ],
981 | "CAS": [
982 | "7783-06-4"
983 | ],
984 | "additional_information": "cas_registry; chebi id: 16136; chemeo id: 51-105-8; cir; comptox id: DTXSID4024149|QC_LEVEL:1.0; nist id: C7783064; opsin; pubchem id: 402; srs id: 154518",
985 | "mode": "name; name; name; name; name; name; name; name; name",
986 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
987 | "number_of_crosschecks": 9,
988 | "identifier": "Hydrogen Sulfide",
989 | "found_molecules": [],
990 | "pubchem_cid": 402,
991 | "formula": "H2S",
992 | "hill_formula": "H2S",
993 | "inchi": "InChI=1S/H2S/h1H2",
994 | "inchikey": "RWSOTUBLDIXVET-UHFFFAOYSA-N",
995 | "iupac_name": "sulfane"
996 | },
997 | "Ibuprofen": {
998 | "SMILES": "CC(C)Cc1ccc(C(C)C(=O)O)cc1",
999 | "synonyms": [
1000 | "Ibuprofen",
1001 | "\u03b1-methyl-4-(2-methylpropyl)-Benzeneacetic acid",
1002 | "p-isobutyl-Hydratropic acid",
1003 | "2-[4-(2-Methylpropyl)phenyl]propanoic acid",
1004 | "\u03b1-Methyl-4-(2-methylpropyl)benzeneacetic acid"
1005 | ],
1006 | "CAS": [
1007 | "15687-27-1"
1008 | ],
1009 | "additional_information": "cas_registry; chemeo id: 20-897-4; cir; comptox id: DTXSID5020732|QC_LEVEL:1.0; nist id: T999947192; opsin; pubchem id: 3672; srs id: 200931",
1010 | "mode": "name; name; name; name; name; name; name; name",
1011 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1012 | "number_of_crosschecks": 8,
1013 | "identifier": "Ibuprofen",
1014 | "found_molecules": [],
1015 | "pubchem_cid": 3672,
1016 | "formula": "C13H18O2",
1017 | "hill_formula": "C13H18O2",
1018 | "inchi": "InChI=1S/C13H18O2/c1-9(2)8-11-4-6-12(7-5-11)10(3)13(14)15/h4-7,9-10H,8H2,1-3H3,(H,14,15)",
1019 | "inchikey": "HEFNNWSXXWATRW-UHFFFAOYSA-N",
1020 | "iupac_name": "2-[4-(2-methylpropyl)phenyl]propanoic acid"
1021 | },
1022 | "Indigo": {
1023 | "SMILES": "O=C1C(=C2Nc3ccccc3C2=O)Nc2ccccc21",
1024 | "synonyms": [
1025 | "Indigo",
1026 | "2-(1,3-dihydro-3-oxo-2H-indol-2-ylidene)-1,2-dihydro-3H-Indol-3-one",
1027 | "Indigo Blue",
1028 | "[\u03942,2\u2032-Biindoline]-3,3\u2032-dione",
1029 | "Indigo Pure BASF"
1030 | ],
1031 | "CAS": [
1032 | "482-89-3"
1033 | ],
1034 | "additional_information": "cas_registry; chemeo id: 114-901-4; comptox id: DTXSID3026279|QC_LEVEL:1.0; srs id: 47183",
1035 | "mode": "name; name; name; name",
1036 | "service": "cas_registry; chemeo; comptox; srs",
1037 | "number_of_crosschecks": 4,
1038 | "identifier": "Indigo",
1039 | "found_molecules": [],
1040 | "pubchem_cid": null,
1041 | "formula": "C16H10N2O2",
1042 | "hill_formula": "C16H10N2O2",
1043 | "inchi": "InChI=1S/C16H10N2O2/c19-15-9-5-1-3-7-11(9)17-13(15)14-16(20)10-6-2-4-8-12(10)18-14/h1-8,17-18H",
1044 | "inchikey": "COHYTHOBJLSHDF-UHFFFAOYSA-N",
1045 | "iupac_name": "[2,2'-biindolinylidene]-3,3'-dione"
1046 | },
1047 | "Isooctane": {
1048 | "SMILES": "CC(C)CC(C)(C)C",
1049 | "synonyms": [
1050 | "Isooctane",
1051 | "2,2,4-Trimethylpentane",
1052 | "Isobutyltrimethylmethane",
1053 | "2,2,4-trimethyl-Pentane",
1054 | "iso-Octane"
1055 | ],
1056 | "CAS": [
1057 | "540-84-1"
1058 | ],
1059 | "additional_information": "cas_registry; chebi id: 62805; chemeo id: 55-702-1; cir; nist id: C540841; opsin; pubchem id: 10907",
1060 | "mode": "name; name; name; name; name; name; name",
1061 | "service": "cas_registry; chebi; chemeo; cir; nist; opsin; pubchem",
1062 | "number_of_crosschecks": 7,
1063 | "identifier": "Isooctane",
1064 | "found_molecules": [],
1065 | "pubchem_cid": 10907,
1066 | "formula": "C8H18",
1067 | "hill_formula": "C8H18",
1068 | "inchi": "InChI=1S/C8H18/c1-7(2)6-8(3,4)5/h7H,6H2,1-5H3",
1069 | "inchikey": "NHTMVDHEPJAVLT-UHFFFAOYSA-N",
1070 | "iupac_name": "2,2,4-trimethylpentane"
1071 | },
1072 | "Isoprene": {
1073 | "SMILES": "C=CC(=C)C",
1074 | "synonyms": [
1075 | "Isoprene",
1076 | "2-methyl-1,3-Butadiene",
1077 | "2-Methylbutadiene",
1078 | "2-methylbuta-1,3-diene",
1079 | "Isopentadiene"
1080 | ],
1081 | "CAS": [
1082 | "78-79-5"
1083 | ],
1084 | "additional_information": "cas_registry; chebi id: 35194; chemeo id: 10-713-8; cir; comptox id: DTXSID2020761|QC_LEVEL:1.0; nist id: C78795; opsin; pubchem id: 6557; srs id: 7328",
1085 | "mode": "name; name; name; name; name; name; name; name; name",
1086 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1087 | "number_of_crosschecks": 9,
1088 | "identifier": "Isoprene",
1089 | "found_molecules": [],
1090 | "pubchem_cid": 6557,
1091 | "formula": "C5H8",
1092 | "hill_formula": "C5H8",
1093 | "inchi": "InChI=1S/C5H8/c1-4-5(2)3/h4H,1-2H2,3H3",
1094 | "inchikey": "RRHGJUQNOFWUDK-UHFFFAOYSA-N",
1095 | "iupac_name": "2-methylbuta-1,3-diene"
1096 | },
1097 | "l-Dopa": {
1098 | "SMILES": "N[C@@H](Cc1ccc(O)c(O)c1)C(=O)O",
1099 | "synonyms": [
1100 | "l-Dopa",
1101 | "Levodopa",
1102 | "3-Hydroxy-L-tyrosine",
1103 | "(2S)-2-amino-3-(3,4-dihydroxyphenyl)propanoic acid",
1104 | "L-3-(3,4-dihydroxyphenyl)-Alanine"
1105 | ],
1106 | "CAS": [
1107 | "59-92-7"
1108 | ],
1109 | "additional_information": "chebi id: 15765; comptox id: DTXSID9023209|QC_LEVEL:1.0; nist id: C63843; opsin; pubchem id: 6047",
1110 | "mode": "name; name; name; name; name",
1111 | "service": "chebi; comptox; nist; opsin; pubchem",
1112 | "number_of_crosschecks": 5,
1113 | "identifier": "l-Dopa",
1114 | "found_molecules": [],
1115 | "pubchem_cid": 6047,
1116 | "formula": "C9H11NO4",
1117 | "hill_formula": "C9H11NO4",
1118 | "inchi": "InChI=1S/C9H11NO4/c10-6(9(13)14)3-5-1-2-7(11)8(12)4-5/h1-2,4,6,11-12H,3,10H2,(H,13,14)/t6-/m0/s1",
1119 | "inchikey": "WTDRDQBEARUVNC-LURJTMIESA-N",
1120 | "iupac_name": "(2S)-2-amino-3-(3,4-dihydroxyphenyl)propanoic acid"
1121 | },
1122 | "Methane": {
1123 | "SMILES": "C",
1124 | "synonyms": [
1125 | "Methane",
1126 | "Marsh gas",
1127 | "Methyl hydride",
1128 | "Biogas",
1129 | "Fire Damp"
1130 | ],
1131 | "CAS": [
1132 | "74-82-8"
1133 | ],
1134 | "additional_information": "cas_registry; chebi id: 16183; chemeo id: 27-471-9; cir; comptox id: DTXSID8025545|QC_LEVEL:1.0; nist id: C74828; opsin; pubchem id: 297; srs id: 5066",
1135 | "mode": "name; name; name; name; name; name; name; name; name",
1136 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1137 | "number_of_crosschecks": 9,
1138 | "identifier": "Methane",
1139 | "found_molecules": [],
1140 | "pubchem_cid": 297,
1141 | "formula": "CH4",
1142 | "hill_formula": "CH4",
1143 | "inchi": "InChI=1S/CH4/h1H4",
1144 | "inchikey": "VNWKTOKETHGBQD-UHFFFAOYSA-N",
1145 | "iupac_name": "methane"
1146 | },
1147 | "Methyl Alcohol": {
1148 | "SMILES": "CO",
1149 | "synonyms": [
1150 | "Methyl Alcohol",
1151 | "Methanol",
1152 | "Carbinol",
1153 | "Methyl hydroxide",
1154 | "wood alcohol"
1155 | ],
1156 | "CAS": [
1157 | "67-56-1"
1158 | ],
1159 | "additional_information": "cas_registry; chemeo id: 69-136-5; cir; comptox id: DTXSID2021731|QC_LEVEL:1.0; nist id: C1849292; opsin; pubchem id: 887; srs id: 4283",
1160 | "mode": "name; name; name; name; name; name; name; name",
1161 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1162 | "number_of_crosschecks": 8,
1163 | "identifier": "Methyl Alcohol",
1164 | "found_molecules": [],
1165 | "pubchem_cid": 887,
1166 | "formula": "CH4O",
1167 | "hill_formula": "CH4O",
1168 | "inchi": "InChI=1S/CH4O/c1-2/h2H,1H3",
1169 | "inchikey": "OKKJLVBELUTLKV-UHFFFAOYSA-N",
1170 | "iupac_name": "methanol"
1171 | },
1172 | "Methylphenidate": {
1173 | "SMILES": "COC(=O)C(c1ccccc1)C1CCCCN1",
1174 | "synonyms": [
1175 | "Methylphenidate",
1176 | "Calocain",
1177 | "2-Piperidineacetic acid, \u03b1-phenyl-, methyl ester",
1178 | "Methylphenidan",
1179 | "Concerta"
1180 | ],
1181 | "CAS": [
1182 | "113-45-1"
1183 | ],
1184 | "additional_information": "cas_registry; chemeo id: 42-778-2; cir; comptox id: DTXSID5023299|QC_LEVEL:1.0; nist id: C113451; pubchem id: 4158",
1185 | "mode": "name; name; name; name; name; name",
1186 | "service": "cas_registry; chemeo; cir; comptox; nist; pubchem",
1187 | "number_of_crosschecks": 6,
1188 | "identifier": "Methylphenidate",
1189 | "found_molecules": [],
1190 | "pubchem_cid": 4158,
1191 | "formula": "C14H19NO2",
1192 | "hill_formula": "C14H19NO2",
1193 | "inchi": "InChI=1S/C14H19NO2/c1-17-14(16)13(11-7-3-2-4-8-11)12-9-5-6-10-15-12/h2-4,7-8,12-13,15H,5-6,9-10H2,1H3",
1194 | "inchikey": "DUGOZIWVEXMGBE-UHFFFAOYSA-N",
1195 | "iupac_name": "methyl 2-phenyl-2-piperidin-2-ylacetate"
1196 | },
1197 | "Naphthalene": {
1198 | "SMILES": "c1ccc2ccccc2c1",
1199 | "synonyms": [
1200 | "Naphthalene",
1201 | "Albocarbon",
1202 | "Tar camphor",
1203 | "White tar",
1204 | "Naphthalin"
1205 | ],
1206 | "CAS": [
1207 | "91-20-3"
1208 | ],
1209 | "additional_information": "cas_registry; chebi id: 16482; chemeo id: 69-516-3; cir; comptox id: DTXSID8020913|QC_LEVEL:1.0; nist id: C1146652; opsin; pubchem id: 931; srs id: 13326",
1210 | "mode": "name; name; name; name; name; name; name; name; name",
1211 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1212 | "number_of_crosschecks": 9,
1213 | "identifier": "Naphthalene",
1214 | "found_molecules": [],
1215 | "pubchem_cid": 931,
1216 | "formula": "C10H8",
1217 | "hill_formula": "C10H8",
1218 | "inchi": "InChI=1S/C10H8/c1-2-6-10-8-4-3-7-9(10)5-1/h1-8H",
1219 | "inchikey": "UFWIBTONFRDIAS-UHFFFAOYSA-N",
1220 | "iupac_name": "naphthalene"
1221 | },
1222 | "Nicotine": {
1223 | "SMILES": "CN1CCC[C@H]1c1cccnc1",
1224 | "synonyms": [
1225 | "Nicotine",
1226 | "3-[(2S)-1-Methylpyrrolidin-2-yl]pyridine",
1227 | "(-)-Nicotine",
1228 | "1-methyl-2-(3-pyridal)-Pyrrolidine",
1229 | "1-methyl-2-(3-pyridal)-pyrrolidene"
1230 | ],
1231 | "CAS": [
1232 | "54-11-5"
1233 | ],
1234 | "additional_information": "cir; comptox id: DTXSID1020930|QC_LEVEL:1.0; pubchem id: 89594",
1235 | "mode": "name; name; name",
1236 | "service": "cir; comptox; pubchem",
1237 | "number_of_crosschecks": 3,
1238 | "identifier": "Nicotine",
1239 | "found_molecules": [],
1240 | "pubchem_cid": 89594,
1241 | "formula": "C10H14N2",
1242 | "hill_formula": "C10H14N2",
1243 | "inchi": "InChI=1S/C10H14N2/c1-12-7-3-5-10(12)9-4-2-6-11-8-9/h2,4,6,8,10H,3,5,7H2,1H3/t10-/m0/s1",
1244 | "inchikey": "SNICXCGAKADSCV-JTQLQIEISA-N",
1245 | "iupac_name": "3-[(2S)-1-methylpyrrolidin-2-yl]pyridine"
1246 | },
1247 | "Nitric Acid": {
1248 | "SMILES": "O=[N+]([O-])O",
1249 | "synonyms": [
1250 | "Nitric Acid",
1251 | "Aqua fortis",
1252 | "Azotic acid",
1253 | "Hydrogen nitrate",
1254 | "Nitryl hydroxide"
1255 | ],
1256 | "CAS": [
1257 | "7697-37-2"
1258 | ],
1259 | "additional_information": "cas_registry; chebi id: 48107; cir; comptox id: DTXSID5029685|QC_LEVEL:1.0; nist id: C7697372; opsin; pubchem id: 944; srs id: 152686",
1260 | "mode": "name; name; name; name; name; name; name; name",
1261 | "service": "cas_registry; chebi; cir; comptox; nist; opsin; pubchem; srs",
1262 | "number_of_crosschecks": 8,
1263 | "identifier": "Nitric Acid",
1264 | "found_molecules": [],
1265 | "pubchem_cid": 944,
1266 | "formula": "HNO3",
1267 | "hill_formula": "HNO3",
1268 | "inchi": "InChI=1S/HNO3/c2-1(3)4/h(H,2,3,4)",
1269 | "inchikey": "GRYLNZFGIOXLOG-UHFFFAOYSA-N",
1270 | "iupac_name": "nitric acid"
1271 | },
1272 | "Nitric Oxide": {
1273 | "SMILES": "[N]=O",
1274 | "synonyms": [
1275 | "Nitric Oxide",
1276 | "Nitrogen monoxide",
1277 | "Nitrosyl radical",
1278 | "nitrogen oxide",
1279 | "oxo-Amidogen"
1280 | ],
1281 | "CAS": [
1282 | "10102-43-9"
1283 | ],
1284 | "additional_information": "cas_registry; chemeo id: 26-216-3; comptox id: DTXSID1020938|QC_LEVEL:1.0; nist id: C10102439; pubchem id: 145068; srs id: 167916",
1285 | "mode": "name; name; name; name; name; name",
1286 | "service": "cas_registry; chemeo; comptox; nist; pubchem; srs",
1287 | "number_of_crosschecks": 6,
1288 | "identifier": "Nitric Oxide",
1289 | "found_molecules": [],
1290 | "pubchem_cid": 145068,
1291 | "formula": "NO",
1292 | "hill_formula": "NO",
1293 | "inchi": "InChI=1S/NO/c1-2",
1294 | "inchikey": "MWUXSHHQAYIFBG-UHFFFAOYSA-N",
1295 | "iupac_name": "nitrogen(II) oxide"
1296 | },
1297 |
1298 | "Nitroglycerin": {
1299 | "SMILES": "O=[N+]([O-])OCC(CO[N+](=O)[O-])O[N+](=O)[O-]",
1300 | "synonyms": [
1301 | "Nitroglycerin",
1302 | "1,2,3-Propanetriol, trinitrate",
1303 | "1,2,3-Propanetriol, 1,2,3-trinitrate",
1304 | "Anginine",
1305 | "1,2,3-propanetriyl nitrate"
1306 | ],
1307 | "CAS": [
1308 | "55-63-0"
1309 | ],
1310 | "additional_information": "cas_registry; chebi id: 28787; chemeo id: 41-624-3; cir; comptox id: DTXSID1021407|QC_LEVEL:1.0; nist id: C55630; opsin; pubchem id: 4510; srs id: 1941",
1311 | "mode": "name; name; name; name; name; name; name; name; name",
1312 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1313 | "number_of_crosschecks": 9,
1314 | "identifier": "Nitroglycerin",
1315 | "found_molecules": [],
1316 | "pubchem_cid": 4510,
1317 | "formula": "C3H5N3O9",
1318 | "hill_formula": "C3H5N3O9",
1319 | "inchi": "InChI=1S/C3H5N3O9/c7-4(8)13-1-3(15-6(11)12)2-14-5(9)10/h3H,1-2H2",
1320 | "inchikey": "SNIOPGDIGTZGOP-UHFFFAOYSA-N",
1321 | "iupac_name": "1,3-dinitrooxypropan-2-yl nitrate"
1322 | },
1323 | "Penicillin": {
1324 | "SMILES": "CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O",
1325 | "synonyms": [
1326 | "Penicillin",
1327 | "Penicillin G",
1328 | "3,3-dimethyl-7-oxo-6-[(2-phenylacetyl)amino]- (2S,5R,6R)-4-Thia-1-azabicyclo[3.2.0]heptane-2-carboxylic acid",
1329 | "3,3-dimethyl-7-oxo-6-(2-phenylacetamido)-4-Thia-1-azabicyclo[3.2.0]heptane-2-carboxylic acid",
1330 | "3,3-dimethyl-7-oxo-6-[(phenylacetyl)amino]- [2S-(2\u03b1,5\u03b1,6\u03b2)]-4-Thia-1-azabicyclo[3.2.0]heptane-2-carboxylic acid"
1331 | ],
1332 | "CAS": [
1333 | "61-33-6"
1334 | ],
1335 | "additional_information": "cas_registry; pubchem id: 2349",
1336 | "mode": "name; name",
1337 | "service": "cas_registry; pubchem",
1338 | "number_of_crosschecks": 2,
1339 | "identifier": "Penicillin",
1340 | "found_molecules": [],
1341 | "pubchem_cid": 2349,
1342 | "formula": "C16H18N2O4S",
1343 | "hill_formula": "C16H18N2O4S",
1344 | "inchi": "InChI=1S/C16H18N2O4S/c1-16(2)12(15(21)22)18-13(20)11(14(18)23-16)17-10(19)8-9-6-4-3-5-7-9/h3-7,11-12,14H,8H2,1-2H3,(H,17,19)(H,21,22)",
1345 | "inchikey": "JGSARLDLIJGVTE-UHFFFAOYSA-N",
1346 | "iupac_name": "3,3-dimethyl-7-oxo-6-[(2-phenylacetyl)amino]-4-thia-1-azabicyclo[3.2.0]heptane-2-carboxylic acid"
1347 | },
1348 | "Phenol": {
1349 | "SMILES": "Oc1ccccc1",
1350 | "synonyms": [
1351 | "Phenol",
1352 | "Carbolic acid",
1353 | "Hydroxybenzene",
1354 | "Phenic acid",
1355 | "acide carbolique"
1356 | ],
1357 | "CAS": [
1358 | "108-95-2"
1359 | ],
1360 | "additional_information": "cas_registry; chebi id: 15882; chemeo id: 69-137-4; cir; comptox id: DTXSID5021124|QC_LEVEL:1.0; opsin; pubchem id: 996; srs id: 25510",
1361 | "mode": "name; name; name; name; name; name; name; name",
1362 | "service": "cas_registry; chebi; chemeo; cir; comptox; opsin; pubchem; srs",
1363 | "number_of_crosschecks": 8,
1364 | "identifier": "Phenol",
1365 | "found_molecules": [],
1366 | "pubchem_cid": 996,
1367 | "formula": "C6H6O",
1368 | "hill_formula": "C6H6O",
1369 | "inchi": "InChI=1S/C6H6O/c7-6-4-2-1-3-5-6/h1-5,7H",
1370 | "inchikey": "ISWSIDIOOBJBQZ-UHFFFAOYSA-N",
1371 | "iupac_name": "phenol"
1372 | },
1373 | "Phosphoric Acid": {
1374 | "SMILES": "O=P(O)(O)O",
1375 | "synonyms": [
1376 | "Phosphoric Acid",
1377 | "Orthophosphoric acid",
1378 | "o-phosphoric acid",
1379 | "WC-Reiniger",
1380 | "acide phosphorique"
1381 | ],
1382 | "CAS": [
1383 | "7664-38-2"
1384 | ],
1385 | "additional_information": "cas_registry; chebi id: 26078; chemeo id: 125-325-2; cir; comptox id: DTXSID5024263|QC_LEVEL:1.0; nist id: C7664382; opsin; pubchem id: 1004; srs id: 152363",
1386 | "mode": "name; name; name; name; name; name; name; name; name",
1387 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1388 | "number_of_crosschecks": 9,
1389 | "identifier": "Phosphoric Acid",
1390 | "found_molecules": [],
1391 | "pubchem_cid": 1004,
1392 | "formula": "H3O4P",
1393 | "hill_formula": "H3O4P",
1394 | "inchi": "InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)",
1395 | "inchikey": "NBIIXXVUZAFLBC-UHFFFAOYSA-N",
1396 | "iupac_name": "phosphoric acid"
1397 | },
1398 | "Propane": {
1399 | "SMILES": "CCC",
1400 | "synonyms": [
1401 | "Propane",
1402 | "Dimethylmethane",
1403 | "Propyl hydride",
1404 | "n-Propane",
1405 | "Liquefied petroleum gas"
1406 | ],
1407 | "CAS": [
1408 | "74-98-6"
1409 | ],
1410 | "additional_information": "cas_registry; chebi id: 32879; chemeo id: 13-317-5; cir; comptox id: DTXSID5026386|QC_LEVEL:1.0; nist id: C17251626; opsin; pubchem id: 6334; srs id: 5207",
1411 | "mode": "name; name; name; name; name; name; name; name; name",
1412 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1413 | "number_of_crosschecks": 9,
1414 | "identifier": "Propane",
1415 | "found_molecules": [],
1416 | "pubchem_cid": 6334,
1417 | "formula": "C3H8",
1418 | "hill_formula": "C3H8",
1419 | "inchi": "InChI=1S/C3H8/c1-3-2/h3H2,1-2H3",
1420 | "inchikey": "ATUOYWHBWRKTHZ-UHFFFAOYSA-N",
1421 | "iupac_name": "propane"
1422 | },
1423 | "Propylene": {
1424 | "SMILES": "C=CC",
1425 | "synonyms": [
1426 | "Propylene",
1427 | "1-Propene",
1428 | "Propene",
1429 | "Methylethylene",
1430 | "1-Propylene"
1431 | ],
1432 | "CAS": [
1433 | "115-07-1"
1434 | ],
1435 | "additional_information": "cas_registry; chemeo id: 23-076-2; cir; comptox id: DTXSID5021205|QC_LEVEL:1.0; nist id: C1517528; opsin; pubchem id: 8252; srs id: 29009",
1436 | "mode": "name; name; name; name; name; name; name; name",
1437 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1438 | "number_of_crosschecks": 8,
1439 | "identifier": "Propylene",
1440 | "found_molecules": [],
1441 | "pubchem_cid": 8252,
1442 | "formula": "C3H6",
1443 | "hill_formula": "C3H6",
1444 | "inchi": "InChI=1S/C3H6/c1-3-2/h3H,1H2,2H3",
1445 | "inchikey": "QQONPFPTGQHPMA-UHFFFAOYSA-N",
1446 | "iupac_name": "prop-1-ene"
1447 | },
1448 | "Quinine": {
1449 | "SMILES": "C=CC1CN2CCC1CC2C(O)c1ccnc2ccc(OC)cc12",
1450 | "synonyms": [
1451 | "Quinine",
1452 | "(-)-Quinine",
1453 | "(8\u03b1,9R)-6\u2032-methoxy-Cinchonan-9-ol",
1454 | "(8\u03b1,9R)-6\u2032-Methoxycinchonan-9-ol",
1455 | "6\u2032-Methoxycinchonidine"
1456 | ],
1457 | "CAS": [
1458 | "130-95-0"
1459 | ],
1460 | "additional_information": "cas_registry; chemeo id: 68-320-1; srs id: 35261",
1461 | "mode": "name; name; name",
1462 | "service": "cas_registry; chemeo; srs",
1463 | "number_of_crosschecks": 3,
1464 | "identifier": "Quinine",
1465 | "found_molecules": [],
1466 | "pubchem_cid": null,
1467 | "formula": "C20H24N2O2",
1468 | "hill_formula": "C20H24N2O2",
1469 | "inchi": "InChI=1S/C20H24N2O2/c1-3-13-12-22-9-7-14(13)10-19(22)20(23)16-6-8-21-18-5-4-15(24-2)11-17(16)18/h3-6,8,11,13-14,19-20,23H,1,7,9-10,12H2,2H3",
1470 | "inchikey": "LOUPRKONTZGTKE-UHFFFAOYSA-N",
1471 | "iupac_name": "(5-ethenyl-1-azabicyclo[2.2.2]octan-2-yl)-(6-methoxyquinolin-4-yl)methanol"
1472 | },
1473 | "Saccharin": {
1474 | "SMILES": "O=C1NS(=O)(=O)c2ccccc21",
1475 | "synonyms": [
1476 | "Saccharin",
1477 | "1,2-Benzisothiazol-3(2H)-one, 1,1-dioxide",
1478 | "1,2-Benzisothiazolin-3-one, 1,1-dioxide",
1479 | "1,2-benzisothiazol-3(2H)-one 1,1-dioxide",
1480 | "1,1-Dioxo-1,2-benzisothiazol-3(2H)-one"
1481 | ],
1482 | "CAS": [
1483 | "81-07-2"
1484 | ],
1485 | "additional_information": "cas_registry; chebi id: 32111; chemeo id: 57-750-6; cir; comptox id: DTXSID5021251|QC_LEVEL:1.0; opsin; pubchem id: 5143; srs id: 8557",
1486 | "mode": "name; name; name; name; name; name; name; name",
1487 | "service": "cas_registry; chebi; chemeo; cir; comptox; opsin; pubchem; srs",
1488 | "number_of_crosschecks": 8,
1489 | "identifier": "Saccharin",
1490 | "found_molecules": [],
1491 | "pubchem_cid": 5143,
1492 | "formula": "C7H5NO3S",
1493 | "hill_formula": "C7H5NO3S",
1494 | "inchi": "InChI=1S/C7H5NO3S/c9-7-5-3-1-2-4-6(5)12(10,11)8-7/h1-4H,(H,8,9)",
1495 | "inchikey": "CVHZOJJKTDOEJC-UHFFFAOYSA-N",
1496 | "iupac_name": "1,1-dioxo-1,2-benzothiazol-3-one"
1497 | },
1498 | "Silicon Dioxide": {
1499 | "SMILES": "O=[Si]=O",
1500 | "synonyms": [
1501 | "Silicon Dioxide",
1502 | "Silica",
1503 | "Dioxosilane",
1504 | "Acticel",
1505 | "Aerosil"
1506 | ],
1507 | "CAS": [
1508 | "7631-86-9"
1509 | ],
1510 | "additional_information": "cas_registry; chebi id: 30563; chemeo id: 69-388-6; cir; comptox id: DTXSID1029677|QC_LEVEL:1.0; nist id: ['C14808607', 'C14808607']; opsin; pubchem id: 24261; srs id: 151977",
1511 | "mode": "name; name; name; name; name; ['name', 'name']; name; name; name",
1512 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1513 | "number_of_crosschecks": 9,
1514 | "identifier": "Silicon Dioxide",
1515 | "found_molecules": [],
1516 | "pubchem_cid": 24261,
1517 | "formula": "O2Si",
1518 | "hill_formula": "O2Si",
1519 | "inchi": "InChI=1S/O2Si/c1-3-2",
1520 | "inchikey": "VYPSYNLAJGMNEJ-UHFFFAOYSA-N",
1521 | "iupac_name": "dioxosilane"
1522 | },
1523 |
1524 |
1525 | "Styrene": {
1526 | "SMILES": "C=Cc1ccccc1",
1527 | "synonyms": [
1528 | "Styrene",
1529 | "ethenyl-Benzene",
1530 | "Ethenylbenzene",
1531 | "Cinnamene",
1532 | "Phenethylene"
1533 | ],
1534 | "CAS": [
1535 | "100-42-5"
1536 | ],
1537 | "additional_information": "cas_registry; chebi id: 27452; chemeo id: 24-192-2; cir; comptox id: DTXSID2021284|QC_LEVEL:1.0; nist id: C100425; opsin; pubchem id: 7501; srs id: 19414",
1538 | "mode": "name; name; name; name; name; name; name; name; name",
1539 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1540 | "number_of_crosschecks": 9,
1541 | "identifier": "Styrene",
1542 | "found_molecules": [],
1543 | "pubchem_cid": 7501,
1544 | "formula": "C8H8",
1545 | "hill_formula": "C8H8",
1546 | "inchi": "InChI=1S/C8H8/c1-2-8-6-4-3-5-7-8/h2-7H,1H2",
1547 | "inchikey": "PPBRXRYQALVLMV-UHFFFAOYSA-N",
1548 | "iupac_name": "styrene"
1549 | },
1550 | "Sucrose": {
1551 | "SMILES": "OC[C@H]1O[C@@](CO)(O[C@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@@H](O)[C@@H]1O",
1552 | "synonyms": [
1553 | "Sucrose",
1554 | "beta-D-fructofuranosyl alpha-D-glucopyranoside",
1555 | "Cane sugar",
1556 | "1-alpha-D-Glucopyranosyl-2-beta-D-fructofuranoside",
1557 | "beta-D-Fruf-(2<->1)-alpha-D-Glcp"
1558 | ],
1559 | "CAS": [
1560 | "57-50-1"
1561 | ],
1562 | "additional_information": "chebi id: 17992; cir; comptox id: DTXSID2021288|QC_LEVEL:1.0; pubchem id: 5988; srs id: 2568",
1563 | "mode": "name; name; name; name; name",
1564 | "service": "chebi; cir; comptox; pubchem; srs",
1565 | "number_of_crosschecks": 5,
1566 | "identifier": "Sucrose",
1567 | "found_molecules": [],
1568 | "pubchem_cid": 5988,
1569 | "formula": "C12H22O11",
1570 | "hill_formula": "C12H22O11",
1571 | "inchi": "InChI=1S/C12H22O11/c13-1-4-6(16)8(18)9(19)11(21-4)23-12(3-15)10(20)7(17)5(2-14)22-12/h4-11,13-20H,1-3H2/t4-,5-,6-,7-,8+,9-,10+,11-,12+/m1/s1",
1572 | "inchikey": "CZMRCDWAGMRECN-UGDNZRGBSA-N",
1573 | "iupac_name": "(2R,3R,4S,5S,6R)-2-[(2S,3S,4S,5R)-3,4-dihydroxy-2,5-bis(hydroxymethyl)oxolan-2-yl]oxy-6-(hydroxymethyl)oxane-3,4,5-triol"
1574 | },
1575 | "Sulfuric Acid": {
1576 | "SMILES": "O=S(=O)(O)O",
1577 | "synonyms": [
1578 | "Sulfuric Acid",
1579 | "Oil of vitriol",
1580 | "Sulphuric acid",
1581 | "bov",
1582 | "Dipping acid"
1583 | ],
1584 | "CAS": [
1585 | "7664-93-9"
1586 | ],
1587 | "additional_information": "cas_registry; chebi id: 26836; chemeo id: 24-837-6; cir; comptox id: DTXSID5029683|QC_LEVEL:1.0; nist id: C7664939; opsin; pubchem id: 1118; srs id: 152405",
1588 | "mode": "name; name; name; name; name; name; name; name; name",
1589 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1590 | "number_of_crosschecks": 9,
1591 | "identifier": "Sulfuric Acid",
1592 | "found_molecules": [],
1593 | "pubchem_cid": 1118,
1594 | "formula": "H2O4S",
1595 | "hill_formula": "H2O4S",
1596 | "inchi": "InChI=1S/H2O4S/c1-5(2,3)4/h(H2,1,2,3,4)",
1597 | "inchikey": "QAOWNCQODCNURD-UHFFFAOYSA-N",
1598 | "iupac_name": "sulfuric acid"
1599 | },
1600 | "Tetrafluoroethylene": {
1601 | "SMILES": "FC(F)=C(F)F",
1602 | "synonyms": [
1603 | "Tetrafluoroethylene",
1604 | "tetrafluoro-Ethylene",
1605 | "1,1,2,2-tetrafluoro-Ethene",
1606 | "tetrafluoro-Ethene",
1607 | "Tetrafluoroethene"
1608 | ],
1609 | "CAS": [
1610 | "116-14-3"
1611 | ],
1612 | "additional_information": "cas_registry; chemeo id: 10-149-5; cir; comptox id: DTXSID6021325|QC_LEVEL:1.0; nist id: C116143; opsin; pubchem id: 8301; srs id: 29397",
1613 | "mode": "name; name; name; name; name; name; name; name",
1614 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1615 | "number_of_crosschecks": 8,
1616 | "identifier": "Tetrafluoroethylene",
1617 | "found_molecules": [],
1618 | "pubchem_cid": 8301,
1619 | "formula": "C2F4",
1620 | "hill_formula": "C2F4",
1621 | "inchi": "InChI=1S/C2F4/c3-1(4)2(5)6",
1622 | "inchikey": "BFKJFAAPBSQJPD-UHFFFAOYSA-N",
1623 | "iupac_name": "1,1,2,2-tetrafluoroethene"
1624 | },
1625 | "Tetrahydrocannabinol": {
1626 | "SMILES": "CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@@H]1CCC(C)=C[C@@H]21",
1627 | "synonyms": [
1628 | "Tetrahydrocannabinol",
1629 | "Dronabinol",
1630 | "(6aR,10aR)-6,6,9-trimethyl-3-pentyl-6a,7,8,10a-tetrahydro-6H-benzo[c]chromen-1-ol",
1631 | "(-)-delta9-trans-Tetrahydrocannabinol",
1632 | "1-trans-delta-9-Tetrahydrocannabinol"
1633 | ],
1634 | "CAS": [
1635 | "1972-08-3"
1636 | ],
1637 | "additional_information": "chebi id: 66964; comptox id: DTXSID6021327|QC_LEVEL:1.0; pubchem id: 16078",
1638 | "mode": "name; name; name",
1639 | "service": "chebi; comptox; pubchem",
1640 | "number_of_crosschecks": 3,
1641 | "identifier": "Tetrahydrocannabinol",
1642 | "found_molecules": [],
1643 | "pubchem_cid": 16078,
1644 | "formula": "C21H30O2",
1645 | "hill_formula": "C21H30O2",
1646 | "inchi": "InChI=1S/C21H30O2/c1-5-6-7-8-15-12-18(22)20-16-11-14(2)9-10-17(16)21(3,4)23-19(20)13-15/h11-13,16-17,22H,5-10H2,1-4H3/t16-,17-/m1/s1",
1647 | "inchikey": "CYQFCXCEBYINGO-IAGOWNOFSA-N",
1648 | "iupac_name": "(6aR,10aR)-6,6,9-trimethyl-3-pentyl-6a,7,8,10a-tetrahydrobenzo[c]chromen-1-ol"
1649 | },
1650 | "Thymine": {
1651 | "SMILES": "Cc1c[nH]c(=O)[nH]c1=O",
1652 | "synonyms": [
1653 | "Thymine",
1654 | "5-methyl-2,4(1H,3H)-Pyrimidinedione",
1655 | "2,4-Dihydroxy-5-methylpyrimidine",
1656 | "5-Methyluracil",
1657 | "5-methylpyrimidine-2,4(1H,3H)-dione"
1658 | ],
1659 | "CAS": [
1660 | "65-71-4"
1661 | ],
1662 | "additional_information": "cas_registry; chebi id: 17821; chemeo id: 18-703-1; cir; comptox id: DTXSID4052342|QC_LEVEL:1.0; opsin; pubchem id: 1135; srs id: 4028",
1663 | "mode": "name; name; name; name; name; name; name; name",
1664 | "service": "cas_registry; chebi; chemeo; cir; comptox; opsin; pubchem; srs",
1665 | "number_of_crosschecks": 8,
1666 | "identifier": "Thymine",
1667 | "found_molecules": [],
1668 | "pubchem_cid": 1135,
1669 | "formula": "C5H6N2O2",
1670 | "hill_formula": "C5H6N2O2",
1671 | "inchi": "InChI=1S/C5H6N2O2/c1-3-2-6-5(9)7-4(3)8/h2H,1H3,(H2,6,7,8,9)",
1672 | "inchikey": "RWQNBRDOKXIBIV-UHFFFAOYSA-N",
1673 | "iupac_name": "5-methyl-1H-pyrimidine-2,4-dione"
1674 | },
1675 | "Trinitrotoluene": {
1676 | "SMILES": "Cc1c([N+](=O)[O-])cc([N+](=O)[O-])cc1[N+](=O)[O-]",
1677 | "synonyms": [
1678 | "Trinitrotoluene",
1679 | "2,4,6-Trinitrotoluene",
1680 | "2-Methyl-1,3,5-trinitrobenzene",
1681 | "2-methyl-1,3,5-trinitro-Benzene",
1682 | "2,4,6-trinitro-Toluene"
1683 | ],
1684 | "CAS": [
1685 | "118-96-7"
1686 | ],
1687 | "additional_information": "cas_registry; chebi id: 46053; chemeo id: 12-617-3; cir; comptox id: DTXSID7024372|QC_LEVEL:1.0; nist id: C118967; opsin; pubchem id: 8376; srs id: 30460",
1688 | "mode": "name; name; name; name; name; name; name; name; name",
1689 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1690 | "number_of_crosschecks": 9,
1691 | "identifier": "Trinitrotoluene",
1692 | "found_molecules": [],
1693 | "pubchem_cid": 8376,
1694 | "formula": "C7H5N3O6",
1695 | "hill_formula": "C7H5N3O6",
1696 | "inchi": "InChI=1S/C7H5N3O6/c1-4-6(9(13)14)2-5(8(11)12)3-7(4)10(15)16/h2-3H,1H3",
1697 | "inchikey": "SPSSULHKWOKEEL-UHFFFAOYSA-N",
1698 | "iupac_name": "2-methyl-1,3,5-trinitrobenzene"
1699 | },
1700 | "Toluene": {
1701 | "SMILES": "Cc1ccccc1",
1702 | "synonyms": [
1703 | "Toluene",
1704 | "Methylbenzene",
1705 | "Methacide",
1706 | "methyl-Benzene",
1707 | "Toluol"
1708 | ],
1709 | "CAS": [
1710 | "108-88-3"
1711 | ],
1712 | "additional_information": "cas_registry; chebi id: 17578; chemeo id: 17-837-4; cir; comptox id: DTXSID7021360|QC_LEVEL:1.0; nist id: C1124181; opsin; pubchem id: 1140; srs id: 25452",
1713 | "mode": "name; name; name; name; name; name; name; name; name",
1714 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1715 | "number_of_crosschecks": 9,
1716 | "identifier": "Toluene",
1717 | "found_molecules": [],
1718 | "pubchem_cid": 1140,
1719 | "formula": "C7H8",
1720 | "hill_formula": "C7H8",
1721 | "inchi": "InChI=1S/C7H8/c1-7-5-3-2-4-6-7/h2-6H,1H3",
1722 | "inchikey": "YXFVVABEGXRONW-UHFFFAOYSA-N",
1723 | "iupac_name": "methyl-benzene"
1724 | },
1725 | "Uracil": {
1726 | "SMILES": "O=c1cc[nH]c(=O)[nH]1",
1727 | "synonyms": [
1728 | "Uracil",
1729 | "2,4(1H,3H)-Pyrimidinedione",
1730 | "2,4-Dioxopyrimidine",
1731 | "2,4-Dihydroxypyrimidine",
1732 | "2,4-Pyrimidinediol"
1733 | ],
1734 | "CAS": [
1735 | "66-22-8"
1736 | ],
1737 | "additional_information": "cas_registry; chebi id: 17568; chemeo id: 31-883-7; cir; comptox id: DTXSID4021424|QC_LEVEL:1.0; opsin; pubchem id: 1174; srs id: 4069",
1738 | "mode": "name; name; name; name; name; name; name; name",
1739 | "service": "cas_registry; chebi; chemeo; cir; comptox; opsin; pubchem; srs",
1740 | "number_of_crosschecks": 8,
1741 | "identifier": "Uracil",
1742 | "found_molecules": [],
1743 | "pubchem_cid": 1174,
1744 | "formula": "C4H4N2O2",
1745 | "hill_formula": "C4H4N2O2",
1746 | "inchi": "InChI=1S/C4H4N2O2/c7-3-1-2-5-4(8)6-3/h1-2H,(H2,5,6,7,8)",
1747 | "inchikey": "ISAKRJDGNUQOIC-UHFFFAOYSA-N",
1748 | "iupac_name": "1H-pyrimidine-2,4-dione"
1749 | },
1750 | "Urea": {
1751 | "SMILES": "NC(N)=O",
1752 | "synonyms": [
1753 | "Urea",
1754 | "Carbamide",
1755 | "carbonyldiamide",
1756 | "Isourea",
1757 | "ur"
1758 | ],
1759 | "CAS": [
1760 | "57-13-6"
1761 | ],
1762 | "additional_information": "cas_registry; chebi id: 16199; chemeo id: 45-964-2; cir; comptox id: DTXSID4021426|QC_LEVEL:1.0; opsin; pubchem id: 1176; srs id: 2451",
1763 | "mode": "name; name; name; name; name; name; name; name",
1764 | "service": "cas_registry; chebi; chemeo; cir; comptox; opsin; pubchem; srs",
1765 | "number_of_crosschecks": 8,
1766 | "identifier": "Urea",
1767 | "found_molecules": [],
1768 | "pubchem_cid": 1176,
1769 | "formula": "CH4N2O",
1770 | "hill_formula": "CH4N2O",
1771 | "inchi": "InChI=1S/CH4N2O/c2-1(3)4/h(H4,2,3,4)",
1772 | "inchikey": "XSQUKJJJFZCRTK-UHFFFAOYSA-N",
1773 | "iupac_name": "urea"
1774 | },
1775 | "Vanillin": {
1776 | "SMILES": "COc1cc(C=O)ccc1O",
1777 | "synonyms": [
1778 | "Vanillin",
1779 | "4-Hydroxy-3-methoxybenzaldehyde",
1780 | "4-hydroxy-3-methoxy-Benzaldehyde",
1781 | "3-Methoxy-4-hydroxybenzaldehyde",
1782 | "Vanillic aldehyde"
1783 | ],
1784 | "CAS": [
1785 | "121-33-5"
1786 | ],
1787 | "additional_information": "cas_registry; chebi id: 18346; cir; comptox id: DTXSID0021969|QC_LEVEL:1.0; opsin; pubchem id: 1183; srs id: 31666",
1788 | "mode": "name; name; name; name; name; name; name",
1789 | "service": "cas_registry; chebi; cir; comptox; opsin; pubchem; srs",
1790 | "number_of_crosschecks": 7,
1791 | "identifier": "Vanillin",
1792 | "found_molecules": [],
1793 | "pubchem_cid": 1183,
1794 | "formula": "C8H8O3",
1795 | "hill_formula": "C8H8O3",
1796 | "inchi": "InChI=1S/C8H8O3/c1-11-8-4-6(5-9)2-3-7(8)10/h2-5,10H,1H3",
1797 | "inchikey": "MWOOGOJBHIARFG-UHFFFAOYSA-N",
1798 | "iupac_name": "4-hydroxy-3-methoxybenzaldehyde"
1799 | },
1800 | "Vinyl Chloride": {
1801 | "SMILES": "C=CCl",
1802 | "synonyms": [
1803 | "Vinyl Chloride",
1804 | "Chloroethene",
1805 | "Chloroethylene",
1806 | "chloro-Ethene",
1807 | "chloro-Ethylene"
1808 | ],
1809 | "CAS": [
1810 | "75-01-4"
1811 | ],
1812 | "additional_information": "cas_registry; chemeo id: 63-522-2; cir; comptox id: DTXSID8021434|QC_LEVEL:1.0; nist id: C75014; opsin; pubchem id: 6338; srs id: 5231",
1813 | "mode": "name; name; name; name; name; name; name; name",
1814 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1815 | "number_of_crosschecks": 8,
1816 | "identifier": "Vinyl Chloride",
1817 | "found_molecules": [],
1818 | "pubchem_cid": 6338,
1819 | "formula": "C2H3Cl",
1820 | "hill_formula": "C2H3Cl",
1821 | "inchi": "InChI=1S/C2H3Cl/c1-2-3/h2H,1H2",
1822 | "inchikey": "BZHJMEDXRYGGRV-UHFFFAOYSA-N",
1823 | "iupac_name": "chloroethene"
1824 | },
1825 | "Water": {
1826 | "SMILES": "O",
1827 | "synonyms": [
1828 | "Water",
1829 | "Distilled water",
1830 | "h2o",
1831 | "oxidane",
1832 | "dihydrogen oxide"
1833 | ],
1834 | "CAS": [
1835 | "7732-18-5"
1836 | ],
1837 | "additional_information": "cas_registry; chebi id: 15377; chemeo id: 69-886-3; cir; comptox id: DTXSID6026296|QC_LEVEL:1.0; nist id: B6010117; opsin; pubchem id: 962; srs id: 153171",
1838 | "mode": "name; name; name; name; name; name; name; name; name",
1839 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs",
1840 | "number_of_crosschecks": 9,
1841 | "identifier": "Water",
1842 | "found_molecules": [],
1843 | "pubchem_cid": 962,
1844 | "formula": "H2O",
1845 | "hill_formula": "H2O",
1846 | "inchi": "InChI=1S/H2O/h1H2",
1847 | "inchikey": "XLYOFNOQVPJJNP-UHFFFAOYSA-N",
1848 | "iupac_name": "oxidane"
1849 | },
1850 | "Xylene": {
1851 | "SMILES": "Cc1ccccc1C",
1852 | "synonyms": [
1853 | "Xylene",
1854 | "1,2-xylene",
1855 | "o-xylene",
1856 | "1,2-dimethyl-Benzene",
1857 | "1,2-Dimethylbenzene"
1858 | ],
1859 | "CAS": [
1860 | "95-47-6"
1861 | ],
1862 | "additional_information": "pubchem id: 7237; srs id: 84970",
1863 | "mode": "name; name",
1864 | "service": "pubchem; srs",
1865 | "number_of_crosschecks": 2,
1866 | "identifier": "Xylene",
1867 | "found_molecules": [],
1868 | "pubchem_cid": 7237,
1869 | "formula": "C8H10",
1870 | "hill_formula": "C8H10",
1871 | "inchi": "InChI=1S/C8H10/c1-7-5-3-4-6-8(7)2/h3-6H,1-2H3",
1872 | "inchikey": "CTQNGGLPUBDAKN-UHFFFAOYSA-N",
1873 | "iupac_name": "1,2-xylene"
1874 | }
1875 | }
--------------------------------------------------------------------------------
/tests/get_iupac.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 | from lxml import etree
4 | from copy import deepcopy
5 | from tqdm import tqdm
6 |
7 | def get_iupac(smiles: str):
8 |
9 | headers = {
10 | "user-agent":f"MoleculeResolver/test (+https://github.com/MoleculeResolver/molecule-resolver)"
11 | }
12 | html_doc = requests.get("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/" + smiles + "/record/XML", headers=headers)
13 | if html_doc.status_code != 200:
14 | return
15 | root = etree.XML(html_doc.text)
16 |
17 | iupac_elements = root.findall(".//{*}PC-Urn_label")
18 | for e in iupac_elements:
19 | if "IUPAC Name" == e.text:
20 | urn = e.getparent()
21 | iupac_name_type = urn.find(".//{*}PC-Urn_name").text
22 |
23 | info_data = urn.getparent().getparent()
24 | iupac_name = info_data.find(".//{*}PC-InfoData_value_sval").text
25 |
26 | if iupac_name_type == "Preferred":
27 | return iupac_name
28 |
29 | def main():
30 | with open("benchmark_component_molecules.json", "r") as f:
31 | benchmark = json.load(f)
32 |
33 | new_benchmark = {}
34 | for name, data in tqdm(benchmark.items()):
35 | iupac_name = get_iupac(data["SMILES"])
36 | if iupac_name is not None:
37 | data["iupac_name"] = iupac_name
38 | new_benchmark[name] = data
39 |
40 | with open("benchmark_component_molecules_iupac.json", "w") as f:
41 | json.dump(new_benchmark, f, indent=4)
42 |
43 | if __name__ == "__main__":
44 | main()
45 |
46 |
--------------------------------------------------------------------------------
/tests/rdkit_normalization_exceptions.py:
--------------------------------------------------------------------------------
1 | from rdkit import Chem
2 | from rdkit.Chem.MolStandardize import rdMolStandardize
3 | from moleculeresolver import MoleculeResolver
4 |
5 | if __name__ == "__main__":
6 | smiles_list = [
7 | "CS(C)=O",
8 | "O=S1CCCC1",
9 | "O=S1CCOCC1",
10 | "CCS(=O)CC",
11 | "CCCS(C)=O",
12 | "CCCS(=O)CCC",
13 | "O=S(c1ccccc1)c1ccccc1",
14 | "O=S1CCCC1",
15 | "CCCS(=O)CCC",
16 | "CCS(=O)CC",
17 | "CCCS(C)=O",
18 | "O=S1CCCC1",
19 | "O=S1CCOCC1"
20 | ]
21 |
22 | for smiles in smiles_list:
23 | print('-'*70)
24 | mr = MoleculeResolver()
25 |
26 | # Normalize molecule with rdkit
27 | mol = Chem.MolFromSmiles(smiles)
28 | normalized_mol = rdMolStandardize.Normalize(mol)
29 | normalized_smiles = Chem.MolToSmiles(normalized_mol)
30 | print(f"Original: {smiles} -> RDKit Normalized: {normalized_smiles}")
31 |
32 | # Convert zwitterionic form back to sulfynil group
33 | corrected_mol = mr.convert_zwitterion_to_sulfynil(normalized_mol)
34 | corrected_smiles = Chem.MolToSmiles(corrected_mol)
35 | print(f"Original: {smiles} -> Corrected : {corrected_smiles}")
36 |
37 | # with MolResolver
38 | mr_smiles = mr.standardize_SMILES(smiles)
39 | print(f"Original: {smiles} -> MR smiles : {mr_smiles}")
40 |
41 | assert smiles == mr_smiles == corrected_smiles
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/tests/test_integration.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from moleculeresolver import MoleculeResolver
3 | import json
4 | import os
5 | from pathlib import Path
6 | from typing import Any, Callable, Dict, Optional
7 |
8 |
9 | # IUPAC names
10 | dir_path = Path(os.path.dirname(os.path.realpath(__file__)))
11 | with open(dir_path / "benchmark_component_molecules_iupac.json", "r") as f:
12 | benchmark = json.load(f)
13 |
14 | SMILES = "SMILES"
15 |
16 | # PATCH_STATE = "SAVE"
17 |
18 |
19 | # class PatchResilientRequest:
20 | # def __init__(self, json_data, patch_state):
21 | # self.json_data = json_data
22 | # self.patch_state = patch_state
23 |
24 | # def __call__(self, url: str, **kwargs) -> str:
25 | # if self.patch_state == "SAVE":
26 | # self.json_data[url] = kwargs["json"]
27 | # elif self.patch_state == "LOAD":
28 | # return self.json_data[url]
29 |
30 |
31 |
32 | @pytest.mark.parametrize("data", benchmark.values())
33 | class TestServices:
34 |
35 | @staticmethod
36 | def _test_service(
37 | call_method: Callable,
38 | input_identifier: str,
39 | output_identifier_type: str,
40 | output_identifier,
41 | kwargs: Optional[Dict] = None,
42 | ):
43 | """
44 | Test a service by calling it with an input identifier and checking that the output identifier matches the expected value.
45 |
46 | Parameters
47 | ----------
48 | call_method : Callable
49 | The method to call
50 | input_identifier : str
51 | The input identifier
52 | output_identifier_type : str
53 | The type of the output identifier
54 | output_identifier : str
55 | The expected output identifier
56 | kwargs : Optional[Dict], optional
57 | Additional keyword arguments to pass to the call method, by default None
58 |
59 |
60 | """
61 | if kwargs is None:
62 | kwargs = {}
63 | res = call_method(input_identifier, **kwargs)
64 | if res is None:
65 | raise ValueError(f"No molecule found for {input_identifier}")
66 |
67 | res_txt = res.__dict__[output_identifier_type]
68 | if res_txt == output_identifier:
69 | return
70 | else:
71 | raise ValueError(f"Expected {output_identifier} but got {res_txt}")
72 |
73 | def test_opsin(self, data):
74 | with MoleculeResolver() as mr:
75 | iupac_name = data["iupac_name"]
76 | self._test_service(
77 | mr.get_molecule_from_OPSIN,
78 | iupac_name,
79 | SMILES,
80 | data["SMILES"],
81 | )
82 |
83 |
84 |
85 | # def test_opsin_batchmode():
86 | # names = [d["iupac_name"] for d in benchmark.values()]
87 | # smiles = [d["SMILES"] for d in benchmark.values()]
88 | # with MoleculeResolver() as mr:
89 | # res = mr.get_molecule_from_OPSIN_batchmode(names)
90 | # for i, r in enumerate(res):
91 | # if r[0].SMILES == smiles[i]:
92 | # continue
93 | # else:
94 | # raise ValueError("Expected " + smiles[i] + " but got " + r.SMILES)
95 |
96 |
97 | def generate_data():
98 | # Run each test with a patch of resilient request that saves response
99 | pass
100 |
101 |
102 | if __name__ == "__main__":
103 | generate_data()
104 |
--------------------------------------------------------------------------------