├── .DS_Store ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .readthedocs.yaml ├── .spr.yml ├── LICENSE.txt ├── MoleculeResolver.png ├── README.md ├── apply.py ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── conf.py │ └── index.rst ├── molecule_test_set ├── benchmark_molecule_names.json └── get_data_generate_identifiers.py ├── moleculeresolver ├── SqliteMoleculeCache.py ├── __init__.py ├── molecule.py ├── moleculeresolver.py ├── opsin-cli-2.8.0-jar-with-dependencies.jar └── rdkitmods.py ├── poetry.lock ├── pyproject.toml └── tests ├── benchmark_component_molecules.json ├── benchmark_component_molecules_iupac.json ├── get_iupac.py ├── rdkit_normalization_exceptions.py └── test_integration.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoleculeResolver/molecule-resolver/15253f702a3ca823c476e0551ff4f895f017cc68/.DS_Store -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | pull_request: 4 | branches: 5 | # Branches from forks have the form 'user:branch-name' so we only run 6 | # this job on pull_request events for branches that look like fork 7 | # branches. Without this we would end up running this job twice for non 8 | # forked PRs, once for the push and then once for opening the PR. 9 | - '**:**' 10 | 11 | 12 | jobs: 13 | # Build the package 14 | build: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v4 20 | 21 | - name: Install python 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: '3.11' 25 | 26 | - name: Install poetry 27 | uses: Gr1N/setup-poetry@v8 28 | 29 | - name: Build package 30 | run: poetry build 31 | 32 | - name: Save built package 33 | uses: actions/upload-artifact@v4 34 | with: 35 | name: dist 36 | path: dist/ 37 | retention-days: 1 38 | 39 | 40 | # Run pytest using built package 41 | test: 42 | needs: build 43 | runs-on: ubuntu-latest 44 | strategy: 45 | matrix: 46 | python: ["3.10", "3.11", "3.12", "3.13"] 47 | 48 | steps: 49 | - name: Checkout repository 50 | uses: actions/checkout@v4 51 | 52 | - name: Install python 53 | uses: actions/setup-python@v4 54 | with: 55 | python-version: ${{ matrix.python }} 56 | cache: 'pip' 57 | cache-dependency-path: "poetry.lock" 58 | 59 | - name: Download built package 60 | uses: actions/download-artifact@v4 61 | with: 62 | name: dist 63 | 64 | - name: Install package and pytest 65 | shell: bash 66 | run: | 67 | WHL_NAME=$(ls molecule_resolver-*.whl) 68 | pip install ${WHL_NAME} pytest 69 | 70 | - name: Run tests 71 | shell: bash 72 | run: pytest 73 | 74 | # Tag and release the package 75 | release: 76 | needs: test 77 | runs-on: ubuntu-latest 78 | permissions: 79 | contents: write 80 | 81 | steps: 82 | - name: Checkout repository 83 | uses: actions/checkout@v4 84 | 85 | - name: Install python 86 | uses: actions/setup-python@v4 87 | with: 88 | python-version: '3.10' 89 | 90 | - name: Download built package 91 | uses: actions/download-artifact@v4 92 | with: 93 | name: dist 94 | path: dist/ 95 | 96 | - name: Determine the version for this release from the build 97 | id: current 98 | run: | 99 | BUILD_VER="$(ls dist/molecule_resolver-*.tar.gz)" 100 | echo "Path: $BUILD_VER" 101 | if [[ $BUILD_VER =~ (molecule_resolver-)([^,][0-9.]{4}) ]]; then 102 | echo "version=${BASH_REMATCH[2]}" >> $GITHUB_OUTPUT 103 | echo "Version of build: ${BASH_REMATCH[2]}" 104 | else 105 | echo "No version found found" 106 | fi 107 | 108 | - name: Install coveo-pypi-cli 109 | run: pip install coveo-pypi-cli 110 | 111 | - name: Get latest published version 112 | id: published 113 | run: | 114 | PUB_VER="$(pypi current-version molecule-resolver)" 115 | echo "version=$PUB_VER" >> $GITHUB_OUTPUT 116 | echo "Latest published version: $PUB_VER" 117 | 118 | - name: Tag repository 119 | shell: bash 120 | id: get-next-tag 121 | if: (steps.current.outputs.version != steps.published.outputs.version) 122 | run: | 123 | TAG_NAME=${{ steps.current.outputs.version }} 124 | echo "tag-name=$TAG_NAME" >> $GITHUB_OUTPUT 125 | echo "This release will be tagged as $TAG_NAME" 126 | git config user.name "github-actions" 127 | git config user.email "actions@users.noreply.github.com" 128 | git tag --annotate --message="Automated tagging system" $TAG_NAME ${{ github.sha }} 129 | 130 | - name: Push the tag 131 | if: (steps.current.outputs.version != steps.published.outputs.version) 132 | id: push_tag 133 | env: 134 | TAG_NAME: ${{ steps.current.outputs.version }} 135 | run: | 136 | if [[ ${{ github.ref_name }} == 'main' ]]; then 137 | git push origin $TAG_NAME 138 | echo "should_release=true" >> $GITHUB_OUTPUT 139 | else 140 | echo "If this was the main branch, I would push a new tag named $TAG_NAME" 141 | echo "should_release=false" >> $GITHUB_OUTPUT 142 | fi 143 | - name: Wait for GitHub to register the tag 144 | run: sleep 10 145 | 146 | - name: Release 147 | uses: softprops/action-gh-release@v2 148 | if: ${{ steps.push_tag.outputs.should_release == 'true' }} 149 | with: 150 | tag_name: ${{ steps.current.outputs.version }} 151 | files: dist/molecule_resolver-${{ steps.current.outputs.version }}.tar.gz 152 | 153 | outputs: 154 | publish_pypi: ${{ steps.push_tag.outputs.should_release }} 155 | publish_test_pypi: ${{ (steps.current.outputs.version != steps.published.outputs.version) && github.ref_name != 'main' }} 156 | 157 | # Publish the package to pypi 158 | publish: 159 | needs: release 160 | runs-on: ubuntu-latest 161 | permissions: 162 | id-token: write # IMPORTANT: this permission is mandatory for trusted publishing 163 | 164 | steps: 165 | - name: Download built package 166 | uses: actions/download-artifact@v4 167 | with: 168 | name: dist 169 | path: dist/ 170 | 171 | - name: Publish to pypi if new version 172 | env: 173 | should_publish: ${{ needs.release.outputs.publish_pypi }} 174 | if: ${{ env.should_publish == 'true' }} 175 | uses: pypa/gh-action-pypi-publish@release/v1 176 | with: 177 | packages-dir: dist/ 178 | 179 | - name: Publish to test pypi if new version not on main 180 | env: 181 | should_publish: ${{ needs.release.outputs.publish_test_pypi }} 182 | if: ${{ env.should_publish == 'true' }} 183 | uses: pypa/gh-action-pypi-publish@release/v1 184 | with: 185 | packages-dir: dist/ 186 | repository-url: https://test.pypi.org/legacy/ 187 | skip-existing: true 188 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | test.py 2 | .vscode 3 | __pycache__/ 4 | *.egg-info 5 | moleculeresolver/molecule_cache.* 6 | .venv/ 7 | *.db* 8 | debug.log 9 | *build/ -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 2 | 3 | # Required 4 | version: 2 5 | 6 | # Set the OS, Python version, and other tools you might need 7 | build: 8 | os: ubuntu-24.04 9 | tools: 10 | python: "3.13" 11 | 12 | # Build documentation in the "docs/" directory with Sphinx 13 | sphinx: 14 | configuration: docs/source/conf.py 15 | 16 | # Optionally, but recommended, 17 | # declare the Python requirements required to build your documentation 18 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 19 | python: 20 | install: 21 | - requirements: docs/requirements.txt 22 | -------------------------------------------------------------------------------- /.spr.yml: -------------------------------------------------------------------------------- 1 | githubRepoOwner: MoleculeResolver 2 | githubRepoName: molecule-resolver 3 | githubHost: github.com 4 | githubRemote: origin 5 | githubBranch: main 6 | requireChecks: true 7 | requireApproval: true 8 | mergeMethod: rebase 9 | mergeQueue: false 10 | forceFetchTags: false 11 | showPrTitlesInStack: false 12 | branchPushIndividually: false 13 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2022 Simon Müller 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /MoleculeResolver.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoleculeResolver/molecule-resolver/15253f702a3ca823c476e0551ff4f895f017cc68/MoleculeResolver.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | MoleculeResolver 3 |

4 | 5 | # MoleculeResolver 6 | 7 | The **moleculeresolver** was born out of the need to annotate large datasets with accurate structural information fast and to crosscheck whether given metadata (name, SMILES) agrees with each other. It also allows to efficiently compare whether structures are available in two large datasets. 8 | 9 | In short it's a Python module that allows you to retrieve molecular structures from multiple chemical databases, perform crosschecks to ensure data reliability, and standardize the best representation of molecules. It also provides functions for comparing molecules and sets of molecules based on specific configurations. This makes it a useful tool for researchers, chemists, or anyone working in computational chemistry / cheminformatics who needs to ensure they are working with the best available data for a molecule. 10 | 11 | 12 | ## Installation 13 | 14 | The package is available on [pypi](https://pypi.org/project/molecule-resolver/): 15 | 16 | ```sh 17 | pip install molecule-resolver 18 | ``` 19 | While the source code is available here: [https://github.com/MoleculeResolver/molecule-resolver](https://github.com/MoleculeResolver/molecule-resolver) 20 | 21 | ## Features 22 | 23 | - **🔍 Retrieve Molecular Structures**: Fetch molecular structures from different chemical databases, including PubChem, Comptox, Chemo, and others. 24 | - **🆔 Support for Different Identifier Types**: Retrieve molecular structures using a variety of identifier types, including CAS numbers, SMILES, InChI, InChIkey and common names. 25 | - **✅ Cross-check Capabilities**: Use data from multiple sources to verify molecular structures and identify the best representation. 26 | - **🔄 Molecule Comparison**: Compare molecules or sets of molecules based on their structure, properties, and specified ⚙️ configurations. 27 | - **⚙️ Standardization**: Standardize molecular structures, including handling isomers, tautomers, and isotopes. 28 | - **💾 Caching Mechanism**: Use local caching to store molecules and reduce the number of repeated requests to external services, improving performance and reducing latency. 29 | 30 | ## Services used 31 | At this moment, the following services are used to get the best structure for a given identifier. In the future, this list might be reviewed to improve perfomance, adding new services or removing some. 32 | In case you want to add an additional service, open an issue or a pull request. 33 | 34 | The MoleculeResolver does not offer all options/configurations for each service available with the specific related repos as it focusses on getting the structure based on the identifiers and doing so as accurate as possible while still being fast using parallelization under the hood. 35 | | Service | Name | CAS | Formula | SMILES | InChI | InChIKey | CID | Batch search | Repos | 36 | |-------------------------------------------------------------------------|------|-----|---------|--------|-------|----------|-----|--------------------|------------------------------------------------------------------------------| 37 | | [cas_registry](https://commonchemistry.cas.org/) | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | 38 | | [chebi](https://www.ebi.ac.uk/chebi/) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | | 39 | | [chemeo](https://www.chemeo.com/) | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | | 40 | | [cir](https://cactus.nci.nih.gov/chemical/structure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | - [CIRpy](https://github.com/mcs07/CIRpy "wrapper for the CIR. FYI, CIR uses OPSIN under the hood, unless specified otherwise.") | 41 | | [comptox](https://comptox.epa.gov/dashboard) | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ✅ | | 42 | | [cts](https://cts.fiehnlab.ucdavis.edu/) | (✅) | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | | 43 | | [nist](https://webbook.nist.gov/chemistry/) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | - [NistChemPy](https://github.com/IvanChernyshov/NistChemPy "unofficial wrapper for search and data extraction of the NIST Webbook.") | 44 | | [opsin](https://opsin.ch.cam.ac.uk/) | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | - [py2opsin](https://github.com/JacksonBurns/py2opsin "lightweight OPSIN wrapper only depending on having Java installed.")
- [pyopsin](https://github.com/Dingyun-Huang/pyopsin "lightweight OPSIN wrapper depending on having Java installed + additional dependencies.") | 45 | | [pubchem](https://pubchem.ncbi.nlm.nih.gov/) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - [PubChemPy](https://github.com/mcs07/PubChemPy "wrapper for the pubchem PUG API") | 46 | | [srs](https://cdxapps.epa.gov/oms-substance-registry-services/search) | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | | 47 | 48 | ChemSpider was not used as it is already included in CIR [[1]](https://matt-swain.com/blog/2012-03-20-cirpy-python-nci-chemical-identifier-resolver) [[2]](https://cactus.nci.nih.gov/blog/?p=1456) [[3]](https://github.com/mcs07/ChemSpiPy). ChemIDplus and the Drug Information Portal were retired in 2022 [[4]](https://www.nlm.nih.gov/pubs/techbull/ja22/ja22_pubchem.html). 49 | 50 | ## 🚀 Usage 51 | 52 | ### Initialization 53 | 54 | To use **Molecule Resolver**, first import and initialize the `MoleculeResolver` class. it is supposed to be used as a context manager: 55 | 56 | ```python 57 | from moleculeresolver import MoleculeResolver 58 | 59 | with MoleculeResolver(available_service_API_keys={"chemeo": "YOUR_API_KEY"}) as mr: 60 | ... 61 | ``` 62 | 63 | ### Retrieve and Compare Molecules by Name and CAS 64 | 65 | Retrieve a molecule using both its common name and CAS number, then compare the two to ensure they represent the same structure: 66 | 67 | ```python 68 | from rdkit import Chem 69 | from moleculeresolver import MoleculeResolver 70 | 71 | with MoleculeResolver(available_service_API_keys={"chemeo": "YOUR_API_KEY"}) as mr: 72 | molecule_name = mr.find_single_molecule(["aspirin"], ["name"]) 73 | molecule_cas = mr.find_single_molecule(["50-78-2"], ["cas"]) 74 | 75 | are_same = mr.are_equal(Chem.MolFromSmiles(molecule_name.SMILES), 76 | Chem.MolFromSmiles(molecule_cas.SMILES)) 77 | print(f"Are the molecules the same? {are_same}") 78 | ``` 79 | 80 | ### Parallelized Molecule Retrieval and Saving to JSON 81 | 82 | Use the parallelized version to retrieve multiple molecules. If a large number of molecules is searched, moleculeresolver will try to use batch download capabilities whenever the database supports this. 83 | 84 | ```python 85 | import json 86 | from moleculeresolver import MoleculeResolver 87 | 88 | molecule_names = ["aspirin", "propanol", "ibuprofen", "non-exixtent-name"] 89 | not_found_molecules = [] 90 | molecules_dicts = {} 91 | 92 | with MoleculeResolver(available_service_API_keys={"chemeo": "YOUR_API_KEY"}) as mr: 93 | molecules = mr.find_multiple_molecules_parallelized(molecule_names, [["name"]] * len(molecule_names)) 94 | for name, molecule in zip(molecule_names, molecules): 95 | if molecule: 96 | molecules_dicts[name] = molecule.to_dict(found_molecules='remove') 97 | else: 98 | not_found_molecules.append(name) 99 | 100 | with open("molecules.json", "w") as json_file: 101 | json.dump(molecules_dicts, json_file, indent=4) 102 | 103 | print(f"Molecules not found: {not_found_molecules}") 104 | ``` 105 | 106 | ## ⚙️ Configuration 107 | 108 | The `MoleculeResolver` class allows users to configure various options like: 109 | 110 | - **API Keys**: Set API keys for accessing different molecular databases. Currently only chemeo needs one. 111 | - **Standardization Options**: Choose how to handle molecular standardization (e.g., normalizing functional groups, disconnecting metals, handling isomers, etc.). 112 | - **Differentiation Settings**: Options for distinguishing between isomers, tautomers, and isotopes. 113 | 114 | ## ⚠️ Warning 115 | 116 | **Inchi** is included in the set of valid identifiers for various [services](#services-used). You should be aware that using Inchi to get SMILES using RDKit is not the most robust approach. You can read more about it [here](https://github.com/rdkit/rdkit/issues/542). 117 | 118 | ## 🤝 Contributing 119 | 120 | Contributions are welcome! If you have suggestions for improving the Molecule Resolver or want to add new features, feel free to submit an issue or a pull request on GitHub. 121 | 122 | ## 📚 Citing 123 | 124 | If you use MoleculeResolver in your research, please cite as follows: 125 | 126 | **Müller, S.** 127 | *How to crack a SMILES: automatic crosschecked chemical structure resolution across multiple services using MoleculeResolver* 128 | **Journal of Cheminformatics**, 17:117 (2025). 129 | DOI: [10.1186/s13321-025-01064-7](https://doi.org/10.1186/s13321-025-01064-7) 130 | 131 | ```bibtex 132 | @article{Muller2025MoleculeResolver, 133 | author = {Müller, Simon}, 134 | title = {How to crack a SMILES: automatic crosschecked chemical structure resolution across multiple services using MoleculeResolver}, 135 | journal = {Journal of Cheminformatics}, 136 | year = {2025}, 137 | volume = {17}, 138 | page = {117}, 139 | doi = {10.1186/s13321-025-01064-7}, 140 | url = {https://doi.org/10.1186/s13321-025-01064-7} 141 | } 142 | 143 | -------------------------------------------------------------------------------- /apply.py: -------------------------------------------------------------------------------- 1 | from moleculeresolver import MoleculeResolver 2 | 3 | # if you specify molecule_cache_db_path, you will have a separate cache for 4 | # the found molecules in each folder. If you leave the default, all instances 5 | # of MoleculeResolver will share the same cache 6 | 7 | with MoleculeResolver( 8 | available_service_API_keys={"chemeo": "YOUR_CHEMEO_API_KEY"}, 9 | molecule_cache_db_path="test.db", 10 | ) as mr: 11 | names_to_find = ["2-bromobutane", "ethanol", "methanol", "propane", "butane"] 12 | 13 | # search for the names in parallel 14 | all_names = [] 15 | all_modes = [] 16 | for name in names_to_find: 17 | names = mr.expand_name_heuristically(name) 18 | all_names.append(names) 19 | all_modes.append(["name"]) 20 | r = mr.get_molecule_from_OPSIN(name) 21 | 22 | molecules_found_in_parallel = mr.find_multiple_molecules_parallelized( 23 | all_names, all_modes 24 | ) 25 | print("all_found_in_parallel:", all(molecules_found_in_parallel)) 26 | 27 | # search for the names sequentially 28 | molecules = [] 29 | for name in names_to_find: 30 | names = mr.expand_name_heuristically(name) 31 | molecule = mr.find_single_molecule_crosschecked(names, "name") 32 | molecules.append(molecule) 33 | print("all_found:", all(molecules)) 34 | 35 | # search for CAS numbers 36 | molecules_found_by_CAS = [] 37 | CAS_numbers = ["7732-18-5", "78-76-2", "64-17-5", "67-56-1", "74-98-6", "106-97-8"] 38 | for CAS in CAS_numbers: 39 | molecule = mr.find_single_molecule_crosschecked(CAS, "CAS") 40 | molecules_found_by_CAS.append(molecule) 41 | 42 | print("all_found_by_CAS:", all(molecules_found_by_CAS)) 43 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-autoapi>=3.4.0 -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = 'MoleculeResolver' 10 | copyright = '2025, Simon Muller and Kobi Felton' 11 | author = 'Simon Muller and Kobi Felton' 12 | release = '0.3.2' 13 | 14 | # -- General configuration --------------------------------------------------- 15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 16 | 17 | extensions = ['autoapi.extension'] 18 | 19 | templates_path = ['_templates'] 20 | exclude_patterns = [] 21 | 22 | autoapi_dirs = ['../../moleculeresolver'] 23 | 24 | # -- Options for HTML output ------------------------------------------------- 25 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 26 | 27 | html_theme = 'alabaster' 28 | html_static_path = ['_static'] 29 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. MoleculeResolver documentation master file, created by 2 | sphinx-quickstart on Mon Jan 13 19:01:52 2025. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | MoleculeResolver documentation 7 | ============================== 8 | 9 | Add your content using ``reStructuredText`` syntax. See the 10 | `reStructuredText `_ 11 | documentation for details. 12 | 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | :caption: Contents: 17 | 18 | -------------------------------------------------------------------------------- /molecule_test_set/benchmark_molecule_names.json: -------------------------------------------------------------------------------- 1 | [ 2 | "Acetic Acid", 3 | "Acetone", 4 | "Acetylene", 5 | "Acetylsalicylic Acid", 6 | "Adenine", 7 | "Adenosine Triphosphate", 8 | "Adipic Acid", 9 | "Aluminum(III) Oxide", 10 | "Ammonia", 11 | "Ascorbic Acid", 12 | "Aspartame", 13 | "Benzene", 14 | "Benzoic Acid", 15 | "Biphenyl ", 16 | "Butane", 17 | "Butene", 18 | "Butyric Acid", 19 | "Caffeine", 20 | "Calcium Carbonate", 21 | "Calcium Oxide", 22 | "Calcium Sulfate", 23 | "Carbon Dioxide", 24 | "Carbon Monoxide", 25 | "Chloroform", 26 | "Chlorophyll", 27 | "Cholesterol", 28 | "Citric Acid", 29 | "Cocaine", 30 | "Cytosine", 31 | "DDT", 32 | "DEET", 33 | "R-12", 34 | "Dopamine", 35 | "Adrenaline", 36 | "Ethane", 37 | "Ethylene", 38 | "Ether", 39 | "Ethanol", 40 | "Ethylenediaminetetraacetic Acid", 41 | "Fluoxetine", 42 | "Formaldehyde", 43 | "Formic Acid", 44 | "Glucose", 45 | "Glycerol", 46 | "Guanine", 47 | "Hydrochloric Acid", 48 | "Hydrogen Peroxide", 49 | "Hydrogen Sulfide", 50 | "Ibuprofen", 51 | "Indigo", 52 | "Insulin", 53 | "Iron(III) Oxide", 54 | "Isooctane", 55 | "Isoprene", 56 | "l-Dopa", 57 | "Methane", 58 | "Methyl Alcohol", 59 | "Methylphenidate", 60 | "Monosodium Glutamate", 61 | "Morphine", 62 | "Naphthalene", 63 | "Nicotine", 64 | "Nitric Acid", 65 | "Nitric Oxide", 66 | "Nitrogen Dioxide", 67 | "Nitrous Oxide", 68 | "Nitroglycerin", 69 | "Norethindrone", 70 | "Penicillin", 71 | "Phenol", 72 | "Phosphoric Acid", 73 | "Piperine", 74 | "Potassium Carbonate", 75 | "Potassium Nitrate", 76 | "Propane", 77 | "Propylene", 78 | "Quinine", 79 | "Saccharin", 80 | "Silicon Dioxide", 81 | "Sodium Bicarbonate", 82 | "Sodium Carbonate", 83 | "Sodium Chloride", 84 | "Sodium Hydroxide", 85 | "Sodium Hypochlorite", 86 | "Strychnine", 87 | "Styrene", 88 | "Sucrose", 89 | "Sulfuric Acid", 90 | "Tetrafluoroethylene", 91 | "Tetrahydrocannabinol", 92 | "Thymine", 93 | "Trinitrotoluene", 94 | "Toluene", 95 | "Triuranium Octaoxide", 96 | "Uracil", 97 | "Urea", 98 | "Vanillin", 99 | "Vinyl Chloride", 100 | "Water", 101 | "Xylene" 102 | ] -------------------------------------------------------------------------------- /molecule_test_set/get_data_generate_identifiers.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import os 4 | from moleculeresolver import MoleculeResolver 5 | from rdkit import Chem 6 | 7 | with open('benchmark_molecule_names.json', 'r') as f: 8 | benchmark_molecule_names = json.load(f) 9 | 10 | if not os.path.exists('benchmark_component_molecules.json'): 11 | 12 | benchmark_component_molecules = {} 13 | with MoleculeResolver(available_service_API_keys={"chemeo": 'YOUR_KEY'}, molecule_cache_db_path='molecule_cache.db') as mr: 14 | mr._available_services.remove('cts') 15 | temp = mr.find_multiple_molecules_parallelized(benchmark_molecule_names, [['name']] * len(benchmark_molecule_names)) 16 | 17 | for name, molecule in zip(benchmark_molecule_names, temp, strict=True): 18 | molecule.found_molecules = [] 19 | molecule = molecule.__dict__ 20 | mol = Chem.MolFromSmiles(molecule['SMILES']) 21 | pubchem_cid = [v.strip() for v in molecule['additional_information'].split(';') if 'pubchem' in v] 22 | if pubchem_cid: 23 | pubchem_cid = int(pubchem_cid[0].split(':')[-1]) 24 | else: 25 | pubchem_cid = None 26 | 27 | molecule['pubchem_cid'] = pubchem_cid 28 | molecule['formula'] = Chem.rdMolDescriptors.CalcMolFormula(mol) 29 | molecule['hill_formula'] = mr.to_hill_formula(mol) 30 | molecule['inchi'] = Chem.MolToInchi(mol) 31 | molecule['inchikey'] = Chem.InchiToInchiKey(molecule['inchi']) 32 | benchmark_component_molecules[name] = molecule 33 | 34 | with open('benchmark_component_molecules.json', 'w') as f: 35 | json.dump(benchmark_component_molecules, f, indent=4) 36 | -------------------------------------------------------------------------------- /moleculeresolver/SqliteMoleculeCache.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import os 3 | import sqlite3 4 | from typing import Optional, Union 5 | import threading 6 | import uuid 7 | 8 | from moleculeresolver.molecule import Molecule 9 | 10 | 11 | class SqliteMoleculeCache: 12 | """ 13 | A class for caching molecule information using SQLite. 14 | 15 | This class provides methods to initialize, manage, and query a SQLite database 16 | for storing molecule information. It supports multi-threading and implements 17 | context management for proper resource handling. 18 | 19 | Attributes: 20 | db_path (str): Path to the SQLite database file. Defaults to ":memory:". 21 | 22 | expiration_datetime (Optional[datetime]): Expiration date for cached entries. 23 | 24 | _connections (dict): Thread-specific database connections. 25 | 26 | _main_thread_id (int): ID of the main thread. 27 | """ 28 | 29 | def __init__( 30 | self, db_path: Optional[str] = ":memory:", expiration_datetime: Optional[datetime] = None 31 | ): 32 | """ 33 | Initialize a new SqliteMoleculeCache instance. 34 | 35 | Args: 36 | db_path (Optional[str]): Path to the SQLite database file. Defaults to ":memory:". 37 | 38 | expiration_datetime (Optional[datetime]): Expiration date for cached entries. 39 | """ 40 | self.db_path = db_path 41 | self.expiration_datetime = expiration_datetime 42 | self._connections = {} 43 | self._main_thread_id = threading.get_ident() 44 | 45 | def __enter__(self) -> "SqliteMoleculeCache": 46 | """ 47 | Enter the runtime context related to this object. 48 | 49 | Creates tables and deletes expired entries. 50 | 51 | Returns: 52 | SqliteMoleculeCache: The instance of the class. 53 | """ 54 | self._create_tables() 55 | self.delete_expired() 56 | return self 57 | 58 | def close_child_connections(self) -> None: 59 | """ 60 | Close all child thread database connections. 61 | """ 62 | for thread_id, thread_connection in self._connections.items(): 63 | if thread_id != self._main_thread_id: 64 | if thread_connection: 65 | thread_connection.close() 66 | self._connections[thread_id] = None 67 | 68 | def __exit__(self, exception_type, exception_value, exception_traceback) -> None: 69 | """ 70 | Exit the runtime context and close all database connections. 71 | 72 | Closes all child thread connections and optimizes the main thread's connection before closing. 73 | """ 74 | self.close_child_connections() 75 | 76 | # Close the connection from the main thread 77 | this_thread_id = threading.get_ident() 78 | if this_thread_id == self._main_thread_id: 79 | if self._main_thread_id in self._connections: 80 | main_thread_connection = self._connections[self._main_thread_id] 81 | main_thread_connection.execute("PRAGMA analysis_limit=8192") 82 | main_thread_connection.execute("PRAGMA optimize") 83 | main_thread_connection.close() 84 | self._connections.clear() 85 | 86 | def get_connection(self) -> sqlite3.Connection: 87 | """ 88 | Get or create a thread-specific database connection. 89 | 90 | Returns: 91 | sqlite3.Connection: A SQLite database connection for the current thread. 92 | """ 93 | thread_id = threading.get_ident() 94 | if thread_id not in self._connections: 95 | self._connections[thread_id] = sqlite3.connect( 96 | self.db_path, check_same_thread=False 97 | ) 98 | self._connections[thread_id].execute("PRAGMA foreign_keys = 1") 99 | self._connections[thread_id].execute("PRAGMA journal_mode=WAL") 100 | self._connections[thread_id].execute("PRAGMA synchronous=NORMAL") 101 | self._connections[thread_id].execute("PRAGMA temp_store=MEMORY") 102 | 103 | return self._connections[thread_id] 104 | 105 | def _create_tables(self) -> None: 106 | """ 107 | Create the necessary tables in the SQLite database if they don't exist. 108 | """ 109 | this_thread_connection = self.get_connection() 110 | with this_thread_connection: 111 | this_thread_connection.execute( 112 | """ 113 | CREATE TABLE IF NOT EXISTS molecules ( 114 | id INTEGER PRIMARY KEY, 115 | service TEXT NOT NULL, 116 | identifier_mode TEXT NOT NULL, 117 | identifier TEXT NOT NULL, 118 | SMILES TEXT, 119 | additional_information TEXT, 120 | datetime_added DATETIME DEFAULT CURRENT_TIMESTAMP 121 | ) 122 | """ 123 | ) 124 | this_thread_connection.execute( 125 | """ 126 | CREATE TABLE IF NOT EXISTS synonyms ( 127 | id INTEGER PRIMARY KEY, 128 | molecule_id INTEGER NOT NULL, 129 | synonym_index INTEGER NOT NULL, 130 | synonym TEXT NOT NULL COLLATE NOCASE, 131 | CONSTRAINT fk_molecules_synonyms 132 | FOREIGN KEY (molecule_id) 133 | REFERENCES molecules(id) 134 | ON DELETE CASCADE 135 | ) 136 | """ 137 | ) 138 | this_thread_connection.execute( 139 | """ 140 | CREATE TABLE IF NOT EXISTS cas_numbers ( 141 | id INTEGER PRIMARY KEY, 142 | molecule_id INTEGER NOT NULL, 143 | cas_number_index INTEGER NOT NULL, 144 | cas_number TEXT NOT NULL, 145 | CONSTRAINT fk_molecules_cas_numbers 146 | FOREIGN KEY (molecule_id) 147 | REFERENCES molecules(id) 148 | ON DELETE CASCADE 149 | ) 150 | """ 151 | ) 152 | this_thread_connection.execute( 153 | """ 154 | CREATE INDEX IF NOT EXISTS idx_molecules_service_identifier_mode_identifier 155 | ON molecules(service, identifier_mode, identifier) 156 | """ 157 | ) 158 | this_thread_connection.execute( 159 | """ 160 | CREATE INDEX IF NOT EXISTS idx_covering_synonyms ON synonyms (molecule_id, synonym COLLATE NOCASE, synonym_index) 161 | """ 162 | ) 163 | this_thread_connection.execute( 164 | """ 165 | CREATE INDEX IF NOT EXISTS idx_covering_cas_number ON cas_numbers (molecule_id, cas_number, cas_number_index) 166 | """ 167 | ) 168 | 169 | def save( 170 | self, 171 | service: Union[str, list[str]], 172 | identifier_mode: Union[str, list[str]], 173 | identifier: Union[str, list[str]], 174 | molecules: Union[Molecule, list[Molecule]], 175 | ) -> None: 176 | """ 177 | Save molecule information to the database. 178 | 179 | Saves one or multiple Molecule objects to the database, along with their associated service, identifier_mode, and identifier. 180 | 181 | Args: 182 | service (Union[str, list[str]]): The service(s) associated with the molecule(s). 183 | 184 | identifier_mode (Union[str, list[str]]): The identifier mode(s) for the molecule(s). 185 | 186 | identifier (Union[str, list[str]]): The identifier(s) for the molecule(s). 187 | 188 | molecules (Union[Molecule, list[Molecule]]): The molecule(s) to be saved. 189 | 190 | Raises: 191 | ValueError: If a molecule's synonyms contain a pipe symbol or if molecule properties don't match the input values. 192 | """ 193 | if isinstance(molecules, Molecule) or molecules is None: 194 | molecules = [molecules] 195 | 196 | for molecule in molecules: 197 | if molecule: 198 | if any(["|" in synonym for synonym in molecule.synonyms]): 199 | raise ValueError( 200 | 'molecule names i.e. synonyms must not contain pipe symbols: "|"' 201 | ) 202 | 203 | if isinstance(service, str): 204 | service = [service] * len(molecules) 205 | identifier_mode = [identifier_mode] * len(molecules) 206 | identifier = [identifier] * len(molecules) 207 | 208 | this_thread_connection = self.get_connection() 209 | with this_thread_connection: 210 | # unfortunately it seems, that python sqlite3 does not support executemany while returning 211 | # the inserted rows. And even if it would be supported, the order of returned ids is not 212 | # guaranteed to be the same order of insertion. Therefore we have to do it one by one. 213 | # https://discuss.python.org/t/sqlite3-executemany-with-returning-clauses/26291 214 | # It could be circumvented by constructing the insert statement manually, running with execute 215 | # and then matching the returned ids to the inserted data. Idk what is faster though. 216 | molecule_ids = [] 217 | for s, m, i, molecule in zip( 218 | service, identifier_mode, identifier, molecules 219 | ): 220 | if molecule is None: 221 | this_data = (s, m, i, None, None) 222 | else: 223 | if ( 224 | molecule.service != s 225 | or molecule.mode != m 226 | or molecule.identifier != i 227 | ): 228 | raise ValueError( 229 | "The molecule properties do not match the input values to the save function." 230 | ) 231 | 232 | this_data = ( 233 | molecule.service.strip(), 234 | molecule.mode.strip(), 235 | i, 236 | molecule.SMILES.strip(), 237 | ( 238 | str(molecule.additional_information).strip() 239 | if molecule.additional_information 240 | else None 241 | ), 242 | ) 243 | 244 | cursor = this_thread_connection.execute( 245 | """ 246 | INSERT INTO molecules (service, identifier_mode, identifier, SMILES, additional_information) 247 | VALUES (?, ?, ?, ?, ?) 248 | """, 249 | this_data, 250 | ) 251 | molecule_ids.append(cursor.lastrowid) 252 | 253 | name_rows_to_insert = [] 254 | cas_number_rows_to_insert = [] 255 | for molecule_id, molecule in zip(molecule_ids, molecules): 256 | if molecule: 257 | if molecule.synonyms: 258 | this_molecule_synonyms = [ 259 | (molecule_id, synonym_index, synonym.strip()) 260 | for synonym_index, synonym in enumerate(molecule.synonyms) 261 | ] 262 | name_rows_to_insert.extend(this_molecule_synonyms) 263 | 264 | if molecule.CAS: 265 | this_molecule_cas_numbers = [ 266 | (molecule_id, cas_number_index, cas_number.strip()) 267 | for cas_number_index, cas_number in enumerate(molecule.CAS) 268 | ] 269 | cas_number_rows_to_insert.extend(this_molecule_cas_numbers) 270 | 271 | this_thread_connection.executemany( 272 | """ 273 | INSERT INTO synonyms (molecule_id, synonym_index, synonym) 274 | VALUES (?, ?, ?) 275 | """, 276 | name_rows_to_insert, 277 | ) 278 | 279 | this_thread_connection.executemany( 280 | """ 281 | INSERT INTO cas_numbers (molecule_id, cas_number_index, cas_number) 282 | VALUES (?, ?, ?) 283 | """, 284 | cas_number_rows_to_insert, 285 | ) 286 | 287 | def _search( 288 | self, 289 | service: Union[str, list[str]], 290 | identifier_mode: Union[str, list[str]], 291 | identifier: Union[str, list[str]], 292 | only_check_for_existence: Optional[bool] = False, 293 | ) -> Union[ 294 | Optional[list[Molecule]], list[Optional[list[Molecule]]], bool, list[bool] 295 | ]: 296 | """ 297 | Search for molecules in the database based on the provided criteria. 298 | 299 | Supports single and multiple molecule searches. It can either return the full molecule information or just check for existence. 300 | 301 | Args: 302 | service (Union[str, list[str]]): The service(s) to search in. 303 | 304 | identifier_mode (Union[str, list[str]]): The mode(s) of identification (e.g., 'name', 'cas'). 305 | 306 | identifier (Union[str, list[str]]): The identifier(s) to search for. 307 | 308 | only_check_for_existence (Optional[bool]): If True, only check if the molecule exists. Defaults to False. 309 | 310 | Returns: 311 | 312 | Union[Optional[list[Molecule]], list[Optional[list[Molecule]]], bool, list[bool]]: 313 | - If searching for a single molecule: 314 | - If only_check_for_existence is False: returns Optional[list[Molecule]] 315 | - If only_check_for_existence is True: returns bool 316 | - If searching for multiple molecules: 317 | - If only_check_for_existence is False: returns list[Optional[list[Molecule]]] 318 | - If only_check_for_existence is True: returns list[bool] 319 | 320 | Raises: 321 | ValueError: If the input parameters are inconsistent or invalid for multiple searches. 322 | """ 323 | if not isinstance(identifier, str): 324 | search_mode = "multiple" 325 | if not (isinstance(identifier_mode, str) and isinstance(service, str)): 326 | if ( 327 | isinstance(service, str) 328 | or isinstance(identifier_mode, str) 329 | or len(service) != len(identifier_mode) 330 | or len(identifier_mode) != len(identifier) 331 | ): 332 | raise ValueError( 333 | "When searching for multiple molecules, service, mode and identifier all must be provided as str or same sized lists." 334 | ) 335 | else: 336 | search_mode = "single" 337 | if not (isinstance(identifier_mode, str) and isinstance(service, str)): 338 | raise ValueError( 339 | "When searching for a single molecule, service, mode and identifier all must be provided as strings." 340 | ) 341 | 342 | def rows_to_molecules(service_, identifier_mode_, identifier_, rows): 343 | molecules = [] 344 | for row in rows: 345 | SMILES, additional_information, temp_synonyms, temp_cas_numbers = row 346 | if SMILES: 347 | synonyms = [] 348 | cas_numbers = [] 349 | 350 | # Workaround as GROUP_CONCAT does not preserve order of the values 351 | if temp_synonyms: 352 | temp_synonyms = { 353 | int(k): v 354 | for k, v in ( 355 | kv.split("|") for kv in temp_synonyms.split("||") 356 | ) 357 | } 358 | synonyms = [ 359 | temp_synonyms[k] for k in sorted(temp_synonyms.keys()) 360 | ] 361 | if temp_cas_numbers: 362 | temp_cas_numbers = { 363 | int(k): v 364 | for k, v in ( 365 | kv.split("|") for kv in temp_cas_numbers.split("||") 366 | ) 367 | } 368 | cas_numbers = [ 369 | temp_cas_numbers[k] for k in sorted(temp_cas_numbers.keys()) 370 | ] 371 | 372 | molecules.append( 373 | Molecule( 374 | SMILES, 375 | synonyms, 376 | cas_numbers, 377 | additional_information if additional_information else "", 378 | identifier_mode_, 379 | service_, 380 | 1, 381 | identifier_, 382 | ) 383 | ) 384 | return molecules 385 | 386 | this_thread_connection = self.get_connection() 387 | with this_thread_connection: 388 | if search_mode == "single": 389 | identifier_clause = "identifier = ?" 390 | identifier_mode_clause = "identifier_mode = ? AND" 391 | values = (service, identifier_mode, identifier) 392 | 393 | if identifier_mode == "name": 394 | identifier_clause = "identifier = ? COLLATE NOCASE" 395 | if identifier_mode == "cas": 396 | identifier_clause = ( 397 | "identifier = ? " # "cas_numbers.cas_number = ?" 398 | ) 399 | 400 | sql = f""" 401 | SELECT molecules.id, 402 | SMILES, 403 | additional_information, 404 | GROUP_CONCAT(synonym_index || '|' || synonym, '||'), 405 | GROUP_CONCAT(cas_number_index || '|' || cas_number, '||') 406 | FROM molecules 407 | LEFT JOIN synonyms ON molecules.id = synonyms.molecule_id 408 | LEFT JOIN cas_numbers ON molecules.id = cas_numbers.molecule_id 409 | WHERE service = ? AND {identifier_mode_clause} {identifier_clause} 410 | GROUP BY molecules.id 411 | """ 412 | cursor = this_thread_connection.execute(sql, values) 413 | 414 | molecule_rows = [row[1:] for row in cursor if row[0]] 415 | 416 | if only_check_for_existence: 417 | return len(molecule_rows) != 0 418 | 419 | if not molecule_rows: 420 | return None 421 | 422 | return rows_to_molecules( 423 | service, identifier_mode, identifier, molecule_rows 424 | ) 425 | 426 | else: 427 | this_transaction_unique_temp_table_name = f"tmp_{uuid.uuid4().hex}" 428 | 429 | this_thread_connection.execute( 430 | f""" 431 | CREATE TEMPORARY TABLE {this_transaction_unique_temp_table_name} ( 432 | search_index INTEGER NOT NULL, 433 | service TEXT NOT NULL, 434 | identifier TEXT NOT NULL 435 | ) 436 | """ 437 | ) 438 | 439 | this_thread_connection.executemany( 440 | f""" 441 | INSERT INTO {this_transaction_unique_temp_table_name} (search_index, service, identifier) 442 | VALUES (?, ?, ?) 443 | """, 444 | list( 445 | zip( 446 | range(len(service)), 447 | service, 448 | identifier, 449 | ) 450 | ), 451 | ) 452 | 453 | if only_check_for_existence: 454 | optional_columns = "" 455 | else: 456 | optional_columns = """, 457 | SMILES, 458 | additional_information, 459 | GROUP_CONCAT(synonym_index || '|' || synonym, '||'), 460 | GROUP_CONCAT(cas_number_index || '|' || cas_number, '||') 461 | """ 462 | 463 | # Distinction makes queries run much faster 464 | all_one_service = len(set(service)) == 1 465 | molecule_join_on_service = "t.service" 466 | if all_one_service: 467 | molecule_join_on_service = f"'{service[0]}'" 468 | 469 | all_one_identifier_mode = len(set(identifier_mode)) == 1 470 | if not all_one_identifier_mode: 471 | raise ValueError( 472 | "This class expects all identifier modes to be the same." 473 | ) 474 | 475 | collation = "" 476 | if identifier_mode[0] == "name": 477 | collation = "COLLATE NOCASE" 478 | 479 | cursor = this_thread_connection.execute( 480 | f""" 481 | SELECT search_index, 482 | m.id{optional_columns} 483 | FROM {this_transaction_unique_temp_table_name} AS t 484 | INNER JOIN molecules AS m 485 | ON m.identifier_mode = '{identifier_mode[0]}' 486 | AND m.service = {molecule_join_on_service} 487 | LEFT JOIN synonyms AS s 488 | ON m.id = s.molecule_id 489 | LEFT JOIN cas_numbers AS c 490 | ON m.id = c.molecule_id 491 | WHERE m.identifier = t.identifier {collation} 492 | GROUP BY search_index, m.id 493 | """ 494 | ) 495 | # TODO: search also the synonyms and cas_numbers tables 496 | results = [None] * len(service) 497 | rows = cursor.fetchall() 498 | if only_check_for_existence: 499 | for row in rows: 500 | search_index, molecule_id = row 501 | results[search_index] = molecule_id is not None 502 | else: 503 | rows_by_search_index = {} 504 | for row in rows: 505 | ( 506 | search_index, 507 | molecule_id, 508 | SMILES, 509 | additional_information, 510 | temp_synonyms, 511 | temp_cas_numbers, 512 | ) = row 513 | 514 | entry_found = molecule_id is not None 515 | if entry_found: 516 | if search_index not in rows_by_search_index: 517 | rows_by_search_index[search_index] = [] 518 | 519 | if SMILES: 520 | rows_by_search_index[search_index].append( 521 | ( 522 | SMILES, 523 | additional_information, 524 | temp_synonyms, 525 | temp_cas_numbers, 526 | ) 527 | ) 528 | 529 | for search_index, rows in rows_by_search_index.items(): 530 | results[search_index] = rows_to_molecules( 531 | service[search_index], 532 | identifier_mode[search_index], 533 | identifier[search_index], 534 | rows, 535 | ) 536 | 537 | return results 538 | 539 | def exists( 540 | self, 541 | service: Union[str, list[str]], 542 | identifier_mode: Union[str, list[str]], 543 | identifier: Union[str, list[str]], 544 | ) -> Union[bool, list[bool]]: 545 | """ 546 | Check if molecule(s) exist in the database based on the provided criteria. 547 | 548 | Supports both single and multiple molecule existence checks. 549 | 550 | Args: 551 | service (Union[str, list[str]]): The service(s) to search in. 552 | Can be a single string or a sequence of strings for multiple checks. 553 | 554 | identifier_mode (Union[str, list[str]]): The mode(s) of identification (e.g., 'name', 'cas'). 555 | Can be a single string or a sequence of strings for multiple checks. 556 | 557 | identifier (Union[str, list[str]]): The identifier(s) to search for. 558 | Can be a single string or a sequence of strings for multiple checks. 559 | 560 | Returns: 561 | 562 | Union[bool, list[bool]]: 563 | 564 | - For a single check: A boolean indicating whether the molecule exists. 565 | - For multiple checks: A list of booleans, each indicating whether the corresponding molecule exists. 566 | 567 | Note: 568 | This method uses the internal _search method with the 'only_check_for_existence' flag set to True. 569 | """ 570 | return self._search( 571 | service, identifier_mode, identifier, only_check_for_existence=True 572 | ) 573 | 574 | def search( 575 | self, 576 | service: Union[str, list[str]], 577 | identifier_mode: Union[str, list[str]], 578 | identifier: Union[str, list[str]], 579 | ) -> Union[Optional[list[Molecule]], list[Optional[list[Molecule]]]]: 580 | """ 581 | Search for molecules based on the given parameters. 582 | 583 | Searches for molecules using the specified service, identifier mode, and identifier. 584 | Supports both single and multiple searches. 585 | 586 | Args: 587 | service (Union[str, list[str]]): The service(s) to use for the search. 588 | Can be a single string or a sequence of strings. 589 | 590 | identifier_mode (Union[str, list[str]]): The identifier mode(s) to use. 591 | Can be a single string or a sequence of strings. 592 | 593 | identifier (Union[str, list[str]]): The identifier(s) to search for. 594 | Can be a single string or a sequence of strings. 595 | 596 | Returns: 597 | 598 | Union[Optional[list[Molecule]], list[Optional[list[Molecule]]]]: 599 | 600 | - If a single search is performed, returns either None or a list of Molecule objects. 601 | - If multiple searches are performed, returns a list of results, where each result 602 | is either None or a list of Molecule objects. 603 | 604 | Note: 605 | This method internally calls the _search method to perform the actual search operation. 606 | """ 607 | return self._search(service, identifier_mode, identifier) 608 | 609 | def delete_expired(self) -> None: 610 | """ 611 | Delete expired molecules from the cache. 612 | 613 | Removes all molecules from the database that were added before the expiration datetime, if set. 614 | 615 | Note: 616 | This method only performs the deletion if 'self.expiration_datetime' is set. 617 | """ 618 | if self.expiration_datetime: 619 | this_thread_connection = self.get_connection() 620 | with this_thread_connection: 621 | this_thread_connection.execute( 622 | """ 623 | DELETE FROM molecules 624 | WHERE datetime_added < ? 625 | """, 626 | (self.expiration_datetime,), 627 | ) 628 | 629 | def delete_by_service(self, service: str, mode: Optional[str] = '%') -> None: 630 | """ 631 | Delete all molecules associated with a specific service from the cache. 632 | 633 | Args: 634 | service (str): The name of the service whose molecules should be deleted. 635 | """ 636 | this_thread_connection = self.get_connection() 637 | with this_thread_connection: 638 | sql = """ 639 | DELETE FROM molecules 640 | WHERE service = ? AND identifier_mode LIKE ? 641 | """ 642 | this_thread_connection.execute( 643 | sql, 644 | (service, mode), 645 | ) 646 | 647 | def recreate_all_tables(self) -> None: 648 | """ 649 | Recreate all tables in the database. 650 | 651 | Closes any existing connections, deletes the database files, 652 | and then recreates the tables. Use with caution, as it will 653 | result in data loss. 654 | 655 | Raises: 656 | RuntimeError: If called in a multi-threaded environment (more than one connection). 657 | """ 658 | if len(self._connections) > 1: 659 | raise RuntimeError( 660 | "Cannot delete cache files in a multi-threaded environment." 661 | ) 662 | else: 663 | if len(self._connections) == 1: 664 | this_thread_connection = self.get_connection() 665 | this_thread_connection.close() 666 | self._connections.clear() 667 | 668 | files = [self.db_path, f"{self.db_path}-shm", f"{self.db_path}-wal"] 669 | for file in files: 670 | if os.path.exists(file): 671 | os.remove(file) 672 | 673 | self._create_tables() 674 | 675 | def count(self, service: Optional[str] = None) -> int: 676 | """ 677 | Count the number of molecules in the database, optionally filtered by service. 678 | 679 | Args: 680 | service (Optional[str]): The service to filter by. If None, counts all molecules. 681 | 682 | Returns: 683 | int: The number of molecules matching the criteria. 684 | """ 685 | this_thread_connection = self.get_connection() 686 | with this_thread_connection: 687 | if service: 688 | cursor = this_thread_connection.execute( 689 | """ 690 | SELECT COUNT(*) 691 | FROM molecules 692 | WHERE service = ? 693 | """, 694 | (service,), 695 | ) 696 | else: 697 | cursor = this_thread_connection.execute( 698 | """ 699 | SELECT COUNT(*) 700 | FROM molecules 701 | """ 702 | ) 703 | 704 | return cursor.fetchone()[0] 705 | -------------------------------------------------------------------------------- /moleculeresolver/__init__.py: -------------------------------------------------------------------------------- 1 | from .moleculeresolver import Molecule 2 | from .moleculeresolver import MoleculeResolver 3 | 4 | from importlib.metadata import version, PackageNotFoundError 5 | try: 6 | __version__ = version("molecule-resolver") 7 | except PackageNotFoundError: 8 | __version__ = "dev" -------------------------------------------------------------------------------- /moleculeresolver/molecule.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Optional, List, Dict, Any 3 | import copy 4 | 5 | 6 | @dataclass 7 | class Molecule: 8 | """ 9 | Represents a molecule with various properties and identifiers. 10 | 11 | Attributes: 12 | SMILES (Optional[str]): The SMILES (Simplified Molecular Input Line Entry System) representation of the molecule. 13 | 14 | synonyms (Optional[list[str]]): A list of alternative names or synonyms for the molecule. 15 | 16 | CAS (Optional[list[str]]): A list of CAS (Chemical Abstracts Service) registry numbers for the molecule. 17 | 18 | additional_information (Optional[str]): Any additional information about the molecule. 19 | 20 | mode (Optional[str]): The mode associated with the molecule. 21 | 22 | service (Optional[str]): The service associated with the molecule. 23 | 24 | number_of_crosschecks (Optional[int]): The number of cross-checks performed on the molecule. 25 | 26 | identifier (Optional[str]): A unique identifier for the molecule. 27 | 28 | found_molecules (Optional[list]): A list of related molecules found during processing. 29 | """ 30 | 31 | SMILES: Optional[str] = None 32 | synonyms: Optional[List[str]] = field(default_factory=list) 33 | CAS: Optional[List[str]] = field(default_factory=list) 34 | additional_information: Optional[str] = "" 35 | mode: Optional[str] = "" 36 | service: Optional[str] = "" 37 | number_of_crosschecks: Optional[int] = 1 38 | identifier: Optional[str] = "" 39 | found_molecules: Optional[list] = field(default_factory=list) 40 | 41 | def to_dict(self, found_molecules: Optional[str] = 'recursive') -> Dict[str, Any]: 42 | """ 43 | Convert the Molecule object to a dictionary. 44 | 45 | Args: 46 | found_molecules (Optional[str]): Determines how 'found_molecules' are handled. 47 | - If 'remove', the 'found_molecules' field will be excluded. 48 | - If 'recursive', 'found_molecules' will be recursively converted to dictionaries. 49 | 50 | Returns: 51 | Dict[str, Any]: A dictionary representation of the Molecule object. 52 | 53 | Note: 54 | This method creates a deep copy of the object's `__dict__` attribute. 55 | Depending on the `found_molecules` parameter, it may exclude or recursively convert 56 | the 'found_molecules' field before returning the dictionary. 57 | """ 58 | d = copy.deepcopy(self.__dict__) 59 | if found_molecules == "remove": 60 | if "found_molecules" in d: 61 | d.pop("found_molecules") 62 | elif found_molecules == "recursive": 63 | if "found_molecules" in d: 64 | new_found_molecules = [] 65 | for grouped_item in d["found_molecules"]: 66 | key = list(grouped_item.keys())[0] 67 | value = list(grouped_item.values())[0] 68 | new_found_molecules.append( 69 | {key: [m.to_dict("recursive") for m in value]} 70 | ) 71 | d["found_molecules"] = new_found_molecules 72 | return d 73 | -------------------------------------------------------------------------------- /moleculeresolver/opsin-cli-2.8.0-jar-with-dependencies.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoleculeResolver/molecule-resolver/15253f702a3ca823c476e0551ff4f895f017cc68/moleculeresolver/opsin-cli-2.8.0-jar-with-dependencies.jar -------------------------------------------------------------------------------- /moleculeresolver/rdkitmods.py: -------------------------------------------------------------------------------- 1 | from contextlib import ContextDecorator 2 | from rdkit import rdBase 3 | 4 | 5 | class disabling_rdkit_logger(ContextDecorator): 6 | """ 7 | A context manager for disabling RDKit logging 8 | based on https://github.com/rdkit/rdkit/issues/2320#issuecomment-731261149 9 | 10 | This class allows for temporary suppression of RDKit log messages at various levels 11 | (error, warning, info, debug) to reduce noise in the output during specific operations. 12 | 13 | Attributes: 14 | previous_status (dict): The log status before entering the context manager. 15 | desired_status (dict): The log status desired during the context manager's execution. 16 | """ 17 | 18 | def __init__( 19 | self, 20 | mute_errors: bool = True, 21 | mute_warning: bool = True, 22 | mute_info: bool = True, 23 | mute_debug: bool = True, 24 | ) -> None: 25 | """ 26 | Initializes the disabling_rdkit_logger context manager. 27 | 28 | Args: 29 | mute_errors (bool): If True, suppress error messages. Defaults to True. 30 | mute_warning (bool): If True, suppress warning messages. Defaults to True. 31 | mute_info (bool): If True, suppress info messages. Defaults to True. 32 | mute_debug (bool): If True, suppress debug messages. Defaults to True. 33 | """ 34 | 35 | self.previous_status = self._get_log_status() 36 | 37 | self.desired_status = { 38 | "rdApp.error": not mute_errors, 39 | "rdApp.warning": not mute_warning, 40 | "rdApp.debug": not mute_debug, 41 | "rdApp.info": not mute_info, 42 | } 43 | 44 | def _get_log_status(self) -> dict[str, bool]: 45 | """ 46 | Get the current log status of RDKit logs. 47 | 48 | Returns: 49 | dict[str, bool]: A dictionary indicating the log status (enabled/disabled) for each log level. 50 | """ 51 | log_status = rdBase.LogStatus() 52 | log_status = { 53 | st.split(":")[0]: st.split(":")[1] for st in log_status.split("\n") 54 | } 55 | log_status = {k: v == "enabled" for k, v in log_status.items()} 56 | return log_status 57 | 58 | def _apply_log_status(self, log_status: dict[str, bool]) -> None: 59 | """ 60 | Apply an RDKit log status. 61 | 62 | Args: 63 | log_status (dict[str, bool]): A dictionary with log levels as keys and their desired status (True/False) as values. 64 | """ 65 | for k, v in log_status.items(): 66 | if v: 67 | rdBase.EnableLog(k) 68 | else: 69 | rdBase.DisableLog(k) 70 | 71 | def __enter__(self) -> "disabling_rdkit_logger": 72 | """ 73 | Enter the runtime context related to this object. 74 | 75 | Applies the desired log status when entering the context. 76 | 77 | Returns: 78 | disabling_rdkit_logger: The context manager itself. 79 | """ 80 | self._apply_log_status(self.desired_status) 81 | return self 82 | 83 | def __exit__(self, exception_type, exception_value, exception_traceback) -> None: 84 | """ 85 | Exit the runtime context related to this object. 86 | 87 | Restores the previous log status when exiting the context. 88 | """ 89 | self._apply_log_status(self.previous_status) 90 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "molecule-resolver" 3 | version = "0.3.9" 4 | description = "A package to use several web services to find molecule structures, synonyms and CAS." 5 | authors = [ 6 | "Simon Muller ", 7 | "Kobi Felton ", 8 | ] 9 | license = "MIT" 10 | readme = "README.md" 11 | packages = [{ include = "moleculeresolver" }] 12 | 13 | [tool.poetry.dependencies] 14 | python = "^3.10,<3.14" 15 | prompt-toolkit = "^3.0.39" 16 | regex = "^2023.10.3" 17 | rdkit = ">=2023.3.3" 18 | requests = "^2.31.0" 19 | openpyxl = "^3.1.2" 20 | tqdm = "^4.66.3" 21 | urllib3 = "^2.0.6" 22 | xmltodict = "^0.13.0" 23 | 24 | 25 | [tool.poetry.group.dev.dependencies] 26 | pytest = "^7.4.3" 27 | pytest-mock = "^3.12.0" 28 | sphinx = "^8.1.3" 29 | sphinx-autobuild = "^2024.10.3" 30 | sphinx-autoapi = "^3.4.0" 31 | 32 | [build-system] 33 | requires = ["poetry-core"] 34 | build-backend = "poetry.core.masonry.api" 35 | -------------------------------------------------------------------------------- /tests/benchmark_component_molecules_iupac.json: -------------------------------------------------------------------------------- 1 | { 2 | "Acetic Acid": { 3 | "SMILES": "CC(=O)O", 4 | "synonyms": [ 5 | "Acetic Acid", 6 | "Ethanoic acid", 7 | "Ethylic acid", 8 | "Glacial acetic acid", 9 | "Methanecarboxylic acid" 10 | ], 11 | "CAS": [ 12 | "64-19-7" 13 | ], 14 | "additional_information": "cas_registry; chemeo id: 51-574-8; cir; comptox id: DTXSID5024394|QC_LEVEL:1.0; nist id: C1186523; opsin; pubchem id: 176; srs id: 3848", 15 | "mode": "name; name; name; name; name; name; name; name", 16 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs", 17 | "number_of_crosschecks": 8, 18 | "identifier": "Acetic Acid", 19 | "found_molecules": [], 20 | "pubchem_cid": 176, 21 | "formula": "C2H4O2", 22 | "hill_formula": "C2H4O2", 23 | "inchi": "InChI=1S/C2H4O2/c1-2(3)4/h1H3,(H,3,4)", 24 | "inchikey": "QTBSBXVTEAMEQO-UHFFFAOYSA-N", 25 | "iupac_name": "Ethanoic acid" 26 | }, 27 | "Acetone": { 28 | "SMILES": "CC(C)=O", 29 | "synonyms": [ 30 | "Acetone", 31 | "2-Propanone", 32 | "Dimethyl ketone", 33 | "propan-2-one", 34 | "Methyl ketone" 35 | ], 36 | "CAS": [ 37 | "67-64-1" 38 | ], 39 | "additional_information": "cas_registry; chebi id: 15347; chemeo id: 50-301-1; cir; comptox id: DTXSID8021482|QC_LEVEL:1.0; nist id: C666524; opsin; pubchem id: 180; srs id: 4309", 40 | "mode": "name; name; name; name; name; name; name; name; name", 41 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 42 | "number_of_crosschecks": 9, 43 | "identifier": "Acetone", 44 | "found_molecules": [], 45 | "pubchem_cid": 180, 46 | "formula": "C3H6O", 47 | "hill_formula": "C3H6O", 48 | "inchi": "InChI=1S/C3H6O/c1-3(2)4/h1-2H3", 49 | "inchikey": "CSCPPACGZOOCGX-UHFFFAOYSA-N", 50 | "iupac_name": "propan-2-one" 51 | }, 52 | "Acetylsalicylic Acid": { 53 | "SMILES": "CC(=O)Oc1ccccc1C(=O)O", 54 | "synonyms": [ 55 | "Acetylsalicylic Acid", 56 | "Aspirin", 57 | "2-(acetyloxy)-Benzoic acid", 58 | "2-(acetyloxy)benzoic acid", 59 | "Rhodine" 60 | ], 61 | "CAS": [ 62 | "50-78-2" 63 | ], 64 | "additional_information": "cas_registry; chebi id: 15365; chemeo id: 43-227-2; cir; comptox id: DTXSID5020108|QC_LEVEL:1.0; nist id: C50782; opsin; pubchem id: 2244; srs id: 1198", 65 | "mode": "name; name; name; name; name; name; name; name; name", 66 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 67 | "number_of_crosschecks": 9, 68 | "identifier": "Acetylsalicylic Acid", 69 | "found_molecules": [], 70 | "pubchem_cid": 2244, 71 | "formula": "C9H8O4", 72 | "hill_formula": "C9H8O4", 73 | "inchi": "InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)", 74 | "inchikey": "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", 75 | "iupac_name": "2-acetyloxybenzoic acid" 76 | }, 77 | "Adenine": { 78 | "SMILES": "Nc1ncnc2nc[nH]c12", 79 | "synonyms": [ 80 | "Adenine", 81 | "1H-Purin-6-amine", 82 | "9H-Purin-6-amine", 83 | "6-Aminopurine", 84 | "Adeninimine" 85 | ], 86 | "CAS": [ 87 | "73-24-5" 88 | ], 89 | "additional_information": "cas_registry; cir; opsin; pubchem id: 190", 90 | "mode": "name; name; name; name", 91 | "service": "cas_registry; cir; opsin; pubchem", 92 | "number_of_crosschecks": 4, 93 | "identifier": "Adenine", 94 | "found_molecules": [], 95 | "pubchem_cid": 190, 96 | "formula": "C5H5N5", 97 | "hill_formula": "C5H5N5", 98 | "inchi": "InChI=1S/C5H5N5/c6-4-3-5(9-1-7-3)10-2-8-4/h1-2H,(H3,6,7,8,9,10)", 99 | "inchikey": "GFFGJBXGBJISGV-UHFFFAOYSA-N", 100 | "iupac_name": "7H-purin-6-amine" 101 | }, 102 | "Adenosine Triphosphate": { 103 | "SMILES": "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O", 104 | "synonyms": [ 105 | "Adenosine Triphosphate", 106 | "atp", 107 | "[[(2R,3S,4R,5R)-5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] phosphono hydrogen phosphate", 108 | "Adenosine 5'-triphosphate", 109 | "Triphosphaden" 110 | ], 111 | "CAS": [ 112 | "56-65-5" 113 | ], 114 | "additional_information": "cir; comptox id: DTXSID6022559|QC_LEVEL:1.0; opsin; pubchem id: 5957; srs id: 2220", 115 | "mode": "name; name; name; name; name", 116 | "service": "cir; comptox; opsin; pubchem; srs", 117 | "number_of_crosschecks": 5, 118 | "identifier": "Adenosine Triphosphate", 119 | "found_molecules": [], 120 | "pubchem_cid": 5957, 121 | "formula": "C10H16N5O13P3", 122 | "hill_formula": "C10H16N5O13P3", 123 | "inchi": "InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1", 124 | "inchikey": "ZKHQWZAMYRWXGA-KQYNXXCUSA-N", 125 | "iupac_name": "[[(2R,3S,4R,5R)-5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] phosphono hydrogen phosphate" 126 | }, 127 | "Adipic Acid": { 128 | "SMILES": "O=C(O)CCCCC(=O)O", 129 | "synonyms": [ 130 | "Adipic Acid", 131 | "Hexanedioic acid", 132 | "1,4-Butanedicarboxylic acid", 133 | "Adipinic acid", 134 | "1,6-Hexanedioic acid" 135 | ], 136 | "CAS": [ 137 | "124-04-9" 138 | ], 139 | "additional_information": "cas_registry; chebi id: 30832; chemeo id: 12-837-9; cir; comptox id: DTXSID7021605|QC_LEVEL:1.0; nist id: C124049; opsin; pubchem id: 196; srs id: 33340", 140 | "mode": "name; name; name; name; name; name; name; name; name", 141 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 142 | "number_of_crosschecks": 9, 143 | "identifier": "Adipic Acid", 144 | "found_molecules": [], 145 | "pubchem_cid": 196, 146 | "formula": "C6H10O4", 147 | "hill_formula": "C6H10O4", 148 | "inchi": "InChI=1S/C6H10O4/c7-5(8)3-1-2-4-6(9)10/h1-4H2,(H,7,8)(H,9,10)", 149 | "inchikey": "WNLRTRBMVRJNCN-UHFFFAOYSA-N", 150 | "iupac_name": "hexanedioic acid" 151 | }, 152 | "Aluminum(III) Oxide": { 153 | "SMILES": "[Al+3].[Al+3].[O-2].[O-2].[O-2]", 154 | "synonyms": [ 155 | "Aluminum(III) Oxide" 156 | ], 157 | "CAS": [], 158 | "additional_information": "opsin", 159 | "mode": "name", 160 | "service": "opsin", 161 | "number_of_crosschecks": 1, 162 | "identifier": "Aluminum(III) Oxide", 163 | "found_molecules": [], 164 | "pubchem_cid": null, 165 | "formula": "Al2O3", 166 | "hill_formula": "Al2O3", 167 | "inchi": "InChI=1S/2Al.3O/q2*+3;3*-2", 168 | "inchikey": "PNEYBMLMFCGWSK-UHFFFAOYSA-N", 169 | "iupac_name": "Aluminum(III) Oxide" 170 | }, 171 | "Ammonia": { 172 | "SMILES": "N", 173 | "synonyms": [ 174 | "Ammonia", 175 | "Ammonia gas", 176 | "Nitro-Sil", 177 | "Spirit of Hartshorn", 178 | "azane" 179 | ], 180 | "CAS": [ 181 | "7664-41-7" 182 | ], 183 | "additional_information": "cas_registry; chebi id: 16134; chemeo id: 22-992-6; cir; comptox id: DTXSID0023872|QC_LEVEL:1.0; nist id: C84796145; opsin; pubchem id: 222; srs id: 152389", 184 | "mode": "name; name; name; name; name; name; name; name; name", 185 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 186 | "number_of_crosschecks": 9, 187 | "identifier": "Ammonia", 188 | "found_molecules": [], 189 | "pubchem_cid": 222, 190 | "formula": "H3N", 191 | "hill_formula": "H3N", 192 | "inchi": "InChI=1S/H3N/h1H3", 193 | "inchikey": "QGZKDVFQNNGYKY-UHFFFAOYSA-N", 194 | "iupac_name": "azane" 195 | }, 196 | "Ascorbic Acid": { 197 | "SMILES": "O=C1O[C@H]([C@@H](O)CO)C(O)=C1O", 198 | "synonyms": [ 199 | "Ascorbic Acid", 200 | "(2R)-2-[(1S)-1,2-dihydroxyethyl]-3,4-dihydroxy-2H-furan-5-one", 201 | "l-ascorbic acid", 202 | "vitamin C", 203 | "L-Xyloascorbic acid" 204 | ], 205 | "CAS": [ 206 | "50-81-7" 207 | ], 208 | "additional_information": "opsin; pubchem id: 54670067; srs id: 1214", 209 | "mode": "name; name; name", 210 | "service": "opsin; pubchem; srs", 211 | "number_of_crosschecks": 3, 212 | "identifier": "Ascorbic Acid", 213 | "found_molecules": [], 214 | "pubchem_cid": 54670067, 215 | "formula": "C6H8O6", 216 | "hill_formula": "C6H8O6", 217 | "inchi": "InChI=1S/C6H8O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h2,5,7-10H,1H2/t2-,5+/m0/s1", 218 | "inchikey": "CIWBSHSKHKDKBQ-JLAZNSOCSA-N", 219 | "iupac_name": "(2R)-2-[(1S)-1,2-dihydroxyethyl]-3,4-dihydroxy-2H-furan-5-one" 220 | }, 221 | "Aspartame": { 222 | "SMILES": "COC(=O)C(Cc1ccccc1)NC(=O)C(N)CC(=O)O", 223 | "synonyms": [ 224 | "Aspartame", 225 | "L-Phenylalanine, L-\u03b1-aspartyl-, 2-methyl ester", 226 | "Succinamic acid, 3-amino-N-(\u03b1-carboxyphenethyl)-, N-methyl ester, stereoisomer", 227 | "L-Phenylalanine, N-L-\u03b1-aspartyl-, 1-methyl ester", 228 | "L-Aspartyl-L-phenylalanine methyl ester" 229 | ], 230 | "CAS": [ 231 | "22839-47-0" 232 | ], 233 | "additional_information": "cas_registry; cir; srs id: 222604", 234 | "mode": "name; name; name", 235 | "service": "cas_registry; cir; srs", 236 | "number_of_crosschecks": 3, 237 | "identifier": "Aspartame", 238 | "found_molecules": [], 239 | "pubchem_cid": null, 240 | "formula": "C14H18N2O5", 241 | "hill_formula": "C14H18N2O5", 242 | "inchi": "InChI=1S/C14H18N2O5/c1-21-14(20)11(7-9-5-3-2-4-6-9)16-13(19)10(15)8-12(17)18/h2-6,10-11H,7-8,15H2,1H3,(H,16,19)(H,17,18)", 243 | "inchikey": "IAOZJIPTCAWIRG-UHFFFAOYSA-N", 244 | "iupac_name": "3-amino-4-[(1-methoxy-1-oxo-3-phenylpropan-2-yl)amino]-4-oxobutanoic acid" 245 | }, 246 | "Benzene": { 247 | "SMILES": "c1ccccc1", 248 | "synonyms": [ 249 | "Benzene", 250 | "Benzol", 251 | "Cyclohexatriene", 252 | "Benzole", 253 | "Coal naphtha" 254 | ], 255 | "CAS": [ 256 | "71-43-2" 257 | ], 258 | "additional_information": "cas_registry; chebi id: 16716; chemeo id: 12-667-8; cir; comptox id: DTXSID3039242|QC_LEVEL:1.0; nist id: C1076433; opsin; pubchem id: 241; srs id: 4754", 259 | "mode": "name; name; name; name; name; name; name; name; name", 260 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 261 | "number_of_crosschecks": 9, 262 | "identifier": "Benzene", 263 | "found_molecules": [], 264 | "pubchem_cid": 241, 265 | "formula": "C6H6", 266 | "hill_formula": "C6H6", 267 | "inchi": "InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H", 268 | "inchikey": "UHOVQNZJYSORNB-UHFFFAOYSA-N", 269 | "iupac_name": "benzene" 270 | }, 271 | "Benzoic Acid": { 272 | "SMILES": "O=C(O)c1ccccc1", 273 | "synonyms": [ 274 | "Benzoic Acid", 275 | "Benzenecarboxylic acid", 276 | "Benzeneformic acid", 277 | "Carboxybenzene", 278 | "Dracylic acid" 279 | ], 280 | "CAS": [ 281 | "65-85-0" 282 | ], 283 | "additional_information": "cas_registry; chemeo id: 18-337-8; cir; comptox id: DTXSID6020143|QC_LEVEL:1.0; nist id: C65850; opsin; pubchem id: 243; srs id: 4044", 284 | "mode": "name; name; name; name; name; name; name; name", 285 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs", 286 | "number_of_crosschecks": 8, 287 | "identifier": "Benzoic Acid", 288 | "found_molecules": [], 289 | "pubchem_cid": 243, 290 | "formula": "C7H6O2", 291 | "hill_formula": "C7H6O2", 292 | "inchi": "InChI=1S/C7H6O2/c8-7(9)6-4-2-1-3-5-6/h1-5H,(H,8,9)", 293 | "inchikey": "WPYMKLBDIGXBTP-UHFFFAOYSA-N", 294 | "iupac_name": "benzoic acid" 295 | }, 296 | "Biphenyl ": { 297 | "SMILES": "c1ccc(-c2ccccc2)cc1", 298 | "synonyms": [ 299 | "Biphenyl", 300 | "1,1'-biphenyl", 301 | "Diphenyl", 302 | "Phenylbenzene", 303 | "Bibenzene" 304 | ], 305 | "CAS": [ 306 | "92-52-4" 307 | ], 308 | "additional_information": "cas_registry; chebi id: 17097; chemeo id: 41-472-2; cir; comptox id: DTXSID4020161|QC_LEVEL:1.0; nist id: C1486017; opsin; pubchem id: 7095; srs id: 14183", 309 | "mode": "name; name; name; name; name; name; name; name; name", 310 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 311 | "number_of_crosschecks": 9, 312 | "identifier": "Biphenyl", 313 | "found_molecules": [], 314 | "pubchem_cid": 7095, 315 | "formula": "C12H10", 316 | "hill_formula": "C12H10", 317 | "inchi": "InChI=1S/C12H10/c1-3-7-11(8-4-1)12-9-5-2-6-10-12/h1-10H", 318 | "inchikey": "ZUOUZKKEUPVFJK-UHFFFAOYSA-N", 319 | "iupac_name": "1,1'-biphenyl" 320 | }, 321 | "Butane": { 322 | "SMILES": "CCCC", 323 | "synonyms": [ 324 | "Butane", 325 | "n-Butane", 326 | "Diethyl", 327 | "lpg", 328 | "Liquefied petroleum gas" 329 | ], 330 | "CAS": [ 331 | "106-97-8" 332 | ], 333 | "additional_information": "cas_registry; chebi id: 37808; chemeo id: 26-823-9; cir; comptox id: DTXSID7024665|QC_LEVEL:1.0; nist id: C106978; opsin; pubchem id: 7843; srs id: 24026", 334 | "mode": "name; name; name; name; name; name; name; name; name", 335 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 336 | "number_of_crosschecks": 9, 337 | "identifier": "Butane", 338 | "found_molecules": [], 339 | "pubchem_cid": 7843, 340 | "formula": "C4H10", 341 | "hill_formula": "C4H10", 342 | "inchi": "InChI=1S/C4H10/c1-3-4-2/h3-4H2,1-2H3", 343 | "inchikey": "IJDNQMDRQITEOD-UHFFFAOYSA-N", 344 | "iupac_name": "butane" 345 | }, 346 | "Butene": { 347 | "SMILES": "C=CCC", 348 | "synonyms": [ 349 | "Butene", 350 | "but-1-ene", 351 | "1-butene", 352 | "Ethylethylene", 353 | "1-Butylene" 354 | ], 355 | "CAS": [ 356 | "106-98-9" 357 | ], 358 | "additional_information": "cir; pubchem id: 7844; srs id: 230482", 359 | "mode": "name; name; name", 360 | "service": "cir; pubchem; srs", 361 | "number_of_crosschecks": 3, 362 | "identifier": "Butene", 363 | "found_molecules": [], 364 | "pubchem_cid": 7844, 365 | "formula": "C4H8", 366 | "hill_formula": "C4H8", 367 | "inchi": "InChI=1S/C4H8/c1-3-4-2/h3H,1,4H2,2H3", 368 | "inchikey": "VXNZUUAINFGPBY-UHFFFAOYSA-N", 369 | "iupac_name": "but-1-ene" 370 | }, 371 | "Butyric Acid": { 372 | "SMILES": "CCCC(=O)O", 373 | "synonyms": [ 374 | "Butyric Acid", 375 | "Butanoic acid", 376 | "n-Butyric acid", 377 | "Ethylacetic acid", 378 | "1-Propanecarboxylic acid" 379 | ], 380 | "CAS": [ 381 | "107-92-6" 382 | ], 383 | "additional_information": "cas_registry; chebi id: 30772; chemeo id: 41-010-4; cir; comptox id: DTXSID8021515|QC_LEVEL:1.0; nist id: C107926; opsin; pubchem id: 264; srs id: 24729", 384 | "mode": "name; name; name; name; name; name; name; name; name", 385 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 386 | "number_of_crosschecks": 9, 387 | "identifier": "Butyric Acid", 388 | "found_molecules": [], 389 | "pubchem_cid": 264, 390 | "formula": "C4H8O2", 391 | "hill_formula": "C4H8O2", 392 | "inchi": "InChI=1S/C4H8O2/c1-2-3-4(5)6/h2-3H2,1H3,(H,5,6)", 393 | "inchikey": "FERIUCNNQQJTOY-UHFFFAOYSA-N", 394 | "iupac_name": "butanoic acid" 395 | }, 396 | "Caffeine": { 397 | "SMILES": "Cn1c(=O)c2c(ncn2C)n(C)c1=O", 398 | "synonyms": [ 399 | "Caffeine", 400 | "3,7-dihydro-1,3,7-trimethyl-1H-Purine-2,6-dione", 401 | "Guaranine", 402 | "1,3,7-Trimethylxanthine", 403 | "1,3,7-trimethyl-3,7-dihydro-1H-purine-2,6-dione" 404 | ], 405 | "CAS": [ 406 | "58-08-2" 407 | ], 408 | "additional_information": "cas_registry; chebi id: 27732; chemeo id: 39-014-3; cir; comptox id: DTXSID0020232|QC_LEVEL:1.0; nist id: C58082; opsin; pubchem id: 2519; srs id: 2741", 409 | "mode": "name; name; name; name; name; name; name; name; name", 410 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 411 | "number_of_crosschecks": 9, 412 | "identifier": "Caffeine", 413 | "found_molecules": [], 414 | "pubchem_cid": 2519, 415 | "formula": "C8H10N4O2", 416 | "hill_formula": "C8H10N4O2", 417 | "inchi": "InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2/h4H,1-3H3", 418 | "inchikey": "RYYVLZVUVIJVGH-UHFFFAOYSA-N", 419 | "iupac_name": "1,3,7-Trimethyl-3,7-dihydro-1H-purine-2,6-dione" 420 | }, 421 | 422 | "Chloroform": { 423 | "SMILES": "ClC(Cl)Cl", 424 | "synonyms": [ 425 | "Chloroform", 426 | "Trichloromethane", 427 | "trichloro-Methane", 428 | "Formyl trichloride", 429 | "Trichloroform" 430 | ], 431 | "CAS": [ 432 | "67-66-3" 433 | ], 434 | "additional_information": "cas_registry; chebi id: 35255; chemeo id: 21-368-0; cir; comptox id: DTXSID1020306|QC_LEVEL:1.0; nist id: B6007499; opsin; pubchem id: 6212; srs id: 4317", 435 | "mode": "name; name; name; name; name; name; name; name; name", 436 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 437 | "number_of_crosschecks": 9, 438 | "identifier": "Chloroform", 439 | "found_molecules": [], 440 | "pubchem_cid": 6212, 441 | "formula": "CHCl3", 442 | "hill_formula": "CHCl3", 443 | "inchi": "InChI=1S/CHCl3/c2-1(3)4/h1H", 444 | "inchikey": "HEDRZPFGACZZDS-UHFFFAOYSA-N", 445 | "iupac_name": "chloroform" 446 | }, 447 | "Cholesterol": { 448 | "SMILES": "CC(C)CCC[C@@H](C)[C@H]1CC[C@H]2[C@@H]3CC=C4C[C@@H](O)CC[C@]4(C)[C@H]3CC[C@]12C", 449 | "synonyms": [ 450 | "Cholesterol", 451 | "cholest-5-en-3beta-ol", 452 | "Cholesterin", 453 | "(3beta,14beta,17alpha)-cholest-5-en-3-ol", 454 | "(3S,8S,9S,10R,13R,14S,17R)-10,13-dimethyl-17-[(2R)-6-methylheptan-2-yl]-2,3,4,7,8,9,11,12,14,15,16,17-dodecahydro-1H-cyclopenta[a]phenanthren-3-ol" 455 | ], 456 | "CAS": [ 457 | "57-88-5" 458 | ], 459 | "additional_information": "chebi id: 16113; cir; comptox id: DTXSID3022401|QC_LEVEL:1.0; opsin; pubchem id: 5997; srs id: 2717", 460 | "mode": "name; name; name; name; name; name", 461 | "service": "chebi; cir; comptox; opsin; pubchem; srs", 462 | "number_of_crosschecks": 6, 463 | "identifier": "Cholesterol", 464 | "found_molecules": [], 465 | "pubchem_cid": 5997, 466 | "formula": "C27H46O", 467 | "hill_formula": "C27H46O", 468 | "inchi": "InChI=1S/C27H46O/c1-18(2)7-6-8-19(3)23-11-12-24-22-10-9-20-17-21(28)13-15-26(20,4)25(22)14-16-27(23,24)5/h9,18-19,21-25,28H,6-8,10-17H2,1-5H3/t19-,21+,22+,23-,24+,25+,26+,27-/m1/s1", 469 | "inchikey": "HVYWMOMLDIMFJA-DPAQBDIFSA-N", 470 | "iupac_name": "(3S,8S,9S,10R,13R,14S,17R)-10,13-dimethyl-17-[(2R)-6-methylheptan-2-yl]-2,3,4,7,8,9,11,12,14,15,16,17-dodecahydro-1H-cyclopenta[a]phenanthren-3-ol" 471 | }, 472 | "Citric Acid": { 473 | "SMILES": "O=C(O)CC(O)(CC(=O)O)C(=O)O", 474 | "synonyms": [ 475 | "Citric Acid", 476 | "2-hydroxy-1,2,3-Propanetricarboxylic acid", 477 | "2-hydroxypropane-1,2,3-tricarboxylic acid", 478 | "Aciletten", 479 | "Citretten" 480 | ], 481 | "CAS": [ 482 | "77-92-9" 483 | ], 484 | "additional_information": "cas_registry; chebi id: 30769; cir; comptox id: DTXSID3020332|QC_LEVEL:1.0; nist id: C77929; opsin; pubchem id: 311; srs id: 6775", 485 | "mode": "name; name; name; name; name; name; name; name", 486 | "service": "cas_registry; chebi; cir; comptox; nist; opsin; pubchem; srs", 487 | "number_of_crosschecks": 8, 488 | "identifier": "Citric Acid", 489 | "found_molecules": [], 490 | "pubchem_cid": 311, 491 | "formula": "C6H8O7", 492 | "hill_formula": "C6H8O7", 493 | "inchi": "InChI=1S/C6H8O7/c7-3(8)1-6(13,5(11)12)2-4(9)10/h13H,1-2H2,(H,7,8)(H,9,10)(H,11,12)", 494 | "inchikey": "KRKNYBCHXYNGOX-UHFFFAOYSA-N", 495 | "iupac_name": "2-hydroxypropane-1,2,3-tricarboxylic acid" 496 | }, 497 | "Cocaine": { 498 | "SMILES": "COC(=O)[C@H]1[C@@H](OC(=O)c2ccccc2)C[C@@H]2CC[C@H]1N2C", 499 | "synonyms": [ 500 | "Cocaine", 501 | "methyl (1R,2R,3S,5S)-3-(benzoyloxy)-8-methyl-8-azabicyclo[3.2.1]octane-2-carboxylate", 502 | "Kokain", 503 | "(1R,2R,3S,5S)-2-(methoxycarbonyl)tropan-3-yl benzoate", 504 | "(-)-cocaine" 505 | ], 506 | "CAS": [ 507 | "50-36-2" 508 | ], 509 | "additional_information": "chebi id: 27958; comptox id: DTXSID2038443|QC_LEVEL:1.0; pubchem id: 446220", 510 | "mode": "name; name; name", 511 | "service": "chebi; comptox; pubchem", 512 | "number_of_crosschecks": 3, 513 | "identifier": "Cocaine", 514 | "found_molecules": [], 515 | "pubchem_cid": 446220, 516 | "formula": "C17H21NO4", 517 | "hill_formula": "C17H21NO4", 518 | "inchi": "InChI=1S/C17H21NO4/c1-18-12-8-9-13(18)15(17(20)21-2)14(10-12)22-16(19)11-6-4-3-5-7-11/h3-7,12-15H,8-10H2,1-2H3/t12-,13+,14-,15+/m0/s1", 519 | "inchikey": "ZPUCINDJVBIVPJ-LJISPDSOSA-N", 520 | "iupac_name": "methyl (1R,2R,3S,5S)-3-benzoyloxy-8-methyl-8-azabicyclo[3.2.1]octane-2-carboxylate" 521 | }, 522 | "DDT": { 523 | "SMILES": "Clc1ccc(C(c2ccc(Cl)cc2)C(Cl)(Cl)Cl)cc1", 524 | "synonyms": [ 525 | "ddt", 526 | "p,p'-DDT", 527 | "1,1,1-trichloro-2,2-bis(p-chlorophenyl)-Ethane", 528 | "\u03b1,\u03b1-Bis(p-chlorophenyl)-\u03b2,\u03b2,\u03b2-trichlorethane", 529 | "Dichlorodiphenyltrichloroethane" 530 | ], 531 | "CAS": [ 532 | "50-29-3" 533 | ], 534 | "additional_information": "cas_registry; chebi id: 16130; chemeo id: 82-476-3; cir; comptox id: DTXSID4020375|QC_LEVEL:1.0; nist id: C50293; pubchem id: 3036; srs id: 1107", 535 | "mode": "name; name; name; name; name; name; name; name", 536 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; pubchem; srs", 537 | "number_of_crosschecks": 8, 538 | "identifier": "DDT", 539 | "found_molecules": [], 540 | "pubchem_cid": 3036, 541 | "formula": "C14H9Cl5", 542 | "hill_formula": "C14H9Cl5", 543 | "inchi": "InChI=1S/C14H9Cl5/c15-11-5-1-9(2-6-11)13(14(17,18)19)10-3-7-12(16)8-4-10/h1-8,13H", 544 | "inchikey": "YVGGHNCTFXOJCH-UHFFFAOYSA-N", 545 | "iupac_name": "1-chloro-4-[2,2,2-trichloro-1-(4-chlorophenyl)ethyl]benzene" 546 | }, 547 | "DEET": { 548 | "SMILES": "CCN(CC)C(=O)c1cccc(C)c1", 549 | "synonyms": [ 550 | "deet", 551 | "N,N-Diethyl-m-toluamide", 552 | "N,N-diethyl-3-methyl-Benzamide", 553 | "N,N-Diethyl-3-methylbenzamide", 554 | "diethyltoluamide" 555 | ], 556 | "CAS": [ 557 | "134-62-3" 558 | ], 559 | "additional_information": "cas_registry; chebi id: 7071; chemeo id: 13-638-9; cir; comptox id: DTXSID2021995|QC_LEVEL:1.0; nist id: C134623; pubchem id: 4284; srs id: 36137", 560 | "mode": "name; name; name; name; name; name; name; name", 561 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; pubchem; srs", 562 | "number_of_crosschecks": 8, 563 | "identifier": "DEET", 564 | "found_molecules": [], 565 | "pubchem_cid": 4284, 566 | "formula": "C12H17NO", 567 | "hill_formula": "C12H17NO", 568 | "inchi": "InChI=1S/C12H17NO/c1-4-13(5-2)12(14)11-8-6-7-10(3)9-11/h6-9H,4-5H2,1-3H3", 569 | "inchikey": "MMOXZBCLCQITDF-UHFFFAOYSA-N", 570 | "iupac_name": "N,N-diethyl-3-methylbenzamide" 571 | }, 572 | "R-12": { 573 | "SMILES": "FC(F)(Cl)Cl", 574 | "synonyms": [ 575 | "Difluorodichloromethane", 576 | "Dwuchlorodwufluorometan", 577 | "dichlorodifluoro-Methane", 578 | "R 12, Refrigerant", 579 | "Dichlorodifluoromethane" 580 | ], 581 | "CAS": [ 582 | "75-71-8" 583 | ], 584 | "additional_information": "chemeo id: 13-346-3; nist id: C75718", 585 | "mode": "name; name", 586 | "service": "chemeo; nist", 587 | "number_of_crosschecks": 2, 588 | "identifier": "R-12", 589 | "found_molecules": [], 590 | "pubchem_cid": null, 591 | "formula": "CCl2F2", 592 | "hill_formula": "CCl2F2", 593 | "inchi": "InChI=1S/CCl2F2/c2-1(3,4)5", 594 | "inchikey": "PXBRQCKWGAHEHS-UHFFFAOYSA-N", 595 | "iupac_name": "dichloro(difluoro)methane" 596 | }, 597 | "Dopamine": { 598 | "SMILES": "NCCc1ccc(O)c(O)c1", 599 | "synonyms": [ 600 | "Dopamine", 601 | "4-(2-aminoethyl)-1,2-Benzenediol", 602 | "4-(2-aminoethyl)-Pyrocatechol", 603 | "4-(2-aminoethyl)benzene-1,2-diol", 604 | "Dopamin" 605 | ], 606 | "CAS": [ 607 | "51-61-6" 608 | ], 609 | "additional_information": "cas_registry; chebi id: 18243; chemeo id: 36-704-0; cir; comptox id: DTXSID6022420|QC_LEVEL:1.0; nist id: C51616; opsin; pubchem id: 681; srs id: 17163551", 610 | "mode": "name; name; name; name; name; name; name; name; name", 611 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 612 | "number_of_crosschecks": 9, 613 | "identifier": "Dopamine", 614 | "found_molecules": [], 615 | "pubchem_cid": 681, 616 | "formula": "C8H11NO2", 617 | "hill_formula": "C8H11NO2", 618 | "inchi": "InChI=1S/C8H11NO2/c9-4-3-6-1-2-7(10)8(11)5-6/h1-2,5,10-11H,3-4,9H2", 619 | "inchikey": "VYFYYTLLBUKUHU-UHFFFAOYSA-N", 620 | "iupac_name": "4-(2-aminoethyl)benzene-1,2-diol" 621 | }, 622 | "Adrenaline": { 623 | "SMILES": "CNC[C@H](O)c1ccc(O)c(O)c1", 624 | "synonyms": [ 625 | "Adrenaline", 626 | "Epinephrine", 627 | "4-[(1R)-1-Hydroxy-2-(methylamino)ethyl]benzene-1,2-diol", 628 | "(-)-Epinephrine", 629 | "(R)-4-[1-hydroxy-2-(methylamino)ethyl]-1,2-Benzenediol" 630 | ], 631 | "CAS": [ 632 | "51-43-4" 633 | ], 634 | "additional_information": "cir; comptox id: DTXSID5022986|QC_LEVEL:1.0; nist id: C51434; pubchem id: 5816", 635 | "mode": "name; name; name; name", 636 | "service": "cir; comptox; nist; pubchem", 637 | "number_of_crosschecks": 4, 638 | "identifier": "Adrenaline", 639 | "found_molecules": [], 640 | "pubchem_cid": 5816, 641 | "formula": "C9H13NO3", 642 | "hill_formula": "C9H13NO3", 643 | "inchi": "InChI=1S/C9H13NO3/c1-10-5-9(13)6-2-3-7(11)8(12)4-6/h2-4,9-13H,5H2,1H3/t9-/m0/s1", 644 | "inchikey": "UCTWMZQNUQWSLP-VIFPVBQESA-N", 645 | "iupac_name": "4-[(1R)-1-hydroxy-2-(methylamino)ethyl]benzene-1,2-diol" 646 | }, 647 | "Ethane": { 648 | "SMILES": "CC", 649 | "synonyms": [ 650 | "Ethane", 651 | "Bimethyl", 652 | "Dimethyl", 653 | "Methylmethane", 654 | "Ethyl hydride" 655 | ], 656 | "CAS": [ 657 | "74-84-0" 658 | ], 659 | "additional_information": "cas_registry; chebi id: 42266; chemeo id: 31-101-4; cir; comptox id: DTXSID6026377|QC_LEVEL:1.0; nist id: R633468; opsin; pubchem id: 6324; srs id: 5082", 660 | "mode": "name; name; name; name; name; name; name; name; name", 661 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 662 | "number_of_crosschecks": 9, 663 | "identifier": "Ethane", 664 | "found_molecules": [], 665 | "pubchem_cid": 6324, 666 | "formula": "C2H6", 667 | "hill_formula": "C2H6", 668 | "inchi": "InChI=1S/C2H6/c1-2/h1-2H3", 669 | "inchikey": "OTMSDBZUPAUEDD-UHFFFAOYSA-N", 670 | "iupac_name": "ethane" 671 | }, 672 | "Ethylene": { 673 | "SMILES": "C=C", 674 | "synonyms": [ 675 | "Ethylene", 676 | "Ethene", 677 | "Acetene", 678 | "Elayl", 679 | "Bicarburretted hydrogen" 680 | ], 681 | "CAS": [ 682 | "74-85-1" 683 | ], 684 | "additional_information": "cas_registry; chemeo id: 56-863-2; cir; comptox id: DTXSID1026378|QC_LEVEL:1.0; nist id: C2813629; opsin; pubchem id: 6325; srs id: 5090", 685 | "mode": "name; name; name; name; name; name; name; name", 686 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs", 687 | "number_of_crosschecks": 8, 688 | "identifier": "Ethylene", 689 | "found_molecules": [], 690 | "pubchem_cid": 6325, 691 | "formula": "C2H4", 692 | "hill_formula": "C2H4", 693 | "inchi": "InChI=1S/C2H4/c1-2/h1-2H2", 694 | "inchikey": "VGGSQFUCUMXWEO-UHFFFAOYSA-N", 695 | "iupac_name": "ethene" 696 | }, 697 | "Ether": { 698 | "SMILES": "CCOCC", 699 | "synonyms": [ 700 | "Ether", 701 | "Ethyl ether", 702 | "Diethyl ether", 703 | "1,1'-oxybis-ethane", 704 | "Anaesthetic ether" 705 | ], 706 | "CAS": [ 707 | "60-29-7" 708 | ], 709 | "additional_information": "cas_registry; chemeo id: 52-430-6; cir; nist id: C60297; pubchem id: 3283; srs id: 3335", 710 | "mode": "name; name; name; name; name; name", 711 | "service": "cas_registry; chemeo; cir; nist; pubchem; srs", 712 | "number_of_crosschecks": 6, 713 | "identifier": "Ether", 714 | "found_molecules": [], 715 | "pubchem_cid": 3283, 716 | "formula": "C4H10O", 717 | "hill_formula": "C4H10O", 718 | "inchi": "InChI=1S/C4H10O/c1-3-5-4-2/h3-4H2,1-2H3", 719 | "inchikey": "RTZKZFJDLAIYFH-UHFFFAOYSA-N", 720 | "iupac_name": "ethoxyethane" 721 | }, 722 | "Ethanol": { 723 | "SMILES": "CCO", 724 | "synonyms": [ 725 | "Ethanol", 726 | "Alcohol", 727 | "Ethyl alcohol", 728 | "Algrain", 729 | "Anhydrol" 730 | ], 731 | "CAS": [ 732 | "64-17-5" 733 | ], 734 | "additional_information": "cas_registry; chebi id: 16236; chemeo id: 35-653-8; cir; comptox id: DTXSID9020584|QC_LEVEL:1.0; nist id: C1516081; opsin; pubchem id: 702; srs id: 3822", 735 | "mode": "name; name; name; name; name; name; name; name; name", 736 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 737 | "number_of_crosschecks": 9, 738 | "identifier": "Ethanol", 739 | "found_molecules": [], 740 | "pubchem_cid": 702, 741 | "formula": "C2H6O", 742 | "hill_formula": "C2H6O", 743 | "inchi": "InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3", 744 | "inchikey": "LFQSCWFLJHTTHZ-UHFFFAOYSA-N", 745 | "iupac_name": "ethanol" 746 | }, 747 | "Ethylenediaminetetraacetic Acid": { 748 | "SMILES": "O=C(O)CN(CCN(CC(=O)O)CC(=O)O)CC(=O)O", 749 | "synonyms": [ 750 | "Ethylenediaminetetraacetic Acid", 751 | "(ethylenedinitrilo)tetra-Acetic acid", 752 | "Edetic Acid", 753 | "N,N'-1,2-ethanediylbis[N-(carboxymethyl)-Glycine", 754 | "2,2',2'',2'''-(ethane-1,2-diyldinitrilo)tetraacetic acid" 755 | ], 756 | "CAS": [ 757 | "60-00-4" 758 | ], 759 | "additional_information": "cas_registry; chebi id: 4735; chemeo id: 119-689-6; cir; comptox id: DTXSID6022977|QC_LEVEL:1.0; nist id: C60004; opsin; pubchem id: 6049; srs id: 3228", 760 | "mode": "name; name; name; name; name; name; name; name; name", 761 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 762 | "number_of_crosschecks": 9, 763 | "identifier": "Ethylenediaminetetraacetic Acid", 764 | "found_molecules": [], 765 | "pubchem_cid": 6049, 766 | "formula": "C10H16N2O8", 767 | "hill_formula": "C10H16N2O8", 768 | "inchi": "InChI=1S/C10H16N2O8/c13-7(14)3-11(4-8(15)16)1-2-12(5-9(17)18)6-10(19)20/h1-6H2,(H,13,14)(H,15,16)(H,17,18)(H,19,20)", 769 | "inchikey": "KCXVZYZYPLLWCC-UHFFFAOYSA-N", 770 | "iupac_name": "2-[2-[bis(carboxymethyl)amino]ethyl-(carboxymethyl)amino]acetic acid" 771 | }, 772 | "Fluoxetine": { 773 | "SMILES": "CNCCC(Oc1ccc(C(F)(F)F)cc1)c1ccccc1", 774 | "synonyms": [ 775 | "Fluoxetine", 776 | "N-Methyl-3-phenyl-3-[4-(trifluoromethyl)phenoxy]propan-1-amine", 777 | "Prozac", 778 | "N-methyl-\u03b3-[4-(trifluoromethyl)phenoxy]-Benzenepropanamine", 779 | "(\u00b1)-N-methyl-\u03b3-[4-(trifluoromethyl)phenoxy]-Benzenepropanamine" 780 | ], 781 | "CAS": [ 782 | "54910-89-3" 783 | ], 784 | "additional_information": "cas_registry; chemeo id: 106-550-3; cir; comptox id: DTXSID7023067|QC_LEVEL:1.0; nist id: C54910893; pubchem id: 3386; srs id: 1735870", 785 | "mode": "name; name; name; name; name; name; name", 786 | "service": "cas_registry; chemeo; cir; comptox; nist; pubchem; srs", 787 | "number_of_crosschecks": 7, 788 | "identifier": "Fluoxetine", 789 | "found_molecules": [], 790 | "pubchem_cid": 3386, 791 | "formula": "C17H18F3NO", 792 | "hill_formula": "C17H18F3NO", 793 | "inchi": "InChI=1S/C17H18F3NO/c1-21-12-11-16(13-5-3-2-4-6-13)22-15-9-7-14(8-10-15)17(18,19)20/h2-10,16,21H,11-12H2,1H3", 794 | "inchikey": "RTHCYVBBDHJXIQ-UHFFFAOYSA-N", 795 | "iupac_name": "N-methyl-3-phenyl-3-[4-(trifluoromethyl)phenoxy]propan-1-amine" 796 | }, 797 | "Formaldehyde": { 798 | "SMILES": "C=O", 799 | "synonyms": [ 800 | "Formaldehyde", 801 | "Formalin", 802 | "Methanal", 803 | "bfv", 804 | "Fannoform" 805 | ], 806 | "CAS": [ 807 | "50-00-0" 808 | ], 809 | "additional_information": "cas_registry; chebi id: 16842; chemeo id: 44-234-3; cir; comptox id: DTXSID7020637|QC_LEVEL:1.0; nist id: C1664999; opsin; pubchem id: 712; srs id: 1008", 810 | "mode": "name; name; name; name; name; name; name; name; name", 811 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 812 | "number_of_crosschecks": 9, 813 | "identifier": "Formaldehyde", 814 | "found_molecules": [], 815 | "pubchem_cid": 712, 816 | "formula": "CH2O", 817 | "hill_formula": "CH2O", 818 | "inchi": "InChI=1S/CH2O/c1-2/h1H2", 819 | "inchikey": "WSFSSNUMVMOOMR-UHFFFAOYSA-N", 820 | "iupac_name": "formaldehyde" 821 | }, 822 | "Formic Acid": { 823 | "SMILES": "O=CO", 824 | "synonyms": [ 825 | "Formic Acid", 826 | "Aminic acid", 827 | "Methanoic acid", 828 | "Formylic acid", 829 | "bilorin" 830 | ], 831 | "CAS": [ 832 | "64-18-6" 833 | ], 834 | "additional_information": "cas_registry; chebi id: 30751; chemeo id: 39-377-1; cir; comptox id: DTXSID2024115|QC_LEVEL:1.0; nist id: C64186; opsin; pubchem id: 284; srs id: 3830", 835 | "mode": "name; name; name; name; name; name; name; name; name", 836 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 837 | "number_of_crosschecks": 9, 838 | "identifier": "Formic Acid", 839 | "found_molecules": [], 840 | "pubchem_cid": 284, 841 | "formula": "CH2O2", 842 | "hill_formula": "CH2O2", 843 | "inchi": "InChI=1S/CH2O2/c2-1-3/h1H,(H,2,3)", 844 | "inchikey": "BDAGIHXWWSANSR-UHFFFAOYSA-N", 845 | "iupac_name": "formic acid" 846 | }, 847 | "Glucose": { 848 | "SMILES": "O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO", 849 | "synonyms": [ 850 | "Glucose", 851 | "D-Glucose", 852 | "DL-Glucose", 853 | "Cartose", 854 | "Cerelose" 855 | ], 856 | "CAS": [ 857 | "58367-01-4" 858 | ], 859 | "additional_information": "comptox id: DTXSID4048729|QC_LEVEL:1.0; nist id: C3458284; opsin", 860 | "mode": "name; name; name", 861 | "service": "comptox; nist; opsin", 862 | "number_of_crosschecks": 3, 863 | "identifier": "Glucose", 864 | "found_molecules": [], 865 | "pubchem_cid": null, 866 | "formula": "C6H12O6", 867 | "hill_formula": "C6H12O6", 868 | "inchi": "InChI=1S/C6H12O6/c7-1-3(9)5(11)6(12)4(10)2-8/h1,3-6,8-12H,2H2/t3-,4+,5+,6+/m0/s1", 869 | "inchikey": "GZCGUPFRVQAUEE-SLPGGIOYSA-N", 870 | "iupac_name": "(2R,3S,4R,5R)-2,3,4,5,6-pentahydroxyhexanal" 871 | }, 872 | "Glycerol": { 873 | "SMILES": "OCC(O)CO", 874 | "synonyms": [ 875 | "Glycerol", 876 | "1,2,3-Propanetriol", 877 | "Glycerin", 878 | "Glycerine", 879 | "Propane-1,2,3-triol" 880 | ], 881 | "CAS": [ 882 | "56-81-5" 883 | ], 884 | "additional_information": "cas_registry; chemeo id: 47-118-9; cir; comptox id: DTXSID9020663|QC_LEVEL:1.0; nist id: C56815; opsin; pubchem id: 753; srs id: 2279", 885 | "mode": "name; name; name; name; name; name; name; name", 886 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs", 887 | "number_of_crosschecks": 8, 888 | "identifier": "Glycerol", 889 | "found_molecules": [], 890 | "pubchem_cid": 753, 891 | "formula": "C3H8O3", 892 | "hill_formula": "C3H8O3", 893 | "inchi": "InChI=1S/C3H8O3/c4-1-3(6)2-5/h3-6H,1-2H2", 894 | "inchikey": "PEDCQBHIVMGVHV-UHFFFAOYSA-N", 895 | "iupac_name": "propane-1,2,3-triol" 896 | }, 897 | "Guanine": { 898 | "SMILES": "Nc1nc2nc[nH]c2c(=O)[nH]1", 899 | "synonyms": [ 900 | "Guanine", 901 | "2-amino-1,7-dihydro-6H-Purin-6-one", 902 | "2-amino-1,9-dihydro-6H-purin-6-one", 903 | "2-amino-1,7-dihydropurin-6-one", 904 | "2-amino-Hypoxanthine" 905 | ], 906 | "CAS": [ 907 | "73-40-5" 908 | ], 909 | "additional_information": "opsin; pubchem id: 135398634; srs id: 5009", 910 | "mode": "name; name; name", 911 | "service": "opsin; pubchem; srs", 912 | "number_of_crosschecks": 3, 913 | "identifier": "Guanine", 914 | "found_molecules": [], 915 | "pubchem_cid": 135398634, 916 | "formula": "C5H5N5O", 917 | "hill_formula": "C5H5N5O", 918 | "inchi": "InChI=1S/C5H5N5O/c6-5-9-3-2(4(11)10-5)7-1-8-3/h1H,(H4,6,7,8,9,10,11)", 919 | "inchikey": "UYTPUPDQBNUYGX-UHFFFAOYSA-N", 920 | "iupac_name": "2-amino-1,7-dihydropurin-6-one" 921 | }, 922 | "Hydrochloric Acid": { 923 | "SMILES": "Cl", 924 | "synonyms": [ 925 | "Hydrochloric Acid", 926 | "Hydrogen chloride", 927 | "Chlorohydric acid", 928 | "Muriatic acid", 929 | "Dilute hydrochloric acid" 930 | ], 931 | "CAS": [ 932 | "7647-01-0" 933 | ], 934 | "additional_information": "cas_registry; chemeo id: 57-172-8; comptox id: DTXSID2020711|QC_LEVEL:1.0; nist id: C7698057; opsin; pubchem id: 313; srs id: 152231", 935 | "mode": "name; name; name; name; name; name; name", 936 | "service": "cas_registry; chemeo; comptox; nist; opsin; pubchem; srs", 937 | "number_of_crosschecks": 7, 938 | "identifier": "Hydrochloric Acid", 939 | "found_molecules": [], 940 | "pubchem_cid": 313, 941 | "formula": "HCl", 942 | "hill_formula": "ClH", 943 | "inchi": "InChI=1S/ClH/h1H", 944 | "inchikey": "VEXZGXHMUGYJMC-UHFFFAOYSA-N", 945 | "iupac_name": "Hydrogen chloride" 946 | }, 947 | "Hydrogen Peroxide": { 948 | "SMILES": "OO", 949 | "synonyms": [ 950 | "Hydrogen Peroxide", 951 | "Albone", 952 | "Inhibine", 953 | "Perhydrol", 954 | "dihydrogen dioxide" 955 | ], 956 | "CAS": [ 957 | "7722-84-1" 958 | ], 959 | "additional_information": "cas_registry; chebi id: 16240; chemeo id: 67-803-6; cir; comptox id: DTXSID2020715|QC_LEVEL:1.0; nist id: C7722841; opsin; pubchem id: 784; srs id: 153015", 960 | "mode": "name; name; name; name; name; name; name; name; name", 961 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 962 | "number_of_crosschecks": 9, 963 | "identifier": "Hydrogen Peroxide", 964 | "found_molecules": [], 965 | "pubchem_cid": 784, 966 | "formula": "H2O2", 967 | "hill_formula": "H2O2", 968 | "inchi": "InChI=1S/H2O2/c1-2/h1-2H", 969 | "inchikey": "MHAJPDPJQMAIIY-UHFFFAOYSA-N", 970 | "iupac_name": "hydrogen peroxide" 971 | }, 972 | "Hydrogen Sulfide": { 973 | "SMILES": "S", 974 | "synonyms": [ 975 | "Hydrogen Sulfide", 976 | "Hydrosulfuric acid", 977 | "dihydrogen monosulfide", 978 | "Stink damp", 979 | "Sulfur hydride" 980 | ], 981 | "CAS": [ 982 | "7783-06-4" 983 | ], 984 | "additional_information": "cas_registry; chebi id: 16136; chemeo id: 51-105-8; cir; comptox id: DTXSID4024149|QC_LEVEL:1.0; nist id: C7783064; opsin; pubchem id: 402; srs id: 154518", 985 | "mode": "name; name; name; name; name; name; name; name; name", 986 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 987 | "number_of_crosschecks": 9, 988 | "identifier": "Hydrogen Sulfide", 989 | "found_molecules": [], 990 | "pubchem_cid": 402, 991 | "formula": "H2S", 992 | "hill_formula": "H2S", 993 | "inchi": "InChI=1S/H2S/h1H2", 994 | "inchikey": "RWSOTUBLDIXVET-UHFFFAOYSA-N", 995 | "iupac_name": "sulfane" 996 | }, 997 | "Ibuprofen": { 998 | "SMILES": "CC(C)Cc1ccc(C(C)C(=O)O)cc1", 999 | "synonyms": [ 1000 | "Ibuprofen", 1001 | "\u03b1-methyl-4-(2-methylpropyl)-Benzeneacetic acid", 1002 | "p-isobutyl-Hydratropic acid", 1003 | "2-[4-(2-Methylpropyl)phenyl]propanoic acid", 1004 | "\u03b1-Methyl-4-(2-methylpropyl)benzeneacetic acid" 1005 | ], 1006 | "CAS": [ 1007 | "15687-27-1" 1008 | ], 1009 | "additional_information": "cas_registry; chemeo id: 20-897-4; cir; comptox id: DTXSID5020732|QC_LEVEL:1.0; nist id: T999947192; opsin; pubchem id: 3672; srs id: 200931", 1010 | "mode": "name; name; name; name; name; name; name; name", 1011 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1012 | "number_of_crosschecks": 8, 1013 | "identifier": "Ibuprofen", 1014 | "found_molecules": [], 1015 | "pubchem_cid": 3672, 1016 | "formula": "C13H18O2", 1017 | "hill_formula": "C13H18O2", 1018 | "inchi": "InChI=1S/C13H18O2/c1-9(2)8-11-4-6-12(7-5-11)10(3)13(14)15/h4-7,9-10H,8H2,1-3H3,(H,14,15)", 1019 | "inchikey": "HEFNNWSXXWATRW-UHFFFAOYSA-N", 1020 | "iupac_name": "2-[4-(2-methylpropyl)phenyl]propanoic acid" 1021 | }, 1022 | "Indigo": { 1023 | "SMILES": "O=C1C(=C2Nc3ccccc3C2=O)Nc2ccccc21", 1024 | "synonyms": [ 1025 | "Indigo", 1026 | "2-(1,3-dihydro-3-oxo-2H-indol-2-ylidene)-1,2-dihydro-3H-Indol-3-one", 1027 | "Indigo Blue", 1028 | "[\u03942,2\u2032-Biindoline]-3,3\u2032-dione", 1029 | "Indigo Pure BASF" 1030 | ], 1031 | "CAS": [ 1032 | "482-89-3" 1033 | ], 1034 | "additional_information": "cas_registry; chemeo id: 114-901-4; comptox id: DTXSID3026279|QC_LEVEL:1.0; srs id: 47183", 1035 | "mode": "name; name; name; name", 1036 | "service": "cas_registry; chemeo; comptox; srs", 1037 | "number_of_crosschecks": 4, 1038 | "identifier": "Indigo", 1039 | "found_molecules": [], 1040 | "pubchem_cid": null, 1041 | "formula": "C16H10N2O2", 1042 | "hill_formula": "C16H10N2O2", 1043 | "inchi": "InChI=1S/C16H10N2O2/c19-15-9-5-1-3-7-11(9)17-13(15)14-16(20)10-6-2-4-8-12(10)18-14/h1-8,17-18H", 1044 | "inchikey": "COHYTHOBJLSHDF-UHFFFAOYSA-N", 1045 | "iupac_name": "[2,2'-biindolinylidene]-3,3'-dione" 1046 | }, 1047 | "Isooctane": { 1048 | "SMILES": "CC(C)CC(C)(C)C", 1049 | "synonyms": [ 1050 | "Isooctane", 1051 | "2,2,4-Trimethylpentane", 1052 | "Isobutyltrimethylmethane", 1053 | "2,2,4-trimethyl-Pentane", 1054 | "iso-Octane" 1055 | ], 1056 | "CAS": [ 1057 | "540-84-1" 1058 | ], 1059 | "additional_information": "cas_registry; chebi id: 62805; chemeo id: 55-702-1; cir; nist id: C540841; opsin; pubchem id: 10907", 1060 | "mode": "name; name; name; name; name; name; name", 1061 | "service": "cas_registry; chebi; chemeo; cir; nist; opsin; pubchem", 1062 | "number_of_crosschecks": 7, 1063 | "identifier": "Isooctane", 1064 | "found_molecules": [], 1065 | "pubchem_cid": 10907, 1066 | "formula": "C8H18", 1067 | "hill_formula": "C8H18", 1068 | "inchi": "InChI=1S/C8H18/c1-7(2)6-8(3,4)5/h7H,6H2,1-5H3", 1069 | "inchikey": "NHTMVDHEPJAVLT-UHFFFAOYSA-N", 1070 | "iupac_name": "2,2,4-trimethylpentane" 1071 | }, 1072 | "Isoprene": { 1073 | "SMILES": "C=CC(=C)C", 1074 | "synonyms": [ 1075 | "Isoprene", 1076 | "2-methyl-1,3-Butadiene", 1077 | "2-Methylbutadiene", 1078 | "2-methylbuta-1,3-diene", 1079 | "Isopentadiene" 1080 | ], 1081 | "CAS": [ 1082 | "78-79-5" 1083 | ], 1084 | "additional_information": "cas_registry; chebi id: 35194; chemeo id: 10-713-8; cir; comptox id: DTXSID2020761|QC_LEVEL:1.0; nist id: C78795; opsin; pubchem id: 6557; srs id: 7328", 1085 | "mode": "name; name; name; name; name; name; name; name; name", 1086 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1087 | "number_of_crosschecks": 9, 1088 | "identifier": "Isoprene", 1089 | "found_molecules": [], 1090 | "pubchem_cid": 6557, 1091 | "formula": "C5H8", 1092 | "hill_formula": "C5H8", 1093 | "inchi": "InChI=1S/C5H8/c1-4-5(2)3/h4H,1-2H2,3H3", 1094 | "inchikey": "RRHGJUQNOFWUDK-UHFFFAOYSA-N", 1095 | "iupac_name": "2-methylbuta-1,3-diene" 1096 | }, 1097 | "l-Dopa": { 1098 | "SMILES": "N[C@@H](Cc1ccc(O)c(O)c1)C(=O)O", 1099 | "synonyms": [ 1100 | "l-Dopa", 1101 | "Levodopa", 1102 | "3-Hydroxy-L-tyrosine", 1103 | "(2S)-2-amino-3-(3,4-dihydroxyphenyl)propanoic acid", 1104 | "L-3-(3,4-dihydroxyphenyl)-Alanine" 1105 | ], 1106 | "CAS": [ 1107 | "59-92-7" 1108 | ], 1109 | "additional_information": "chebi id: 15765; comptox id: DTXSID9023209|QC_LEVEL:1.0; nist id: C63843; opsin; pubchem id: 6047", 1110 | "mode": "name; name; name; name; name", 1111 | "service": "chebi; comptox; nist; opsin; pubchem", 1112 | "number_of_crosschecks": 5, 1113 | "identifier": "l-Dopa", 1114 | "found_molecules": [], 1115 | "pubchem_cid": 6047, 1116 | "formula": "C9H11NO4", 1117 | "hill_formula": "C9H11NO4", 1118 | "inchi": "InChI=1S/C9H11NO4/c10-6(9(13)14)3-5-1-2-7(11)8(12)4-5/h1-2,4,6,11-12H,3,10H2,(H,13,14)/t6-/m0/s1", 1119 | "inchikey": "WTDRDQBEARUVNC-LURJTMIESA-N", 1120 | "iupac_name": "(2S)-2-amino-3-(3,4-dihydroxyphenyl)propanoic acid" 1121 | }, 1122 | "Methane": { 1123 | "SMILES": "C", 1124 | "synonyms": [ 1125 | "Methane", 1126 | "Marsh gas", 1127 | "Methyl hydride", 1128 | "Biogas", 1129 | "Fire Damp" 1130 | ], 1131 | "CAS": [ 1132 | "74-82-8" 1133 | ], 1134 | "additional_information": "cas_registry; chebi id: 16183; chemeo id: 27-471-9; cir; comptox id: DTXSID8025545|QC_LEVEL:1.0; nist id: C74828; opsin; pubchem id: 297; srs id: 5066", 1135 | "mode": "name; name; name; name; name; name; name; name; name", 1136 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1137 | "number_of_crosschecks": 9, 1138 | "identifier": "Methane", 1139 | "found_molecules": [], 1140 | "pubchem_cid": 297, 1141 | "formula": "CH4", 1142 | "hill_formula": "CH4", 1143 | "inchi": "InChI=1S/CH4/h1H4", 1144 | "inchikey": "VNWKTOKETHGBQD-UHFFFAOYSA-N", 1145 | "iupac_name": "methane" 1146 | }, 1147 | "Methyl Alcohol": { 1148 | "SMILES": "CO", 1149 | "synonyms": [ 1150 | "Methyl Alcohol", 1151 | "Methanol", 1152 | "Carbinol", 1153 | "Methyl hydroxide", 1154 | "wood alcohol" 1155 | ], 1156 | "CAS": [ 1157 | "67-56-1" 1158 | ], 1159 | "additional_information": "cas_registry; chemeo id: 69-136-5; cir; comptox id: DTXSID2021731|QC_LEVEL:1.0; nist id: C1849292; opsin; pubchem id: 887; srs id: 4283", 1160 | "mode": "name; name; name; name; name; name; name; name", 1161 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1162 | "number_of_crosschecks": 8, 1163 | "identifier": "Methyl Alcohol", 1164 | "found_molecules": [], 1165 | "pubchem_cid": 887, 1166 | "formula": "CH4O", 1167 | "hill_formula": "CH4O", 1168 | "inchi": "InChI=1S/CH4O/c1-2/h2H,1H3", 1169 | "inchikey": "OKKJLVBELUTLKV-UHFFFAOYSA-N", 1170 | "iupac_name": "methanol" 1171 | }, 1172 | "Methylphenidate": { 1173 | "SMILES": "COC(=O)C(c1ccccc1)C1CCCCN1", 1174 | "synonyms": [ 1175 | "Methylphenidate", 1176 | "Calocain", 1177 | "2-Piperidineacetic acid, \u03b1-phenyl-, methyl ester", 1178 | "Methylphenidan", 1179 | "Concerta" 1180 | ], 1181 | "CAS": [ 1182 | "113-45-1" 1183 | ], 1184 | "additional_information": "cas_registry; chemeo id: 42-778-2; cir; comptox id: DTXSID5023299|QC_LEVEL:1.0; nist id: C113451; pubchem id: 4158", 1185 | "mode": "name; name; name; name; name; name", 1186 | "service": "cas_registry; chemeo; cir; comptox; nist; pubchem", 1187 | "number_of_crosschecks": 6, 1188 | "identifier": "Methylphenidate", 1189 | "found_molecules": [], 1190 | "pubchem_cid": 4158, 1191 | "formula": "C14H19NO2", 1192 | "hill_formula": "C14H19NO2", 1193 | "inchi": "InChI=1S/C14H19NO2/c1-17-14(16)13(11-7-3-2-4-8-11)12-9-5-6-10-15-12/h2-4,7-8,12-13,15H,5-6,9-10H2,1H3", 1194 | "inchikey": "DUGOZIWVEXMGBE-UHFFFAOYSA-N", 1195 | "iupac_name": "methyl 2-phenyl-2-piperidin-2-ylacetate" 1196 | }, 1197 | "Naphthalene": { 1198 | "SMILES": "c1ccc2ccccc2c1", 1199 | "synonyms": [ 1200 | "Naphthalene", 1201 | "Albocarbon", 1202 | "Tar camphor", 1203 | "White tar", 1204 | "Naphthalin" 1205 | ], 1206 | "CAS": [ 1207 | "91-20-3" 1208 | ], 1209 | "additional_information": "cas_registry; chebi id: 16482; chemeo id: 69-516-3; cir; comptox id: DTXSID8020913|QC_LEVEL:1.0; nist id: C1146652; opsin; pubchem id: 931; srs id: 13326", 1210 | "mode": "name; name; name; name; name; name; name; name; name", 1211 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1212 | "number_of_crosschecks": 9, 1213 | "identifier": "Naphthalene", 1214 | "found_molecules": [], 1215 | "pubchem_cid": 931, 1216 | "formula": "C10H8", 1217 | "hill_formula": "C10H8", 1218 | "inchi": "InChI=1S/C10H8/c1-2-6-10-8-4-3-7-9(10)5-1/h1-8H", 1219 | "inchikey": "UFWIBTONFRDIAS-UHFFFAOYSA-N", 1220 | "iupac_name": "naphthalene" 1221 | }, 1222 | "Nicotine": { 1223 | "SMILES": "CN1CCC[C@H]1c1cccnc1", 1224 | "synonyms": [ 1225 | "Nicotine", 1226 | "3-[(2S)-1-Methylpyrrolidin-2-yl]pyridine", 1227 | "(-)-Nicotine", 1228 | "1-methyl-2-(3-pyridal)-Pyrrolidine", 1229 | "1-methyl-2-(3-pyridal)-pyrrolidene" 1230 | ], 1231 | "CAS": [ 1232 | "54-11-5" 1233 | ], 1234 | "additional_information": "cir; comptox id: DTXSID1020930|QC_LEVEL:1.0; pubchem id: 89594", 1235 | "mode": "name; name; name", 1236 | "service": "cir; comptox; pubchem", 1237 | "number_of_crosschecks": 3, 1238 | "identifier": "Nicotine", 1239 | "found_molecules": [], 1240 | "pubchem_cid": 89594, 1241 | "formula": "C10H14N2", 1242 | "hill_formula": "C10H14N2", 1243 | "inchi": "InChI=1S/C10H14N2/c1-12-7-3-5-10(12)9-4-2-6-11-8-9/h2,4,6,8,10H,3,5,7H2,1H3/t10-/m0/s1", 1244 | "inchikey": "SNICXCGAKADSCV-JTQLQIEISA-N", 1245 | "iupac_name": "3-[(2S)-1-methylpyrrolidin-2-yl]pyridine" 1246 | }, 1247 | "Nitric Acid": { 1248 | "SMILES": "O=[N+]([O-])O", 1249 | "synonyms": [ 1250 | "Nitric Acid", 1251 | "Aqua fortis", 1252 | "Azotic acid", 1253 | "Hydrogen nitrate", 1254 | "Nitryl hydroxide" 1255 | ], 1256 | "CAS": [ 1257 | "7697-37-2" 1258 | ], 1259 | "additional_information": "cas_registry; chebi id: 48107; cir; comptox id: DTXSID5029685|QC_LEVEL:1.0; nist id: C7697372; opsin; pubchem id: 944; srs id: 152686", 1260 | "mode": "name; name; name; name; name; name; name; name", 1261 | "service": "cas_registry; chebi; cir; comptox; nist; opsin; pubchem; srs", 1262 | "number_of_crosschecks": 8, 1263 | "identifier": "Nitric Acid", 1264 | "found_molecules": [], 1265 | "pubchem_cid": 944, 1266 | "formula": "HNO3", 1267 | "hill_formula": "HNO3", 1268 | "inchi": "InChI=1S/HNO3/c2-1(3)4/h(H,2,3,4)", 1269 | "inchikey": "GRYLNZFGIOXLOG-UHFFFAOYSA-N", 1270 | "iupac_name": "nitric acid" 1271 | }, 1272 | "Nitric Oxide": { 1273 | "SMILES": "[N]=O", 1274 | "synonyms": [ 1275 | "Nitric Oxide", 1276 | "Nitrogen monoxide", 1277 | "Nitrosyl radical", 1278 | "nitrogen oxide", 1279 | "oxo-Amidogen" 1280 | ], 1281 | "CAS": [ 1282 | "10102-43-9" 1283 | ], 1284 | "additional_information": "cas_registry; chemeo id: 26-216-3; comptox id: DTXSID1020938|QC_LEVEL:1.0; nist id: C10102439; pubchem id: 145068; srs id: 167916", 1285 | "mode": "name; name; name; name; name; name", 1286 | "service": "cas_registry; chemeo; comptox; nist; pubchem; srs", 1287 | "number_of_crosschecks": 6, 1288 | "identifier": "Nitric Oxide", 1289 | "found_molecules": [], 1290 | "pubchem_cid": 145068, 1291 | "formula": "NO", 1292 | "hill_formula": "NO", 1293 | "inchi": "InChI=1S/NO/c1-2", 1294 | "inchikey": "MWUXSHHQAYIFBG-UHFFFAOYSA-N", 1295 | "iupac_name": "nitrogen(II) oxide" 1296 | }, 1297 | 1298 | "Nitroglycerin": { 1299 | "SMILES": "O=[N+]([O-])OCC(CO[N+](=O)[O-])O[N+](=O)[O-]", 1300 | "synonyms": [ 1301 | "Nitroglycerin", 1302 | "1,2,3-Propanetriol, trinitrate", 1303 | "1,2,3-Propanetriol, 1,2,3-trinitrate", 1304 | "Anginine", 1305 | "1,2,3-propanetriyl nitrate" 1306 | ], 1307 | "CAS": [ 1308 | "55-63-0" 1309 | ], 1310 | "additional_information": "cas_registry; chebi id: 28787; chemeo id: 41-624-3; cir; comptox id: DTXSID1021407|QC_LEVEL:1.0; nist id: C55630; opsin; pubchem id: 4510; srs id: 1941", 1311 | "mode": "name; name; name; name; name; name; name; name; name", 1312 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1313 | "number_of_crosschecks": 9, 1314 | "identifier": "Nitroglycerin", 1315 | "found_molecules": [], 1316 | "pubchem_cid": 4510, 1317 | "formula": "C3H5N3O9", 1318 | "hill_formula": "C3H5N3O9", 1319 | "inchi": "InChI=1S/C3H5N3O9/c7-4(8)13-1-3(15-6(11)12)2-14-5(9)10/h3H,1-2H2", 1320 | "inchikey": "SNIOPGDIGTZGOP-UHFFFAOYSA-N", 1321 | "iupac_name": "1,3-dinitrooxypropan-2-yl nitrate" 1322 | }, 1323 | "Penicillin": { 1324 | "SMILES": "CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O", 1325 | "synonyms": [ 1326 | "Penicillin", 1327 | "Penicillin G", 1328 | "3,3-dimethyl-7-oxo-6-[(2-phenylacetyl)amino]- (2S,5R,6R)-4-Thia-1-azabicyclo[3.2.0]heptane-2-carboxylic acid", 1329 | "3,3-dimethyl-7-oxo-6-(2-phenylacetamido)-4-Thia-1-azabicyclo[3.2.0]heptane-2-carboxylic acid", 1330 | "3,3-dimethyl-7-oxo-6-[(phenylacetyl)amino]- [2S-(2\u03b1,5\u03b1,6\u03b2)]-4-Thia-1-azabicyclo[3.2.0]heptane-2-carboxylic acid" 1331 | ], 1332 | "CAS": [ 1333 | "61-33-6" 1334 | ], 1335 | "additional_information": "cas_registry; pubchem id: 2349", 1336 | "mode": "name; name", 1337 | "service": "cas_registry; pubchem", 1338 | "number_of_crosschecks": 2, 1339 | "identifier": "Penicillin", 1340 | "found_molecules": [], 1341 | "pubchem_cid": 2349, 1342 | "formula": "C16H18N2O4S", 1343 | "hill_formula": "C16H18N2O4S", 1344 | "inchi": "InChI=1S/C16H18N2O4S/c1-16(2)12(15(21)22)18-13(20)11(14(18)23-16)17-10(19)8-9-6-4-3-5-7-9/h3-7,11-12,14H,8H2,1-2H3,(H,17,19)(H,21,22)", 1345 | "inchikey": "JGSARLDLIJGVTE-UHFFFAOYSA-N", 1346 | "iupac_name": "3,3-dimethyl-7-oxo-6-[(2-phenylacetyl)amino]-4-thia-1-azabicyclo[3.2.0]heptane-2-carboxylic acid" 1347 | }, 1348 | "Phenol": { 1349 | "SMILES": "Oc1ccccc1", 1350 | "synonyms": [ 1351 | "Phenol", 1352 | "Carbolic acid", 1353 | "Hydroxybenzene", 1354 | "Phenic acid", 1355 | "acide carbolique" 1356 | ], 1357 | "CAS": [ 1358 | "108-95-2" 1359 | ], 1360 | "additional_information": "cas_registry; chebi id: 15882; chemeo id: 69-137-4; cir; comptox id: DTXSID5021124|QC_LEVEL:1.0; opsin; pubchem id: 996; srs id: 25510", 1361 | "mode": "name; name; name; name; name; name; name; name", 1362 | "service": "cas_registry; chebi; chemeo; cir; comptox; opsin; pubchem; srs", 1363 | "number_of_crosschecks": 8, 1364 | "identifier": "Phenol", 1365 | "found_molecules": [], 1366 | "pubchem_cid": 996, 1367 | "formula": "C6H6O", 1368 | "hill_formula": "C6H6O", 1369 | "inchi": "InChI=1S/C6H6O/c7-6-4-2-1-3-5-6/h1-5,7H", 1370 | "inchikey": "ISWSIDIOOBJBQZ-UHFFFAOYSA-N", 1371 | "iupac_name": "phenol" 1372 | }, 1373 | "Phosphoric Acid": { 1374 | "SMILES": "O=P(O)(O)O", 1375 | "synonyms": [ 1376 | "Phosphoric Acid", 1377 | "Orthophosphoric acid", 1378 | "o-phosphoric acid", 1379 | "WC-Reiniger", 1380 | "acide phosphorique" 1381 | ], 1382 | "CAS": [ 1383 | "7664-38-2" 1384 | ], 1385 | "additional_information": "cas_registry; chebi id: 26078; chemeo id: 125-325-2; cir; comptox id: DTXSID5024263|QC_LEVEL:1.0; nist id: C7664382; opsin; pubchem id: 1004; srs id: 152363", 1386 | "mode": "name; name; name; name; name; name; name; name; name", 1387 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1388 | "number_of_crosschecks": 9, 1389 | "identifier": "Phosphoric Acid", 1390 | "found_molecules": [], 1391 | "pubchem_cid": 1004, 1392 | "formula": "H3O4P", 1393 | "hill_formula": "H3O4P", 1394 | "inchi": "InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)", 1395 | "inchikey": "NBIIXXVUZAFLBC-UHFFFAOYSA-N", 1396 | "iupac_name": "phosphoric acid" 1397 | }, 1398 | "Propane": { 1399 | "SMILES": "CCC", 1400 | "synonyms": [ 1401 | "Propane", 1402 | "Dimethylmethane", 1403 | "Propyl hydride", 1404 | "n-Propane", 1405 | "Liquefied petroleum gas" 1406 | ], 1407 | "CAS": [ 1408 | "74-98-6" 1409 | ], 1410 | "additional_information": "cas_registry; chebi id: 32879; chemeo id: 13-317-5; cir; comptox id: DTXSID5026386|QC_LEVEL:1.0; nist id: C17251626; opsin; pubchem id: 6334; srs id: 5207", 1411 | "mode": "name; name; name; name; name; name; name; name; name", 1412 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1413 | "number_of_crosschecks": 9, 1414 | "identifier": "Propane", 1415 | "found_molecules": [], 1416 | "pubchem_cid": 6334, 1417 | "formula": "C3H8", 1418 | "hill_formula": "C3H8", 1419 | "inchi": "InChI=1S/C3H8/c1-3-2/h3H2,1-2H3", 1420 | "inchikey": "ATUOYWHBWRKTHZ-UHFFFAOYSA-N", 1421 | "iupac_name": "propane" 1422 | }, 1423 | "Propylene": { 1424 | "SMILES": "C=CC", 1425 | "synonyms": [ 1426 | "Propylene", 1427 | "1-Propene", 1428 | "Propene", 1429 | "Methylethylene", 1430 | "1-Propylene" 1431 | ], 1432 | "CAS": [ 1433 | "115-07-1" 1434 | ], 1435 | "additional_information": "cas_registry; chemeo id: 23-076-2; cir; comptox id: DTXSID5021205|QC_LEVEL:1.0; nist id: C1517528; opsin; pubchem id: 8252; srs id: 29009", 1436 | "mode": "name; name; name; name; name; name; name; name", 1437 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1438 | "number_of_crosschecks": 8, 1439 | "identifier": "Propylene", 1440 | "found_molecules": [], 1441 | "pubchem_cid": 8252, 1442 | "formula": "C3H6", 1443 | "hill_formula": "C3H6", 1444 | "inchi": "InChI=1S/C3H6/c1-3-2/h3H,1H2,2H3", 1445 | "inchikey": "QQONPFPTGQHPMA-UHFFFAOYSA-N", 1446 | "iupac_name": "prop-1-ene" 1447 | }, 1448 | "Quinine": { 1449 | "SMILES": "C=CC1CN2CCC1CC2C(O)c1ccnc2ccc(OC)cc12", 1450 | "synonyms": [ 1451 | "Quinine", 1452 | "(-)-Quinine", 1453 | "(8\u03b1,9R)-6\u2032-methoxy-Cinchonan-9-ol", 1454 | "(8\u03b1,9R)-6\u2032-Methoxycinchonan-9-ol", 1455 | "6\u2032-Methoxycinchonidine" 1456 | ], 1457 | "CAS": [ 1458 | "130-95-0" 1459 | ], 1460 | "additional_information": "cas_registry; chemeo id: 68-320-1; srs id: 35261", 1461 | "mode": "name; name; name", 1462 | "service": "cas_registry; chemeo; srs", 1463 | "number_of_crosschecks": 3, 1464 | "identifier": "Quinine", 1465 | "found_molecules": [], 1466 | "pubchem_cid": null, 1467 | "formula": "C20H24N2O2", 1468 | "hill_formula": "C20H24N2O2", 1469 | "inchi": "InChI=1S/C20H24N2O2/c1-3-13-12-22-9-7-14(13)10-19(22)20(23)16-6-8-21-18-5-4-15(24-2)11-17(16)18/h3-6,8,11,13-14,19-20,23H,1,7,9-10,12H2,2H3", 1470 | "inchikey": "LOUPRKONTZGTKE-UHFFFAOYSA-N", 1471 | "iupac_name": "(5-ethenyl-1-azabicyclo[2.2.2]octan-2-yl)-(6-methoxyquinolin-4-yl)methanol" 1472 | }, 1473 | "Saccharin": { 1474 | "SMILES": "O=C1NS(=O)(=O)c2ccccc21", 1475 | "synonyms": [ 1476 | "Saccharin", 1477 | "1,2-Benzisothiazol-3(2H)-one, 1,1-dioxide", 1478 | "1,2-Benzisothiazolin-3-one, 1,1-dioxide", 1479 | "1,2-benzisothiazol-3(2H)-one 1,1-dioxide", 1480 | "1,1-Dioxo-1,2-benzisothiazol-3(2H)-one" 1481 | ], 1482 | "CAS": [ 1483 | "81-07-2" 1484 | ], 1485 | "additional_information": "cas_registry; chebi id: 32111; chemeo id: 57-750-6; cir; comptox id: DTXSID5021251|QC_LEVEL:1.0; opsin; pubchem id: 5143; srs id: 8557", 1486 | "mode": "name; name; name; name; name; name; name; name", 1487 | "service": "cas_registry; chebi; chemeo; cir; comptox; opsin; pubchem; srs", 1488 | "number_of_crosschecks": 8, 1489 | "identifier": "Saccharin", 1490 | "found_molecules": [], 1491 | "pubchem_cid": 5143, 1492 | "formula": "C7H5NO3S", 1493 | "hill_formula": "C7H5NO3S", 1494 | "inchi": "InChI=1S/C7H5NO3S/c9-7-5-3-1-2-4-6(5)12(10,11)8-7/h1-4H,(H,8,9)", 1495 | "inchikey": "CVHZOJJKTDOEJC-UHFFFAOYSA-N", 1496 | "iupac_name": "1,1-dioxo-1,2-benzothiazol-3-one" 1497 | }, 1498 | "Silicon Dioxide": { 1499 | "SMILES": "O=[Si]=O", 1500 | "synonyms": [ 1501 | "Silicon Dioxide", 1502 | "Silica", 1503 | "Dioxosilane", 1504 | "Acticel", 1505 | "Aerosil" 1506 | ], 1507 | "CAS": [ 1508 | "7631-86-9" 1509 | ], 1510 | "additional_information": "cas_registry; chebi id: 30563; chemeo id: 69-388-6; cir; comptox id: DTXSID1029677|QC_LEVEL:1.0; nist id: ['C14808607', 'C14808607']; opsin; pubchem id: 24261; srs id: 151977", 1511 | "mode": "name; name; name; name; name; ['name', 'name']; name; name; name", 1512 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1513 | "number_of_crosschecks": 9, 1514 | "identifier": "Silicon Dioxide", 1515 | "found_molecules": [], 1516 | "pubchem_cid": 24261, 1517 | "formula": "O2Si", 1518 | "hill_formula": "O2Si", 1519 | "inchi": "InChI=1S/O2Si/c1-3-2", 1520 | "inchikey": "VYPSYNLAJGMNEJ-UHFFFAOYSA-N", 1521 | "iupac_name": "dioxosilane" 1522 | }, 1523 | 1524 | 1525 | "Styrene": { 1526 | "SMILES": "C=Cc1ccccc1", 1527 | "synonyms": [ 1528 | "Styrene", 1529 | "ethenyl-Benzene", 1530 | "Ethenylbenzene", 1531 | "Cinnamene", 1532 | "Phenethylene" 1533 | ], 1534 | "CAS": [ 1535 | "100-42-5" 1536 | ], 1537 | "additional_information": "cas_registry; chebi id: 27452; chemeo id: 24-192-2; cir; comptox id: DTXSID2021284|QC_LEVEL:1.0; nist id: C100425; opsin; pubchem id: 7501; srs id: 19414", 1538 | "mode": "name; name; name; name; name; name; name; name; name", 1539 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1540 | "number_of_crosschecks": 9, 1541 | "identifier": "Styrene", 1542 | "found_molecules": [], 1543 | "pubchem_cid": 7501, 1544 | "formula": "C8H8", 1545 | "hill_formula": "C8H8", 1546 | "inchi": "InChI=1S/C8H8/c1-2-8-6-4-3-5-7-8/h2-7H,1H2", 1547 | "inchikey": "PPBRXRYQALVLMV-UHFFFAOYSA-N", 1548 | "iupac_name": "styrene" 1549 | }, 1550 | "Sucrose": { 1551 | "SMILES": "OC[C@H]1O[C@@](CO)(O[C@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@@H](O)[C@@H]1O", 1552 | "synonyms": [ 1553 | "Sucrose", 1554 | "beta-D-fructofuranosyl alpha-D-glucopyranoside", 1555 | "Cane sugar", 1556 | "1-alpha-D-Glucopyranosyl-2-beta-D-fructofuranoside", 1557 | "beta-D-Fruf-(2<->1)-alpha-D-Glcp" 1558 | ], 1559 | "CAS": [ 1560 | "57-50-1" 1561 | ], 1562 | "additional_information": "chebi id: 17992; cir; comptox id: DTXSID2021288|QC_LEVEL:1.0; pubchem id: 5988; srs id: 2568", 1563 | "mode": "name; name; name; name; name", 1564 | "service": "chebi; cir; comptox; pubchem; srs", 1565 | "number_of_crosschecks": 5, 1566 | "identifier": "Sucrose", 1567 | "found_molecules": [], 1568 | "pubchem_cid": 5988, 1569 | "formula": "C12H22O11", 1570 | "hill_formula": "C12H22O11", 1571 | "inchi": "InChI=1S/C12H22O11/c13-1-4-6(16)8(18)9(19)11(21-4)23-12(3-15)10(20)7(17)5(2-14)22-12/h4-11,13-20H,1-3H2/t4-,5-,6-,7-,8+,9-,10+,11-,12+/m1/s1", 1572 | "inchikey": "CZMRCDWAGMRECN-UGDNZRGBSA-N", 1573 | "iupac_name": "(2R,3R,4S,5S,6R)-2-[(2S,3S,4S,5R)-3,4-dihydroxy-2,5-bis(hydroxymethyl)oxolan-2-yl]oxy-6-(hydroxymethyl)oxane-3,4,5-triol" 1574 | }, 1575 | "Sulfuric Acid": { 1576 | "SMILES": "O=S(=O)(O)O", 1577 | "synonyms": [ 1578 | "Sulfuric Acid", 1579 | "Oil of vitriol", 1580 | "Sulphuric acid", 1581 | "bov", 1582 | "Dipping acid" 1583 | ], 1584 | "CAS": [ 1585 | "7664-93-9" 1586 | ], 1587 | "additional_information": "cas_registry; chebi id: 26836; chemeo id: 24-837-6; cir; comptox id: DTXSID5029683|QC_LEVEL:1.0; nist id: C7664939; opsin; pubchem id: 1118; srs id: 152405", 1588 | "mode": "name; name; name; name; name; name; name; name; name", 1589 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1590 | "number_of_crosschecks": 9, 1591 | "identifier": "Sulfuric Acid", 1592 | "found_molecules": [], 1593 | "pubchem_cid": 1118, 1594 | "formula": "H2O4S", 1595 | "hill_formula": "H2O4S", 1596 | "inchi": "InChI=1S/H2O4S/c1-5(2,3)4/h(H2,1,2,3,4)", 1597 | "inchikey": "QAOWNCQODCNURD-UHFFFAOYSA-N", 1598 | "iupac_name": "sulfuric acid" 1599 | }, 1600 | "Tetrafluoroethylene": { 1601 | "SMILES": "FC(F)=C(F)F", 1602 | "synonyms": [ 1603 | "Tetrafluoroethylene", 1604 | "tetrafluoro-Ethylene", 1605 | "1,1,2,2-tetrafluoro-Ethene", 1606 | "tetrafluoro-Ethene", 1607 | "Tetrafluoroethene" 1608 | ], 1609 | "CAS": [ 1610 | "116-14-3" 1611 | ], 1612 | "additional_information": "cas_registry; chemeo id: 10-149-5; cir; comptox id: DTXSID6021325|QC_LEVEL:1.0; nist id: C116143; opsin; pubchem id: 8301; srs id: 29397", 1613 | "mode": "name; name; name; name; name; name; name; name", 1614 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1615 | "number_of_crosschecks": 8, 1616 | "identifier": "Tetrafluoroethylene", 1617 | "found_molecules": [], 1618 | "pubchem_cid": 8301, 1619 | "formula": "C2F4", 1620 | "hill_formula": "C2F4", 1621 | "inchi": "InChI=1S/C2F4/c3-1(4)2(5)6", 1622 | "inchikey": "BFKJFAAPBSQJPD-UHFFFAOYSA-N", 1623 | "iupac_name": "1,1,2,2-tetrafluoroethene" 1624 | }, 1625 | "Tetrahydrocannabinol": { 1626 | "SMILES": "CCCCCc1cc(O)c2c(c1)OC(C)(C)[C@@H]1CCC(C)=C[C@@H]21", 1627 | "synonyms": [ 1628 | "Tetrahydrocannabinol", 1629 | "Dronabinol", 1630 | "(6aR,10aR)-6,6,9-trimethyl-3-pentyl-6a,7,8,10a-tetrahydro-6H-benzo[c]chromen-1-ol", 1631 | "(-)-delta9-trans-Tetrahydrocannabinol", 1632 | "1-trans-delta-9-Tetrahydrocannabinol" 1633 | ], 1634 | "CAS": [ 1635 | "1972-08-3" 1636 | ], 1637 | "additional_information": "chebi id: 66964; comptox id: DTXSID6021327|QC_LEVEL:1.0; pubchem id: 16078", 1638 | "mode": "name; name; name", 1639 | "service": "chebi; comptox; pubchem", 1640 | "number_of_crosschecks": 3, 1641 | "identifier": "Tetrahydrocannabinol", 1642 | "found_molecules": [], 1643 | "pubchem_cid": 16078, 1644 | "formula": "C21H30O2", 1645 | "hill_formula": "C21H30O2", 1646 | "inchi": "InChI=1S/C21H30O2/c1-5-6-7-8-15-12-18(22)20-16-11-14(2)9-10-17(16)21(3,4)23-19(20)13-15/h11-13,16-17,22H,5-10H2,1-4H3/t16-,17-/m1/s1", 1647 | "inchikey": "CYQFCXCEBYINGO-IAGOWNOFSA-N", 1648 | "iupac_name": "(6aR,10aR)-6,6,9-trimethyl-3-pentyl-6a,7,8,10a-tetrahydrobenzo[c]chromen-1-ol" 1649 | }, 1650 | "Thymine": { 1651 | "SMILES": "Cc1c[nH]c(=O)[nH]c1=O", 1652 | "synonyms": [ 1653 | "Thymine", 1654 | "5-methyl-2,4(1H,3H)-Pyrimidinedione", 1655 | "2,4-Dihydroxy-5-methylpyrimidine", 1656 | "5-Methyluracil", 1657 | "5-methylpyrimidine-2,4(1H,3H)-dione" 1658 | ], 1659 | "CAS": [ 1660 | "65-71-4" 1661 | ], 1662 | "additional_information": "cas_registry; chebi id: 17821; chemeo id: 18-703-1; cir; comptox id: DTXSID4052342|QC_LEVEL:1.0; opsin; pubchem id: 1135; srs id: 4028", 1663 | "mode": "name; name; name; name; name; name; name; name", 1664 | "service": "cas_registry; chebi; chemeo; cir; comptox; opsin; pubchem; srs", 1665 | "number_of_crosschecks": 8, 1666 | "identifier": "Thymine", 1667 | "found_molecules": [], 1668 | "pubchem_cid": 1135, 1669 | "formula": "C5H6N2O2", 1670 | "hill_formula": "C5H6N2O2", 1671 | "inchi": "InChI=1S/C5H6N2O2/c1-3-2-6-5(9)7-4(3)8/h2H,1H3,(H2,6,7,8,9)", 1672 | "inchikey": "RWQNBRDOKXIBIV-UHFFFAOYSA-N", 1673 | "iupac_name": "5-methyl-1H-pyrimidine-2,4-dione" 1674 | }, 1675 | "Trinitrotoluene": { 1676 | "SMILES": "Cc1c([N+](=O)[O-])cc([N+](=O)[O-])cc1[N+](=O)[O-]", 1677 | "synonyms": [ 1678 | "Trinitrotoluene", 1679 | "2,4,6-Trinitrotoluene", 1680 | "2-Methyl-1,3,5-trinitrobenzene", 1681 | "2-methyl-1,3,5-trinitro-Benzene", 1682 | "2,4,6-trinitro-Toluene" 1683 | ], 1684 | "CAS": [ 1685 | "118-96-7" 1686 | ], 1687 | "additional_information": "cas_registry; chebi id: 46053; chemeo id: 12-617-3; cir; comptox id: DTXSID7024372|QC_LEVEL:1.0; nist id: C118967; opsin; pubchem id: 8376; srs id: 30460", 1688 | "mode": "name; name; name; name; name; name; name; name; name", 1689 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1690 | "number_of_crosschecks": 9, 1691 | "identifier": "Trinitrotoluene", 1692 | "found_molecules": [], 1693 | "pubchem_cid": 8376, 1694 | "formula": "C7H5N3O6", 1695 | "hill_formula": "C7H5N3O6", 1696 | "inchi": "InChI=1S/C7H5N3O6/c1-4-6(9(13)14)2-5(8(11)12)3-7(4)10(15)16/h2-3H,1H3", 1697 | "inchikey": "SPSSULHKWOKEEL-UHFFFAOYSA-N", 1698 | "iupac_name": "2-methyl-1,3,5-trinitrobenzene" 1699 | }, 1700 | "Toluene": { 1701 | "SMILES": "Cc1ccccc1", 1702 | "synonyms": [ 1703 | "Toluene", 1704 | "Methylbenzene", 1705 | "Methacide", 1706 | "methyl-Benzene", 1707 | "Toluol" 1708 | ], 1709 | "CAS": [ 1710 | "108-88-3" 1711 | ], 1712 | "additional_information": "cas_registry; chebi id: 17578; chemeo id: 17-837-4; cir; comptox id: DTXSID7021360|QC_LEVEL:1.0; nist id: C1124181; opsin; pubchem id: 1140; srs id: 25452", 1713 | "mode": "name; name; name; name; name; name; name; name; name", 1714 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1715 | "number_of_crosschecks": 9, 1716 | "identifier": "Toluene", 1717 | "found_molecules": [], 1718 | "pubchem_cid": 1140, 1719 | "formula": "C7H8", 1720 | "hill_formula": "C7H8", 1721 | "inchi": "InChI=1S/C7H8/c1-7-5-3-2-4-6-7/h2-6H,1H3", 1722 | "inchikey": "YXFVVABEGXRONW-UHFFFAOYSA-N", 1723 | "iupac_name": "methyl-benzene" 1724 | }, 1725 | "Uracil": { 1726 | "SMILES": "O=c1cc[nH]c(=O)[nH]1", 1727 | "synonyms": [ 1728 | "Uracil", 1729 | "2,4(1H,3H)-Pyrimidinedione", 1730 | "2,4-Dioxopyrimidine", 1731 | "2,4-Dihydroxypyrimidine", 1732 | "2,4-Pyrimidinediol" 1733 | ], 1734 | "CAS": [ 1735 | "66-22-8" 1736 | ], 1737 | "additional_information": "cas_registry; chebi id: 17568; chemeo id: 31-883-7; cir; comptox id: DTXSID4021424|QC_LEVEL:1.0; opsin; pubchem id: 1174; srs id: 4069", 1738 | "mode": "name; name; name; name; name; name; name; name", 1739 | "service": "cas_registry; chebi; chemeo; cir; comptox; opsin; pubchem; srs", 1740 | "number_of_crosschecks": 8, 1741 | "identifier": "Uracil", 1742 | "found_molecules": [], 1743 | "pubchem_cid": 1174, 1744 | "formula": "C4H4N2O2", 1745 | "hill_formula": "C4H4N2O2", 1746 | "inchi": "InChI=1S/C4H4N2O2/c7-3-1-2-5-4(8)6-3/h1-2H,(H2,5,6,7,8)", 1747 | "inchikey": "ISAKRJDGNUQOIC-UHFFFAOYSA-N", 1748 | "iupac_name": "1H-pyrimidine-2,4-dione" 1749 | }, 1750 | "Urea": { 1751 | "SMILES": "NC(N)=O", 1752 | "synonyms": [ 1753 | "Urea", 1754 | "Carbamide", 1755 | "carbonyldiamide", 1756 | "Isourea", 1757 | "ur" 1758 | ], 1759 | "CAS": [ 1760 | "57-13-6" 1761 | ], 1762 | "additional_information": "cas_registry; chebi id: 16199; chemeo id: 45-964-2; cir; comptox id: DTXSID4021426|QC_LEVEL:1.0; opsin; pubchem id: 1176; srs id: 2451", 1763 | "mode": "name; name; name; name; name; name; name; name", 1764 | "service": "cas_registry; chebi; chemeo; cir; comptox; opsin; pubchem; srs", 1765 | "number_of_crosschecks": 8, 1766 | "identifier": "Urea", 1767 | "found_molecules": [], 1768 | "pubchem_cid": 1176, 1769 | "formula": "CH4N2O", 1770 | "hill_formula": "CH4N2O", 1771 | "inchi": "InChI=1S/CH4N2O/c2-1(3)4/h(H4,2,3,4)", 1772 | "inchikey": "XSQUKJJJFZCRTK-UHFFFAOYSA-N", 1773 | "iupac_name": "urea" 1774 | }, 1775 | "Vanillin": { 1776 | "SMILES": "COc1cc(C=O)ccc1O", 1777 | "synonyms": [ 1778 | "Vanillin", 1779 | "4-Hydroxy-3-methoxybenzaldehyde", 1780 | "4-hydroxy-3-methoxy-Benzaldehyde", 1781 | "3-Methoxy-4-hydroxybenzaldehyde", 1782 | "Vanillic aldehyde" 1783 | ], 1784 | "CAS": [ 1785 | "121-33-5" 1786 | ], 1787 | "additional_information": "cas_registry; chebi id: 18346; cir; comptox id: DTXSID0021969|QC_LEVEL:1.0; opsin; pubchem id: 1183; srs id: 31666", 1788 | "mode": "name; name; name; name; name; name; name", 1789 | "service": "cas_registry; chebi; cir; comptox; opsin; pubchem; srs", 1790 | "number_of_crosschecks": 7, 1791 | "identifier": "Vanillin", 1792 | "found_molecules": [], 1793 | "pubchem_cid": 1183, 1794 | "formula": "C8H8O3", 1795 | "hill_formula": "C8H8O3", 1796 | "inchi": "InChI=1S/C8H8O3/c1-11-8-4-6(5-9)2-3-7(8)10/h2-5,10H,1H3", 1797 | "inchikey": "MWOOGOJBHIARFG-UHFFFAOYSA-N", 1798 | "iupac_name": "4-hydroxy-3-methoxybenzaldehyde" 1799 | }, 1800 | "Vinyl Chloride": { 1801 | "SMILES": "C=CCl", 1802 | "synonyms": [ 1803 | "Vinyl Chloride", 1804 | "Chloroethene", 1805 | "Chloroethylene", 1806 | "chloro-Ethene", 1807 | "chloro-Ethylene" 1808 | ], 1809 | "CAS": [ 1810 | "75-01-4" 1811 | ], 1812 | "additional_information": "cas_registry; chemeo id: 63-522-2; cir; comptox id: DTXSID8021434|QC_LEVEL:1.0; nist id: C75014; opsin; pubchem id: 6338; srs id: 5231", 1813 | "mode": "name; name; name; name; name; name; name; name", 1814 | "service": "cas_registry; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1815 | "number_of_crosschecks": 8, 1816 | "identifier": "Vinyl Chloride", 1817 | "found_molecules": [], 1818 | "pubchem_cid": 6338, 1819 | "formula": "C2H3Cl", 1820 | "hill_formula": "C2H3Cl", 1821 | "inchi": "InChI=1S/C2H3Cl/c1-2-3/h2H,1H2", 1822 | "inchikey": "BZHJMEDXRYGGRV-UHFFFAOYSA-N", 1823 | "iupac_name": "chloroethene" 1824 | }, 1825 | "Water": { 1826 | "SMILES": "O", 1827 | "synonyms": [ 1828 | "Water", 1829 | "Distilled water", 1830 | "h2o", 1831 | "oxidane", 1832 | "dihydrogen oxide" 1833 | ], 1834 | "CAS": [ 1835 | "7732-18-5" 1836 | ], 1837 | "additional_information": "cas_registry; chebi id: 15377; chemeo id: 69-886-3; cir; comptox id: DTXSID6026296|QC_LEVEL:1.0; nist id: B6010117; opsin; pubchem id: 962; srs id: 153171", 1838 | "mode": "name; name; name; name; name; name; name; name; name", 1839 | "service": "cas_registry; chebi; chemeo; cir; comptox; nist; opsin; pubchem; srs", 1840 | "number_of_crosschecks": 9, 1841 | "identifier": "Water", 1842 | "found_molecules": [], 1843 | "pubchem_cid": 962, 1844 | "formula": "H2O", 1845 | "hill_formula": "H2O", 1846 | "inchi": "InChI=1S/H2O/h1H2", 1847 | "inchikey": "XLYOFNOQVPJJNP-UHFFFAOYSA-N", 1848 | "iupac_name": "oxidane" 1849 | }, 1850 | "Xylene": { 1851 | "SMILES": "Cc1ccccc1C", 1852 | "synonyms": [ 1853 | "Xylene", 1854 | "1,2-xylene", 1855 | "o-xylene", 1856 | "1,2-dimethyl-Benzene", 1857 | "1,2-Dimethylbenzene" 1858 | ], 1859 | "CAS": [ 1860 | "95-47-6" 1861 | ], 1862 | "additional_information": "pubchem id: 7237; srs id: 84970", 1863 | "mode": "name; name", 1864 | "service": "pubchem; srs", 1865 | "number_of_crosschecks": 2, 1866 | "identifier": "Xylene", 1867 | "found_molecules": [], 1868 | "pubchem_cid": 7237, 1869 | "formula": "C8H10", 1870 | "hill_formula": "C8H10", 1871 | "inchi": "InChI=1S/C8H10/c1-7-5-3-4-6-8(7)2/h3-6H,1-2H3", 1872 | "inchikey": "CTQNGGLPUBDAKN-UHFFFAOYSA-N", 1873 | "iupac_name": "1,2-xylene" 1874 | } 1875 | } -------------------------------------------------------------------------------- /tests/get_iupac.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | from lxml import etree 4 | from copy import deepcopy 5 | from tqdm import tqdm 6 | 7 | def get_iupac(smiles: str): 8 | 9 | headers = { 10 | "user-agent":f"MoleculeResolver/test (+https://github.com/MoleculeResolver/molecule-resolver)" 11 | } 12 | html_doc = requests.get("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/" + smiles + "/record/XML", headers=headers) 13 | if html_doc.status_code != 200: 14 | return 15 | root = etree.XML(html_doc.text) 16 | 17 | iupac_elements = root.findall(".//{*}PC-Urn_label") 18 | for e in iupac_elements: 19 | if "IUPAC Name" == e.text: 20 | urn = e.getparent() 21 | iupac_name_type = urn.find(".//{*}PC-Urn_name").text 22 | 23 | info_data = urn.getparent().getparent() 24 | iupac_name = info_data.find(".//{*}PC-InfoData_value_sval").text 25 | 26 | if iupac_name_type == "Preferred": 27 | return iupac_name 28 | 29 | def main(): 30 | with open("benchmark_component_molecules.json", "r") as f: 31 | benchmark = json.load(f) 32 | 33 | new_benchmark = {} 34 | for name, data in tqdm(benchmark.items()): 35 | iupac_name = get_iupac(data["SMILES"]) 36 | if iupac_name is not None: 37 | data["iupac_name"] = iupac_name 38 | new_benchmark[name] = data 39 | 40 | with open("benchmark_component_molecules_iupac.json", "w") as f: 41 | json.dump(new_benchmark, f, indent=4) 42 | 43 | if __name__ == "__main__": 44 | main() 45 | 46 | -------------------------------------------------------------------------------- /tests/rdkit_normalization_exceptions.py: -------------------------------------------------------------------------------- 1 | from rdkit import Chem 2 | from rdkit.Chem.MolStandardize import rdMolStandardize 3 | from moleculeresolver import MoleculeResolver 4 | 5 | if __name__ == "__main__": 6 | smiles_list = [ 7 | "CS(C)=O", 8 | "O=S1CCCC1", 9 | "O=S1CCOCC1", 10 | "CCS(=O)CC", 11 | "CCCS(C)=O", 12 | "CCCS(=O)CCC", 13 | "O=S(c1ccccc1)c1ccccc1", 14 | "O=S1CCCC1", 15 | "CCCS(=O)CCC", 16 | "CCS(=O)CC", 17 | "CCCS(C)=O", 18 | "O=S1CCCC1", 19 | "O=S1CCOCC1" 20 | ] 21 | 22 | for smiles in smiles_list: 23 | print('-'*70) 24 | mr = MoleculeResolver() 25 | 26 | # Normalize molecule with rdkit 27 | mol = Chem.MolFromSmiles(smiles) 28 | normalized_mol = rdMolStandardize.Normalize(mol) 29 | normalized_smiles = Chem.MolToSmiles(normalized_mol) 30 | print(f"Original: {smiles} -> RDKit Normalized: {normalized_smiles}") 31 | 32 | # Convert zwitterionic form back to sulfynil group 33 | corrected_mol = mr.convert_zwitterion_to_sulfynil(normalized_mol) 34 | corrected_smiles = Chem.MolToSmiles(corrected_mol) 35 | print(f"Original: {smiles} -> Corrected : {corrected_smiles}") 36 | 37 | # with MolResolver 38 | mr_smiles = mr.standardize_SMILES(smiles) 39 | print(f"Original: {smiles} -> MR smiles : {mr_smiles}") 40 | 41 | assert smiles == mr_smiles == corrected_smiles 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from moleculeresolver import MoleculeResolver 3 | import json 4 | import os 5 | from pathlib import Path 6 | from typing import Any, Callable, Dict, Optional 7 | 8 | 9 | # IUPAC names 10 | dir_path = Path(os.path.dirname(os.path.realpath(__file__))) 11 | with open(dir_path / "benchmark_component_molecules_iupac.json", "r") as f: 12 | benchmark = json.load(f) 13 | 14 | SMILES = "SMILES" 15 | 16 | # PATCH_STATE = "SAVE" 17 | 18 | 19 | # class PatchResilientRequest: 20 | # def __init__(self, json_data, patch_state): 21 | # self.json_data = json_data 22 | # self.patch_state = patch_state 23 | 24 | # def __call__(self, url: str, **kwargs) -> str: 25 | # if self.patch_state == "SAVE": 26 | # self.json_data[url] = kwargs["json"] 27 | # elif self.patch_state == "LOAD": 28 | # return self.json_data[url] 29 | 30 | 31 | 32 | @pytest.mark.parametrize("data", benchmark.values()) 33 | class TestServices: 34 | 35 | @staticmethod 36 | def _test_service( 37 | call_method: Callable, 38 | input_identifier: str, 39 | output_identifier_type: str, 40 | output_identifier, 41 | kwargs: Optional[Dict] = None, 42 | ): 43 | """ 44 | Test a service by calling it with an input identifier and checking that the output identifier matches the expected value. 45 | 46 | Parameters 47 | ---------- 48 | call_method : Callable 49 | The method to call 50 | input_identifier : str 51 | The input identifier 52 | output_identifier_type : str 53 | The type of the output identifier 54 | output_identifier : str 55 | The expected output identifier 56 | kwargs : Optional[Dict], optional 57 | Additional keyword arguments to pass to the call method, by default None 58 | 59 | 60 | """ 61 | if kwargs is None: 62 | kwargs = {} 63 | res = call_method(input_identifier, **kwargs) 64 | if res is None: 65 | raise ValueError(f"No molecule found for {input_identifier}") 66 | 67 | res_txt = res.__dict__[output_identifier_type] 68 | if res_txt == output_identifier: 69 | return 70 | else: 71 | raise ValueError(f"Expected {output_identifier} but got {res_txt}") 72 | 73 | def test_opsin(self, data): 74 | with MoleculeResolver() as mr: 75 | iupac_name = data["iupac_name"] 76 | self._test_service( 77 | mr.get_molecule_from_OPSIN, 78 | iupac_name, 79 | SMILES, 80 | data["SMILES"], 81 | ) 82 | 83 | 84 | 85 | # def test_opsin_batchmode(): 86 | # names = [d["iupac_name"] for d in benchmark.values()] 87 | # smiles = [d["SMILES"] for d in benchmark.values()] 88 | # with MoleculeResolver() as mr: 89 | # res = mr.get_molecule_from_OPSIN_batchmode(names) 90 | # for i, r in enumerate(res): 91 | # if r[0].SMILES == smiles[i]: 92 | # continue 93 | # else: 94 | # raise ValueError("Expected " + smiles[i] + " but got " + r.SMILES) 95 | 96 | 97 | def generate_data(): 98 | # Run each test with a patch of resilient request that saves response 99 | pass 100 | 101 | 102 | if __name__ == "__main__": 103 | generate_data() 104 | --------------------------------------------------------------------------------