├── .gitignore ├── .readthedocs.yaml ├── CHANGELOG ├── LICENSE ├── README.md ├── docs ├── Makefile ├── README.md ├── make.bat ├── requirements.txt └── source │ ├── conf.py │ ├── index.rst │ └── smallworld_api.rst ├── export_note.md ├── setup.py ├── smallworld_api ├── __init__.py ├── base.py ├── common_db.py ├── defaults.py ├── extras.py ├── nomatcherror.py └── search.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-20.04 5 | tools: 6 | python: "3.8" 7 | 8 | sphinx: 9 | configuration: docs/source/conf.py 10 | builder: html 11 | fail_on_warning: false 12 | 13 | python: 14 | install: 15 | - method: pip 16 | path: . 17 | - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | ## [1.1.2] - 2022-12-25 2 | ### Fixed 3 | - `retrieve_database` is now called on initialisation 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Matteo Ferla 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python_SmallWorld_API 2 | 3 | An unofficial Python3 module to query a SmallWorld chemical space search server. 4 | 5 | [![Documentation Status](https://readthedocs.org/projects/python-smallworld-api/badge/?version=latest)](https://python-smallworld-api.readthedocs.io/en/latest/?badge=latest) 6 | [![https img shields io pypi v smallworld api logo python](https://img.shields.io/pypi/v/smallworld--api?logo=python)](https://pypi.org/project/smallworld--api) [![https img shields io pypi pyversions smallworld api logo python](https://img.shields.io/pypi/pyversions/smallworld--api?logo=python)](https://pypi.org/project/smallworld--api) [![https img shields io pypi wheel smallworld api logo python](https://img.shields.io/pypi/wheel/smallworld--api?logo=python)](https://pypi.org/project/smallworld--api) [![https img shields io pypi format smallworld api logo python](https://img.shields.io/pypi/format/smallworld--api?logo=python)](https://pypi.org/project/smallworld--api) [![https img shields io pypi status smallworld api logo python](https://img.shields.io/pypi/status/smallworld--api?logo=python)](https://pypi.org/project/smallworld--api) [![https img shields io pypi dm smallworld api logo python](https://img.shields.io/pypi/dm/smallworld--api?logo=python)](https://pypi.org/project/smallworld--api) [![https img shields io codeclimate maintainability matteoferla Python_SmallWorld_API logo codeclimate](https://img.shields.io/codeclimate/maintainability/matteoferla/Python_SmallWorld_API?logo=codeclimate)](https://codeclimate.com/github/matteoferla/Python_SmallWorld_API) [![https img shields io codeclimate issues matteoferla Python_SmallWorld_API logo codeclimate](https://img.shields.io/codeclimate/issues/matteoferla/Python_SmallWorld_API?logo=codeclimate)](https://codeclimate.com/github/matteoferla/Python_SmallWorld_API) [![https img shields io codeclimate tech debt matteoferla Python_SmallWorld_API logo codeclimate](https://img.shields.io/codeclimate/tech-debt/matteoferla/Python_SmallWorld_API?logo=codeclimate)](https://codeclimate.com/github/matteoferla/Python_SmallWorld_API) [![https img shields io github forks matteoferla Python_SmallWorld_API label Fork style social logo github](https://img.shields.io/github/forks/matteoferla/Python_SmallWorld_API?label=Fork&style=social&logo=github)](https://github.com/matteoferla/Python_SmallWorld_API) [![https img shields io github stars matteoferla Python_SmallWorld_API style social logo github](https://img.shields.io/github/stars/matteoferla/Python_SmallWorld_API?style=social&logo=github)](https://github.com/matteoferla/Python_SmallWorld_API) [![https img shields io github watchers matteoferla Python_SmallWorld_API label Watch style social logo github](https://img.shields.io/github/watchers/matteoferla/Python_SmallWorld_API?label=Watch&style=social&logo=github)](https://github.com/matteoferla/Python_SmallWorld_API) [![https img shields io github last commit matteoferla Python_SmallWorld_API logo github](https://img.shields.io/github/last-commit/matteoferla/Python_SmallWorld_API?logo=github)](https://github.com/matteoferla/Python_SmallWorld_API) [![https img shields io github license matteoferla Python_SmallWorld_API logo github](https://img.shields.io/github/license/matteoferla/Python_SmallWorld_API?logo=github)](https://github.com/matteoferla/Python_SmallWorld_API/raw/master/LICENCE) [![https img shields io github release date matteoferla Python_SmallWorld_API logo github](https://img.shields.io/github/release-date/matteoferla/Python_SmallWorld_API?logo=github)](https://github.com/matteoferla/Python_SmallWorld_API) [![https img shields io github commit activity m matteoferla Python_SmallWorld_API logo github](https://img.shields.io/github/commit-activity/m/matteoferla/Python_SmallWorld_API?logo=github)](https://github.com/matteoferla/Python_SmallWorld_API) [![https img shields io github issues matteoferla Python_SmallWorld_API logo github](https://img.shields.io/github/issues/matteoferla/Python_SmallWorld_API?logo=github)](https://github.com/matteoferla/Python_SmallWorld_API) [![https img shields io github issues closed matteoferla Python_SmallWorld_API logo github](https://img.shields.io/github/issues-closed/matteoferla/Python_SmallWorld_API?logo=github)](https://github.com/matteoferla/Python_SmallWorld_API) 7 | 8 | 9 | ### Disclaimer 10 | > This is Unofficial So please do not abuse it or use it when you cannot legally use the site! 11 | 12 | SmallWorld is a search engine for chemical space developed by John Mayfield and Roger Sayle at [NextMove Software](https://www.nextmovesoftware.com/). 13 | John Irwin and Brian Shoichet at UCSF (the creators and maintainers of the [ZINC](https://zinc.docking.org/) database), 14 | host a version of it at [sw.docking.org](https://sw.docking.org/search.html) along with another NextMove Software product, 15 | [Arthor](https://arthor.docking.org/). 16 | 17 | ## Overview 18 | 19 | SmallWorld allows one to search for similar compounds 20 | to a give [SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) 21 | in one of many databases —a very complex feat. 22 | 23 | A copy is hosted at [sw.docking.org](https://sw.docking.org/search.html) by John Irwin, Brian Shoichet and co. 24 | This is a free service, but it is not intended for heavy use. 25 | To change the endpoint, one can change the class attribute `SmallWorld.base_url` to a different URL. 26 | The folk at NextMove Software deploy instances of it for paying customers —and with full support. 27 | 28 | The API points of the site are described in 29 | [wiki.docking.org/index.php/How_to_use_SmallWorld_API](https://wiki.docking.org/index.php/How_to_use_SmallWorld_API). 30 | 31 | This Python3 module allows one to search it. 32 | 33 | For searches in Arthor, Zinc and EnamineStore see elsewhere. 34 | 35 | ## Install 36 | 37 | pip install -q smallworld-api 38 | 39 | ## Usage 40 | The following searches for Aspirin in Enamine's make-on-demand space, Enamine REAL, which does not contain it 41 | as the latter is filtered by Lipinski's rule of five (Aspirin is actually [a terrible placeholder drug](https://www.blopig.com/blog/2023/08/placeholder-compounds-distraction-vs-accuracy/)) 42 | 43 | ```python 44 | from rdkit import Chem 45 | from rdkit.Chem import PandasTools 46 | import pandas as pd # for typehinting below 47 | 48 | from smallworld_api import SmallWorld 49 | 50 | print(SmallWorld.base_url) # 'https://sw.docking.org' 51 | aspirin = 'O=C(C)Oc1ccccc1C(=O)O' 52 | sw = SmallWorld() 53 | results : pd.DataFrame = sw.search(aspirin, dist=5, db=sw.REAL_dataset) 54 | 55 | from IPython.display import display 56 | display(results) 57 | ``` 58 | 59 | The first two import lines are optional as the code works without rdkit. If pandas gets imported before PandasTools 60 | and Chem imported not in _main_ then display issues happen, 61 | which can be fixed with a `from rdkit.Chem.Draw import IPythonConsole`. 62 | 63 | So it's up to you to remember to run: 64 | 65 | ```python 66 | PandasTools.AddMoleculeColumnToFrame(results, 'smiles', 'molecule', includeFingerprints=True) 67 | ``` 68 | 69 | The argument `db` for `.search` is a string and is the name of the database. These do seem to change, 70 | so they get updated during initialisation or with the call: 71 | 72 | ```python 73 | dbs: pd.DataFrame = SmallWorld.retrieve_databases() #: pd.DataFrame (.db_choices gets updated too) 74 | ``` 75 | 76 | The dynamic properties `.REAL_dataset` and `.ZINC_dataset` simply return the best value from the presets, which may have 77 | become out of date (unless updated). 78 | 79 | ## Query terms 80 | 81 | The first argument passed to `.search` can be: 82 | 83 | * a `str` (SMILES) 84 | * a `Chem.Mol` (rdkit is an optional requirement though) 85 | * a list-like (sequence) or a dict-like (mapping) of the above, where the index or key becomes the name in the output 86 | table. 87 | 88 | See the class attribute dictionary `SmallWorld.default_submission` for what the defaults are set to, which ought to be: 89 | 90 | {'dist': 8, 91 | 'tdn': 6, 92 | 'rdn': 6, 93 | 'rup': 2, 94 | 'ldn': 2, 95 | 'lup': 2, 96 | 'maj': 6, 97 | 'min': 6, 98 | 'sub': 6, 99 | 'sdist': 12, 100 | 'tup': 6, 101 | 'scores': 'Atom Alignment,ECFP4,Daylight'} 102 | 103 | If one is sure that the correct dataset is being used and any raised `NoMatchError` is due to the SMILES, then once can 104 | add for the last case the argument `tolerate_tolerate_NoMatchError=True`, which makes them ignored bar for a warning. 105 | 106 | ## Debug 107 | 108 | The instantiation is set up so for debugging, namely it has two attributes of interest: 109 | 110 | * `sw.last_reply`, a `requests.Response` instance 111 | * `sw.hit_list_id` an integer representing the search (AKA. `hlid` in the server responses) 112 | 113 | The errors raised are generally either `requests.HTTPError` 114 | or `smallworld.NoMatchError`. The former is raised by a `requests.Response.raise_for_status` call and means there is a 115 | status code that isn't 200, the latter is raised by one of the various checks in `sw.get_results()`. 116 | 117 | For the former errors, i.e. those by a serverside-declared HTML-formatting error (eg. status code 404), if one is in a 118 | Jupyter notebook one can do `sw.show_reply_as_html()`. Generally if you get status code 500, it is best to try again 119 | tomorrow as the server is having a hard time and is probably not okay on the web. 120 | 121 | For the latter, the result in `.last_reply` should be a JSON string, therefore should give something like this: 122 | 123 | ```python3 124 | reply_data: dict = sw.last_reply.json() 125 | ``` 126 | 127 | A common issue is the change in database names, therefore do do and pick a different one 128 | (ATM, the index of the dataframe is the name to use, but in 2021 it was the `name`) 129 | 130 | ```python3 131 | from IPython.display import display 132 | 133 | from smallworld_api import SmallWorld 134 | db_table : pd.DataFrame = SmallWorld.retrieve_databases() 135 | display( db_table ) 136 | ``` 137 | 138 | There will be a "ground control to major Tom" warning in the first query. This weird reply means that the stream has 139 | finished, but not closed or something. Ignore it. 140 | 141 | Also, as a shorthand, `mol: Chem.Mol = SmallWorld.check_smiles(aspirin)` 142 | can be called to check if the molecules is fine. 143 | 144 | For extreme debugging, open Chrome and go to [sw.docking.org](https://sw.docking.org/search.html) 145 | and open the developer tools (F12). Then go to the Network tab and do a search, eg. with `CC2=CC=C(CNC(=O)C1CCC1)C=N2`, 146 | this will be populated by all the figure requests, but `/search/submit` will be the first one to look at 147 | if the issue is with the submit method in the trace, `/search/view` if it's with the `get_results` method. 148 | Then simply copy the url query off the request and use it as parameters or compare them etc. For example: 149 | 150 | ```python3 151 | import urllib.parse 152 | from smallworld_api import SmallWorld 153 | 154 | url_query = '👾=👾&👾=👾' 155 | expected = dict(urllib.parse.parse_qsl(url_query)) 156 | 157 | class Debug(SmallWorld): 158 | 159 | def submit_query(self, params): 160 | # override the method to check the parameters 161 | print('Going to use:', params) 162 | print('Missing:', set(expected).difference(params)) 163 | return super().submit_query(expected) 164 | 165 | d = Debug() 166 | d.search('CCO', db=d.REAL_dataset) 167 | ``` 168 | 169 | If it's a field that change, raise an issue and I'll update the class or do a pull request :pray:. 170 | 171 | ## Choices 172 | 173 | The database choices can seen with the preset list `SmallWorld.db_choices`. But also this can be recached via the 174 | classmethod `SmallWorld.retrieve_databases()`. 175 | 176 | Two databases, `REAL_Space_21Q3_2B(public)` and `REAL_DB_20Q2`, are Enamine REAL databases 177 | (aka. Enamine will make the compound on request). Previously, the 178 | repository, [enamine-real-search](https://github.com/xchem/enamine-real-search) was good for this, but unfortunately 179 | Enamine changed their endpoints. So I wrote this to take its place! 180 | Despite the smaller number of entries, `REAL_DB_20Q2` gives the most hits and is less likely to "Major Tom out". 181 | 182 | Likewise, the attribute `SmallWorld.sf_choices` (type list) and 183 | the classmethod `SmallWorld.retrieve_scorefun_options()` do the same. 184 | The values are less and are: `['Atom Alignment', 'SMARTS Alignment', 'ECFP4', 'Daylight']`, but these 185 | are activated by default and will be visible as columns in the resulting dataframe from a search call. 186 | 187 | Here is the full list of databases: 188 | 189 | ```python 190 | import pandas as pd 191 | 192 | choices: pd.DataFrame = SmallWorld.retrieve_databases() 193 | 194 | display(choices) 195 | ``` 196 | Which will return (as of writing on the 9th Dec 2021): 197 | 198 | | | name | numEntries | numMapped | numUnmapped | numSkipped | status | 199 | |:----------------------------------------------|:---------------------------|-------------:|------------:|--------------:|-------------:|:----------| 200 | | REAL_Space_21Q3_All_2B_public.smi.anon | REAL_Space_21Q3_2B(public) | 1950356098 | 1935062471 | 15293627 | 0 | Available | 201 | | ZINC-All-2020Q2-1.46B.anon | ZINC-All-20Q2-1.46B | 1468554638 | 1467030947 | 1523691 | 231 | Available | 202 | | ZINC-For-Sale-2020Q2-1.46B.anon | ZINC-For-Sale-20Q2-1.46B | 1464949146 | 1463519428 | 1429718 | 22 | Available | 203 | | ZINC20-ForSale-21Q3.smi.anon | ZINC20-ForSale-21Q3-1.4B | 1479284919 | 1440784765 | 38500154 | 29 | Available | 204 | | Enamine_REAL_Public_July_2020_Q1-2_1.36B.anon | REAL_DB_20Q2 | 1361198468 | 1350462346 | 10736122 | 0 | Available | 205 | | Wait-OK-2020Q2-1.2B.anon | Wait-OK-20Q2-1.2B | 1174063221 | 1172785190 | 1278031 | 1 | Available | 206 | | WuXi-20Q4.smi.anon | WuXi-20Q4-600M | 2353582875 | 600762581 | 1752820294 | 284 | Available | 207 | | MculeUltimate-20Q2.smi.anon | MculeUltimate_20Q2_126M | 126471523 | 126471523 | 0 | 0 | Available | 208 | | WuXi-2020Q2-120M.anon | WuXi-20Q2-120M | 339132361 | 120400570 | 218731791 | 0 | Available | 209 | | mcule_ultimate_200407_c8bxI4.anon | Mcule_ultimate_20Q2-126M | 126471523 | 45589462 | 80882061 | 0 | Available | 210 | | BB-All-2020Q2-26.7M.anon | BB-All-20Q2-26.7M | 26787985 | 26707241 | 80744 | 16 | Available | 211 | | In-Stock-2020Q2-13.8M.anon | In-Stock-20Q2-13.8M | 13842485 | 13829086 | 13399 | 1 | Available | 212 | | ZINC20-InStock-21Q3.smi.anon | ZINC20-InStock-21Q3-11M | 11122445 | 11103910 | 18535 | 5 | Available | 213 | | BBall.smi.anon | BB-All-21Q4-3.3M | 3319960 | 3319705 | 255 | 6 | Available | 214 | | BBnow.smi.anon | BB-Now-21Q4-2M | 2076639 | 2076464 | 175 | 6 | Available | 215 | | BB-Now-2020Q2-1.6M.anon | BB-Now-20Q2-1.6M | 1649789 | 1649386 | 403 | 4 | Available | 216 | | BB_50.smi.anon | BB-50-21Q4-1.5M | 1483551 | 1483434 | 117 | 2 | Available | 217 | | BB_10.smi.anon | BB-10-21Q4-1.2M | 1243321 | 1243241 | 80 | 0 | Available | 218 | | BB_40.smi.anon | BB-40-21Q4-590K | 589959 | 589911 | 48 | 4 | Available | 219 | | interesting.smi.anon | ZINC-Interesting-20Q2-320K | 320845 | 320773 | 72 | 1 | Available | 220 | | ZINC-Interesting-2020Q2-300K.anon | ZINC-Interesting-20Q2-300K | 307854 | 300765 | 7089 | 1 | Available | 221 | | TCNMP-2020Q2-31912.anon | TCNMP-20Q2-31912 | 37438 | 31912 | 5526 | 0 | Available | 222 | | BB_30.smi.anon | BB-30-21Q4-3K | 3129 | 3119 | 10 | 0 | Available | 223 | | WorldDrugs-2020Q2-3004.anon | WorldDrugs-20Q2-3004 | 3004 | 3003 | 1 | 0 | Available | 224 | | HMDB-2020Q2-584.anon | HMDB-20Q2-584 | 585 | 584 | 1 | 0 | Available | 225 | 226 | ## Names 227 | 228 | There is a Python module called [smallworld](https://github.com/benmaier/smallworld), 229 | which implements the small world algorithm. 230 | This is not an API to the [sw.docking.org](https://sw.docking.org/search.html) site. 231 | 232 | The blog of the [sw.docking.org](https://sw.docking.org/search.html) site mentions a pysmallworld. 233 | There is no mention of this in Google so I am guessing it is for a future feature? 234 | I however need to use this now as 235 | I need it as a publicly usable example workflow of [Fragmenstein](https://github.com/matteoferla/Fragmenstein). 236 | 237 | Also, there is a great and wacky boardgame called [Small World](https://boardgamegeek.com/boardgame/40692/small-world), 238 | with a curious/agonising dynamic which forces you to not be a collector. 239 | 240 | 241 | 242 | 243 | 244 | 245 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | ## Note 2 | 3 | This folder is where Sphinx generates the documentation for readthedocs 4 | ([read docs](https://python-smallworld-api.readthedocs.io/en/latest/?badge=latest)) 5 | If you are in GitHub or locally, ignore it. -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-toolbox 2 | m2r2 3 | pandas 4 | rdkit-pypi 5 | ipython 6 | requests -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # using commands from https://gist.github.com/matteoferla/ba72ab12a9e5f690277e2e88551773aa 3 | # modified for readthedocs 4 | # This file only contains a selection of the most common options. For a full 5 | # list see the documentation: 6 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 7 | 8 | # ``.readthedocs.yaml` installs it. 9 | # import os 10 | # import sys 11 | # sys.path.insert(0, os.path.abspath("../../")) 12 | 13 | # -- Project information ----------------------------------------------------- 14 | 15 | project = 'smallworld_api' 16 | copyright = '2022, Matteo Ferla' 17 | author = 'Matteo Ferla' 18 | github_username = 'matteoferla' 19 | github_repository = 'Python_SmallWorld_API' 20 | 21 | 22 | # -- General configuration --------------------------------------------------- 23 | 24 | extensions = [ 25 | 'readthedocs_ext.readthedocs', 26 | 'sphinx.ext.viewcode', 27 | 'sphinx.ext.todo', 28 | 'sphinx_toolbox.more_autodoc', 29 | 'sphinx.ext.autodoc', 30 | #'sphinx.ext.imgconverter', 31 | #'m2r' 32 | ] 33 | 34 | html_static_path = [] 35 | templates_path = ['_templates'] 36 | #source_suffix = ['.rst', '.md'] 37 | always_document_param_types = True 38 | typehints_defaults = 'braces' 39 | 40 | from m2r2 import parse_from_file # noqa 41 | 42 | for markdown_filename, srt_filename in {'../../README.md': 'readme.rst', 43 | '../../export_note.md': 'export_note.rst'}.items(): 44 | with open(srt_filename, 'w') as fh: 45 | fh.write(parse_from_file(markdown_filename)) 46 | 47 | language = 'en' 48 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 49 | html_theme = 'sphinx_rtd_theme' 50 | todo_include_todos = True 51 | 52 | def skip(app, what, name, obj, would_skip, options): 53 | if name in ( '__init__',): 54 | return False 55 | return would_skip 56 | 57 | def setup(app): 58 | app.connect('autodoc-skip-member', skip) -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. smallworld_api documentation master file, created by 2 | sphinx-quickstart on Tue Mar 22 16:54:05 2022. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to smallworld_api's documentation! 7 | ========================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 4 11 | :caption: Contents: 12 | 13 | readme 14 | smallworld_api 15 | export_note 16 | 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /docs/source/smallworld_api.rst: -------------------------------------------------------------------------------- 1 | smallworld\_api package 2 | ======================= 3 | 4 | .. automodule:: smallworld_api 5 | :members: 6 | :inherited-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | 10 | Private inherited classes 11 | ========================== 12 | 13 | smallworld\_api.base module 14 | --------------------------- 15 | 16 | .. automodule:: smallworld_api.base 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | 21 | smallworld\_api.common\_db module 22 | --------------------------------- 23 | 24 | .. automodule:: smallworld_api.common_db 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | smallworld\_api.defaults module 30 | ------------------------------- 31 | 32 | .. automodule:: smallworld_api.defaults 33 | :members: 34 | :undoc-members: 35 | :show-inheritance: 36 | 37 | smallworld\_api.extras module 38 | ----------------------------- 39 | 40 | .. automodule:: smallworld_api.extras 41 | :members: 42 | :undoc-members: 43 | :show-inheritance: 44 | 45 | smallworld\_api.nomatcherror module 46 | ----------------------------------- 47 | 48 | .. automodule:: smallworld_api.nomatcherror 49 | :members: 50 | :undoc-members: 51 | :show-inheritance: 52 | 53 | smallworld\_api.search module 54 | ----------------------------- 55 | 56 | .. automodule:: smallworld_api.search 57 | :members: 58 | :undoc-members: 59 | :show-inheritance: 60 | 61 | -------------------------------------------------------------------------------- /export_note.md: -------------------------------------------------------------------------------- 1 | ## Export note 2 | 3 | The 'export' requests is the second request and returns the sought data of a given generated 'hlid' (hit list id). 4 | 5 | The params passed are many and are to allow the table formatting. 6 | 7 | In the class `SmallWorld` there's a class attribute called `valid_export_columns` which is a dictionary that goes into 8 | the get request's params. 9 | 10 | It currently is not split up and has keys like `'columns[0][name]'` as opposed to a single `'columns'` 11 | holding a value of type `List[Dict[str, Any]]`. Say: 12 | 13 | ```python 14 | {'columns': [{'search': {'value': '', 'regex': 'false'}, 15 | 'data': '0', 16 | 'name': 'alignment', 17 | 'searchable': 'true', 18 | 'orderable': 'false'}, 19 | {'search': {'value': '0-10', 'regex': 'false'}, 20 | 'data': '1', 21 | 'name': 'dist', 22 | 'searchable': 'true', 23 | 'orderable': 'true'}, 24 | {'search': {'value': '', 'regex': 'false'}, 25 | 'data': '2', 26 | 'name': 'ecfp4', 27 | 'searchable': 'true', 28 | 'orderable': 'true'}, 29 | {'search': {'value': '', 'regex': 'false'}, 30 | 'data': '3', 31 | 'name': 'daylight', 32 | 'searchable': 'true', 33 | 'orderable': 'true'}, 34 | {'search': {'value': '0-4', 'regex': 'false'}, 35 | 'data': '4', 36 | 'name': 'topodist', 37 | 'searchable': 'true', 38 | 'orderable': 'true'}, 39 | {'search': {'value': '', 'regex': 'false'}, 40 | 'data': '5', 41 | 'name': 'mces', 42 | 'searchable': 'true', 43 | 'orderable': 'true'}, 44 | {'search': {'value': '0-4', 'regex': 'false'}, 45 | 'data': '6', 46 | 'name': 'tdn', 47 | 'searchable': 'true', 48 | 'orderable': 'true'}, 49 | {'search': {'value': '0-4', 'regex': 'false'}, 50 | 'data': '7', 51 | 'name': 'tup', 52 | 'searchable': 'true', 53 | 'orderable': 'true'}, 54 | {'search': {'value': '0-4', 'regex': 'false'}, 55 | 'data': '8', 56 | 'name': 'rdn', 57 | 'searchable': 'true', 58 | 'orderable': 'true'}, 59 | {'search': {'value': '0-4', 'regex': 'false'}, 60 | 'data': '9', 61 | 'name': 'rup', 62 | 'searchable': 'true', 63 | 'orderable': 'true'}, 64 | {'search': {'value': '0-4', 'regex': 'false'}, 65 | 'data': '10', 66 | 'name': 'ldn', 67 | 'searchable': 'true', 68 | 'orderable': 'true'}, 69 | {'search': {'value': '0-4', 'regex': 'false'}, 70 | 'data': '11', 71 | 'name': 'lup', 72 | 'searchable': 'true', 73 | 'orderable': 'true'}, 74 | {'search': {'value': '', 'regex': 'false'}, 75 | 'data': '12', 76 | 'name': 'mut', 77 | 'searchable': 'true', 78 | 'orderable': 'true'}, 79 | {'search': {'value': '0-4', 'regex': 'false'}, 80 | 'data': '13', 81 | 'name': 'maj', 82 | 'searchable': 'true', 83 | 'orderable': 'true'}, 84 | {'search': {'value': '0-4', 'regex': 'false'}, 85 | 'data': '14', 86 | 'name': 'min', 87 | 'searchable': 'true', 88 | 'orderable': 'true'}, 89 | {'search': {'value': '0-4', 'regex': 'false'}, 90 | 'data': '15', 91 | 'name': 'hyb', 92 | 'searchable': 'true', 93 | 'orderable': 'true'}, 94 | {'search': {'value': '0-4', 'regex': 'false'}, 95 | 'data': '16', 96 | 'name': 'sub', 97 | 'searchable': 'true', 98 | 'orderable': 'true'}], 99 | 'order': [{'column': 0, 'dir': 'asc'}, ], 100 | 'search': [{'value': '', 'regex': False}] 101 | } 102 | ``` 103 | This is because for some reason it the HTML quoted version is different than the above. 104 | As the janky `'columns[0][name]'` works, I've not bothered looking into it. -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | from warnings import warn 4 | 5 | sw_url = 'https://sw.docking.org/search.html' 6 | warn(f'''DISCLAIMER: To use the SmallWorld API please make sure you can legally query a server with it, 7 | for example {sw_url} by John Irwin. 8 | This tool is not affiliated with the great folk at NextMove Software nor the Irwin lab, 9 | but I am grateful for their smashing work and so should you so don't forget to cite them!''') 10 | 11 | # ----------- python version check 12 | import sys 13 | 14 | if sys.version_info.major != 3 or sys.version_info.minor < 6: 15 | print(sys.version_info) 16 | raise SystemError('Module written for Python 3.6+.') 17 | 18 | # -------------- fill docstring 19 | import os 20 | 21 | this_directory = os.path.abspath(os.path.dirname(__file__)) 22 | __doc__ = 'Smallworld API' 23 | if os.path.exists(os.path.join(this_directory, 'README.md')): 24 | # there is no manifest.in file, so it could be missing 25 | with open(os.path.join(this_directory, 'README.md'), encoding='utf-8') as f: 26 | __doc__ = f.read() 27 | 28 | description = f'An (unofficial) Python3 module to query the SmallWorld chemical space search server ({sw_url})' 29 | 30 | setup( 31 | name='smallworld-api', 32 | version='1.1.4', 33 | python_requires='>=3.7', 34 | packages=find_packages(), 35 | install_requires=['pandas', 'requests', 'ipython'], # rdkit is optional. 36 | url='https://github.com/matteoferla/Python_SmallWorld_API', 37 | license='MIT', 38 | author='Matteo Ferla', 39 | author_email='matteo@well.ox.ac.uk', 40 | classifiers=[ # https://pypi.org/classifiers/ 41 | 'Development Status :: 5 - Production/Stable', # Development Status :: 4 - Beta 42 | 'Intended Audience :: Science/Research', 43 | 'Topic :: Scientific/Engineering :: Chemistry', 44 | 'License :: OSI Approved :: MIT License', 45 | 'Programming Language :: Python :: 3.7', 46 | 'Programming Language :: Python :: 3.8', 47 | 'Programming Language :: Python :: 3.9', 48 | ], 49 | description=description, 50 | long_description=__doc__, 51 | long_description_content_type='text/markdown' 52 | ) 53 | -------------------------------------------------------------------------------- /smallworld_api/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | """ 4 | The primary class in the smallworld_api module is ``SmallWorld``. 5 | All other classes, in other files are inherited by this. 6 | """ 7 | 8 | __all__ = ['SmallWorld', 'NoMatchError'] 9 | 10 | from warnings import warn 11 | import pandas as pd 12 | from .defaults import Defaults # class attributes 13 | from .base import Base # inherits Defaults 14 | from .extras import Extras # extra methods not required by search 15 | from typing import * 16 | from .nomatcherror import NoMatchError 17 | from .search import Searcher 18 | import time, sys 19 | 20 | if TYPE_CHECKING or 'sphinx' in sys.modules: 21 | from rdkit import Chem 22 | 23 | 24 | class SmallWorld(Searcher): 25 | """ 26 | A python3 API based upon https://wiki.docking.org/index.php/How_to_use_SmallWorld_API 27 | 28 | This (``SmallWorld``) is the main class of the module ``smallworld_api`` and is split into separate files 29 | by functionality. The classes it bases are as follows: 30 | Defaults -> Common -> Base -> Extras -> Searcher -> SmallWorld 31 | 32 | For example, class attributes are in Defaults. 33 | """ 34 | 35 | def __init__(self, update_dbs: bool = True): 36 | """ 37 | Initialisation results in the updating of the databases. 38 | """ 39 | super().__init__() 40 | if update_dbs and not self._db_updated: 41 | self.retrieve_databases() 42 | 43 | def search_smiles(self, 44 | smiles: str, 45 | db: str, 46 | **other_parameters) -> pd.DataFrame: 47 | """ 48 | Given a smiles and a database return the table of results! 49 | 50 | The optional arguments are: 51 | 52 | * dist = 10 (atom difference distance threshhold) 53 | * several in `.default_submission`... 54 | 55 | The number of results given are controlled by: 56 | 57 | * length = 10 (number of results) 58 | * draw = 10 (pointless atm) 59 | * start = 0 60 | 61 | Which are passed onto `.get_results`. 62 | 63 | Calls ``submit_query`` and then ``get_results`` (code in ``search.py``) 64 | Returns a pandas dataframe of results. 65 | The dataframe is not rdkit modified yet. 66 | """ 67 | start = int(other_parameters['start']) if 'start' in other_parameters else 0 68 | dist = int(other_parameters['dist']) if 'dist' in other_parameters else 0 69 | length = int(other_parameters['length']) if 'length' in other_parameters else 10 70 | draw = int(other_parameters['draw']) if 'draw' in other_parameters else 10 71 | valids = {k: other_parameters[k] for k in self.valid_submit_keys if k in other_parameters} 72 | if db not in self.db_choices: 73 | warn(f'{db} is not a valid choice ({self.db_choices}).' + 74 | 'Check updated with `.retrieve_scorefun_options()`') 75 | params = {'smi': smiles, 76 | 'db': db, 77 | **self.default_submission, 78 | 'dist': int(dist), 79 | **valids} 80 | self.query_summary: Dict[str, Any] = self.submit_query(params) 81 | self.hit_list_id: int = self.query_summary['hlid'] 82 | try: 83 | results = self.get_results(start, length, draw) 84 | except BaseException as error: 85 | warn(f'{error.__class__.__name__}: {error} was raised. ' + 86 | 'Retrying in 2 seconds. There may be connection issues') 87 | time.sleep(2) 88 | results = self.get_results(start, length, draw) 89 | return results 90 | 91 | def search_mol(self, 92 | mol: Chem.Mol, 93 | db: str, 94 | **other_parameters) -> pd.DataFrame: 95 | smiles = self.mol2smiles(mol) 96 | return self.search_smiles(smiles=smiles, db=db, **other_parameters) 97 | 98 | def search_many(self, 99 | query: Union[Sequence[Any], Mapping[str, Any]], 100 | db: str, 101 | **other_parameters) -> pd.DataFrame: 102 | """ 103 | search for many SMILES or Chem.Mol. 104 | """ 105 | results: List[pd.DataFrame] = [] 106 | if isinstance(query, Sequence): # list or tuple etc. 107 | iterator = enumerate(query) 108 | elif isinstance(query, Mapping): # dict etc. 109 | iterator = query.items() 110 | elif isinstance(query, pd.Series): # a pd.Series is not a sequence or a mapping apparently 111 | iterator = query.to_dict().items() 112 | else: 113 | raise TypeError(f'Unrecognised type: {type(query)} for `.search_many_smiles`') 114 | tick = 0 115 | for name, item in iterator: 116 | # ## what is it? 117 | if isinstance(item, str): 118 | # it's a smiles 119 | smiles = item 120 | elif self.is_this_mol(item): 121 | # rdkit 122 | smiles = self.mol2smiles(item) 123 | elif not item: 124 | # nothing 125 | warn(f'Falsy value {item} in the {type(query)} query') 126 | continue 127 | else: 128 | # mystery 129 | raise TypeError(f'Unrecognised type {type(item)}') 130 | # ## prevent excessive calls 131 | tock = time.time() 132 | if tick > tock - self.speed_threshold: 133 | time.sleep(self.speed_threshold - (tock - tick)) 134 | tick = time.time() 135 | # ## what to capture 136 | if 'tolerate_all_exceptions' in other_parameters: 137 | tolerated_exceptions = (Exception,) 138 | elif 'tolerated_exceptions' in other_parameters: 139 | tolerated_exceptions = other_parameters['tolerated_exceptions'] 140 | elif 'tolerate_NoMatchError' in other_parameters and other_parameters['tolerate_NoMatchError']: 141 | tolerated_exceptions = (NoMatchError,) 142 | else: 143 | tolerated_exceptions = () 144 | # ## run! 145 | try: 146 | self.reset() 147 | result: pd.DataFrame = self.search_smiles(smiles=smiles, db=db, **other_parameters) 148 | if result.empty: 149 | continue 150 | result['query_index'] = name 151 | result['query_smiles'] = smiles 152 | results.append(result) 153 | except tolerated_exceptions as error: 154 | warn(f'{error.__class__.__name__}: {error} for {name}') 155 | # end of loop 156 | if not results: 157 | raise NoMatchError('No results were found in SmallWorld. ' + 158 | 'Considering changing `dist` (distance by N of mismatches) or ' 159 | '`length` (number of results) greater than zero') 160 | return pd.concat(results, axis='index', ignore_index=True) 161 | 162 | def search(self, query: Any, db: str, **other_parameters) -> pd.DataFrame: 163 | """ 164 | The query can be 165 | 166 | * a single SMILES, 167 | * a rdkit.Chem.Mol 168 | * a list of SMILES or rdkit.Chem.Mol 169 | * a dictionary of SMILES or rdkit.Chem.Mol 170 | 171 | These all lead back to ``.search_smiles``, which functions as follows: 172 | """ 173 | if isinstance(query, str): 174 | return self.search_smiles(smiles=query, db=db, **other_parameters) 175 | elif self.is_this_mol(query): # rdkit is optional. 176 | self.search_mol(mol=query, db=db, **other_parameters) 177 | elif isinstance(query, Sized) and len(query) == 0: 178 | raise ValueError('Empty query') 179 | elif isinstance(query, Mapping) or isinstance(query, Sequence): 180 | return self.search_many(query=query, db=db, **other_parameters) 181 | else: 182 | raise TypeError(f'Unknown type {type(query)} for query') 183 | 184 | 185 | SmallWorld.search.__doc__ += SmallWorld.search_smiles.__doc__ 186 | -------------------------------------------------------------------------------- /smallworld_api/base.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from .common_db import Common 3 | from typing import * 4 | 5 | 6 | class Base(Common): # Defaults -> Common -> Base -> Extras -> SmallWorld 7 | 8 | def __init__(self): 9 | """ 10 | The two attributes added during initialisation are for debugging. 11 | """ 12 | self.last_reply: requests.Response() # debugging 13 | self.hit_list_id = -1 14 | self.session = requests.Session() 15 | self.query_summary: Dict[str, Any] = {} 16 | 17 | def reset(self): 18 | self.__init__() 19 | 20 | def _retrieve(self, url: str, params: Dict[str, Any]) -> requests.Response: 21 | """ 22 | This is convoluted for debugging. 23 | If something goes wrong... ``.last_reply`` can be inspected. 24 | """ 25 | self.last_reply: requests.Response = self.session.get(url=self.base_url + url, 26 | params=params, 27 | stream=self.stream_response, 28 | timeout=600, # 10 minutes 29 | ) 30 | self.last_reply.raise_for_status() 31 | return self.last_reply 32 | -------------------------------------------------------------------------------- /smallworld_api/common_db.py: -------------------------------------------------------------------------------- 1 | import re 2 | from .defaults import Defaults 3 | 4 | 5 | class Common(Defaults): # Defaults -> Common -> Base -> Extras -> Searcher -> SmallWorld 6 | 7 | def _get_year_from_name(self, name) -> float: 8 | """ 9 | Given a string containing 20Q3 returns 20.5 10 | 11 | :param name: 12 | :return: 13 | """ 14 | rex = re.search(r'(?P\d\d)Q(?P\d)', name) 15 | if rex: 16 | return int(rex.group('year')) + int(rex.group('quarter')) / 4 - 0.25 17 | rex = re.search(r'20(?P\d\d)', name) # > 2099 --> sorry captain Kirk your dataset is too new 18 | if rex: 19 | return int(rex.group('year')) 20 | rex = re.search(r'(?P\d\d)', name) 21 | if rex: # dodgy? 22 | return int(rex.group('year')) 23 | return 0. 24 | 25 | @property 26 | def REAL_dataset(self) -> str: # noqa 27 | # As per PEP 8 REAL_dataset is better than real_dataset. So ignore PyCharm 28 | # likewise... This is not an enum. 29 | return self._latest_dataset(name='real', error_name='Enamine Real') 30 | 31 | @property 32 | def ZINC_dataset(self) -> str: # noqa 33 | return self._latest_dataset(name='zinc', error_name='Zinc') 34 | 35 | @property 36 | def WuXi_dataset(self) -> str: # noqa 37 | return self._latest_dataset(name='wuxi', error_name='WuXi') 38 | 39 | def _latest_dataset(self, name: str, error_name: str) -> str: 40 | """ 41 | Get the latest dataset named ``name``. 42 | If it disappeared from ``.db_choices`` attribute raise an error ``error_name`` w/ fancy formatting 43 | (Say Enamine REAL is just REAL in the name). 44 | """ 45 | name = name.lower() 46 | options = [db for db in self.db_choices if name in db.lower()] 47 | if len(options) == 0: 48 | raise ValueError(f'There is no {error_name} in the options') 49 | return sorted(options, key=self._get_year_from_name, reverse=True)[0] 50 | -------------------------------------------------------------------------------- /smallworld_api/defaults.py: -------------------------------------------------------------------------------- 1 | class Defaults: # Defaults -> Common -> Base -> Extras -> Searcher -> SmallWorld 2 | 3 | # routes to API endpoints 4 | base_url = 'https://sw.docking.org' 5 | search_route = '/search/submit' 6 | view_route = '/search/view' 7 | 8 | stream_response = True 9 | 10 | # database choice list is updated with ``df = SmallWorld.retrieve_databases()`` 11 | _db_updated = False 12 | db_choices = ['REAL-Database-22Q1.smi.anon', 13 | 'WuXi-20Q4.smi.anon', 14 | 'all-zinc.smi.anon', 15 | 'for-sale.smi.anon', 16 | 'wait-ok.smi.anon', 17 | 'MculeUltimate-20Q2.smi.anon', 18 | 'bb_forsale.smi.anon', 19 | 'mcule_full.smi.anon', 20 | 'mcule-v.smi.anon', 21 | 'old-in-stock.smi.anon', 22 | 'mcule.smi.anon', 23 | 'informer-set.smi.anon', 24 | 'mculebb.smi.anon', 25 | 'bb_instock.smi.anon', 26 | 'Enamine-BB-Stock-Mar2022.smi.anon', 27 | 'Enamine-SC-Stock-Mar2022.smi.anon', 28 | 'interesting.smi.anon'] 29 | 30 | # most of these are enabled by default 31 | sf_choices = ['Atom Alignment', 'SMARTS Alignment', 'ECFP4', 'Daylight'] 32 | 33 | # these are the keys that are valid according to the documentation for a submit request: 34 | valid_submit_keys = ['smi', 'db', 'dist', 'tdn', 'tup', 'rdn', 'rup', 'ldn', 'lup', 'scores'] 35 | default_submission = {'dist': 8, 36 | 'tdn': 6, 37 | 'rdn': 6, 38 | 'rup': 2, 39 | 'ldn': 2, 40 | 'lup': 2, 41 | 'maj': 6, 42 | 'min': 6, 43 | 'sub': 6, 44 | 'sdist': 12, 45 | 'tup': 6, 46 | 'scores': 'Atom Alignment,ECFP4,Daylight' 47 | } 48 | 49 | @property 50 | def speed_threshold(self): 51 | # user cannot change! 52 | # max 5 secs between each query 53 | return 5 54 | 55 | # this is the required information (kind of) 56 | valid_export_columns = {'columns[0][data]': '0', 57 | 'columns[0][name]': 'alignment', 58 | 'columns[0][searchable]': 'true', 59 | 'columns[0][orderable]': 'false', 60 | 'columns[0][search][value]': '', 61 | 'columns[0][search][regex]': 'false', 62 | 'columns[1][data]': '1', 63 | 'columns[1][name]': 'dist', 64 | 'columns[1][searchable]': 'true', 65 | 'columns[1][orderable]': 'true', 66 | 'columns[1][search][value]': '0-12', 67 | 'columns[1][search][regex]': 'false', 68 | 'columns[2][data]': '2', 69 | 'columns[2][name]': 'ecfp4', 70 | 'columns[2][searchable]': 'true', 71 | 'columns[2][orderable]': 'true', 72 | 'columns[2][search][value]': '', 73 | 'columns[2][search][regex]': 'false', 74 | 'columns[3][data]': '3', 75 | 'columns[3][name]': 'daylight', 76 | 'columns[3][searchable]': 'true', 77 | 'columns[3][orderable]': 'true', 78 | 'columns[3][search][value]': '', 79 | 'columns[3][search][regex]': 'false', 80 | 'columns[4][data]': '4', 81 | 'columns[4][name]': 'topodist', 82 | 'columns[4][searchable]': 'true', 83 | 'columns[4][orderable]': 'true', 84 | 'columns[4][search][value]': '0-8', 85 | 'columns[4][search][regex]': 'false', 86 | 'columns[5][data]': '5', 87 | 'columns[5][name]': 'mces', 88 | 'columns[5][searchable]': 'true', 89 | 'columns[5][orderable]': 'true', 90 | 'columns[5][search][value]': '', 91 | 'columns[5][search][regex]': 'false', 92 | 'columns[6][data]': '6', 93 | 'columns[6][name]': 'tdn', 94 | 'columns[6][searchable]': 'true', 95 | 'columns[6][orderable]': 'true', 96 | 'columns[6][search][value]': '0-6', 97 | 'columns[6][search][regex]': 'false', 98 | 'columns[7][data]': '7', 99 | 'columns[7][name]': 'tup', 100 | 'columns[7][searchable]': 'true', 101 | 'columns[7][orderable]': 'true', 102 | 'columns[7][search][value]': '0-6', 103 | 'columns[7][search][regex]': 'false', 104 | 'columns[8][data]': '8', 105 | 'columns[8][name]': 'rdn', 106 | 'columns[8][searchable]': 'true', 107 | 'columns[8][orderable]': 'true', 108 | 'columns[8][search][value]': '0-6', 109 | 'columns[8][search][regex]': 'false', 110 | 'columns[9][data]': '9', 111 | 'columns[9][name]': 'rup', 112 | 'columns[9][searchable]': 'true', 113 | 'columns[9][orderable]': 'true', 114 | 'columns[9][search][value]': '0-2', 115 | 'columns[9][search][regex]': 'false', 116 | 'columns[10][data]': '10', 117 | 'columns[10][name]': 'ldn', 118 | 'columns[10][searchable]': 'true', 119 | 'columns[10][orderable]': 'true', 120 | 'columns[10][search][value]': '0-2', 121 | 'columns[10][search][regex]': 'false', 122 | 'columns[11][data]': '11', 123 | 'columns[11][name]': 'lup', 124 | 'columns[11][searchable]': 'true', 125 | 'columns[11][orderable]': 'true', 126 | 'columns[11][search][value]': '0-2', 127 | 'columns[11][search][regex]': 'false', 128 | 'columns[12][data]': '12', 129 | 'columns[12][name]': 'mut', 130 | 'columns[12][searchable]': 'true', 131 | 'columns[12][orderable]': 'true', 132 | 'columns[12][search][value]': '', 133 | 'columns[12][search][regex]': 'false', 134 | 'columns[13][data]': '13', 135 | 'columns[13][name]': 'maj', 136 | 'columns[13][searchable]': 'true', 137 | 'columns[13][orderable]': 'true', 138 | 'columns[13][search][value]': '0-6', 139 | 'columns[13][search][regex]': 'false', 140 | 'columns[14][data]': '14', 141 | 'columns[14][name]': 'min', 142 | 'columns[14][searchable]': 'true', 143 | 'columns[14][orderable]': 'true', 144 | 'columns[14][search][value]': '0-6', 145 | 'columns[14][search][regex]': 'false', 146 | 'columns[15][data]': '15', 147 | 'columns[15][name]': 'hyb', 148 | 'columns[15][searchable]': 'true', 149 | 'columns[15][orderable]': 'true', 150 | 'columns[15][search][value]': '0-6', 151 | 'columns[15][search][regex]': 'false', 152 | 'columns[16][data]': '16', 153 | 'columns[16][name]': 'sub', 154 | 'columns[16][searchable]': 'true', 155 | 'columns[16][orderable]': 'true', 156 | 'columns[16][search][value]': '0-6', 157 | 'columns[16][search][regex]': 'false', 158 | 'order[0][column]': '0', 159 | 'order[0][dir]': 'asc', 160 | 'search[value]': '', 161 | 'search[regex]': 'false'} 162 | -------------------------------------------------------------------------------- /smallworld_api/extras.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import operator, re, json 3 | from warnings import warn 4 | import pandas as pd 5 | import requests 6 | from typing import * 7 | from IPython.display import display, HTML 8 | from .base import Base # inherits Defaults 9 | from collections import namedtuple 10 | import sys 11 | 12 | if TYPE_CHECKING or 'sphinx' in sys.modules: 13 | from rdkit import Chem 14 | 15 | 16 | class Extras(Base): # Defaults -> Common -> Base -> Extras -> Searcher -> SmallWorld 17 | 18 | def show_reply_as_html(self, reply: Optional[requests.Response] = None): 19 | """ 20 | The API calls may fail for some reason. 21 | Generally code 500 due to the server timing out and having issues. 22 | This prints the reply. 23 | """ 24 | if reply is None: 25 | reply = self.last_reply 26 | display(HTML(reply.text)) 27 | 28 | @classmethod 29 | def retrieve_scorefun_options(cls) -> pd.DataFrame: 30 | reply: requests.Response = requests.get('https://sw.docking.org/search/config') 31 | reply.raise_for_status() 32 | scores = pd.DataFrame(reply.json()['ScoreFuncs']) 33 | cls.sf_choices = scores.name.to_list() 34 | return scores 35 | 36 | @classmethod 37 | def retrieve_databases(cls) -> pd.DataFrame: 38 | reply = requests.get('https://sw.docking.org/search/maps') 39 | reply.raise_for_status() 40 | dbs = (pd.DataFrame.from_dict(reply.json(), orient='index') 41 | [['name', 'numEntries', 'numMapped', 'numUnmapped', 'numSkipped', 'status']] 42 | .sort_values('numMapped', ascending=False)) 43 | cls.db_choices = dbs.index.to_list() # previously it was: dbs.name.to_list() 44 | cls._db_updated = True 45 | if not hasattr(cls, '__name__'): # ``retrieve_databases`` called on instance 46 | cls.__class__.db_choices = dbs.index.to_list() 47 | cls.__class__._db_updated = True 48 | return dbs 49 | 50 | @staticmethod 51 | def check_smiles(smiles: str): # -> rdkit.Chem.Mol 52 | from rdkit import Chem 53 | mol = Chem.MolFromSmiles(smiles) 54 | assert mol is not None 55 | return mol 56 | 57 | def mol2smiles(self, mol: Chem.Mol) -> str: 58 | from rdkit import Chem 59 | return Chem.MolToSmiles(mol) 60 | 61 | def is_this_mol(self, item: Any) -> bool: 62 | return item.__class__.__name__ == 'Mol' 63 | -------------------------------------------------------------------------------- /smallworld_api/nomatcherror.py: -------------------------------------------------------------------------------- 1 | class NoMatchError(Exception): 2 | """ 3 | No match for the molecule was found. 4 | """ 5 | 6 | def __str__(self): 7 | return 'The API returned no matches. ' + \ 8 | '. '.join(map(str, self.args)) + \ 9 | ' Try a different database (cf. `SmallWorld.retrieve_databases()`' 10 | -------------------------------------------------------------------------------- /smallworld_api/search.py: -------------------------------------------------------------------------------- 1 | import json 2 | import operator 3 | import re 4 | import time 5 | from typing import * 6 | from warnings import warn 7 | import pandas as pd 8 | import requests 9 | from .extras import Extras # extra methods not required by search 10 | from .nomatcherror import NoMatchError 11 | 12 | 13 | class Searcher(Extras): # Defaults -> Common -> Base -> Extras -> Searcher -> SmallWorld 14 | 15 | def submit_query(self, params) -> Dict[str, Any]: 16 | """ 17 | The first step. 18 | """ 19 | try: 20 | reply: requests.Response = self._retrieve(url=self.search_route, params=params) 21 | line_iter = reply.iter_lines(decode_unicode=True) 22 | line_iter = map(str.strip, map(str, line_iter)) 23 | line_iter = filter(lambda line: re.search(r'data:', line), line_iter) 24 | # using iter_lines + in stream mode does not solve the major tom hanging issue... 25 | reply_data: List[Dict[str, Any]] = list() 26 | hlid = -1 27 | for line in line_iter: 28 | datum = json.loads(re.sub(r'^data:\s?', '', line)) 29 | if 'hlid' in datum: 30 | hlid = datum['hlid'] 31 | reply_data.append(datum) 32 | time.sleep(1) 33 | except requests.exceptions.ChunkedEncodingError as error: 34 | print('ChunkedEncodingError: search may be incomplete') 35 | if hlid == -1: 36 | raise ValueError(reply_data[-1]) 37 | if reply_data[-1]['status'] != 'END': 38 | # "Ground Control to Major Tom" means there is no signal. 39 | warn(f"No completed return code returned (generally harmless). See `.query_summary` for actual response") 40 | return reply_data[-1] 41 | 42 | def get_results(self, start: int = 0, length: int = 10, draw: int = 10) -> pd.DataFrame: 43 | params = dict(hlid=self.hit_list_id, 44 | start=start, 45 | length=length, 46 | draw=draw 47 | ) 48 | params = {**params, **self.valid_export_columns} 49 | reply = self._retrieve(url=self.view_route, 50 | params=params) 51 | if not reply.json()["recordsTotal"]: 52 | raise NoMatchError(f'There are {reply.json()["recordsTotal"]} hits in the reply') 53 | reply_data = reply.json()['data'] 54 | if not reply_data: 55 | raise NoMatchError('There is no `data` in the reply!') 56 | # expand the first column 57 | df1 = pd.DataFrame(map(operator.itemgetter(0), reply_data)) 58 | if len(df1) == 0: 59 | raise NoMatchError('Reply generated an empty table') 60 | df2 = pd.DataFrame(reply_data).drop(columns=[0]) 61 | columns = [v for p, v in params.items() if re.match(r'columns\[\d+]\[name]', p)] 62 | df2.columns = columns[1:] 63 | df = pd.concat([df1, df2], axis=1) 64 | df['name'] = df.hitSmiles.str.split(expand=True)[1] 65 | df['smiles'] = df.hitSmiles.str.split(expand=True)[0] 66 | # PandasTools.AddMoleculeColumnToFrame(df,'smiles','molecule',includeFingerprints=True) 67 | return df 68 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from smallworld_api import SmallWorld 3 | import pandas as pd 4 | 5 | class TestAPI(unittest.TestCase): 6 | def test_small_db(self): 7 | from IPython.display import display 8 | # input smiles 9 | aspirin = 'O=C(C)Oc1ccccc1C(=O)O' 10 | SmallWorld.check_smiles(aspirin) # assert raised if gibberish 11 | # run! 12 | sws = SmallWorld() 13 | results: pd.DataFrame = sws.search(aspirin, dist=5, db='WorldDrugs-20Q2-3004', length=10) 14 | self.assertEqual(len(results), 10 ) 15 | 16 | def test_big_db(self): 17 | from IPython.display import display 18 | # input smiles 19 | melatonin = 'COc1ccc2[nH]cc(CCNC(C)=O)c2c1' 20 | SmallWorld.check_smiles(melatonin) # assert raised if gibberish 21 | # run! 22 | sws = SmallWorld() 23 | results: pd.DataFrame = sws.search(melatonin, dist=5, db='REAL_DB_20Q2', length=10) 24 | self.assertEqual(len(results), 10) 25 | 26 | if __name__ == '__main__': 27 | unittest.main() 28 | --------------------------------------------------------------------------------