├── .coveragerc ├── .github ├── ISSUE_TEMPLATE │ ├── ---bug-report.md │ ├── ---feature-request.md │ └── --questions---help.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── python-publish.yml ├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── azure-pipelines.yml ├── ci └── install.sh ├── conda └── meta.yaml ├── examples ├── README.md ├── basic_functions.ipynb ├── cytoscape_vis.ipynb ├── example.sdf └── scaffold_graphs.ipynb ├── img └── scaffoldgraph.jpg ├── requirements.txt ├── scaffoldgraph ├── __init__.py ├── analysis │ ├── __init__.py │ ├── diversity.py │ ├── enrichment.py │ ├── frequency.py │ ├── general.py │ └── representation.py ├── core │ ├── __init__.py │ ├── fragment.py │ ├── graph.py │ └── scaffold.py ├── io │ ├── __init__.py │ ├── dataframe.py │ ├── sdf.py │ ├── smiles.py │ ├── supplier.py │ └── tsv.py ├── network.py ├── prioritization │ ├── __init__.py │ ├── generic_rules.py │ ├── original_rules.py │ ├── prioritization_rules.py │ ├── prioritization_ruleset.py │ └── rule_io.py ├── scripts │ ├── __init__.py │ ├── generate.py │ ├── misc.py │ ├── operations.py │ └── run.py ├── tree.py ├── utils │ ├── __init__.py │ ├── aggregate.py │ ├── bipartite.py │ ├── cache.py │ ├── logging.py │ ├── misc.py │ └── subset.py └── vis │ ├── __init__.py │ ├── base.py │ ├── notebook │ ├── __init__.py │ ├── cytoscape.py │ └── resources │ │ └── cytoscape.json │ └── utils.py ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── analysis ├── __init__.py ├── test_enrichment.py ├── test_general.py └── test_representation.py ├── core ├── test_fragment.py ├── test_graph.py └── test_scaffold.py ├── data └── test_smiles.smi ├── prioritization ├── __init__.py ├── test_generic_rules.py ├── test_original_rules.py ├── test_prioritization_rules.py └── test_prioritization_ruleset.py ├── scripts └── test_scripts.py ├── test_network.py ├── test_tree.py ├── utils ├── __init__.py ├── test_aggregate.py ├── test_bipartite.py ├── test_misc.py └── test_subset.py └── vis ├── __init__.py ├── test_notebook.py └── test_vis_utils.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | scaffoldgraph/vis/notebook/*, 4 | scaffoldgraph/scripts/misc.py, 5 | 6 | [report] 7 | exclude_lines = 8 | pragma: no cover 9 | logger.* 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/---bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Bug report" 3 | about: Create a report to help us improve ScaffoldGraph 4 | title: "[BUG]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behaviour: 15 | 1. 16 | 2. 17 | 3. 18 | 19 | **Expected behaviour** 20 | A clear and concise description of what you expected to happen. 21 | 22 | **Desktop (please complete the following information):** 23 | - OS: [e.g. iOS] 24 | - Version [e.g. 22] 25 | 26 | **Additional context** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/---feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680 Feature request" 3 | about: Suggest an idea for this project 4 | title: "[F]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/--questions---help.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓ Questions & Help" 3 | about: Start a discussion related to ScaffoldGraph 4 | title: "[Q]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## ❓ Questions & Help 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | #### Reference Issue 5 | 6 | 7 | 8 | #### What does this implement/fix? 9 | 10 | 11 | #### Any other comments? 12 | 13 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload to PyPi 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: '3.x' 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install setuptools wheel twine 22 | - name: Build and publish 23 | env: 24 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 25 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 26 | run: | 27 | python setup.py sdist bdist_wheel 28 | twine upload dist/* 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # PyCharm project settings 98 | .idea 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | - "3.7" 5 | - "3.8" 6 | 7 | install: 8 | - source ci/install.sh 9 | 10 | script: 11 | - pytest -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at oliver.scott.17@ucl.ac.uk. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing 2 | 3 | Contributions to ScaffoldGraph will most likely fall into the following categories: 4 | 5 | 1. Implementing a new Feature: 6 | * New Features that fit into the scope of this package will be accepted. If you are unsure about the 7 | idea/design/implementation, feel free to post an issue. 8 | 2. Fixing a Bug: 9 | * Bug fixes are welcomed, please send a Pull Request each time a bug is encountered. When sending a Pull 10 | Request please provide a clear description of the encountered bug. If unsure feel free to post an issue 11 | 12 | Please send Pull Requests to: 13 | http://github.com/UCLCheminformatics/scaffoldgraph 14 | 15 | ### Testing 16 | 17 | ScaffoldGraphs testing is located under `test/`. Run all tests using: 18 | 19 | ``` 20 | $ python setup.py test 21 | ``` 22 | 23 | or run an individual test: `pytest --no-cov tests/core` 24 | 25 | When contributing new features please include appropriate test files 26 | 27 | ### Continuous Integration 28 | 29 | ScaffoldGraph uses Travis CI for continuous integration 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 OliverBScott 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | include LICENSE 3 | include setup.py 4 | include requirements.txt 5 | include scaffoldgraph/*.py 6 | include scaffoldgraph/*/*.py 7 | include tests/data/test_smiles.smi 8 | include scaffoldgraph/vis/notebook/resources/* 9 | recursive-include tests *.py 10 | -------------------------------------------------------------------------------- /azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | # Python package 2 | # Create and test a Python package on multiple Python versions. 3 | # Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: 4 | # https://docs.microsoft.com/azure/devops/pipelines/languages/python 5 | 6 | trigger: 7 | - main 8 | 9 | pool: 10 | vmImage: 'ubuntu-latest' 11 | strategy: 12 | matrix: 13 | Python36: 14 | python.version: '3.6' 15 | Python37: 16 | python.version: '3.7' 17 | Python38: 18 | python.version: '3.8' 19 | 20 | steps: 21 | - bash: echo "##vso[task.prependpath]$CONDA/bin" 22 | displayName: Add conda to PATH 23 | 24 | - bash: | 25 | conda config --set always_yes yes --set changeps1 no 26 | conda config --add channels conda-forge 27 | conda install -q mamba 28 | mamba create -q -n $(conda_env) python=$(python.version) 29 | displayName: Create conda environment 30 | 31 | - bash: | 32 | source activate $(conda_env) 33 | mamba install -q --file ./requirements.txt 34 | python setup.py install 35 | displayName: Install package 36 | 37 | - bash: | 38 | source activate $(conda_env) 39 | mamba install -q pytest pytest-cov 40 | pytest --cov=scaffoldgraph -v -s tests/ 41 | displayName: Run tests -------------------------------------------------------------------------------- /ci/install.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | set -e 4 | 5 | # Retrieve the latest miniconda distribution for linux 6 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 7 | 8 | # Install miniconda 9 | bash miniconda.sh -b -p $HOME/miniconda 10 | export PATH="$HOME/miniconda/bin:$PATH" 11 | 12 | # Configure conda 13 | conda config --set always_yes yes --set changeps1 no 14 | conda update -q conda 15 | conda info -a 16 | conda config --add channels conda-forge 17 | conda create -q -n travis_env python=$TRAVIS_PYTHON_VERSION 18 | source activate travis_env 19 | 20 | # Install 21 | conda install --file $TRAVIS_BUILD_DIR/requirements.txt 22 | python $TRAVIS_BUILD_DIR/setup.py install 23 | -------------------------------------------------------------------------------- /conda/meta.yaml: -------------------------------------------------------------------------------- 1 | {% set name = "ScaffoldGraph" %} 2 | {% set version = "1.1.2" %} 3 | 4 | package: 5 | name: {{ name|lower }} 6 | version: {{ version }} 7 | 8 | source: 9 | url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz 10 | sha256: 53bb45b59302c4f0fca8b6b380d79a8cd82f954727e22ec08015f4c6f3ea7640 11 | 12 | build: 13 | noarch: python 14 | number: 0 15 | skip: True # [py<35] 16 | script: "{{ PYTHON }} -m pip install . -vv" 17 | 18 | requirements: 19 | build: 20 | - python 21 | - pip 22 | - pytest-runner 23 | run: 24 | - python 25 | - rdkit 26 | - networkx 27 | - matplotlib 28 | - loguru 29 | - tqdm 30 | - scipy >=1.3.1 31 | 32 | test: 33 | imports: 34 | - scaffoldgraph 35 | - scaffoldgraph.network 36 | - scaffoldgraph.tree 37 | - scaffoldgraph.analysis 38 | - scaffoldgraph.core 39 | - scaffoldgraph.prioritization 40 | - scaffoldgraph.utils 41 | - scaffoldgraph.io 42 | commands: 43 | - scaffoldgraph --help 44 | 45 | about: 46 | home: https://github.com/UCLCheminformatics/ScaffoldGraph 47 | license: MIT 48 | license_family: MIT 49 | license_file: LICENSE 50 | summary: 'Cheminformatics package for building scaffold networks and trees.' 51 | description: | 52 | ScaffoldGraph is an open-source cheminformatics library, built using RDKit and NetworkX, 53 | for the generation and analysis of scaffold networks and scaffold trees. 54 | doc_url: https://github.com/UCLCheminformatics/ScaffoldGraph 55 | dev_url: https://github.com/UCLCheminformatics/ScaffoldGraph 56 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # ScaffoldGraph Examples 2 | 3 | These notebooks display basic functionality of the ScaffoldGraph package. 4 | 5 | **Notebooks:** 6 | * basic_functions.ipynb (Basic functionality independent of the NetworkX API) 7 | * scaffold_graphs.ipynb (Creating scaffold networks and trees using the NetworkX API) 8 | * cytoscape_vis.ipynb (Visualizing scaffold graphs using ipycytoscape) 9 | 10 | **Data:** 11 | * example.sdf (A collection of molecules from PubChem) 12 | -------------------------------------------------------------------------------- /examples/cytoscape_vis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ScaffoldGraph: Cytoscape Visualization\n", 8 | "\n", 9 | "ScaffoldGraph contains a utility to visualize scaffoldgraphs interactively in Jupyter using the [ipycytoscape](https://github.com/QuantStack/ipycytoscape) module.\n", 10 | "\n", 11 | "To use the utility ipycytoscape must be installed:\n", 12 | "\n", 13 | "`pip install ipycytoscape` or `conda install -c conda-forge ipycytoscape`" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import scaffoldgraph as sg\n", 23 | "import os\n", 24 | "\n", 25 | "# Import the visualization utility.\n", 26 | "from scaffoldgraph.vis.notebook import cytoscape" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "The visualizer can be used for all scaffold graph types. Here we will use the `ScaffoldTree`." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "application/vnd.jupyter.widget-view+json": { 44 | "model_id": "1b3cafb2bea34d4f8c77ecd284a6be91", 45 | "version_major": 2, 46 | "version_minor": 0 47 | }, 48 | "text/plain": [ 49 | "HBox(children=(HTML(value='ScaffoldTree'), FloatProgress(value=0.0, layout=Layout(flex='2'), max=199.0), HTML(…" 50 | ] 51 | }, 52 | "metadata": {}, 53 | "output_type": "display_data" 54 | }, 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "\n", 60 | "Type: ScaffoldTree\n", 61 | "Number of molecule nodes: 199\n", 62 | "Number of scaffold nodes: 450\n", 63 | "Number of edges: 584\n", 64 | "Max hierarchy: 6\n", 65 | "Min hierarchy: 1\n", 66 | "\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "sdf_file = os.path.dirname(sg.__file__).replace('scaffoldgraph', 'examples/example.sdf')\n", 72 | "\n", 73 | "# Construct a ScaffoldTree.\n", 74 | "tree = sg.ScaffoldTree.from_sdf(sdf_file, progress=True)\n", 75 | "\n", 76 | "# Print a quick summary.\n", 77 | "print(sg.utils.summary(tree))" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "Initialize the visualizer with the scaffold tree. Note that custom styles can also be specified, allowing customisation of the visualisation. e.g. colouring molecule nodes according to activity. For more information on styles see the [ipycytoscape documentation](https://ipycytoscape.readthedocs.io/en/latest/index.html). Here we will just use the default style provided." 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 3, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "visualizer = cytoscape.CytoscapeVisualizer(tree) # initialize." 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "The visualizer allows the user to draw the whole graph or just a subset of the graph starting from a molecule or scaffold node. Given that the tree can often be very large visualizing small subsets if often a better idea.\n", 101 | "\n", 102 | "* visualizer.draw (draw the whole graph)\n", 103 | "* visualizer.draw_for_molecule (draw a molecule and its constituent scaffolds)\n", 104 | "* visualizer.draw_for_scaffold (draw a scaffold and its parent and/or children nodes)\n", 105 | "\n", 106 | "When drawing scaffold trees edges are annotated with the rule used during prioritization." 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 4, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "data": { 116 | "application/vnd.jupyter.widget-view+json": { 117 | "model_id": "ed1b4914099d4a00804fd6df811cab1c", 118 | "version_major": 2, 119 | "version_minor": 0 120 | }, 121 | "text/plain": [ 122 | "CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'nodeSpacing': 50, 'edgeLengthVal': 50}, cytoscape_style=[{…" 123 | ] 124 | }, 125 | "metadata": {}, 126 | "output_type": "display_data" 127 | } 128 | ], 129 | "source": [ 130 | "# Draw graph for PubChem ID '1201903'.\n", 131 | "widget = visualizer.draw_for_molecule('1201903')\n", 132 | "\n", 133 | "# Widget properties can be modified after construction. \n", 134 | "widget.layout.height = '600px'\n", 135 | "\n", 136 | "# display the widget.\n", 137 | "widget" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "Using the hierarchy 1 scaffold from the last query ('1201903'), lets draw the subgraph originating from this scaffold ('c1ccncc1').\n", 145 | "\n", 146 | "When creating visualizations from scaffold there is an additional option `traversal` specifying the direction of subgraph creation {'child', 'parent', 'bidirectional'}. Specifying child will draw the subgraph toward the child molecules and parent will draw towards lower hierarchy scaffolds. Specifying bidirectional will construct the subgraph in both directions. The default traversal is 'child'." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 5, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "application/vnd.jupyter.widget-view+json": { 157 | "model_id": "ac18776655314fb5af4bb64c530e33a0", 158 | "version_major": 2, 159 | "version_minor": 0 160 | }, 161 | "text/plain": [ 162 | "CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'nodeSpacing': 50, 'edgeLengthVal': 50}, cytoscape_style=[{…" 163 | ] 164 | }, 165 | "metadata": {}, 166 | "output_type": "display_data" 167 | } 168 | ], 169 | "source": [ 170 | "# Draw graph for scaffold 'c1ccncc1'\n", 171 | "widget = visualizer.draw_for_scaffold('c1ccncc1', traversal='child')\n", 172 | "widget.layout.height = '600px'\n", 173 | "widget" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "The layout can also be customised using the `layout_kwargs` argument passing arguments to the CytoscapeWidget.set_layout function. Here we change the algorithm for constructing the layou to 'breadthfirst'." 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 6, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "application/vnd.jupyter.widget-view+json": { 191 | "model_id": "9b62090602234268a15aa01167f5ae2b", 192 | "version_major": 2, 193 | "version_minor": 0 194 | }, 195 | "text/plain": [ 196 | "CytoscapeWidget(cytoscape_layout={'name': 'breadthfirst', 'nodeSpacing': 50, 'edgeLengthVal': 50}, cytoscape_s…" 197 | ] 198 | }, 199 | "metadata": {}, 200 | "output_type": "display_data" 201 | } 202 | ], 203 | "source": [ 204 | "# Draw graph for scaffold 'c1ccncc1'\n", 205 | "widget = visualizer.draw_for_scaffold('c1ccncc1', layout_kwargs={'name': 'breadthfirst'})\n", 206 | "widget.layout.height = '600px'\n", 207 | "widget" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "# Further interactivity using ipywidgets \n", 215 | "\n", 216 | "A user also has the ability to add further interactivity to the visualisations using the ipywidgets interact function, an example below shows an interactive visualisation where the user can select the molecule to display using a dropdown box and also update the layout." 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 7, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "data": { 226 | "application/vnd.jupyter.widget-view+json": { 227 | "model_id": "bd68bd2e87a74c2a9bf0aa6824f3dad1", 228 | "version_major": 2, 229 | "version_minor": 0 230 | }, 231 | "text/plain": [ 232 | "interactive(children=(Dropdown(description='molecule_id', options=('1085650', '1085710', '1106125', '1113907',…" 233 | ] 234 | }, 235 | "metadata": {}, 236 | "output_type": "display_data" 237 | } 238 | ], 239 | "source": [ 240 | "from ipywidgets import interact, fixed\n", 241 | "\n", 242 | "\n", 243 | "def draw(visualizer, molecule_id, layout):\n", 244 | " layout_kwargs = {'name': layout}\n", 245 | " widget = visualizer.draw_for_molecule(molecule_id, layout_kwargs)\n", 246 | " widget.layout.height = '600px'\n", 247 | " return widget\n", 248 | " \n", 249 | "\n", 250 | "# Get the list of all molecules in the graph and sort.\n", 251 | "mol_ids = sorted(list(tree.get_molecule_nodes()))\n", 252 | "\n", 253 | "# List of a subset of available layouts in cytoscape.\n", 254 | "layouts = ['dagre', 'breadthfirst', 'cose']\n", 255 | "\n", 256 | "# Now create the visualization.\n", 257 | "interact(\n", 258 | " draw,\n", 259 | " visualizer=fixed(visualizer),\n", 260 | " molecule_id=mol_ids,\n", 261 | " layout=layouts\n", 262 | ");" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "# Style Customization\n", 270 | "\n", 271 | "The user also has the ability to customize various aspects of the widgets style. Fopr example the nodes can be coloured based on an attribute. The example shows how to colour scaffold nodes based on the 'hierarchy' attribute." 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 8, 277 | "metadata": {}, 278 | "outputs": [ 279 | { 280 | "data": { 281 | "application/vnd.jupyter.widget-view+json": { 282 | "model_id": "5ff71126406b4fad84a0eaeae7262566", 283 | "version_major": 2, 284 | "version_minor": 0 285 | }, 286 | "text/plain": [ 287 | "CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'nodeSpacing': 50, 'edgeLengthVal': 50}, cytoscape_style=[{…" 288 | ] 289 | }, 290 | "metadata": {}, 291 | "output_type": "display_data" 292 | } 293 | ], 294 | "source": [ 295 | "from scaffoldgraph.vis import color_scaffold_nodes_by_attribute\n", 296 | "from rdkit.Chem.Draw import rdMolDraw2D\n", 297 | "\n", 298 | "# Add a 'background-color' attribute to each scaffold node.\n", 299 | "# The cmap can either be a string or a matplotlib.cm.Colormap object.\n", 300 | "color_scaffold_nodes_by_attribute(tree, 'hierarchy', cmap='BuPu', label='color')\n", 301 | "\n", 302 | "# Create a style element for the visualizer.\n", 303 | "background_style = {\n", 304 | " \"selector\": \"node[color]\", # select nodes with an attribute 'color'\n", 305 | " \"style\": {\n", 306 | " \"background-color\": \"data(color)\", # set background-color from attribute.\n", 307 | " }\n", 308 | "}\n", 309 | "\n", 310 | "# We can also customize rdkit drawing options (need a transparent background).\n", 311 | "drawOpts = rdMolDraw2D.MolDrawOptions()\n", 312 | "drawOpts.clearBackground = False\n", 313 | "drawOpts.bondLineWidth = 4\n", 314 | "\n", 315 | "# Create a new visualizer with new options.\n", 316 | "vis = cytoscape.CytoscapeVisualizer(\n", 317 | " tree,\n", 318 | " refresh_images=True,\n", 319 | " rd_draw_options=drawOpts,\n", 320 | ")\n", 321 | "\n", 322 | "# Append the style element to the visualizer.\n", 323 | "vis.style.append(background_style)\n", 324 | "\n", 325 | "# Create visualization.\n", 326 | "widget = vis.draw_for_scaffold('c1ccncc1')\n", 327 | "widget.layout.height = '600px'\n", 328 | "widget\n" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [] 337 | } 338 | ], 339 | "metadata": { 340 | "kernelspec": { 341 | "display_name": "ScaffoldGraph", 342 | "language": "python", 343 | "name": "scaffoldgraph" 344 | }, 345 | "language_info": { 346 | "codemirror_mode": { 347 | "name": "ipython", 348 | "version": 3 349 | }, 350 | "file_extension": ".py", 351 | "mimetype": "text/x-python", 352 | "name": "python", 353 | "nbconvert_exporter": "python", 354 | "pygments_lexer": "ipython3", 355 | "version": "3.7.6" 356 | } 357 | }, 358 | "nbformat": 4, 359 | "nbformat_minor": 4 360 | } 361 | -------------------------------------------------------------------------------- /img/scaffoldgraph.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UCLCheminformatics/ScaffoldGraph/8168d739ca3783d39775fb1553f721b5a7dc437b/img/scaffoldgraph.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | rdkit 2 | networkx 3 | tqdm 4 | loguru 5 | numpy 6 | scipy>=1.3.1 7 | matplotlib>=2.2.2 8 | pytest -------------------------------------------------------------------------------- /scaffoldgraph/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph 3 | """ 4 | 5 | from loguru import logger 6 | 7 | from . import prioritization 8 | from . import utils 9 | from . import vis 10 | 11 | from .core import ( 12 | get_next_murcko_fragments, 13 | get_all_murcko_fragments, 14 | get_murcko_scaffold, 15 | get_ring_toplogy_scaffold, 16 | get_ring_connectivity_scaffold, 17 | ) 18 | 19 | from .network import ScaffoldNetwork, HierS 20 | from .tree import ScaffoldTree, tree_frags_from_mol 21 | 22 | __version__ = '1.1.2' 23 | 24 | 25 | __all__ = [ 26 | '__version__', 27 | 'HierS', 28 | 'ScaffoldNetwork', 29 | 'ScaffoldTree', 30 | 'tree_frags_from_mol', 31 | 'get_next_murcko_fragments', 32 | 'get_all_murcko_fragments', 33 | 'get_murcko_scaffold', 34 | 'get_ring_toplogy_scaffold', 35 | 'get_ring_connectivity_scaffold', 36 | ] 37 | 38 | logger.disable(__name__) 39 | -------------------------------------------------------------------------------- /scaffoldgraph/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.analysis 3 | 4 | The analysis package contains functions for analyzing ScaffoldGraphs 5 | """ 6 | 7 | from .representation import calc_average_pairwise_similarity, get_over_represented_scaffold_classes 8 | from .enrichment import calc_scaffold_enrichment, compound_set_enrichment 9 | from .general import get_virtual_scaffolds, get_singleton_scaffolds 10 | from .diversity import diversity_pick_for_scaffold_class 11 | from .frequency import cumulative_scaffold_frequency, area_under_curve 12 | 13 | 14 | __all__ = [ 15 | 'calc_average_pairwise_similarity', 16 | 'get_over_represented_scaffold_classes', 17 | 'calc_scaffold_enrichment', 18 | 'compound_set_enrichment', 19 | 'get_virtual_scaffolds', 20 | 'get_singleton_scaffolds', 21 | 'diversity_pick_for_scaffold_class', 22 | 'cumulative_scaffold_frequency', 23 | 'area_under_curve' 24 | ] 25 | -------------------------------------------------------------------------------- /scaffoldgraph/analysis/diversity.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.analysis.diversity 3 | """ 4 | 5 | from rdkit.SimDivFilters.rdSimDivPickers import LeaderPicker 6 | from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect 7 | from rdkit.Chem import MolFromSmiles 8 | 9 | from functools import partial 10 | 11 | 12 | def _form_dist_func(dist_func, fps): 13 | """function: create a partial dist_func.""" 14 | if dist_func.__code__.co_argcount != 3: 15 | raise ValueError('dist_func must have three arguments: i, j, fps') 16 | if dist_func.__code__.co_varnames[2] != 'fps': 17 | raise ValueError('dist_func third argument name must be: fps') 18 | formed_dist_func = partial(dist_func, fps=fps) 19 | return formed_dist_func 20 | 21 | 22 | def _make_diversity_pick(pool, threshold, pick_size, dist_func=None): 23 | """iterable: make a diversity pick from a pool of fingerprints.""" 24 | picker = LeaderPicker() 25 | pool_size = len(pool) 26 | if pick_size > pool_size: 27 | pick_size = pool_size 28 | if dist_func is None: 29 | pick = picker.LazyBitVectorPick(pool, pool_size, threshold, pick_size) 30 | else: 31 | dist_func = _form_dist_func(dist_func, pool) 32 | pick = picker.LazyPick(dist_func, pool_size, threshold, pick_size) 33 | return pick 34 | 35 | 36 | def _create_pool(scaffold, graph, radius, bits): 37 | """tuple : create molecule pool (ids, mols, fps).""" 38 | mol_ids, smiles = zip(*graph.get_molecules_for_scaffold(scaffold, 'smiles')) 39 | mols = list(map(MolFromSmiles, smiles)) 40 | fps = list(map(lambda x: GetMorganFingerprintAsBitVect(x, radius, nBits=bits), mols)) 41 | if len(fps) == 0: 42 | raise ValueError(f'No molecules for scaffold class: {scaffold}') 43 | return mol_ids, mols, fps 44 | 45 | 46 | def diversity_pick_for_scaffold_class( 47 | scaffold, 48 | graph, 49 | threshold=0.65, 50 | pick_size=0, 51 | fp_radius=2, 52 | fp_bits=1024, 53 | dist_func=None 54 | ): 55 | """ 56 | Pick a diverse set of molecules from a scaffold class using 57 | the RDKit diversity picker (LeaderPicker) and Morgan 58 | fingerprints. 59 | 60 | Parameters 61 | ---------- 62 | scaffold : str 63 | Scaffold class name i.e. scaffold SMILES. 64 | graph : ScaffoldGraph 65 | ScaffoldGraph for picking. 66 | threshold : float, optional 67 | Stop picking when the distance goes below this value. 68 | The default is 0.65 i.e. similarity = 0.35. 69 | pick_size : int, optional 70 | Number of items to pick from the molecule pool. If 71 | the pick size is greater than the pool size, the 72 | pick size will be equal to the size of the pool. 73 | fp_radius : int, optional 74 | Radius of Morgan fingerprint. The default is 2. 75 | fp_bits : int, optional 76 | Number of bits in the Morgan fingerprint. The 77 | default is 1024. 78 | dist_func : function, optional 79 | A function for calculating distance between a pair 80 | of fingerprints. The function should take two indicies 81 | (i, j) and a list of fingerprints (fps) and return 82 | the distance between these points. 83 | 84 | Examples 85 | -------- 86 | Diversity pick for benzene scaffold. 87 | 88 | >>> ids, mols, fps = diversity_pick_for_scaffold_class('c1ccccc1', graph, pick_size=10) 89 | 90 | Returns 91 | ------- 92 | tuple ((ids), (mols), (fps)) 93 | A tuple of tuples with the first containg the picked molecules ids, 94 | the seconds containing the picked mols RDMols and the third containg 95 | the molecules fingerprints. 96 | 97 | Notes 98 | ----- 99 | If performing diversity picks on a large scale, a custom implementation 100 | should probably be used where fingerprints can be cached. 101 | 102 | """ 103 | mol_ids, mols, fps = _create_pool(scaffold, graph, fp_radius, fp_bits) 104 | pick = _make_diversity_pick(fps, threshold, pick_size, dist_func) 105 | picked = [(mol_ids[x], mols[x], fps[x]) for x in pick] 106 | ids, mols, fps = zip(*picked) 107 | return ids, mols, fps 108 | -------------------------------------------------------------------------------- /scaffoldgraph/analysis/enrichment.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.analysis.enrichment 3 | 4 | Module contains an implementation of Compound Set Enrichment from the papers: 5 | - Compound Set Enrichment: A Novel Approach to Analysis of Primary HTS Data. 6 | - Mining for bioactive scaffolds with scaffold networks: Improved compound set enrichment from primary screening data. 7 | """ 8 | 9 | from networkx import set_node_attributes 10 | from scipy.stats import ks_2samp, binom_test 11 | from loguru import logger 12 | 13 | 14 | def _btp(scaffoldgraph, activity_key, alternative, pd): 15 | """CSE - binomial test (used in cse functions).""" 16 | result, active, total = {}, 0, 0 17 | for m, a in scaffoldgraph.get_molecule_nodes(activity_key): 18 | if int(a) == 1: 19 | active += 1 20 | total += 1 21 | if pd is None: 22 | pd = active / total 23 | logger.debug(f'(BTP) Total: {total}, Active: {active}, pd: {pd}') 24 | for scaffold in scaffoldgraph.get_scaffold_nodes(): 25 | mols, acts = zip(*scaffoldgraph.get_molecules_for_scaffold(scaffold, activity_key)) 26 | N, K = len(mols), acts.count(1) 27 | pval = binom_test(K, N, pd, alternative=alternative) 28 | logger.debug(f'(BTP) {scaffold}, {K}, {N}, {pval}') 29 | result[scaffold] = {'pval': pval, '_active': K, '_total': N} 30 | return result 31 | 32 | 33 | def _ksp(scaffoldgraph, activity_key, alternative): 34 | """CSE - Kolmogorov-Smirnov test (used in cse functions).""" 35 | result, background = {}, [] 36 | for _, activity in scaffoldgraph.get_molecule_nodes(activity_key): 37 | background.append(activity) 38 | for scaffold in scaffoldgraph.get_scaffold_nodes(): 39 | mols, acts = zip(*scaffoldgraph.get_molecules_for_scaffold(scaffold, activity_key)) 40 | N = len(mols) 41 | dmax, pval = ks_2samp(acts, background, alternative, 'auto') 42 | logger.debug(f'(KSP) {scaffold}, {N}, {dmax}, {pval}') 43 | result[scaffold] = {'pval': pval, 'dmax': dmax, '_total': N} 44 | return result 45 | 46 | 47 | def bonferroni_correction(scaffoldgraph, crit): 48 | """Returns bonferroni corrected significance level for each hierarchy. 49 | 50 | Parameters 51 | ---------- 52 | scaffoldgraph : ScaffoldGraph 53 | A ScaffoldGraph object to query. 54 | crit : float 55 | The critical significance value to apply bonferroni correction at 56 | each scaffold hierarchy. 57 | 58 | Returns 59 | ------- 60 | dict 61 | A dictionary containing the corrected critical significance value 62 | at each scaffold hierarchy {hierarchy: crit}. 63 | 64 | """ 65 | hier = scaffoldgraph.get_hierarchy_sizes() 66 | return {k: crit / v for k, v in hier.items()} 67 | 68 | 69 | def calc_scaffold_enrichment(scaffoldgraph, activity, mode='ks', alternative='greater', p=None): 70 | """ 71 | Calculate scaffold enrichment using the Kolmogorov-Smirnov or binomal test. 72 | 73 | Parameters 74 | ---------- 75 | scaffoldgraph : ScaffoldGraph 76 | A ScaffoldGraph object to query. 77 | activity : str 78 | A scaffold node attribute key corresponding to an activity value. 79 | If the test is binomial this value should be a binary attribute 80 | (0 or 1 / True or False). 81 | mode : {'ks', 'b'}, optional 82 | A string specifying the statistical test to perform. 'ks' specifies a 83 | Kolmogorov-Smirnov test and 'b' or 'binomial' specifies a binomial test. 84 | The default is 'ks'. 85 | alternative : {'two-sided', 'less', 'greater'}, optional 86 | Defines the alternative hypothesis. 87 | The following options are available: 88 | * 'two-sided' 89 | * 'less': one-sided 90 | * 'greater': one-sided 91 | The default is 'greater'. 92 | p : float, None, optional 93 | The hypothesized probability of success. 0 <= p <= 1. Used in binomial mode. 94 | If not specified p is set automatically (number of active / total compounds). 95 | The default is None. 96 | 97 | Returns 98 | ------- 99 | dict 100 | A dict of dicts in the format {scaffold: {results}} where results is the set 101 | of results returned by the statistical test and scaffold is a scaffold node 102 | key corresponding to a scaffold in the ScaffoldGraph object. 103 | 104 | See Also 105 | -------- 106 | scaffoldgraph.analysis.enrichment.compound_set_enrichment 107 | 108 | References 109 | ---------- 110 | .. [1] Varin, T., Schuffenhauer, A., Ertl, P., and Renner, S. (2011). Mining for bioactive scaffolds 111 | with scaffold networks: Improved compound set enrichment from primary screening data. 112 | Journal of Chemical Information and Modeling, 51(7), 1528–1538. 113 | .. [2] Varin, T., Gubler, H., Parker, C., Zhang, J., Raman, P., Ertl, P. and Schuffenhauer, A. (2010) 114 | Compound Set Enrichment: A Novel Approach to Analysis of Primary HTS Data. 115 | Journal of Chemical Information and Modeling, 50(12), 2067-2078. 116 | 117 | """ 118 | if mode == 'binomial' or mode == 'b': 119 | return _btp(scaffoldgraph, activity, alternative, p) 120 | elif mode == 'ks' or mode == 'k': 121 | return _ksp(scaffoldgraph, activity, alternative) 122 | else: 123 | raise ValueError(f'scaffold enrichment mode: {mode}, not implemented') 124 | 125 | 126 | def compound_set_enrichment(scaffoldgraph, activity, mode='ks', alternative='greater', crit=0.01, p=None): 127 | """ 128 | Perform compound set enrichment (CSE), calculating scaffolds enriched for bioactivity. 129 | 130 | Parameters 131 | ---------- 132 | scaffoldgraph : ScaffoldGraph 133 | A ScaffoldGraph object to query. 134 | activity : str 135 | A scaffold node attribute key corresponding to an activity value. 136 | If the test is binomial this value should be a binary attribute 137 | (0 or 1 / True or False). 138 | mode : {'ks', 'b'}, optional 139 | A string specifying the statistical test to perform. 'ks' specifies a 140 | Kolmogorov-Smirnov test and 'b' or 'binomial' specifies a binomial test. 141 | The default is 'ks'. 142 | alternative : {'two-sided', 'less', 'greater'}, optional 143 | Defines the alternative hypothesis. 144 | The following options are available: 145 | * 'two-sided' 146 | * 'less': one-sided 147 | * 'greater': one-sided 148 | The default is 'greater'. 149 | crit : float, optional 150 | The critical significance level. The default is 0.01 151 | p : float, None, optional 152 | The hypothesized probability of success. 0 <= p <= 1. Used in binomial mode. 153 | If not specified p is set automatically (number of active / total compounds). 154 | The default is None. 155 | 156 | Returns 157 | ------- 158 | A tuple of 'enriched' scaffold classes in the format: (scaffold, {data}) where data 159 | is the corresponding node attributes for the returned scaffold. 160 | 161 | Notes 162 | ----- 163 | P-values are added as node attributes with the key 'pval'. 164 | 165 | References 166 | ---------- 167 | .. [1] Varin, T., Schuffenhauer, A., Ertl, P., and Renner, S. (2011). Mining for bioactive scaffolds 168 | with scaffold networks: Improved compound set enrichment from primary screening data. 169 | Journal of Chemical Information and Modeling, 51(7), 1528–1538. 170 | .. [2] Varin, T., Gubler, H., Parker, C., Zhang, J., Raman, P., Ertl, P. and Schuffenhauer, A. (2010) 171 | Compound Set Enrichment: A Novel Approach to Analysis of Primary HTS Data. 172 | Journal of Chemical Information and Modeling, 50(12), 2067-2078. 173 | 174 | """ 175 | set_node_attributes(scaffoldgraph, calc_scaffold_enrichment(scaffoldgraph, activity, mode, alternative, p)) 176 | bonferroni = bonferroni_correction(scaffoldgraph, crit) 177 | result = [] 178 | for scaffold, data in scaffoldgraph.get_scaffold_nodes(True): 179 | if data['pval'] < bonferroni[data['hierarchy']]: 180 | result.append((scaffold, data)) 181 | return tuple(sorted(result, key=lambda x: x[1]['pval'])) 182 | -------------------------------------------------------------------------------- /scaffoldgraph/analysis/frequency.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.analysis.frequency 3 | 4 | """ 5 | import numpy as np 6 | 7 | 8 | def cumulative_scaffold_frequency( 9 | scaffoldgraph, 10 | hierarchy=-1, 11 | norm_hierarchy=False, 12 | frequency_key=None 13 | ): 14 | """Calculate cumulative scaffold frequency distrubutions (CSF) from 15 | a scaffold graph. 16 | 17 | Parameters 18 | ---------- 19 | scaffoldgraph : ScaffoldGraph 20 | A ScaffoldGraph object to query. 21 | hierarchy : int 22 | The scaffold hierarchy to consider. If -1 then the CSF is 23 | calculated for murcko scaffolds rather than a scaffold 24 | hierarchy. The default is -1 (murcko scaffolds). 25 | norm_hierarchy : bool 26 | Normalise the CSF by the number of molecules represented 27 | by the considered hierarchy rather than the total molecules 28 | in the graph. If False then compound representation in the CDF 29 | may be below 100%. The default is False (normalise by total). 30 | frequency_key : str, None, optional 31 | If scaffold frequency exists as an attribute of the graph, set 32 | this key to avoid re-calculation of scaffold frequencies. 33 | 34 | Examples 35 | -------- 36 | Create a CSF plot (CSFP) for murcko scaffolds 37 | 38 | >>> import matplotlib.pyplot as plt 39 | >>> fig, ax = plt.subplots() 40 | >>> x, y = cumulative_scaffold_frequency(tree, hierarchy=-1) 41 | >>> ax.plot(x, y, label='Murcko CSF') 42 | >>> ax.set_xlabel('Percentage of scaffolds') 43 | >>> ax.set_ylabel('Percentage of molecules') 44 | >>> ax.legend() 45 | >>> fig.show() 46 | 47 | Calculate P_50, the percentage of scaffolds that represent 50% of compounds. 48 | 49 | >>> import numpy as np 50 | >>> p50 = np.interp(0.5, y, x) 51 | 52 | Returns 53 | ------- 54 | tuple 55 | A tuple containing the cumulative percentage of scaffolds and 56 | the cumulative scaffold frequency as a percentage of molecules. 57 | Can be used to plot a CSF plot (x, y). 58 | 59 | Notes 60 | ----- 61 | Cumulative scaffold frequency should be used with scaffold tree 62 | structures. 63 | 64 | """ 65 | if not frequency_key: 66 | scaffoldgraph.add_scaffold_molecule_count() 67 | frequency_key = 'count' 68 | if hierarchy == -1: # murcko scaffolds 69 | h = _get_murcko_frequency(scaffoldgraph) 70 | elif hierarchy in scaffoldgraph.get_hierarchy_sizes(): # hierarchies 71 | h = scaffoldgraph.get_scaffolds_in_hierarchy(hierarchy, frequency_key) 72 | else: # hierarchy does not exist 73 | raise ValueError(f'Invalid hierarchy: {hierarchy}') 74 | sh = sorted(h, key=lambda x: x[1], reverse=True) 75 | sf = 1. * np.arange(len(sh)) / (len(sh) - 1) 76 | cumsum = np.cumsum([x[1] for x in sh]) 77 | if norm_hierarchy is True and hierarchy > 0: 78 | mf = cumsum / cumsum[-1] 79 | else: # normalize by total molecules in graph 80 | mf = cumsum / scaffoldgraph.num_molecule_nodes 81 | return sf, mf 82 | 83 | 84 | def _get_murcko_frequency(scaffoldgraph): 85 | """Get frequencies for murcko scaffolds.""" 86 | g = scaffoldgraph 87 | mols = g.get_molecule_nodes() 88 | m = list({next(g.predecessors(x)) for x in mols}) 89 | f = [len([x for x in g.successors(x) if g.nodes[x]['type'] == 'molecule']) for x in m] 90 | return list(zip(m, f)) 91 | 92 | 93 | def area_under_curve(x, y): 94 | """Calculate area under the curve using the trapezoidal rule. 95 | 96 | Parameters 97 | ---------- 98 | x : np.ndarray, shape (n, ) 99 | Array of x coordinates, must be monotonic increasing or decreasing 100 | y : np.ndarray, shape (n, ) 101 | Array of y coordinates 102 | 103 | Returns 104 | ------- 105 | area : float 106 | Area under the curve (AUC) 107 | 108 | """ 109 | x = np.asanyarray(x) 110 | y = np.asanyarray(y) 111 | if x.shape != y.shape: 112 | raise ValueError( 113 | 'Input arrays are expected to contain the same ' 114 | f'number of points, x.shape: {x.shape}, ' 115 | f'y.shape: {y.shape}' 116 | ) 117 | if x.shape[0] < 2: 118 | raise ValueError( 119 | 'At least two points are required to calculate' 120 | f' area under the curve, got shape: {x.shape}' 121 | ) 122 | if len(x.shape) != 1: 123 | raise ValueError( 124 | f'Expected 1d arrays for x and y, got ' 125 | f'shape: {x.shape}' 126 | ) 127 | direction = 1 128 | dx = np.diff(x) 129 | if np.any(dx < 0): 130 | if np.all(dx < 0): 131 | raise ValueError( 132 | 'x is neither monotonic increasing or' 133 | 'decreasing' 134 | ) 135 | direction = -1 136 | area = direction * np.trapz(y, x) 137 | return area 138 | -------------------------------------------------------------------------------- /scaffoldgraph/analysis/general.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.analysis.representation 3 | 4 | Module contains general functions for scaffold analysis 5 | """ 6 | 7 | 8 | def get_virtual_scaffolds(scaffoldgraph, data=False, default=None): 9 | """Get 'virtual' scaffolds within a scaffold graph. 10 | 11 | Virtual scaffolds represent scaffolds that are not directly obtained from 12 | any molecule of the collection, but generated by the pruning process. 13 | Virtual scaffolds may provide promising starting points for the synthesis 14 | or acquisition of compounds complementing the current collection. 15 | 16 | Parameters 17 | ---------- 18 | scaffoldgraph : ScaffoldGraph 19 | A ScaffoldGraph object to query 20 | data : str, bool, optional 21 | The scaffold node attribute returned in 2-tuple (n, ddict[data]). 22 | If True, return entire node attribute dict as (n, ddict). 23 | If False, return just the nodes n. The default is False. 24 | default : value, bool, optional 25 | Value used for nodes that don't have the requested attribute. 26 | Only relevant if data is not True or False. 27 | 28 | Returns 29 | ------- 30 | list 31 | A list of scaffold node keys corresponding to virtual scaffolds. 32 | 33 | """ 34 | virtual = [] 35 | for scaffold, d in scaffoldgraph.get_scaffold_nodes(True): 36 | mol_count = 0 37 | for succ in scaffoldgraph.successors(scaffold): 38 | if scaffoldgraph.nodes[succ].get('type') == 'molecule': 39 | mol_count += 1 40 | if mol_count == 0: 41 | if data is False: 42 | virtual.append(scaffold) 43 | elif data is True: 44 | virtual.append((scaffold, d)) 45 | else: 46 | virtual.append((scaffold, d.get(data, default))) 47 | return virtual 48 | 49 | 50 | def get_singleton_scaffolds(scaffoldgraph, data=False, default=None): 51 | """Get singleton scaffolds within a scaffold graph. 52 | 53 | Singleton scaffolds represent scaffolds that are direct members of only 54 | one compound in the current collection. 55 | 56 | Parameters 57 | ---------- 58 | scaffoldgraph : ScaffoldGraph 59 | A ScaffoldGraph object to query 60 | data : str, bool, optional 61 | The scaffold node attribute returned in 2-tuple (n, ddict[data]). 62 | If True, return entire node attribute dict as (n, ddict). 63 | If False, return just the nodes n. The default is False. 64 | default : value, bool, optional 65 | Value used for nodes that don't have the requested attribute. 66 | Only relevant if data is not True or False. 67 | 68 | Returns 69 | ------- 70 | list 71 | A list of scaffold node keys corresponding to virtual scaffolds. 72 | 73 | """ 74 | singletons = [] 75 | for scaffold, d in scaffoldgraph.get_scaffold_nodes(True): 76 | mol_count = 0 77 | for succ in scaffoldgraph.successors(scaffold): 78 | if scaffoldgraph.nodes[succ].get('type') == 'molecule': 79 | mol_count += 1 80 | if mol_count == 1: 81 | if data is False: 82 | singletons.append(scaffold) 83 | elif data is True: 84 | singletons.append((scaffold, d)) 85 | else: 86 | singletons.append((scaffold, d.get(data, default))) 87 | return singletons 88 | -------------------------------------------------------------------------------- /scaffoldgraph/analysis/representation.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.analysis.representation 3 | 4 | Module contains an adaption of the Automated Identification of Over-Represented 5 | Scaffold Classes in HTS Data method from the paper: 'HierS: Hierarchical Scaffold Clustering 6 | Using Topological Chemical Graphs' 7 | """ 8 | 9 | from networkx import set_node_attributes 10 | from itertools import combinations 11 | 12 | from rdkit import DataStructs 13 | from rdkit import Chem 14 | 15 | from scaffoldgraph.utils.cache import Cache 16 | 17 | 18 | class MolecularSimilarityCache(object): 19 | """An LRU cache for speeding up repeated molecular similarity computations.""" 20 | 21 | __slots__ = ('_fp_func', '_sim_func', '_fp_cache', '_sim_cache') 22 | 23 | def __init__(self, fp_func=None, sim_func=None, fp_cache_maxsize=None, sim_cache_maxsize=None): 24 | """ 25 | Parameters 26 | ---------- 27 | fp_func : callable, optional 28 | A function calculating a molecular fingerprint from an rdkit Mol object. 29 | If None the function is set to ``rdkit.Chem.RDKFingerprint``. 30 | sim_func : callable, optional 31 | A function calculating the similarity between two fingerprints as returned 32 | by `fp_func`. If None the function is set to ``rdkit.Datastructs.TanimotoSimilarity`` 33 | fp_cache_maxsize : int, optional 34 | Set the maximum number of fingerprints cached. If None the cache is unbounded. 35 | sim_cache_maxsize : int, optional 36 | Set the maximum number of similarity values cached. If None the cache is unbounded. 37 | 38 | """ 39 | self._fp_func = fp_func if fp_func else Chem.RDKFingerprint 40 | assert callable(self._fp_func), 'fp_func must be callable or None' 41 | self._sim_func = sim_func if sim_func else DataStructs.TanimotoSimilarity 42 | assert callable(self._sim_func), 'sim_func must be callable or None' 43 | self._fp_cache = Cache(fp_cache_maxsize) 44 | self._sim_cache = Cache(sim_cache_maxsize) 45 | 46 | @property 47 | def fp_func(self): 48 | """callable: The fingerprinting function 49 | 50 | If the fingerprinting function is changed both the similarity and 51 | fingerpint caches are cleared. 52 | """ 53 | return self._fp_func 54 | 55 | @fp_func.setter 56 | def fp_func(self, fp_func): 57 | setattr(self, '_fp_func', fp_func) 58 | self.clear() # clear both caches 59 | 60 | @property 61 | def sim_func(self): 62 | """callable: The molecular similarity function 63 | 64 | If the similarity function is changed the similarity cache is cleared. 65 | """ 66 | return self._sim_func 67 | 68 | @sim_func.setter 69 | def sim_func(self, sim_func): 70 | setattr(self, '_sim_func', sim_func) 71 | self.clear_sim_cache() # clear only similarity cache 72 | 73 | def get_fingerprint(self, mol_node): 74 | """Retrieve a fingerprint from the cache if it exists else calculate. 75 | 76 | Parameters 77 | ---------- 78 | mol_node : tuple 79 | A molecule node from a ScaffoldGraph where the first entry is the 80 | molecule ID and the second is a dictionary of node attributes. 81 | 82 | Returns 83 | ------- 84 | object 85 | A molecular fingerprint. 86 | 87 | """ 88 | mol_id = mol_node[0] 89 | if mol_id in self._fp_cache: 90 | return self._fp_cache[mol_id] 91 | smi = mol_node[1]['smiles'] 92 | fp = self._fp_cache.setdefault(mol_id, self._calc_fp(smi)) 93 | return fp 94 | 95 | def _calc_fp(self, smiles): 96 | rdmol = Chem.MolFromSmiles(smiles) 97 | return self._fp_func(rdmol) 98 | 99 | def get_similarity(self, mol_node_1, mol_node_2): 100 | """Retrieve a similarity value from the cache if it exists else calculate. 101 | 102 | Parameters 103 | ---------- 104 | mol_node_1 : tuple 105 | A molecule node from a ScaffoldGraph where the first entry is the 106 | molecule ID and the second is a dictionary of node attributes. 107 | mol_node_2 : tuple 108 | A molecule node from a ScaffoldGraph where the first entry is the 109 | molecule ID and the second is a dictionary of node attributes. 110 | 111 | Returns 112 | ------- 113 | float 114 | A molecular similarity score. 115 | 116 | """ 117 | id1, id2 = mol_node_1[0], mol_node_2[0] 118 | key = tuple(sorted([id1, id2])) 119 | if key in self._sim_cache: 120 | return self._sim_cache[key] 121 | fp1 = self.get_fingerprint(mol_node_1) 122 | fp2 = self.get_fingerprint(mol_node_2) 123 | sim = self._sim_cache.setdefault(key, self.sim_func(fp1, fp2)) 124 | return sim 125 | 126 | def clear_fp_cache(self): 127 | """Empty the fingerprint cache.""" 128 | self._fp_cache.clear() 129 | 130 | def clear_sim_cache(self): 131 | """Empty the similarity cache.""" 132 | self._sim_cache.clear() 133 | 134 | def clear(self): 135 | """Empty both the fingerprint and similarity caches.""" 136 | self.clear_fp_cache() 137 | self.clear_sim_cache() 138 | 139 | def __enter__(self): 140 | return self 141 | 142 | def __exit__(self, exc_type, exc_val, exc_tb): 143 | self.clear() 144 | 145 | def __repr__(self): 146 | return '{}({}, {})'.format( 147 | self.__class__.__name__, 148 | repr(self._fp_cache), 149 | repr(self._sim_cache) 150 | ) 151 | 152 | 153 | def calc_average_pairwise_similarity(scaffoldgraph, fp_func=None, sim_func=None, skip_levels=None, 154 | fp_cache_maxsize=None, sim_cache_maxsize=None): 155 | 156 | """Calculate average pairwise similarity for each scaffold in a ScaffoldGraph. 157 | 158 | Average Pairwise Similarity (APS) is a simple method for approximating the overall topological 159 | similarity between compounds in a given scaffold class. The APS coefficient can also be used 160 | as a metric to gauge scaffold over-representation in a set of compounds as described in the 161 | HierS paper. 162 | 163 | Notes 164 | ----- 165 | The metric used in the HierS implementation is called APT (Average Pairwise Tanimoto). 166 | In this implementation it is known as 'APS', as the function enables the user to specify 167 | similarity metrics other than Tanimoto using the `sim_func` argument. 168 | 169 | Parameters 170 | ---------- 171 | scaffoldgraph : ScaffoldGraph 172 | fp_func : callable, None, optional 173 | A callable returning a molecular fingerprint from an RDKit Mol object. 174 | If None the fingerprint is an RDKFingerprint with default parameters. 175 | sim_func : callable, None, optional 176 | A callable returning a similarity value (float) for a pair of fingerprint objects 177 | calculated by `fp_func`. If None the default metric is Tanimoto. 178 | skip_levels : iterable, None, optional 179 | Skip any scaffolds in hierarchy levels specified. 180 | The aps and membership is set to 0. 181 | fp_cache_maxsize : int, optional 182 | Set the maximum number of fingerprints cached. If None the cache is unbounded. 183 | sim_cache_maxsize : int, optional 184 | Set the maximum number of similarity values cached. If None the cache is unbounded. 185 | 186 | Returns 187 | ------- 188 | dict 189 | A dict of dicts in the format {scaffold: {members, aps}} where members is the 190 | number of molecules in the scaffold cluster and aps is the average pairwise 191 | similarity of the molecules in the cluster. 192 | 193 | See Also 194 | -------- 195 | scaffoldgraph.analysis.representation.get_over_represented_scaffold_classes 196 | 197 | """ 198 | aps_dict = {} 199 | cache_args = (fp_func, sim_func, fp_cache_maxsize, sim_cache_maxsize) 200 | 201 | with MolecularSimilarityCache(*cache_args) as cache: 202 | for scaffold, data in scaffoldgraph.get_scaffold_nodes(True): 203 | aps_data = aps_dict.setdefault(scaffold, {}) 204 | 205 | if skip_levels and data['hierarchy'] in skip_levels: 206 | aps_data['members'] = 0 207 | aps_data['aps'] = 0.0 208 | 209 | m_nodes = scaffoldgraph.get_molecules_for_scaffold(scaffold, data=True) 210 | n_members = len(m_nodes) 211 | aps_data['members'] = n_members 212 | 213 | # If only 1 member (or less in case of disconnect) set aps to 0.0 214 | if n_members <= 1: 215 | aps_data['aps'] = 0.0 216 | continue 217 | 218 | pw_sims = [] 219 | for i, j in combinations(m_nodes, 2): 220 | pw_sims.append(cache.get_similarity(i, j)) 221 | aps_data['aps'] = sum(pw_sims) / len(pw_sims) 222 | 223 | return aps_dict 224 | 225 | 226 | def get_over_represented_scaffold_classes(scaffoldgraph, threshold=0.80, member_cutoff=None, 227 | skip_aps=False, **kwargs): 228 | 229 | """Returns scaffolds that are potentially over-represented in the dataset. 230 | 231 | This method is an adaptation of the method described in the HierS paper for 232 | automated identification of over-represented scaffold classes in HTS data. 233 | 234 | The algorithm first builds a list of all scaffolds exceeding the user-defined 235 | similarity threshold which is subsequently ordered by ascending scaffold hierarchy (HierS 236 | used molecular weight to sort, but using hierarchy makes sense as it is pre-calculated 237 | during construction). Each scaffold (above hierarchy 1) is then inspected to see if it is 238 | derived from any scaffold that precedes it in the list. Any scaffold in the list of 239 | overrepresented scaffolds that is found to be derived from a higher ranking (i.e., 240 | lower molecular weight) scaffold is removed because all of the compounds that have membership 241 | in such scaffolds are already accounted for by the higher ranking scaffold. 242 | 243 | The HierS paper uses three defined similarity thresholds (APS) in three categories: 244 | 245 | loose = 0.75 246 | medium = 0.80 247 | strict = 0.85 248 | 249 | Parameters 250 | ---------- 251 | scaffoldgraph : ScaffoldGraph 252 | threshold : float, optional 253 | Similarity threshold used to define potential over-represented scaffolds. 254 | The default is 0.80 (i.e. medium) 255 | member_cutoff : int, None, optional 256 | If set, scaffolds for which (member_cutoff <= member molecules) are not considered 257 | to be over-represented (not significant). The default is None. 258 | skip_aps : bool, optional 259 | If True the function assumes that the APS has already been calculated and 'members' and 260 | 'aps' are scaffold node attributes (i.e. use if running the same function more than 261 | once with different thresholds). The default is False. 262 | **kwargs : 263 | Arguments for the calc_average_pairwise_similarity function (calculating the APS metric). 264 | 265 | References 266 | ---------- 267 | .. [1] Wilkens, S., Janes, J., and Su, A. (2005). HierS: Hierarchical Scaffold Clustering 268 | Using Topological Chemical Graphs. Journal of Medicinal Chemistry, 48(9), 3182-3193. 269 | 270 | """ 271 | if skip_aps is False: 272 | aps = calc_average_pairwise_similarity(scaffoldgraph, **kwargs) 273 | set_node_attributes(scaffoldgraph, aps) 274 | aps.clear() 275 | 276 | or_scaffolds = [] 277 | for scaffold, d in scaffoldgraph.get_scaffold_nodes(data=True): 278 | if d.get('aps', 0) > threshold and not (member_cutoff and not d.get('members') >= member_cutoff): 279 | or_scaffolds.append((scaffold, d)) 280 | or_scaffolds.sort(key=lambda n: n[1].get('hierarchy')) 281 | or_set = set([s for s, _ in or_scaffolds]) 282 | 283 | def _filter(scaffold): 284 | s, data = scaffold 285 | if data.get('hierarchy', 1) == 1: 286 | return True 287 | elif any([p in or_set for p in scaffoldgraph.get_parent_scaffolds(s)]): 288 | return False 289 | return True 290 | 291 | return tuple(filter(_filter, or_scaffolds)) 292 | -------------------------------------------------------------------------------- /scaffoldgraph/core/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.core 3 | 4 | The core package contains core functionality for building ScaffoldGraphs. 5 | """ 6 | 7 | from .fragment import ( 8 | MurckoRingFragmenter, 9 | MurckoRingSystemFragmenter, 10 | get_all_murcko_fragments, 11 | get_next_murcko_fragments, 12 | get_murcko_scaffold, 13 | get_ring_toplogy_scaffold, 14 | get_ring_connectivity_scaffold 15 | ) 16 | 17 | from .graph import ScaffoldGraph 18 | from .scaffold import Scaffold 19 | 20 | __all__ = [ 21 | 'ScaffoldGraph', 22 | 'Scaffold', 23 | 'MurckoRingFragmenter', 24 | 'MurckoRingSystemFragmenter', 25 | 'get_all_murcko_fragments', 26 | 'get_next_murcko_fragments', 27 | 'get_murcko_scaffold', 28 | 'get_ring_toplogy_scaffold', 29 | 'get_ring_connectivity_scaffold', 30 | ] 31 | -------------------------------------------------------------------------------- /scaffoldgraph/io/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.io 3 | 4 | Contains functions for reading molecules from various formats. 5 | - SMILES 6 | - SDF 7 | - DataFrame 8 | 9 | Constains function for writing ScaffoldGraphs to various formats. 10 | - SDF 11 | - TSV 12 | """ 13 | 14 | from .dataframe import read_dataframe 15 | from .sdf import read_sdf 16 | from .smiles import read_smiles_file 17 | 18 | __all__ = ['read_sdf', 'read_smiles_file', 'read_dataframe'] 19 | -------------------------------------------------------------------------------- /scaffoldgraph/io/dataframe.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.io.dataframe 3 | 4 | Contains functions for reading molecules from pandas dataframes. 5 | """ 6 | 7 | from rdkit.Chem import MolFromSmiles, Mol 8 | from loguru import logger 9 | 10 | 11 | class DataFrameMolSupplier(object): 12 | """Class supplying rdkit Mols from a pandas DataFrame.""" 13 | 14 | def __init__(self, df, smiles_column, name_column, data_cols=None): 15 | """Initialize DataFrameMolSupplier. 16 | 17 | Parameters 18 | ---------- 19 | df : pandas.DataFrame 20 | Dataframe to read molecules from. 21 | smiles_column : str 22 | Key of column containing SMILES strings. 23 | name_column : str 24 | Key of column containing molecule name strings. 25 | data_cols : list, optional 26 | A list of column keys containg data to retain 27 | in molecule graph nodes. The default is None. 28 | 29 | """ 30 | self.data_cols = data_cols 31 | if data_cols is None: 32 | self.supplier = zip( 33 | df[smiles_column].values, 34 | df[name_column].values 35 | ) 36 | else: 37 | self.supplier = zip( 38 | df[smiles_column].values, 39 | df[name_column].values, 40 | df[data_cols].values 41 | ) 42 | self.n = len(df[smiles_column]) 43 | self.cursor = 1 44 | 45 | def __iter__(self): 46 | return self 47 | 48 | def __next__(self): 49 | values = next(self.supplier) 50 | try: 51 | if isinstance(values[0], Mol): 52 | mol = values[0] 53 | else: 54 | mol = MolFromSmiles(values[0]) 55 | mol.SetProp('_Name', str(values[1])) 56 | if self.data_cols is not None: 57 | for key, value in zip(self.data_cols, values[2]): 58 | mol.SetProp(str(key), str(value)) 59 | except AttributeError: 60 | logger.warning('Molecule {} : {} could not be parsed'.format( 61 | self.cursor, values[0] 62 | )) 63 | self.cursor += 1 64 | return None 65 | 66 | self.cursor += 1 67 | return mol 68 | 69 | def __len__(self): 70 | return self.n 71 | 72 | 73 | def read_dataframe(df, smiles_column, name_column, data_columns=None): 74 | """Read molecules from a dataframe. 75 | 76 | Parameters 77 | ---------- 78 | df : pandas.DataFrame 79 | Dataframe to read molecules from. 80 | smiles_column : str 81 | Key of column containing SMILES strings or rdkit Mol objects. 82 | name_column : str 83 | Key of column containing molecule name strings. 84 | data_columns : list, optional 85 | A list of column keys containg data to retain 86 | in molecule graph nodes. The default is None. 87 | 88 | Returns 89 | ------- 90 | DataFrameMolSupplier 91 | 92 | """ 93 | return DataFrameMolSupplier(df, smiles_column, name_column, data_columns) 94 | -------------------------------------------------------------------------------- /scaffoldgraph/io/sdf.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.io.sdf 3 | 4 | Contains functions for reading and writing from/to SDF. 5 | """ 6 | 7 | from rdkit.Chem import ForwardSDMolSupplier, SDWriter, MolFromSmiles 8 | 9 | from .supplier import MolSupplier, EnumeratedMolSupplier 10 | 11 | 12 | def read_sdf(sdf_file, requires_length=False): 13 | """Read molecules from an SDF. 14 | 15 | Parameters 16 | ---------- 17 | sdf_file : file-like object 18 | An open SDF. 19 | requires_length : bool, optional 20 | If True returns an enumerated MolSupplier, 21 | i.e. when monitoring progress. The default 22 | is False. 23 | 24 | Returns 25 | ------- 26 | MolSupplier or EnumeratedSupplier 27 | 28 | """ 29 | supplier = ForwardSDMolSupplier(sdf_file) 30 | if not requires_length: 31 | return MolSupplier(supplier) 32 | count = sdf_count(sdf_file) 33 | sdf_file.seek(0) 34 | return EnumeratedMolSupplier(supplier, count) 35 | 36 | 37 | def write_sdf_file(scaffold_graph, output_file): 38 | """Write an SDF file from a ScaffoldGraph. 39 | 40 | All scaffolds in the scaffoldgraph are written to the 41 | SDF, while molecules are ignored. Scaffolds are sorted 42 | in ascending order according to their hierarchy level. 43 | 44 | The output follows the standard SDF specification with 45 | the added property fields: 46 | 47 | TITLE field: scaffold ID 48 | SUBSCAFFOLDS field: list of sub-scaffold IDs 49 | HIERARCHY field: hierarchy level of scaffold 50 | SMILES field: scaffold canonical SMILES 51 | 52 | Parameters 53 | ---------- 54 | scaffold_graph : scaffoldgraph.core.ScaffoldGraph 55 | ScaffoldGraph to be written to an SDF. 56 | output_file : str 57 | Filepath to an output file. 58 | 59 | """ 60 | N = scaffold_graph.num_scaffold_nodes 61 | sorted_scaffolds = sorted(scaffold_graph.get_scaffold_nodes(data=True), key=lambda x: x[1]['hierarchy']) 62 | mapping = dict(zip([s[0] for s in sorted_scaffolds], range(0, N))) 63 | writer = SDWriter(output_file) 64 | for scaffold, data in sorted_scaffolds: 65 | molecule = MolFromSmiles(scaffold) 66 | if molecule is not None: 67 | subscaffolds = list(scaffold_graph.predecessors(scaffold)) 68 | molecule.SetProp('_Name', mapping[scaffold]) 69 | molecule.SetIntProp('HIERARCHY', scaffold_graph.nodes[scaffold]['HIERARCHY']) 70 | molecule.SetProp('SMILES', scaffold) 71 | molecule.SetProp('SUBSCAFFOLDS', ', '.join([str(mapping[s]) for s in subscaffolds])) 72 | writer.write(molecule) 73 | writer.close() 74 | 75 | 76 | def sdf_count(file_obj): 77 | """Count the number of molecules in an SDF file. 78 | 79 | Counts the number of times '$$$$' occurs at the start of lines 80 | in the file. 81 | 82 | Parameters 83 | ---------- 84 | file_obj : file-like object 85 | 86 | Returns 87 | ------- 88 | int 89 | The number of molecules in the file. 90 | 91 | """ 92 | return sum(1 for line in file_obj if line[:4] == b'$$$$') 93 | -------------------------------------------------------------------------------- /scaffoldgraph/io/smiles.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.io.smiles 3 | 4 | Contains functions for reading molecules from SMILES files. 5 | """ 6 | 7 | from rdkit.Chem import SmilesMolSupplier 8 | 9 | from .supplier import EnumeratedMolSupplier, MolSupplier 10 | 11 | 12 | def read_smiles_file(smiles_file, delimiter=' ', smiles_column=0, 13 | name_column=1, header=False, requires_length=False): 14 | 15 | """Read molecules from a SMILES file. 16 | 17 | Parameters 18 | ---------- 19 | smiles_file : str 20 | File path to a SMILES file. 21 | delimiter : str, optional 22 | Delimiter used in SMILES file. The default is ' '. 23 | smiles_column : int, optional 24 | SMILES column index. The default is 0. 25 | name_column : int, optional 26 | Molecule name/ID column index. The default is 1. 27 | header : bool, optional 28 | Whether the SMILES file contains a header. 29 | The default is False. 30 | requires_length : bool, optional 31 | If True returns an enumerated Mol supplier, i.e. when 32 | monitoring progress. The default is False. 33 | 34 | Returns 35 | ------- 36 | MolSupplier or EnumeratedSupplier 37 | 38 | """ 39 | if requires_length is False: 40 | return MolSupplier( 41 | SmilesMolSupplier( 42 | smiles_file, 43 | delimiter, 44 | smiles_column, 45 | name_column, 46 | header, 47 | True)) 48 | 49 | count = smiles_count(smiles_file) 50 | if header is True: 51 | count -= 1 52 | 53 | supplier = SmilesMolSupplier( 54 | smiles_file, delimiter, smiles_column, name_column, header, True 55 | ) 56 | 57 | return EnumeratedMolSupplier(supplier, count) 58 | 59 | 60 | def smiles_count(smiles_file): 61 | """int : Return the number of lines in a SMILES file.""" 62 | f = open(smiles_file, 'rb') 63 | lines = 0 64 | buf_size = 1024 * 1024 65 | read_f = f.read 66 | buf = read_f(buf_size) 67 | while buf: 68 | lines += buf.count(b'\n') 69 | buf = read_f(buf_size) 70 | f.close() 71 | return lines 72 | -------------------------------------------------------------------------------- /scaffoldgraph/io/supplier.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.io.supplier 3 | 4 | Contains utilities for io within scaffoldgraph. 5 | """ 6 | 7 | from loguru import logger 8 | 9 | 10 | class MolSupplier(object): 11 | """A wrapper for rdkit Mol suppliers. 12 | 13 | Provides logging for molecule parsing errors in 14 | a way that is compatible with scaffoldgraphs 15 | logging system. 16 | 17 | Notes 18 | ----- 19 | Technically the supplier can be used with any iterable python object 20 | containing or supplying rdkit Mol objects. 21 | 22 | See Also 23 | -------- 24 | EnumeratedMolSupplier 25 | 26 | """ 27 | def __init__(self, supplier): 28 | """Initialize an EnumeratedMolSupplier. 29 | 30 | Parameters 31 | ---------- 32 | supplier : iterable 33 | An rdkit Mol Supplier. 34 | 35 | """ 36 | self.supplier = supplier 37 | self.cursor = 1 38 | 39 | def __iter__(self): 40 | return self 41 | 42 | def __next__(self): 43 | mol = next(self.supplier) 44 | if mol is None: 45 | logger.warning('Molecule {} could not be parsed'.format( 46 | self.cursor 47 | )) 48 | self.cursor += 1 49 | return mol 50 | 51 | 52 | class EnumeratedMolSupplier(MolSupplier): 53 | """ 54 | A wrapper for rdkit Mol suppliers, providing the number of mols in the supplier, 55 | for use with progress monitoring. 56 | 57 | Attributes 58 | ---------- 59 | n : int 60 | The length of the supplier 61 | 62 | Notes 63 | ----- 64 | Technically the supplier can be used with any iterable python object 65 | containing or supplying rdkit Mol objects. 66 | 67 | See Also 68 | -------- 69 | MolSupplier 70 | 71 | """ 72 | def __init__(self, supplier, length): 73 | """Initialize an EnumeratedMolSupplier. 74 | 75 | Parameters 76 | ---------- 77 | supplier : iterable 78 | An rdkit Mol Supplier. 79 | length : int 80 | Number of Mols in the supplier. 81 | 82 | """ 83 | super(EnumeratedMolSupplier, self).__init__(supplier) 84 | self.n = length 85 | 86 | def __len__(self): 87 | return self.n 88 | -------------------------------------------------------------------------------- /scaffoldgraph/io/tsv.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.io.tsv 3 | 4 | Contains functions for writing to TSV files. 5 | """ 6 | 7 | import csv 8 | 9 | 10 | def write_tsv(scaffold_graph, output_file, write_ids=False): 11 | """Write a ScaffoldGraph to a file in TSV format. 12 | 13 | Used by scaffoldgraphs CLI utility. 14 | 15 | Parameters 16 | ---------- 17 | scaffold_graph : scaffoldgraph.core.ScaffoldGraph 18 | An scaffold graph to write to a file. 19 | output_file : str 20 | Path to output file. 21 | write_ids : bool, optional 22 | If True, write the fields {'ID', 'HIERARCHY', 'SMILES', 23 | 'SUBSCAFFOLDS'} else write the fields {'HIERARCHY', 24 | 'SMILES', 'SUBSCAFFOLDS', 'MOLECULES', 'ANNOTATIONS'}. 25 | The aggregate CLI function uses write_ids=True, while 26 | the generation utilities use write_ids=False. The default 27 | is False. 28 | 29 | """ 30 | N = scaffold_graph.num_scaffold_nodes 31 | sorted_scaffolds = sorted(scaffold_graph.get_scaffold_nodes(data=True), key=lambda x: x[1]['hierarchy']) 32 | 33 | if write_ids: 34 | field_names = ['ID', 'HIERARCHY', 'SMILES', 'SUBSCAFFOLDS'] 35 | mapping = dict(zip([s[0] for s in sorted_scaffolds], range(0, N))) 36 | else: 37 | field_names = ['HIERARCHY', 'SMILES', 'SUBSCAFFOLDS', 'MOLECULES', 'ANNOTATIONS'] 38 | mapping = None 39 | 40 | with open(output_file, 'w') as output: 41 | 42 | writer = csv.DictWriter(output, delimiter='\t', fieldnames=field_names) 43 | writer.writeheader() 44 | 45 | for node, data in sorted_scaffolds: 46 | line = dict.fromkeys(field_names) 47 | line['SMILES'] = node 48 | line['HIERARCHY'] = data['hierarchy'] 49 | 50 | subscaffolds = list(scaffold_graph.predecessors(node)) 51 | if write_ids: 52 | line['SUBSCAFFOLDS'] = ', '.join([str(mapping[s]) for s in subscaffolds]) 53 | line['ID'] = str(mapping[node]) 54 | else: 55 | line['SUBSCAFFOLDS'] = ', '.join(subscaffolds) 56 | ancestors = scaffold_graph.successors(node) 57 | molecules, annotations = [], set() 58 | for a in ancestors: 59 | try: 60 | if scaffold_graph.nodes[a]['type'] == 'molecule': 61 | molecules.append(a) 62 | edge = scaffold_graph.edges[(node, a)] 63 | annotations.add(edge['annotation']) 64 | except KeyError: 65 | continue 66 | line['MOLECULES'] = ', '.join(molecules) 67 | line['ANNOTATIONS'] = ', '.join(annotations) 68 | 69 | writer.writerow(line) 70 | -------------------------------------------------------------------------------- /scaffoldgraph/network.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.network 3 | """ 4 | 5 | from .core import MurckoRingFragmenter, MurckoRingSystemFragmenter 6 | from .core import ScaffoldGraph 7 | 8 | 9 | class ScaffoldNetwork(ScaffoldGraph): 10 | """ 11 | Class representing a scaffold network. 12 | 13 | Explore scaffold-space through the iterative removal of available rings, 14 | generating all possible sub-scaffolds for a set of input molecules. 15 | The output is a directed acyclic graph of molecular scaffolds. 16 | 17 | Examples 18 | -------- 19 | Create a ScaffoldNetwork from a SMILES file. 20 | 21 | >>> import scaffoldgraph as sg 22 | >>> network = sg.ScaffoldNetwork.from_smiles_file('my_file.smi', progress=True) 23 | >>> network.num_scaffold_nodes 24 | 100 25 | 26 | Create a ScaffoldNetwork from an SDF. 27 | 28 | >>> network = sg.ScaffoldNetwork.from_sdf('my_file.sdf', progress=True) 29 | 30 | If the SDF is zipped: 31 | 32 | >>> network = sg.ScaffoldNetwork.from_sdf('my_file.sdf.gz', zipped=True) 33 | 34 | Get scaffold nodes: 35 | 36 | >>> list(network.get_scaffold_nodes()) 37 | ['O=C(OCOC(=O)c1cccc2ncn(Cc3ccccc3)c12)OC1CCCCC1', 38 | 'O=C(OCOC(=O)c1cccc2nc[nH]c12)OC1CCCCC1', 39 | ...] 40 | 41 | Include node attributes: 42 | 43 | >>> list(network.get_scaffold_nodes(data=True)) 44 | [('O=C(OCOC(=O)c1cccc2ncn(Cc3ccccc3)c12)OC1CCCCC1', {'type': 'scaffold', 'hierarchy': 4}), 45 | ('O=C(OCOC(=O)c1cccc2nc[nH]c12)OC1CCCCC1', {'type': 'scaffold', 'hierarchy': 3}), 46 | ...] 47 | 48 | Get molecule nodes (use data=True to get attributes): 49 | 50 | >>> list(network.get_molecule_nodes()) 51 | ['DB00006', 52 | 'DB00007', 53 | 'DB00014', 54 | ...] 55 | 56 | 57 | References 58 | ---------- 59 | .. [1] Varin, T., Schuffenhauer, A., Ertl, P., and Renner, S. (2011). Mining for bioactive 60 | scaffolds with scaffold networks: Improved compound set enrichment from primary screening data. 61 | Journal of Chemical Information and Modeling, 51(7), 1528–1538. 62 | 63 | See Also 64 | -------- 65 | ScaffoldGraph 66 | ScaffoldTree 67 | HierS 68 | 69 | """ 70 | def __init__(self, graph=None, **kwargs): 71 | """Initialize a ScaffoldNetwork. 72 | 73 | Parameters 74 | ---------- 75 | graph : input graph, optional 76 | Data to initialize graph. If None (default) an empty 77 | graph is created. The data can be any format that is supported 78 | by the ``to_networkx_graph()`` function, currently including 79 | edge list, dict of dicts, dict of lists, NetworkX graph, 80 | NumPy matrix or 2d ndarray, SciPy sparse matrix, 81 | or PyGraphviz graph. This argument is passed to the networkx 82 | DiGraph constructor. 83 | 84 | """ 85 | super(ScaffoldNetwork, self).__init__(graph, MurckoRingFragmenter(), 'network') 86 | 87 | def _hierarchy_constructor(self, child): 88 | parents = (p for p in self.fragmenter.fragment(child) if p) 89 | for parent in parents: 90 | if parent in self.nodes: 91 | self.add_scaffold_edge(parent, child) 92 | else: 93 | self.add_scaffold_node(parent) 94 | self.add_scaffold_edge(parent, child) 95 | if parent.rings.count > 1: 96 | self._hierarchy_constructor(parent) 97 | 98 | 99 | class HierS(ScaffoldGraph): 100 | """ 101 | Class representing a HierS type scaffold network. 102 | 103 | Explore scaffold-space through the iterative removal of available rings, 104 | generating all possible sub-scaffolds without dissecting fused ring-systems. 105 | 106 | Notes 107 | ----- 108 | A HierS type network differs from a conventional scaffold network, through construction. 109 | When fragmenting molecules the HierS constructor does not attempt to break fused ring 110 | systems. 111 | 112 | Examples 113 | -------- 114 | Create a HierS network from a SMILES file. 115 | 116 | >>> import scaffoldgraph as sg 117 | >>> network = sg.HierS.from_smiles_file('my_file.smi', progress=True) 118 | >>> network.num_scaffold_nodes 119 | 92 120 | 121 | Create a HierS netwoek from an SDF. 122 | 123 | >>> network = sg.HierS.from_sdf('my_file.sdf', progress=True) 124 | 125 | If the SDF is zipped: 126 | 127 | >>> network = sg.HierS.from_sdf('my_file.sdf.gz', zipped=True) 128 | 129 | Get scaffold nodes: 130 | 131 | >>> list(network.get_scaffold_nodes()) 132 | ['O=C(OCOC(=O)c1cccc2ncn(Cc3ccccc3)c12)OC1CCCCC1', 133 | 'O=C(OCOC(=O)c1cccc2nc[nH]c12)OC1CCCCC1', 134 | ...] 135 | 136 | Include node attributes: 137 | 138 | >>> list(network.get_scaffold_nodes(data=True)) 139 | [('O=C(OCOC(=O)c1cccc2ncn(Cc3ccccc3)c12)OC1CCCCC1', {'type': 'scaffold', 'hierarchy': 4}), 140 | ('O=C(OCOC(=O)c1cccc2nc[nH]c12)OC1CCCCC1', {'type': 'scaffold', 'hierarchy': 3}), 141 | ...] 142 | 143 | Get molecule nodes (use data=True to get attributes): 144 | 145 | >>> list(network.get_molecule_nodes()) 146 | ['DB00006', 147 | 'DB00007', 148 | 'DB00014', 149 | ...] 150 | 151 | References 152 | ---------- 153 | .. [1] Wilkens, S., Janes, J., and Su, A. (2005). HierS: Hierarchical Scaffold Clustering 154 | Using Topological Chemical Graphs. Journal of Medicinal Chemistry, 48(9), 3182-3193. 155 | 156 | See Also 157 | -------- 158 | ScaffoldGraph 159 | ScaffoldNetwork 160 | ScaffoldTree 161 | 162 | """ 163 | def __init__(self, graph=None, **kwargs): 164 | """Initialize a HierS network. 165 | 166 | Parameters 167 | ---------- 168 | graph : input graph, optional 169 | Data to initialize graph. If None (default) an empty 170 | graph is created. The data can be any format that is supported 171 | by the ``to_networkx_graph()`` function, currently including 172 | edge list, dict of dicts, dict of lists, NetworkX graph, 173 | NumPy matrix or 2d ndarray, SciPy sparse matrix, 174 | or PyGraphviz graph. This argument is passed to the networkx 175 | DiGraph constructor. 176 | 177 | """ 178 | super(HierS, self).__init__(graph, MurckoRingSystemFragmenter(), 'hiers') 179 | 180 | def _hierarchy_constructor(self, child): 181 | parents = (p for p in self.fragmenter.fragment(child) if p) 182 | for parent in parents: 183 | if parent in self.nodes: 184 | self.add_scaffold_edge(parent, child) 185 | else: 186 | self.add_scaffold_node(parent) 187 | self.add_scaffold_edge(parent, child) 188 | if parent.ring_systems.count > 1: 189 | self._hierarchy_constructor(parent) 190 | -------------------------------------------------------------------------------- /scaffoldgraph/prioritization/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.prioritization 3 | 4 | Contains functions for scaffold prioritization. 5 | """ 6 | 7 | from .original_rules import original_ruleset 8 | from .prioritization_ruleset import ScaffoldRuleSet 9 | from .prioritization_rules import BaseScaffoldFilterRule, ScaffoldFilterRule, \ 10 | ScaffoldMinFilterRule, ScaffoldMaxFilterRule 11 | from .generic_rules import * 12 | 13 | 14 | __all__ = [ 15 | 'BaseScaffoldFilterRule', 16 | 'ScaffoldFilterRule', 17 | 'ScaffoldMinFilterRule', 18 | 'ScaffoldMaxFilterRule', 19 | 'ScaffoldRuleSet', 20 | 'original_ruleset', 21 | ] 22 | -------------------------------------------------------------------------------- /scaffoldgraph/prioritization/original_rules.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.prioritization.original_rules 3 | 4 | Implements rules from the paper: 5 | 'The Scaffold Tree − Visualization of the Scaffold Universe by Hierarchical Scaffold Classification' 6 | """ 7 | 8 | from itertools import chain 9 | 10 | from rdkit.Chem import MolFromSmarts 11 | 12 | from scaffoldgraph.prioritization.prioritization_ruleset import ScaffoldRuleSet 13 | from scaffoldgraph.core.fragment import collect_linker_atoms 14 | 15 | from .prioritization_rules import * 16 | 17 | 18 | __all__ = [ 19 | 'OriginalRule01', 20 | 'OriginalRule02', 21 | 'OriginalRule03', 22 | 'OriginalRule04', 23 | 'OriginalRule05', 24 | 'OriginalRule06', 25 | 'OriginalRule07', 26 | 'OriginalRule08', 27 | 'OriginalRule09a', 28 | 'OriginalRule09b', 29 | 'OriginalRule09c', 30 | 'OriginalRule10', 31 | 'OriginalRule11', 32 | 'OriginalRule12', 33 | 'OriginalRule13', 34 | 'original_ruleset', 35 | ] 36 | 37 | 38 | class OriginalRule01(ScaffoldFilterRule): 39 | """Remove heterocycles of size 3 first.""" 40 | 41 | def condition(self, child, parent): 42 | removed_ring = child.rings[parent.removed_ring_idx] 43 | ring_atomic_nums = [a.GetAtomicNum() for a in removed_ring.atoms] 44 | ring_num_het = len([a for a in ring_atomic_nums if a != 1 and a != 6]) 45 | return removed_ring.size == 3 and ring_num_het == 1 46 | 47 | @property 48 | def name(self): 49 | return 'original rule 01' 50 | 51 | 52 | class OriginalRule02(ScaffoldFilterRule): 53 | """Do not remove rings with >= 12 atoms if there are smaller rings to remove.""" 54 | 55 | def condition(self, child, parent): 56 | removed_ring = child.rings[parent.removed_ring_idx] 57 | return removed_ring.size < 12 58 | 59 | @property 60 | def name(self): 61 | return 'original rule 02' 62 | 63 | 64 | class OriginalRule03(ScaffoldMinFilterRule): 65 | """Choose the parent scaffold with the smallest number of acyclic linker bonds.""" 66 | 67 | acyc_linker_smarts = MolFromSmarts('*!@!=!#*') 68 | 69 | def get_property(self, child, parent): 70 | matches = parent.mol.GetSubstructMatches(self.acyc_linker_smarts) 71 | return len(matches) 72 | 73 | @property 74 | def name(self): 75 | return 'original rule 03' 76 | 77 | 78 | class OriginalRule04(ScaffoldMaxFilterRule): 79 | """Retain bridged rings, spiro rings and nonlinear fusion patterns with preference.""" 80 | 81 | def get_property(self, child, parent): 82 | nr = parent.rings.count 83 | rb = list(chain(*parent.rings.bond_rings)) 84 | nrrb = len(rb) - len(set(rb)) 85 | return abs(nrrb - (nr - 1)) 86 | 87 | @property 88 | def name(self): 89 | return 'original rule 04' 90 | 91 | 92 | class OriginalRule05(ScaffoldFilterRule): 93 | """Bridged ring systems retained with preference over spiro rings, 94 | Rings with a positive signed delta are retained.""" 95 | 96 | def condition(self, child, parent): 97 | nr = parent.rings.count 98 | rb = list(chain(*parent.rings.bond_rings)) 99 | nrrb = len(rb) - len(set(rb)) 100 | delta = nrrb - (nr - 1) 101 | return delta >= 1 102 | 103 | @property 104 | def name(self): 105 | return 'original rule 05' 106 | 107 | 108 | class OriginalRule06(ScaffoldFilterRule): 109 | """Remove rings of size 3, 5 and 6 first.""" 110 | 111 | def condition(self, child, parent): 112 | rr_size = child.rings[parent.removed_ring_idx].size 113 | return rr_size == 3 or rr_size == 5 or rr_size == 6 114 | 115 | @property 116 | def name(self): 117 | return 'original rule 06' 118 | 119 | 120 | class OriginalRule07(BaseScaffoldFilterRule): 121 | """A fully aromatic ring system must not be dissected in a way that the resulting system 122 | is not aromatic anymore. 123 | 124 | UNIMPLEMENTED 125 | This is tricky to implement and should probably be done during the fragmentation process, 126 | although for efficiency it might be better to ignore this rule, rdkit seems to catch many 127 | of these cases in the partial sanitization as we do not attempt to change atom types when 128 | this event occurs. (SNG also skips this step). 129 | """ 130 | 131 | def filter(self, child, parents): 132 | return parents 133 | 134 | @property 135 | def name(self): 136 | return 'original rule 07' 137 | 138 | 139 | class OriginalRule08(ScaffoldMinFilterRule): 140 | """Remove rings with the least hetero atoms first.""" 141 | 142 | def get_property(self, child, parent): 143 | removed_ring = child.rings[parent.removed_ring_idx] 144 | ring_atomic_nums = [a.GetAtomicNum() for a in removed_ring.atoms] 145 | return len([a for a in ring_atomic_nums if a != 1 and a != 6]) 146 | 147 | @property 148 | def name(self): 149 | return 'original rule 08' 150 | 151 | 152 | class OriginalRule09a(ScaffoldMinFilterRule): 153 | """Remove scaffolds with least nitrogen atoms in deleted ring.""" 154 | 155 | def get_property(self, child, parent): 156 | removed_ring = child.rings[parent.removed_ring_idx] 157 | ring_atomic_nums = [a.GetAtomicNum() for a in removed_ring.atoms] 158 | return ring_atomic_nums.count(7) 159 | 160 | @property 161 | def name(self): 162 | return 'original rule 09a' 163 | 164 | 165 | class OriginalRule09b(ScaffoldMinFilterRule): 166 | """Remove scaffolds with least oxygen atoms in deleted ring.""" 167 | 168 | def get_property(self, child, parent): 169 | removed_ring = child.rings[parent.removed_ring_idx] 170 | ring_atomic_nums = [a.GetAtomicNum() for a in removed_ring.atoms] 171 | return ring_atomic_nums.count(8) 172 | 173 | @property 174 | def name(self): 175 | return 'original rule 09b' 176 | 177 | 178 | class OriginalRule09c(ScaffoldMinFilterRule): 179 | """Remove scaffolds with least sulphur atoms in deleted ring.""" 180 | 181 | def get_property(self, child, parent): 182 | removed_ring = child.rings[parent.removed_ring_idx] 183 | ring_atomic_nums = [a.GetAtomicNum() for a in removed_ring.atoms] 184 | return ring_atomic_nums.count(16) 185 | 186 | @property 187 | def name(self): 188 | return 'original rule 09c' 189 | 190 | 191 | class OriginalRule10(ScaffoldMinFilterRule): 192 | """Smaller rings are removed first.""" 193 | 194 | def get_property(self, child, parent): 195 | return child.rings[parent.removed_ring_idx].size 196 | 197 | @property 198 | def name(self): 199 | return 'original rule 10' 200 | 201 | 202 | class OriginalRule11(ScaffoldFilterRule): 203 | """Retain non-aromatic rings with preference.""" 204 | 205 | def condition(self, child, parent): 206 | removed_ring = child.rings[parent.removed_ring_idx] 207 | return all([bond.GetIsAromatic() for bond in removed_ring.bonds]) 208 | 209 | @property 210 | def name(self): 211 | return 'original rule 11' 212 | 213 | 214 | class OriginalRule12(ScaffoldFilterRule): 215 | """Remove rings first where the linker is attached to a ring hetero atom at either end of the linker.""" 216 | 217 | def condition(self, child, parent): 218 | linker, ra = set(), set() # linker atoms, ring attachments 219 | removed_ring = child.rings[parent.removed_ring_idx] 220 | attachments = removed_ring.get_attachment_points() 221 | for attachment in attachments: 222 | ra.update(collect_linker_atoms(child.mol.GetAtomWithIdx(attachment), linker, False)) 223 | atomic_nums = [child.atoms[x].GetAtomicNum() for x in ra] 224 | return len([a for a in atomic_nums if a != 1 and a != 6]) > 0 225 | 226 | @property 227 | def name(self): 228 | return 'original rule 12' 229 | 230 | 231 | class OriginalRule13(BaseScaffoldFilterRule): 232 | """Tie-breaker rule (alphabetical).""" 233 | 234 | def filter(self, child, parents): 235 | return [sorted(parents, key=lambda p: p.smiles)[0]] 236 | 237 | @property 238 | def name(self): 239 | return 'original rule 13' 240 | 241 | 242 | def _make_original_rules(): 243 | """list: Generate a list of the original rules.""" 244 | all_rules = [ 245 | OriginalRule01(), 246 | OriginalRule02(), 247 | OriginalRule03(), 248 | OriginalRule04(), 249 | OriginalRule05(), 250 | OriginalRule06(), 251 | OriginalRule07(), 252 | OriginalRule08(), 253 | OriginalRule09a(), 254 | OriginalRule09b(), 255 | OriginalRule09c(), 256 | OriginalRule10(), 257 | OriginalRule11(), 258 | OriginalRule12(), 259 | OriginalRule13(), 260 | ] 261 | return all_rules 262 | 263 | 264 | # This is the ruleset used by the original scaffold tree publication. 265 | original_ruleset = ScaffoldRuleSet(_make_original_rules(), name='Original Rules') 266 | -------------------------------------------------------------------------------- /scaffoldgraph/prioritization/prioritization_rules.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.prioritization.prioritization_rules 3 | 4 | Implements abstract rules for scaffold prioritization when constructing scaffold trees. 5 | """ 6 | 7 | from abc import ABCMeta, abstractmethod 8 | from itertools import compress 9 | 10 | __all__ = [ 11 | 'BaseScaffoldFilterRule', 12 | 'ScaffoldFilterRule', 13 | 'ScaffoldMaxFilterRule', 14 | 'ScaffoldMinFilterRule', 15 | ] 16 | 17 | 18 | class BaseScaffoldFilterRule(metaclass=ABCMeta): 19 | """Abstract base class for defining rules for scaffold prioritization. 20 | 21 | Scaffold filter rules should subclass this base class. 22 | All base rules should implement the ``name`` property and the 23 | ``filter`` function. 24 | 25 | """ 26 | @abstractmethod 27 | def filter(self, child, parents): 28 | """Filter a set of input scaffolds (parents). 29 | 30 | The filter method should filter a set of input parent scaffolds using 31 | a defined rule. 32 | 33 | Defined rules may use properties relating to the scaffolds themselves 34 | or from the child scaffold from which they were obtained. i.e. properties 35 | of the ring that was removed: 36 | 37 | # Get index of removed ring from a parent scaffold 38 | >>> removed_ring = parents[0].removed_ring_index 39 | 40 | # Get atoms/bonds in this ring 41 | >>> ring_atoms = child.rings[removed_ring] 42 | 43 | # calculate property (i.e. number of carbon atoms in ring) 44 | >>> prop = [a.GetAtomicNum() for a in ring_atoms].count(6) 45 | 46 | Parameters 47 | ---------- 48 | child : scaffoldgraph.core.Scaffold 49 | The child scaffold from which the parent scaffolds were obtained. 50 | parents : iterable 51 | An iterable of all parent scaffolds generated by a fragmenter. 52 | 53 | """ 54 | raise NotImplementedError() 55 | 56 | @property 57 | @abstractmethod 58 | def name(self): 59 | """Return the name of the filter rule. 60 | 61 | Subclasses should define this, returning a name for the rule. 62 | 63 | """ 64 | raise NotImplementedError() 65 | 66 | def __call__(self, child, parents): 67 | return self.filter(child, parents) 68 | 69 | def __str__(self): 70 | return str(self.name) 71 | 72 | def __repr__(self): 73 | return '<{_cls} at {address}>'.format( 74 | _cls=self.__class__.__name__, 75 | address=hex(id(self)) 76 | ) 77 | 78 | 79 | class ScaffoldFilterRule(BaseScaffoldFilterRule): 80 | """ 81 | Abstract base class for defining rules for scaffold prioritization 82 | based on a defined True/False condition. 83 | 84 | Subclasses should implement the condition method, where a boolean value 85 | is returned for a particular input scaffold. Scaffolds with a 'True' 86 | property will be retained when using the filter method. Subclasses 87 | should also implement the ``name`` property. 88 | 89 | Example 90 | ------- 91 | >>> class MyRule(ScaffoldFilterRule): 92 | ... 93 | ... def condition(self, child, parent): 94 | ... if parent ... : 95 | ... return True 96 | ... return False 97 | ... 98 | ... @property 99 | ... def name(self): 100 | ... return 'my conditional rule' 101 | 102 | """ 103 | def filter(self, child, parents): 104 | """Filter a set of parent scaffolds using a defined condition. 105 | 106 | Parameters 107 | ---------- 108 | child : scaffoldgraph.core.Scaffold 109 | The child scaffold from which the parent scaffolds were obtained. 110 | parents : iterable 111 | An iterable of all parent scaffolds generated by a fragmenter. 112 | 113 | """ 114 | return [s for s in parents if self.condition(child, s)] 115 | 116 | @abstractmethod 117 | def condition(self, child, parent): 118 | """A Boolean condition for scaffold filtering. 119 | 120 | Subclasses should implement this method. 121 | 122 | Parameters 123 | ---------- 124 | child : scaffoldgraph.core.Scaffold 125 | The child scaffold from which the parent scaffolds were obtained. 126 | parent : scaffoldgraph.core.Scaffold 127 | A parent scaffold. 128 | 129 | """ 130 | raise NotImplementedError() 131 | 132 | 133 | class ScaffoldMinFilterRule(BaseScaffoldFilterRule): 134 | """ 135 | Abstract base class for defining rules for scaffold prioritization 136 | based on a minimum property value. 137 | 138 | Subclasses should implement the ``get_property method``, where a property value 139 | is returned for a particular input scaffold. Scaffolds with a property value 140 | equal to the minimum property value will be retained. Subclasses should also 141 | implement the ``name`` property. 142 | 143 | Example 144 | ------- 145 | >>> class MyRule(ScaffoldMinFilterRule): 146 | ... 147 | ... def get_property(self, child, parent): 148 | ... prop = get_some_property(parent) 149 | ... return prop 150 | ... 151 | ... @property 152 | ... def name(self): 153 | ... return 'my min conditional rule' 154 | 155 | """ 156 | def filter(self, child, parents): 157 | """Filter a set of parent scaffolds using a minimum property value. 158 | 159 | Parameters 160 | ---------- 161 | child : scaffoldgraph.core.Scaffold 162 | The child scaffold from which the parent scaffolds were obtained. 163 | parents : iterable 164 | An iterable of all parent scaffolds generated by a fragmenter. 165 | 166 | """ 167 | props = [self.get_property(child, s) for s in parents] 168 | min_val = min(props) 169 | return list(compress(parents, [True if p == min_val else False for p in props])) 170 | 171 | @abstractmethod 172 | def get_property(self, child, parent): 173 | """Return a property value for a child/parent scaffold. 174 | 175 | Subclasses should implement this method. 176 | 177 | Parameters 178 | ---------- 179 | child : scaffoldgraph.core.Scaffold 180 | The child scaffold from which the parent scaffolds were obtained. 181 | parent : scaffoldgraph.core.Scaffold 182 | A parent scaffold. 183 | 184 | """ 185 | raise NotImplementedError() 186 | 187 | 188 | class ScaffoldMaxFilterRule(BaseScaffoldFilterRule): 189 | """Abstract base class for defining rules for scaffold prioritization 190 | based on a maximum property value. 191 | 192 | Subclasses should implement the ``get_property`` method, where a property value 193 | is returned for a particular input scaffold. Scaffolds with a property value 194 | equal to the maximum property value will be retained. Subclasses should also 195 | implement the ``name`` property. 196 | 197 | Example 198 | ------- 199 | >>> class MyRule(ScaffoldMaxFilterRule): 200 | ... 201 | ... def get_property(self, child, parent): 202 | ... prop = get_some_property(parent) 203 | ... return prop 204 | ... 205 | ... @property 206 | ... def name(self): 207 | ... return 'my min conditional rule' 208 | 209 | """ 210 | def filter(self, child, parents): 211 | """Filter a set of parent scaffolds using a maximum property value. 212 | 213 | Parameters 214 | ---------- 215 | child : scaffoldgraph.core.Scaffold 216 | The child scaffold from which the parent scaffolds were obtained. 217 | parents : iterable 218 | An iterable of all parent scaffolds generated by a fragmenter. 219 | 220 | """ 221 | props = [self.get_property(child, s) for s in parents] 222 | max_val = max(props) 223 | return list(compress(parents, [True if p == max_val else False for p in props])) 224 | 225 | @abstractmethod 226 | def get_property(self, child, parent): 227 | """Return a property value for a child/parent scaffold. 228 | 229 | Subclasses should implement this method. 230 | 231 | Parameters 232 | ---------- 233 | child : scaffoldgraph.core.Scaffold 234 | The child scaffold from which the parent scaffolds were obtained. 235 | parent : scaffoldgraph.core.Scaffold 236 | A parent scaffold. 237 | 238 | """ 239 | raise NotImplementedError() 240 | -------------------------------------------------------------------------------- /scaffoldgraph/prioritization/prioritization_ruleset.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.prioritization.prioritization_ruleset 3 | 4 | Implements a ruleset for scaffold prioritization when constructing scaffold trees. 5 | """ 6 | 7 | from .prioritization_rules import BaseScaffoldFilterRule 8 | 9 | 10 | class ScaffoldRuleSet(object): 11 | """ 12 | Class defining a set of rules used for scaffold prioritization. 13 | 14 | Rules added to the rule set must subclass the BaseScaffoldFilterRule. 15 | 16 | """ 17 | def __init__(self, rules=None, name=None): 18 | """ 19 | Initialize a rule set with an iterable of rules and an 20 | optional name. 21 | 22 | Parameters 23 | ---------- 24 | rules : iterable, optional 25 | An iterable of rules. The default is None. 26 | name : str, optional 27 | Name of rule set. The default is None. 28 | 29 | """ 30 | self._rules = [] 31 | if rules is not None: 32 | for rule in rules: 33 | self.add_rule(rule) 34 | self.name = name if name else 'ScaffoldRuleSet' 35 | 36 | def __call__(self, child, parents): 37 | return self.filter_scaffolds(child, parents) 38 | 39 | @property 40 | def rules(self): 41 | """list : Return rules as a list.""" 42 | return self._rules 43 | 44 | def filter_scaffolds(self, child, parents): 45 | """Filter a set of parent scaffolds using the defined rules. 46 | 47 | Method is called internally by scaffold graph constructors. 48 | __call__ is an alias for this function. 49 | 50 | Parameters 51 | ---------- 52 | child : scaffoldgraph.core.Scaffold 53 | Child scaffold. 54 | parents : list 55 | Parent scaffolds. 56 | 57 | Returns 58 | ------- 59 | parent : scaffoldgraph.core.Scaffold 60 | The scaffold retained after filtering. 61 | 62 | Raises 63 | ------ 64 | ValueError 65 | Raised if the ruleset contains no rules. 66 | ValueError 67 | Raised if the iterable of parent scaffolds 68 | is empty. 69 | ValueError 70 | Raised if more than one scaffold is left after 71 | all of the filter rules are evaluated. The RuleSet 72 | may require a tie-breaker rule. 73 | 74 | """ 75 | if len(self) == 0: 76 | raise ValueError('No rules defined in rule set') 77 | if len(parents) == 0: 78 | raise ValueError('No parent scaffolds supplied to filter') 79 | elif len(parents) == 1: 80 | parent = parents.pop() 81 | parent.prioritization_rule = 'last remaining' 82 | return parent 83 | remaining = list(parents) 84 | for rule in self: 85 | filtered = rule.filter(child, remaining) 86 | if filtered: 87 | remaining = filtered 88 | if len(remaining) == 1: 89 | parent = remaining.pop() 90 | parent.prioritization_rule = rule.name 91 | return parent 92 | raise ValueError('Filter error, more than one remaining scaffold ' 93 | 'after filter rules applied. Rule set may require ' 94 | 'a tie-breaker rule') 95 | 96 | def add_rule(self, rule): 97 | """Appends a rule to the ruleset. 98 | 99 | Parameters 100 | ---------- 101 | rule : BaseScaffoldFilterRule 102 | Scaffold filter rule with base class ``BaseScaffoldFilterRule``. 103 | 104 | """ 105 | if self.check_valid_rule(rule): 106 | self._rules.append(rule) 107 | else: 108 | raise TypeError('rule must be a subclass of BaseScaffoldRule') 109 | 110 | def insert_rule(self, rule, index): 111 | """Inserts a rule into the ruleset at supplied index. 112 | 113 | Parameters 114 | ---------- 115 | rule : BaseScaffoldFilterRule 116 | Scaffold filter rule with base class ``BaseScaffoldFilterRule``. 117 | index : int 118 | Position in list to insert rule. 119 | 120 | """ 121 | if self.check_valid_rule(rule): 122 | self._rules.insert(index, rule) 123 | else: 124 | raise TypeError('rule must be a subclass of BaseScaffoldRule') 125 | 126 | def delete_rule(self, index): 127 | """Deletes a rule from the ruleset at supplied index. 128 | 129 | Parameters 130 | ---------- 131 | index : int 132 | Position in list to delete rule. 133 | 134 | """ 135 | self._rules.__delitem__(index) 136 | 137 | @classmethod 138 | def from_rule_file(cls, filename, name=None): 139 | """Create a scaffold rule set from a rule set file. 140 | 141 | A rule set file is a text file specifying the names of 142 | rules to include in the ruleset seperated by new lines. 143 | The rule names must belong to either the original set 144 | or the generic set. The name of the rule corresponds to 145 | the class name of the desired rule. i.e. for OriginalRule01 146 | the file should contain the string OriginalRule01 followed 147 | by a new line. When including generic rules, min or max 148 | can be specified by including min or max after the name 149 | seperated by an underscore. i.e. RRPNumHetAtoms_min. 150 | For Rules which contain further arguments, these can be 151 | appended to the name with underscores. i.e. 152 | RRPRingSizeX_max_6. In this case the rule will prioritize 153 | scaffolds where the removed rings size is equal to 6. 154 | 155 | Parameters 156 | ---------- 157 | filename : str 158 | File name of the rule set file. 159 | name : str, optional 160 | Name to assign rule set. 161 | 162 | See Also 163 | -------- 164 | scaffoldgraph.prioritization.original_rules 165 | scaffoldgraph.prioritization.generic_rules 166 | 167 | """ 168 | from .rule_io import read_rule_file 169 | rules = read_rule_file(filename) 170 | return cls(rules, name) 171 | 172 | @staticmethod 173 | def check_valid_rule(rule): 174 | """bool : Returns True if rule is a valid scaffold filter rule.""" 175 | return BaseScaffoldFilterRule in rule.__class__.__mro__ 176 | 177 | def __getitem__(self, index): 178 | return self._rules[index] 179 | 180 | def __setitem__(self, index, rule): 181 | if self.check_valid_rule(rule): 182 | self._rules.__setitem__(index, rule) 183 | raise TypeError('rule must be a subclass of BaseScaffoldRule') 184 | 185 | def __len__(self): 186 | return len(self._rules) 187 | 188 | def __repr__(self): 189 | return '<{_cls} at {address}>'.format( 190 | _cls=self.__class__.__name__, 191 | address=hex(id(self)) 192 | ) 193 | -------------------------------------------------------------------------------- /scaffoldgraph/prioritization/rule_io.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.prioritization.rule_io 3 | 4 | Functions for reading prioritization rulesets from a file. 5 | Can be used to specify custom rulesets when using the CLI. 6 | """ 7 | 8 | from scaffoldgraph.prioritization.original_rules import * 9 | from scaffoldgraph.prioritization.generic_rules import * 10 | 11 | 12 | rule_name_to_class = { 13 | 'scpabsdelta': SCPAbsDelta, 14 | 'scpdelta': SCPDelta, 15 | 'scpnumlinkerbonds': SCPNumLinkerBonds, 16 | 'scpnumaromaticrings': SCPNumAromaticRings, 17 | 'scpnumhetatoms': SCPNumHetAtoms, 18 | 'scpnumnatoms': SCPNumNAtoms, 19 | 'scpnumoatoms': SCPNumOAtoms, 20 | 'scpnumsatoms': SCPNumSAtoms, 21 | 'scpnumxatoms': SCPNumXAtoms, 22 | 'rrpringsize': RRPRingSize, 23 | 'rrplinkerlength': RRPLinkerLength, 24 | 'rrphetatomlinked': RRPHetAtomLinked, 25 | 'rrplinkerlengthx': RRPLinkerLengthX, 26 | 'rrpnumhetatoms': RRPNumHetAtoms, 27 | 'rrpnumnatoms': RRPNumNAtoms, 28 | 'rrpnumoatoms': RRPNumOAtoms, 29 | 'rrpnumsatoms': RRPNumSAtoms, 30 | 'rrpnumxatoms': RRPNumXAtoms, 31 | 'rrpringsizex': RRPRingSizeX, 32 | 'rspabsdelta': RSPAbsDelta, 33 | 'rspdelta': RSPDelta, 34 | 'rspnumaromaticrings': RSPNumAromaticRings, 35 | 'rspnumhetatoms': RSPNumHetAtoms, 36 | 'rspnumnatoms': RSPNumNAtoms, 37 | 'rspnumoatoms': RSPNumOAtoms, 38 | 'rspnumsatoms': RSPNumSAtoms, 39 | 'rspnumxatoms': RSPNumXAtoms, 40 | 'tiebreaker': Tiebreaker, 41 | 'originalrule01': OriginalRule01, 42 | 'originalrule02': OriginalRule02, 43 | 'originalrule03': OriginalRule03, 44 | 'originalrule04': OriginalRule04, 45 | 'originalrule05': OriginalRule05, 46 | 'originalrule06': OriginalRule06, 47 | 'originalrule07': OriginalRule07, 48 | 'originalrule08': OriginalRule08, 49 | 'originalrule09a': OriginalRule09a, 50 | 'originalrule09b': OriginalRule09b, 51 | 'originalrule09c': OriginalRule09c, 52 | 'originalrule10': OriginalRule10, 53 | 'originalrule11': OriginalRule11, 54 | 'originalrule12': OriginalRule12, 55 | 'originalrule13': OriginalRule13, 56 | } 57 | 58 | 59 | def read_rule_file(filename): 60 | """Read rules from a file. 61 | 62 | Parameters 63 | ---------- 64 | filename : str 65 | Name of rule file. 66 | 67 | Returns 68 | ------- 69 | list 70 | list of rule objects. 71 | 72 | Raises 73 | ------ 74 | ValueError 75 | Raised if any of the rules defined in the 76 | rule file are not implemented. 77 | 78 | """ 79 | rules = [] 80 | with open(filename, 'r') as f: 81 | for line in f.readlines(): 82 | tokens = line.strip().split('_') 83 | if len(tokens) == 0: 84 | continue 85 | rule_name = tokens[0] 86 | rule_cls = rule_name_to_class.get( 87 | rule_name.lower(), None) 88 | if rule_cls is None: 89 | raise ValueError(f'Rule {rule_name} is not defined') 90 | if len(tokens) > 2: 91 | rule = rule_cls(tokens[1], *list(map(int, tokens[2:]))) 92 | else: 93 | rule = rule_cls(*tokens[1:]) 94 | rules.append(rule) 95 | return rules 96 | -------------------------------------------------------------------------------- /scaffoldgraph/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.scripts 3 | 4 | scaffoldgraph CLI utility 5 | """ 6 | -------------------------------------------------------------------------------- /scaffoldgraph/scripts/generate.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.scripts.generate 3 | """ 4 | 5 | import datetime 6 | import time 7 | 8 | from loguru import logger 9 | 10 | from scaffoldgraph import ScaffoldNetwork, ScaffoldTree, HierS 11 | from scaffoldgraph.prioritization import ScaffoldRuleSet 12 | from scaffoldgraph.io import tsv 13 | 14 | from .misc import file_format 15 | 16 | start_message = """ 17 | Running ScaffoldGraph ({command}) Generation with options: 18 | Input file: {input} 19 | Output file: {output} 20 | Maximum rings: {max_r} 21 | Flatten isotopes: {isotope} 22 | Keep largest Fragment: {fragment} 23 | Discharge & Deradicalize: {discharge} 24 | """ 25 | 26 | stop_message = """ 27 | ScaffoldGraph Generation Complete: 28 | Molecules written: {molecules} 29 | Scaffolds written: {scaffolds} 30 | Molecules filtered: {filtered} 31 | Linear molecules: {linear} 32 | Time elapsed: {time} 33 | 34 | Output saved @ {output} 35 | """ 36 | 37 | 38 | def _get_graph_cls(name): 39 | """Get scaffoldgraph class from name string.""" 40 | if name == 'network': 41 | return ScaffoldNetwork 42 | elif name == 'tree': 43 | return ScaffoldTree 44 | elif name == 'hiers': 45 | return HierS 46 | else: 47 | msg = f'scaffold graph type: {name} not known' 48 | raise ValueError(msg) 49 | 50 | 51 | def _maybe_ruleset(args): 52 | """Return a ScaffoldRuleset if specified in CLI arguments.""" 53 | ruleset = None 54 | if 'ruleset' in args and args.ruleset is not None: 55 | filename = args.ruleset 56 | ruleset = ScaffoldRuleSet.from_rule_file(filename) 57 | return ruleset 58 | 59 | 60 | def generate_cli(args): 61 | """Run scaffoldgraph generation for CLI utility.""" 62 | graph_cls = _get_graph_cls(args.command) 63 | graph_name = graph_cls.__name__ 64 | ruleset = _maybe_ruleset(args) 65 | 66 | if not args.silent: 67 | print( 68 | start_message.format( 69 | command=graph_name, 70 | input=args.input, 71 | output=args.output, 72 | max_r=args.max_rings, 73 | isotope=args.flatten_isotopes, 74 | fragment=args.keep_largest_fragment, 75 | discharge=args.discharge_and_deradicalize, 76 | ) 77 | ) 78 | 79 | logger.info(f'Generating {graph_name} Graph...') 80 | fmt, zipped = file_format(args.input) 81 | start = time.time() 82 | 83 | if fmt == 'SDF': 84 | sg = graph_cls.from_sdf( 85 | args.input, 86 | ring_cutoff=args.max_rings, 87 | progress=args.silent is False, 88 | zipped=zipped, 89 | flatten_isotopes=args.flatten_isotopes, 90 | keep_largest_fragment=args.keep_largest_fragment, 91 | discharge_and_deradicalize=args.discharge_and_deradicalize, 92 | prioritization_rules=ruleset, 93 | ) 94 | elif fmt == 'SMI': 95 | sg = graph_cls.from_smiles_file( 96 | args.input, 97 | ring_cutoff=args.max_rings, 98 | progress=args.silent is False, 99 | flatten_isotopes=args.flatten_isotopes, 100 | keep_largest_fragment=args.keep_largest_fragment, 101 | discharge_and_deradicalize=args.discharge_and_deradicalize, 102 | prioritization_rules=ruleset, 103 | ) 104 | else: 105 | raise ValueError('input file format is not currently supported') 106 | 107 | tsv.write_tsv(sg, args.output, write_ids=False) 108 | logger.info(f'{graph_name} Graph Generation Complete...') 109 | elapsed = datetime.timedelta(seconds=round(time.time() - start)) 110 | filtered = sg.graph['num_filtered'] 111 | linear = sg.graph['num_linear'] 112 | 113 | if not args.silent: 114 | print( 115 | stop_message.format( 116 | molecules=sg.num_molecule_nodes, 117 | scaffolds=sg.num_scaffold_nodes, 118 | filtered=filtered, 119 | linear=linear, 120 | time=elapsed, 121 | output=args.output 122 | ) 123 | ) 124 | -------------------------------------------------------------------------------- /scaffoldgraph/scripts/misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.scripts.misc 3 | """ 4 | 5 | import logging 6 | import os 7 | 8 | import tqdm 9 | 10 | 11 | class TqdmHandler(logging.Handler): 12 | """Logging handler for use with tqdm (used in CLI).""" 13 | 14 | def __init__(self, level=logging.NOTSET): 15 | super().__init__(level) 16 | 17 | def emit(self, record): 18 | try: 19 | msg = self.format(record) 20 | tqdm.tqdm.write(msg) 21 | self.flush() 22 | except (KeyboardInterrupt, SystemExit): 23 | raise 24 | except Exception: 25 | self.handleError(record) 26 | 27 | 28 | def file_format(path): 29 | """Determine an input file format from a path.""" 30 | split_path, extension = os.path.splitext(path) 31 | if extension == '.sdf': 32 | return 'SDF', False 33 | elif extension == '.smi': 34 | return 'SMI', False 35 | elif extension == '.gz' or extension == '.gzip': 36 | new_extension = file_format(split_path) 37 | if new_extension[0] is not None: 38 | return new_extension[0], True 39 | else: 40 | return None, False 41 | else: 42 | return None, False 43 | -------------------------------------------------------------------------------- /scaffoldgraph/scripts/run.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.scripts.run 3 | 4 | Module defines the CLI utility for ScaffoldGraph. 5 | """ 6 | 7 | import argparse 8 | import logging 9 | import sys 10 | 11 | from loguru import logger 12 | 13 | from scaffoldgraph import __version__ 14 | from .generate import generate_cli 15 | from .misc import TqdmHandler 16 | from .operations import select_cli, aggregate_cli 17 | 18 | title = f"ScaffoldGraph {__version__}" 19 | desc = "Generate Scaffold Networks and Scaffold Trees." 20 | 21 | tqdm_format = "scaffold-graph: " 22 | tqdm_format += "{time:HH:mm:ss} " 23 | tqdm_format += "{process} " 24 | tqdm_format += "{level}: " 25 | tqdm_format += "{message}" 26 | 27 | tqdm_handler = { 28 | 'sink': TqdmHandler(logging.NOTSET), 29 | 'format': tqdm_format, 30 | 'level': 'INFO' 31 | } 32 | 33 | usage = 'scaffoldgraph []' 34 | 35 | 36 | def configure_logger(verbosity): 37 | """Configure the scaffoldgraph cli logger to use tqdm handler. 38 | 39 | Parameters 40 | ---------- 41 | verbosity : int 42 | Select the output verbosity. 0 is the lowest verbosity 43 | 'CRITICAL' and 4 is the highest verbosity 'DEBUG'. If 44 | < 0 or > 4 the maximum verbosity is selected. 45 | 46 | """ 47 | config = {'handlers': []} 48 | logger.enable('scaffoldgraph') 49 | 50 | if verbosity == 0: 51 | tqdm_handler['sink'].level = logging.CRITICAL 52 | tqdm_handler['level'] = 'CRITICAL' 53 | elif verbosity == 1: 54 | tqdm_handler['sink'].level = logging.ERROR 55 | tqdm_handler['level'] = 'ERROR' 56 | elif verbosity == 2: 57 | tqdm_handler['sink'].level = logging.WARNING 58 | tqdm_handler['level'] = 'WARNING' 59 | elif verbosity == 3: 60 | tqdm_handler['sink'].level = logging.INFO 61 | tqdm_handler['level'] = 'INFO' 62 | elif verbosity == 4: 63 | tqdm_handler['sink'].level = logging.DEBUG 64 | tqdm_handler['level'] = 'DEBUG' 65 | else: # if < 0 or > 4 is supplied set logger to max level (DEBUG) 66 | tqdm_handler['sink'].level = logging.DEBUG 67 | tqdm_handler['level'] = 'DEBUG' 68 | 69 | config["handlers"].append(tqdm_handler) 70 | logger.configure(**config) 71 | 72 | 73 | def parent_parser(): 74 | """Common arguments for all scaffoldgraph commands.""" 75 | parser = argparse.ArgumentParser(add_help=False) 76 | parser.add_argument('-v', '--verbosity', metavar='', type=int, default=3, choices=[0, 1, 2, 3, 4], 77 | help='set logger verbosity [0, 1, 2, 3, 4] (default: 3)') 78 | parser.add_argument('-s', '--silent', action='store_true', help='silence console output (default: False)') 79 | return parser 80 | 81 | 82 | def generate_parent_parser(): 83 | """Creates a parent parser for generate commands (Network, Tree, HierS).""" 84 | parser = argparse.ArgumentParser(add_help=False) 85 | parser.add_argument('input', help='input file (SDF, SMILES)') 86 | parser.add_argument('output', help='output file path') 87 | parser.add_argument('--max-rings', '-m', type=int, default=10, metavar='', 88 | help='ignore molecules with # rings > (default: 10)') 89 | parser.add_argument('--flatten-isotopes', '-i', action='store_true', 90 | help='remove remove specific isotopes when initializing the scaffold') 91 | parser.add_argument('--keep_largest_fragment', '-f', action='store_true', 92 | help='when encountering molecules containing disconnected fragments initialize' 93 | ' the scaffold from only the largest disconnected fragment') 94 | parser.add_argument('--discharge-and-deradicalize', '-d', action='store_true', 95 | help='remove charges and radicals when initializing the scaffold') 96 | return parser 97 | 98 | 99 | def scaffoldgraph_args(): 100 | """Defines CLI utility for ScaffoldGraph.""" 101 | parser = argparse.ArgumentParser('scaffoldgraph', description=desc) 102 | parser.add_argument('--version', action='version', version=__version__) 103 | subparsers = parser.add_subparsers(title='command', dest='command') 104 | 105 | # network (generate a scaffold network from a SMILES or SDF file) 106 | network_parser = subparsers.add_parser('network', description='Generate a scaffold network', 107 | parents=[generate_parent_parser(), parent_parser()]) 108 | network_parser.set_defaults(func=generate_cli) 109 | 110 | # HierS (generate a HierS scaffold network from a SMILES or SDF file) 111 | hiers_parser = subparsers.add_parser('hiers', description='Generate a HierS type scaffold network', 112 | parents=[generate_parent_parser(), parent_parser()]) 113 | hiers_parser.set_defaults(func=generate_cli) 114 | 115 | # tree (generate a scaffold tree form a SMILES or SDF file) 116 | tree_parser = subparsers.add_parser('tree', description='Generate a scaffold tree', 117 | parents=[generate_parent_parser(), parent_parser()]) 118 | tree_parser.add_argument('-r', '--ruleset', help='supply a ruleset file for custom scaffold prioritization', 119 | metavar='') 120 | tree_parser.set_defaults(func=generate_cli) 121 | 122 | # select (select a subgraph of a scaffold graph using a molecular query) 123 | select_parser = subparsers.add_parser('select', description='Select subgraph from a molecular query.', 124 | parents=[parent_parser()]) 125 | select_parser.add_argument('input_graph', help='input aggregated graph file') 126 | select_parser.add_argument('input_query', help='input query file (SDF, SMILES)') 127 | select_parser.add_argument('output', help='output file path') 128 | select_parser.add_argument('-d', '--sdf', help='write output as an SDF', action='store_true') 129 | select_parser.set_defaults(func=select_cli) 130 | 131 | # aggregate (Aggregate intermediate scaffold graph files (TSV or PICKLE)) 132 | aggregate_parser = subparsers.add_parser('aggregate', description='Aggregate scaffold graphs', 133 | parents=[parent_parser()]) 134 | aggregate_parser.add_argument('input', nargs='+', help='input file(s) (TSV)') 135 | aggregate_parser.add_argument('output', help='output file path') 136 | aggregate_parser.add_argument('-m', '--map-mols', help='map molecule IDs from input to scaffold IDs, \ 137 | and place result in given file', metavar='') 138 | aggregate_parser.add_argument('-a', '--map-annotations', help='map scaffold IDs to annotations, \ 139 | and place result in given file', metavar='') 140 | aggregate_parser.add_argument('-d', '--sdf', help='write output as an SDF', action='store_true') 141 | aggregate_parser.set_defaults(func=aggregate_cli) 142 | 143 | return parser 144 | 145 | 146 | def scaffoldgraph_main(): 147 | """Run the CLI utility for ScaffoldGraph.""" 148 | parser = scaffoldgraph_args() 149 | args = parser.parse_args(None if sys.argv[1:] else ['-h']) 150 | configure_logger(args.verbosity) 151 | try: 152 | args.func(args) 153 | except FileNotFoundError as e: 154 | logger.critical(f'Input file not found: {e.filename}') 155 | except ValueError as e: 156 | logger.critical(e) 157 | except RuntimeError as e: 158 | logger.critical(e) 159 | except MemoryError as e: 160 | logger.critical(e) 161 | except KeyboardInterrupt: 162 | logger.critical('scaffoldgraph process interrupted from keyboard') 163 | except Exception as e: 164 | logger.critical(f'Unknown error: {e}') 165 | finally: 166 | logger.info('Exiting scaffoldgraph...') 167 | -------------------------------------------------------------------------------- /scaffoldgraph/tree.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.tree 3 | """ 4 | 5 | from rdkit.Chem import rdmolops 6 | 7 | from .core import ScaffoldGraph, Scaffold, MurckoRingFragmenter 8 | from .core.fragment import get_murcko_scaffold 9 | from .prioritization import original_ruleset 10 | from .utils import suppress_rdlogger 11 | 12 | 13 | class ScaffoldTree(ScaffoldGraph): 14 | """ 15 | Class representing a scaffold tree. 16 | 17 | Explore scaffold-space through the iterative removal of the least-characteristic 18 | ring from a molecular scaffold. The output is a tree of molecular scaffolds. 19 | 20 | Examples 21 | -------- 22 | Create a ScaffoldTree from a SMILES file. 23 | 24 | >>> import scaffoldgraph as sg 25 | >>> tree = sg.ScaffoldTree.from_smiles_file('my_file.smi', progress=True) 26 | >>> network.num_scaffold_nodes 27 | 75 28 | 29 | Create a ScaffoldTree from an SDF. 30 | 31 | >>> tree = sg.ScaffoldTree.from_sdf('my_file.sdf', progress=True) 32 | 33 | If the SDF is zipped: 34 | 35 | >>> tree = sg.ScaffoldTree.from_sdf('my_file.sdf.gz', zipped=True) 36 | 37 | Get scaffold nodes: 38 | 39 | >>> list(tree.get_scaffold_nodes()) 40 | ['O=C(OCOC(=O)c1cccc2ncn(Cc3ccccc3)c12)OC1CCCCC1', 41 | 'O=C(OCOC(=O)c1cccc2nc[nH]c12)OC1CCCCC1', 42 | ...] 43 | 44 | Include node attributes: 45 | 46 | >>> list(tree.get_scaffold_nodes(data=True)) 47 | [('O=C(OCOC(=O)c1cccc2ncn(Cc3ccccc3)c12)OC1CCCCC1', {'type': 'scaffold', 'hierarchy': 4}), 48 | ('O=C(OCOC(=O)c1cccc2nc[nH]c12)OC1CCCCC1', {'type': 'scaffold', 'hierarchy': 3}), 49 | ...] 50 | 51 | Get molecule nodes (use data=True to get attributes): 52 | 53 | >>> list(tree.get_molecule_nodes()) 54 | ['DB00006', 55 | 'DB00007', 56 | 'DB00014', 57 | ...] 58 | 59 | References 60 | ---------- 61 | .. [1] Schuffenhauer, A., Ertl, P., Roggo, S., Wetzel, S., Koch, M. A., and Waldmann, H. (2007). 62 | The scaffold tree visualization of the scaffold universe by hierarchical scaffold classification. 63 | Journal of Chemical Information and Modeling, 47(1), 47–58. PMID: 17238248. 64 | 65 | See Also 66 | -------- 67 | ScaffoldGraph 68 | ScaffoldNetwork 69 | HierS 70 | 71 | """ 72 | def __init__(self, graph=None, prioritization_rules=None, **kwargs): 73 | """Initialize a ScaffoldTree. 74 | 75 | Parameters 76 | ---------- 77 | graph : input graph, optional 78 | Data to initialize graph. If None (default) an empty 79 | graph is created. The data can be any format that is supported 80 | by the ``to_networkx_graph()`` function, currently including 81 | edge list, dict of dicts, dict of lists, NetworkX graph, 82 | NumPy matrix or 2d ndarray, SciPy sparse matrix, 83 | or PyGraphviz graph. This argument is passed to the networkx 84 | DiGraph constructor. 85 | prioritization_rules : ScaffoldRuleSet 86 | Ruleset for prioritizing parent scaffolds during tree 87 | construction. 88 | 89 | """ 90 | super(ScaffoldTree, self).__init__(graph, MurckoRingFragmenter(True), 'tree') 91 | self.rules = prioritization_rules if prioritization_rules else original_ruleset 92 | 93 | def _hierarchy_constructor(self, child): 94 | parents = [p for p in self.fragmenter.fragment(child) if p] 95 | if not parents: 96 | return 97 | parent = self.rules(child, parents) 98 | if not parent: 99 | return 100 | deletion_rule = parent.prioritization_rule 101 | if parent in self.nodes: 102 | self.add_scaffold_edge(parent, child, rule=deletion_rule) 103 | else: 104 | self.add_scaffold_node(parent) 105 | self.add_scaffold_edge(parent, child, rule=deletion_rule) 106 | if parent.rings.count > 1: 107 | self._hierarchy_constructor(parent) 108 | 109 | @property 110 | def prioritization_rules(self): 111 | """ScaffoldRuleSet : Return the prioritization ruleset used.""" 112 | return self.rules 113 | 114 | 115 | @suppress_rdlogger() 116 | def tree_frags_from_mol(mol, prioritization_rules=None): 117 | """Generate a scaffold tree from a single molecule without using networkx. 118 | 119 | Parameters 120 | ---------- 121 | mol: rdkit.Chem.rdchem.Mol 122 | rdkit molecule for processing. 123 | prioritization_rules : ScaffoldRuleSet, optional 124 | rules for prioritizing parent scaffolds. If 125 | not supplied the original rules are used. 126 | The default is None. 127 | 128 | Returns 129 | ------- 130 | parents 131 | An ordered list of rdkit Mols representing a scaffold tree. 132 | 133 | Examples 134 | -------- 135 | Generating scaffold tree fragments: 136 | 137 | >>> from rdkit import Chem 138 | >>> smiles = 'Cc1[nH]cnc1Cn1cccc(-c2ccccc2O)c1=O' 139 | >>> molecule = Chem.MolFromSmiles(smiles) 140 | >>> frags = tree_frags_from_mol(molecule) 141 | 142 | """ 143 | scaffold = Scaffold(get_murcko_scaffold(mol)) 144 | rdmolops.RemoveStereochemistry(scaffold.mol) 145 | parents = [scaffold] 146 | fragmenter = MurckoRingFragmenter(use_scheme_4=True) 147 | rules = prioritization_rules if prioritization_rules else original_ruleset 148 | 149 | def _next_scaffold(child): 150 | next_parents = [p for p in fragmenter.fragment(child) if p] 151 | if not next_parents: 152 | return 153 | next_parent = rules(child, next_parents) 154 | parents.append(next_parent) 155 | if next_parent.rings.count > 1: 156 | _next_scaffold(next_parent) 157 | 158 | _next_scaffold(scaffold) 159 | 160 | return [p.mol for p in parents] 161 | -------------------------------------------------------------------------------- /scaffoldgraph/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.utils 3 | """ 4 | 5 | from .misc import canonize_smiles, summary 6 | from .aggregate import aggregate 7 | from .logging import suppress_rdlogger 8 | 9 | __all__ = [ 10 | 'canonize_smiles', 11 | 'aggregate', 12 | 'summary', 13 | 'suppress_rdlogger', 14 | ] 15 | -------------------------------------------------------------------------------- /scaffoldgraph/utils/aggregate.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.utils.aggregate 3 | 4 | Functions for aggregating ScaffoldGraphs. 5 | """ 6 | 7 | import warnings 8 | 9 | from networkx import Graph, compose 10 | 11 | 12 | def aggregate(list_of_graphs): 13 | """Aggregate a list of graphs into one graph object. 14 | 15 | Graphs within the list must be a subclass of a networkx Graph object. 16 | 17 | Parameters 18 | ---------- 19 | list_of_graphs : list 20 | A list of scaffold graphs (ScaffoldGraph) for aggregation. 21 | 22 | Returns 23 | ------- 24 | ScaffoldGraph 25 | A graph type object with the same class as the first entry in 26 | the parameter list_of_graphs. 27 | 28 | Raises 29 | ------ 30 | ValueError: 31 | raises if an empty list is provided, instead of a list of graphs. 32 | ValueError: 33 | raises if any entry in the list is not a subclass of nx.Graph. 34 | 35 | Examples 36 | -------- 37 | >>> g1 = sg.ScaffoldNetwork.from_sdf('g1.sdf') 38 | >>> print(g1.number_of_nodes()) 39 | 100 40 | >>> g2 = sg.ScaffoldNetwork.from_sdf('g2.sdf') 41 | >>> print(g2.number_of_nodes()) 42 | 50 43 | >>> g3 = sg.ScaffoldNetwork.from_sdf('g3.sdf') 44 | >>> print(g3.number_of_nodes()) 45 | 200 46 | >>> list_of_graphs = [g1, g2, g3] 47 | >>> aggregated_graph = aggregate(list_of_graphs) 48 | >>> print(aggregated_graph.number_of_nodes()) 49 | 325 50 | 51 | Notes 52 | ----- 53 | The user is not prevented from aggregating multiple graphs of 54 | differing types, although this may lead to undesired behaviour. 55 | (i.e. aggregating a tree and a network is possible) 56 | 57 | Based on nx.compose_all: 58 | .. _Compose-all: https://networkx.github.io/documentation/stable/reference/algorithms/ 59 | generated/networkx.algorithms.operators.all.compose_all.html 60 | 61 | """ 62 | if not list_of_graphs: 63 | raise ValueError('Cannot apply aggregate to an empty list') 64 | graphs = iter(list_of_graphs) 65 | C = next(graphs) 66 | graph_type = type(C) 67 | for H in graphs: 68 | if not issubclass(type(H), Graph): 69 | raise ValueError('Can only aggregate graph type objects') 70 | if graph_type != type(H): 71 | warnings.warn('Attempting to aggregate graphs of different types ' 72 | f'({graph_type} & {type(H)}) ' 73 | 'could result in undesired behaviour') 74 | C = compose(C, H) 75 | return C 76 | -------------------------------------------------------------------------------- /scaffoldgraph/utils/bipartite.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.utils.bipartite 3 | 4 | Defines functions for creating bipartite graphs from scaffold graphs. 5 | """ 6 | 7 | from scaffoldgraph.core import ScaffoldGraph 8 | 9 | 10 | def make_bipartite_graph(graph): 11 | """Collapse a scaffold hierarchy into a bipartite representation. 12 | 13 | Scaffold --> Molecule 14 | 15 | The returned output will inherit the class of the input graph. 16 | 17 | Parameters 18 | ---------- 19 | graph : sg.core.ScaffoldGraph 20 | A scaffold graph template for producing a bipaertite 21 | graph. 22 | 23 | Returns 24 | ------- 25 | sg.core.ScaffoldGraph 26 | Bipartite scaffoldgraph where the scaffold hierarchy 27 | has been collapsed. 28 | 29 | """ 30 | if not issubclass(type(graph), ScaffoldGraph): 31 | raise ValueError(f'{graph} must be a ScaffoldGraph') 32 | graph_type = type(graph) 33 | B = graph_type(None) 34 | for scf, sdata in graph.get_scaffold_nodes(True): 35 | B.add_node(scf, **sdata) 36 | for mol, mdata in graph.get_molecules_for_scaffold(scf, True): 37 | if not B.molecule_in_graph(mol): 38 | B.add_node(mol, **mdata) 39 | B.add_edge(scf, mol) 40 | return B 41 | -------------------------------------------------------------------------------- /scaffoldgraph/utils/cache.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.utils.cache 3 | """ 4 | 5 | from collections import OrderedDict 6 | from operator import eq as _eq 7 | 8 | 9 | class Cache(OrderedDict): 10 | """A basic implementation of an LRU cache using OrderedDict. 11 | 12 | Adapted (slightly) from the collections ``OrderedDict`` 13 | documentation. 14 | 15 | .. _collections OrderedDict Documentation: 16 | https://docs.python.org/3/library/collections.html#collections.OrderedDict 17 | 18 | """ 19 | def __init__(self, maxsize=None, *args, **kwargs): 20 | """ 21 | Parameters 22 | ---------- 23 | maxsize : int, None, optional 24 | Set the maximum size of the cache, if None the cache 25 | has no size limitation. The default is None. 26 | *args 27 | Variable length argument list. 28 | Passed to OrderedDict. 29 | **kwargs 30 | Arbitrary keyword arguments. 31 | Passed to OrderedDict. 32 | 33 | """ 34 | self._maxsize = maxsize 35 | super(Cache, self).__init__(*args, **kwargs) 36 | 37 | @property 38 | def maxsize(self): 39 | """int: The maximum size of the cache.""" 40 | return self._maxsize 41 | 42 | def __getitem__(self, key): 43 | value = super().__getitem__(key) 44 | self.move_to_end(key) 45 | return value 46 | 47 | def __setitem__(self, key, value): 48 | super().__setitem__(key, value) 49 | if self.maxsize and len(self) > self.maxsize: 50 | oldest = next(iter(self)) 51 | del self[oldest] 52 | 53 | def __eq__(self, other): 54 | if isinstance(other, Cache): 55 | return dict.__eq__(self, other) and all(map(_eq, self, other)) 56 | return dict.__eq__(self, other) 57 | 58 | def __repr__(self): 59 | return '{}(maxsize={})'.format( 60 | self.__class__.__name__, 61 | self.maxsize 62 | ) 63 | -------------------------------------------------------------------------------- /scaffoldgraph/utils/logging.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.utils.logging 3 | 4 | Utilities for dealing with rdkit logging. 5 | """ 6 | 7 | import functools 8 | import warnings 9 | 10 | from rdkit import __version__ as rdversion 11 | from rdkit import RDLogger, rdBase 12 | 13 | 14 | DEFAULT_RDLOGGER_STATUS = { 15 | 'rdApp.debug': True, 16 | 'rdApp.info': True, 17 | 'rdApp.warning': True, 18 | 'rdApp.error': True 19 | } 20 | 21 | QUIET_RDLOGGER_STATUS = { 22 | 'rdApp.debug': False, 23 | 'rdApp.info': False, 24 | 'rdApp.warning': False, 25 | 'rdApp.error': True 26 | } 27 | 28 | UNKNOWN_RDLOGGER_STATUS = DEFAULT_RDLOGGER_STATUS 29 | 30 | 31 | def get_rdlogger_status(): 32 | """dict : Return the status of the rdlogger.""" 33 | status_dict = {} 34 | if rdversion < '2020.09.01': 35 | warnings.warn('Failed to get status of rdlogger') 36 | return UNKNOWN_RDLOGGER_STATUS 37 | for status in rdBase.LogStatus().split('\n'): 38 | level, state = status.split(':') 39 | status_dict[level] = True if state == 'enabled' else False 40 | return status_dict 41 | 42 | 43 | def set_rdlogger_status(status_dict): 44 | """Set the state of the rdlogger.""" 45 | for level, state in status_dict.items(): 46 | if state is True: 47 | rdBase.EnableLog(level) 48 | else: 49 | rdBase.DisableLog(level) 50 | 51 | 52 | def set_rdlogger_quiet(): 53 | """Set the rdlogger to quiet status.""" 54 | set_rdlogger_status(QUIET_RDLOGGER_STATUS) 55 | 56 | 57 | def reset_rdlogger(): 58 | """Reset the rdlogger status to default.""" 59 | set_rdlogger_status(DEFAULT_RDLOGGER_STATUS) 60 | 61 | 62 | def set_rdlogger_debug_status(status): 63 | """Set status of rdlogger: debug.""" 64 | set_rdlogger_status({'rdApp.debug': status}) 65 | 66 | 67 | def set_rdlogger_info_status(status): 68 | """Set status of rdlogger: info.""" 69 | set_rdlogger_status({'rdApp.info': status}) 70 | 71 | 72 | def set_rdlogger_warning_status(status): 73 | """Set status of rdlogger: warning.""" 74 | set_rdlogger_status({'rdApp.warning': status}) 75 | 76 | 77 | def set_rdlogger_error_status(status): 78 | """Set status of rdlogger: error.""" 79 | set_rdlogger_status({'rdApp.error': status}) 80 | 81 | 82 | def suppress_rdlogger( 83 | suppress_info=True, 84 | suppress_warning=True, 85 | suppress_error=True, 86 | suppress_debug=True 87 | ): 88 | """Decorator for controlling the output level of the rdkit logger. 89 | 90 | Useful for supressing the output of noisy functions related to 91 | the rdkit logger. The previous status of the logger is returned 92 | after the function has been executed. 93 | 94 | Parameters 95 | ---------- 96 | suppress_info : bool, optional 97 | Suppress logs from rdApp.info. The default is True. 98 | suppress_warning : bool, optional 99 | Suppress logs from rdApp.warning. The default is True. 100 | suppress_error : bool, optional 101 | Suppress logs from rdApp.error. The default is True. 102 | suppress_debug : bool, optional 103 | Suppress logs from rdApp.debug. The default is True. 104 | 105 | Returns 106 | ------- 107 | decorator : function 108 | 109 | Notes 110 | ----- 111 | The prior state of the logger can only be returned in the newer 112 | versions of rdkit (>= '2020.09.01'). In previous versions the 113 | logger status is returned to its default state. 114 | 115 | """ 116 | rdlogger, altered_status = RDLogger.logger(), {} 117 | altered_status['rdApp.info'] = not suppress_info 118 | altered_status['rdApp.warning'] = not suppress_warning 119 | altered_status['rdApp.error'] = not suppress_error 120 | altered_status['rdApp.debug'] = not suppress_debug 121 | 122 | def decorator(func): 123 | @functools.wraps(func) 124 | def wrap_suppress(*args, **kwargs): 125 | # rdkit version compatability. 126 | prior_status = DEFAULT_RDLOGGER_STATUS 127 | if rdversion >= '2020.09.01': 128 | prior_status = get_rdlogger_status() 129 | set_rdlogger_status(altered_status) 130 | try: # restore status of rdlogger on failure. 131 | result = func(*args, **kwargs) 132 | except Exception as e: 133 | set_rdlogger_status(prior_status) 134 | raise e 135 | set_rdlogger_status(prior_status) 136 | return result 137 | return wrap_suppress 138 | return decorator 139 | -------------------------------------------------------------------------------- /scaffoldgraph/utils/misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.utils.misc 3 | 4 | Defines miscellaneous functions used within scaffoldgraph. 5 | """ 6 | 7 | import networkx as nx 8 | 9 | from rdkit import Chem 10 | 11 | 12 | def canonize_smiles(smiles, failsafe=True): 13 | """Canonize a SMILES string (with failsafe). 14 | 15 | Parameters 16 | ---------- 17 | smiles : str 18 | SMILES string to canonize. 19 | failsafe : bool 20 | If True, if the SMILES fails to parse 21 | the input SMILES is returned instead 22 | of raising an error. 23 | 24 | Returns 25 | ------- 26 | str 27 | The canonical SMILES representation. 28 | 29 | """ 30 | mol = Chem.MolFromSmiles(smiles) 31 | if mol is None and failsafe: 32 | return smiles 33 | return Chem.MolToSmiles(mol) 34 | 35 | 36 | def summary(graph, n=None): 37 | """Return a summary of information for the graph or a single node n. 38 | 39 | Parameters 40 | ---------- 41 | graph : sg.core.ScaffoldGraph or NetworkX graph 42 | A graph object which can either be a ScaffoldGraph graph or a NetworkX 43 | graph object. 44 | n : any hashable, optional 45 | A node in the graph. The default is None. 46 | 47 | Returns 48 | ------- 49 | info : str 50 | A string containing the summary. 51 | 52 | Raises 53 | ------ 54 | ValueError 55 | If n is not in the graph. 56 | 57 | """ 58 | from scaffoldgraph.core import ScaffoldGraph 59 | if not issubclass(type(graph), ScaffoldGraph): 60 | return nx.info(graph, n) 61 | info = "" 62 | if n is None: 63 | type_name = [type(graph).__name__] 64 | info += f"Type: {','.join(type_name)}\n" 65 | info += f"Number of molecule nodes: {graph.num_molecule_nodes}\n" 66 | info += f"Number of scaffold nodes: {graph.num_scaffold_nodes}\n" 67 | info += f"Number of edges: {graph.number_of_edges()}\n" 68 | info += f"Max hierarchy: {graph.max_hierarchy()}\n" 69 | info += f"Min hierarchy: {graph.min_hierarchy()}\n" 70 | else: 71 | if graph.molecule_in_graph(n): 72 | info += f"Node {n} has the following properties:\n" 73 | info += "Type: molecule\n" 74 | info += f"SMILES: {graph.nodes[n].get('smiles')}\n" 75 | info += f"Degree: {graph.degree(n)}\n" 76 | info += "Parent scaffolds: " 77 | info += " ".join(str(s) for s in graph.predecessors(n)) 78 | elif graph.scaffold_in_graph(n): 79 | key = canonize_smiles(n) 80 | info += f"Node {key} has the following properties:\n" 81 | info += "Type: scaffold\n" 82 | info += f"Hierarchy: {graph.nodes[key].get('hierarchy')}\n" 83 | info += f"Degree: {graph.degree(key)}\n" 84 | info += "Parent scaffolds: " 85 | info += " ".join(str(s) for s in graph.get_parent_scaffolds(key, max_levels=1)) 86 | info += "\n" 87 | info += "Child scaffolds: " 88 | info += " ".join(str(s) for s in graph.get_child_scaffolds(key, max_levels=1)) 89 | info += "\n" 90 | info += "Child molecules: " 91 | info += " ".join( 92 | str(s) for s in graph.successors(key) if graph.nodes[s].get('type') == 'molecule' 93 | ) 94 | else: 95 | raise ValueError(f"node {n} not in graph") 96 | return info 97 | -------------------------------------------------------------------------------- /scaffoldgraph/utils/subset.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.utils.subset 3 | 4 | """ 5 | from networkx.algorithms.traversal import bfs_tree 6 | from collections import defaultdict 7 | 8 | 9 | def split_graph_by_molecule_attribute(graph, key, default=None): 10 | """Split a scaffold graph into subgraphs based on unique molecule attributes. 11 | 12 | This function first groups molecule nodes sharing a unique attribute 13 | value, and then proceeds to build subgraphs from each node subset using 14 | a breadth-first search. 15 | 16 | The returned subgraphs are graph views and thus changes to the graph are 17 | nruled out by the view, but changes to node attributes 18 | are reflected in the original graph. To prevent this behaviour use: 19 | subgraph.copy() 20 | 21 | Parameters 22 | ---------- 23 | graph : sg.core.ScaffoldGraph 24 | A scaffold graph to split. 25 | key : str 26 | The key for the molecule node attribute used to split the graph 27 | into subgraphs. 28 | default : value, bool, optional 29 | Value used for nodes that don't have the requested attribute. 30 | 31 | Returns 32 | ------- 33 | splits : dict 34 | A dictionary with keys representing unique node attributes and 35 | values representing the constructed subgraphs. 36 | 37 | """ 38 | if isinstance(key, bool): 39 | raise ValueError('Attribute key cannot be a boolean type') 40 | splits = defaultdict(list) 41 | for node, attr in graph.get_molecule_nodes(key, default): 42 | splits[attr].append(node) 43 | splits.default_factory = None # Not really required 44 | for attr, nodes in splits.items(): 45 | bfs_subset = set() 46 | for node in nodes: 47 | bfs = bfs_tree(graph, node, reverse=True) 48 | bfs_subset.update(bfs) 49 | splits[attr] = graph.subgraph(bfs_subset) 50 | return splits 51 | -------------------------------------------------------------------------------- /scaffoldgraph/vis/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.vis 3 | """ 4 | 5 | from .utils import ( 6 | embed_node_mol_images, 7 | remove_node_mol_images, 8 | color_scaffold_nodes_by_attribute, 9 | color_molecule_nodes_by_attribute, 10 | add_root_node, 11 | remove_root_node, 12 | ) 13 | 14 | __all__ = [ 15 | 'embed_node_mol_images', 16 | 'remove_node_mol_images', 17 | 'color_scaffold_nodes_by_attribute', 18 | 'color_molecule_nodes_by_attribute', 19 | 'add_root_node', 20 | 'remove_root_node', 21 | ] 22 | -------------------------------------------------------------------------------- /scaffoldgraph/vis/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.vis.base 3 | """ 4 | 5 | import networkx as nx 6 | 7 | from abc import ABC 8 | 9 | from scaffoldgraph.core import ScaffoldGraph 10 | from scaffoldgraph.utils import canonize_smiles 11 | 12 | from .utils import remove_node_mol_images 13 | 14 | 15 | class Visualizer(ABC): 16 | """Base class for ScaffoldGraph visualizers. 17 | 18 | A Visualizer contains functions for creating visualizations 19 | of ScaffoldGraphs. 20 | 21 | See Also 22 | -------- 23 | scaffoldgraph.vis.notebook.cytoscape.CytoscapeVisualizer 24 | 25 | """ 26 | def __init__(self, graph, requires_tree=False, refresh_images=False): 27 | """Initialize the visualizer. 28 | 29 | Parameters 30 | ---------- 31 | graph : ScaffoldGraph 32 | ScaffoldGraph to visualize 33 | requires_tree : bool, optional 34 | Whether the visualizer requires a tree 35 | structure to create a visualization. 36 | refresh_images: bool, optional 37 | If True remove all embeded images from the 38 | input graph and regenerate when required. 39 | The default is False. 40 | 41 | """ 42 | self._requires_tree = requires_tree 43 | self._refresh = refresh_images 44 | self._graph = self._validate_graph(graph) 45 | 46 | @property 47 | def graph(self): 48 | """ScaffoldGraph: return the graph associated with the visualizer.""" 49 | return self._graph 50 | 51 | @graph.setter 52 | def graph(self, graph): 53 | self._graph = self._validate_graph(graph) 54 | 55 | def _validate_graph(self, graph): 56 | """Private: Validate a graph is suitable for visualizer.""" 57 | if not issubclass(type(graph), ScaffoldGraph): 58 | raise ValueError( 59 | f'{graph} must be a subclass of ScaffoldGraph' 60 | ) 61 | if self._requires_tree: 62 | if not nx.is_tree(graph) or nx.is_forest(graph): 63 | msg = '{} requires a tree/forest structured graph' 64 | msg.format(self.__class__.__name__) 65 | raise ValueError(msg) 66 | if self._refresh is True: 67 | remove_node_mol_images(graph) 68 | return graph 69 | 70 | def _subgraph_from_mol(self, molecule): 71 | """Private: Select a subgraph starting at a molecule node. 72 | 73 | Parameters 74 | ---------- 75 | molecule : str 76 | Molecule node identifier. 77 | 78 | Returns 79 | ------- 80 | subgraph : ScaffoldGraph 81 | A subgraph starting at `molecule`. 82 | 83 | """ 84 | G = self._graph 85 | if not G.molecule_in_graph(molecule): 86 | raise ValueError(f'molecule: {molecule} not in graph {G}') 87 | scaffolds = G.get_scaffolds_for_molecule(molecule) 88 | subgraph = G.subgraph([molecule] + scaffolds) 89 | return subgraph 90 | 91 | def _subgraph_from_scf(self, scaffold, traversal): 92 | """Private: Select a subgraph starting at a scaffold node. 93 | 94 | Parameters 95 | ---------- 96 | scaffold : str 97 | Scaffold node identifier. 98 | traversal : str {'parent', 'child', 'bidirectional'} 99 | The direction of traversal to create the subgraph. 100 | If 'bidirectional' both directions are considered. 101 | 102 | Returns 103 | ------- 104 | subgraph : ScaffoldGraph 105 | A subgraph starting at `scaffold`. 106 | 107 | """ 108 | G = self._graph 109 | query = canonize_smiles(scaffold) 110 | if not G.scaffold_in_graph(query): 111 | raise ValueError(f'scaffold: {query} not in graph {G}') 112 | if traversal == 'parent': 113 | nodes = G.get_parent_scaffolds(query) 114 | elif traversal == 'child': 115 | nodes = list(nx.descendants(G, query)) 116 | elif traversal == 'bidirectional': 117 | nodes = G.get_parent_scaffolds(query) 118 | nodes += list(nx.descendants(G, query)) 119 | else: 120 | msg = 'traversal must be one of {child, parent, bidirectional}' 121 | raise ValueError(msg) 122 | subgraph = G.subgraph([query] + nodes) 123 | return subgraph 124 | 125 | def __repr__(self): 126 | return '<{_cls} at {address}>'.format( 127 | _cls=self.__class__.__name__, 128 | address=hex(id(self)) 129 | ) 130 | -------------------------------------------------------------------------------- /scaffoldgraph/vis/notebook/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.vis.notebook 3 | """ 4 | -------------------------------------------------------------------------------- /scaffoldgraph/vis/notebook/cytoscape.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.vis.notebook.cytoscape 3 | """ 4 | 5 | import warnings 6 | import json 7 | 8 | from pathlib import Path 9 | 10 | from scaffoldgraph.vis.base import Visualizer 11 | from scaffoldgraph.vis.utils import embed_node_mol_images 12 | 13 | try: 14 | import ipycytoscape as cy 15 | _cytoscape_available = True 16 | except ImportError: 17 | _cytoscape_available = False 18 | warnings.warn( 19 | 'ipycytoscape could not be imported and is required ' 20 | 'for generating cytoscape based visualizations.' 21 | ) 22 | 23 | DEFAULT_STYLE = Path(__file__).parent.resolve() / 'resources' / 'cytoscape.json' 24 | 25 | DEFAULT_LAYOUT = { 26 | 'name': 'dagre', 27 | 'nodeSpacing': 50, 28 | 'edgeLengthVal': 50 29 | } 30 | 31 | 32 | def read_style_file(path): 33 | """Read a JSON style file (cytoscape). 34 | 35 | Parameters 36 | ---------- 37 | path : str 38 | File path to style file. 39 | 40 | Returns 41 | ------- 42 | style : dict 43 | Style dictionary. 44 | 45 | """ 46 | with open(path, 'r') as f: 47 | style = json.load(f) 48 | return style 49 | 50 | 51 | class CytoscapeVisualizer(Visualizer): 52 | """Class for creating visualizations using ipycytoscape. 53 | 54 | This visualizer renders scaffoldgraphs as interactive 55 | networks using cytoscape. The visualizer is flexible 56 | allowing users to customize the output defining the 57 | style and layout options. 58 | 59 | Notes 60 | ----- 61 | visualizer is intended to be used within a jupyter notebook. 62 | 63 | ipycytoscape must be installed to use this feature. 64 | 65 | The code for this feature was inspired/adpated from: 66 | .. _Blogpost: https://iwatobipen.wordpress.com/2020/03/30/draw-scaffold-tree 67 | -as-network-with-molecular-image-rdkit-cytoscape/ 68 | 69 | Examples 70 | -------- 71 | Create a visualization for a whole graph. 72 | 73 | >>> from scaffoldgraph.vis.notebook import cytoscape 74 | >>> import scaffoldgraph as sg 75 | >>> tree = sg.ScaffoldTree.from_sdf('my_sdf.sdf') 76 | >>> visualizer = cytoscape.CytoscapeVisualizer(tree) 77 | >>> visualizer.draw() 78 | 79 | Use a different layout. 80 | 81 | >>> visualizer.draw(layout_kwargs={'name': 'breadthfirst'}) 82 | 83 | Draw a subgraph starting from a molecule node. 84 | 85 | >>> visualizer.draw_for_molecule('CHEMBL1997663') 86 | 87 | Draw a subgraph starting from a scaffold node. 88 | 89 | >>> visualizer.draw_for_scaffold('c1ccc(CNc2ccccc2)cc1') 90 | 91 | """ 92 | def __init__( 93 | self, 94 | graph, 95 | style=None, 96 | refresh_images=False, 97 | rd_draw_options=None, 98 | mol_img_size=(350, 300), 99 | ): 100 | """Initialize the cytoscape visualizer. 101 | 102 | Parameters 103 | ---------- 104 | graph : ScaffoldGraph 105 | A ScaffoldGraph object to draw. 106 | style : list, optional 107 | A list of dicts specifying the style to pass 108 | to the cytoscape widget, for more details 109 | see the ipycytoscape documentation. If None 110 | a default style is used and can be updated 111 | after initialization. 112 | refresh_images: bool, optional 113 | If True remove all embeded images from the 114 | input graph and regenerate when required. 115 | The default is False. 116 | rd_draw_options: rdkit.Chem.Draw.rdMolDraw2D.MolDrawOptions, optional 117 | Specify options for molecule drawing. Requires a 118 | `MolDrawOptions` object or `None`. 119 | The default is None. 120 | mol_img_size: tuple, optional 121 | Specify the size of the node images. Format is 122 | `(width, height)`. Note that if changed from 123 | default the style will have to be updated. 124 | The default is `(350, 300)`. 125 | 126 | """ 127 | super(CytoscapeVisualizer, self).__init__( 128 | graph, 129 | requires_tree=False, 130 | refresh_images=refresh_images, 131 | ) 132 | self._drawopts = rd_draw_options 133 | self._img_size = mol_img_size 134 | self._style = style if style else read_style_file(DEFAULT_STYLE) 135 | 136 | @property 137 | def style(self): 138 | """list : returns the cytoscape style associated.""" 139 | return self._style 140 | 141 | @style.setter 142 | def style(self, style): 143 | assert isinstance(style, list),\ 144 | 'style must be a list object' 145 | self._style = style 146 | 147 | @staticmethod 148 | def _cytoscape_validate(): 149 | if _cytoscape_available is False: 150 | raise RuntimeError('ipycytoscape is not available') 151 | 152 | def _draw(self, subgraph, layout_kwargs): 153 | """Private: create the cytoscape widget from a subgraph.""" 154 | if subgraph.number_of_nodes() >= 100: 155 | warnings.warn('graphs with > 100 nodes may be slow to render') 156 | embed_node_mol_images( 157 | subgraph, 158 | size=self._img_size, 159 | draw_options=self._drawopts, 160 | ) 161 | layout = {} 162 | layout.update(DEFAULT_LAYOUT) 163 | if layout_kwargs: 164 | layout.update(layout_kwargs) 165 | widget = cy.CytoscapeWidget() 166 | widget.set_style(self._style) 167 | widget.set_layout(**layout) 168 | widget.graph.add_graph_from_networkx( 169 | subgraph, directed=True 170 | ) 171 | return widget 172 | 173 | def draw(self, layout_kwargs=None): 174 | """Draw the entire scaffoldgraph. 175 | 176 | Parameters 177 | ---------- 178 | layout_kwargs : dict, optional 179 | arguments to pass to the CytoscapeWidget.set_layout 180 | function. 181 | 182 | Returns 183 | ------- 184 | widget : ipycytoscape.CytoscapeWidget 185 | 186 | """ 187 | self._cytoscape_validate() 188 | return self._draw(self._graph, layout_kwargs) 189 | 190 | def draw_for_molecule(self, molecule_id, layout_kwargs=None): 191 | """Draw subgraph starting from a query molecule. 192 | 193 | Parameters 194 | ---------- 195 | molecule_id : str 196 | Molecule node identifier. 197 | layout_kwargs : dict, optional 198 | arguments to pass to the CytoscapeWidget.set_layout 199 | function. 200 | 201 | Returns 202 | ------- 203 | widget : ipycytoscape.CytoscapeWidget 204 | 205 | """ 206 | self._cytoscape_validate() 207 | subgraph = self._subgraph_from_mol(molecule_id) 208 | return self._draw(subgraph, layout_kwargs) 209 | 210 | def draw_for_scaffold(self, scaffold_id, traversal='child', layout_kwargs=None): 211 | """Draw subgraph starting from a query scaffold. 212 | 213 | Parameters 214 | ---------- 215 | scaffold_id : str 216 | Scaffold node identifier. 217 | traversal : str {'parent', 'child', 'bidirectional'} 218 | The direction of traversal to create the subgraph. 219 | If 'bidirectional' both directions are considered. 220 | The default is 'child'. 221 | layout_kwargs : dict, optional 222 | arguments to pass to the CytoscapeWidget.set_layout 223 | function. 224 | 225 | Returns 226 | ------- 227 | widget : ipycytoscape.CytoscapeWidget 228 | 229 | """ 230 | self._cytoscape_validate() 231 | subgraph = self._subgraph_from_scf(scaffold_id, traversal) 232 | return self._draw(subgraph, layout_kwargs) 233 | -------------------------------------------------------------------------------- /scaffoldgraph/vis/notebook/resources/cytoscape.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "selector": "node", 4 | "style": { 5 | "text-valign": "top", 6 | "color": "#2E56AD", 7 | "font-family": "arial", 8 | "font-size": "40px", 9 | "shape": "rectangle", 10 | "width": 350, 11 | "height": 300, 12 | "background-color": "#EEEEEE", 13 | "background-fit": "contain", 14 | "background-image": "data(img)", 15 | "text-outline-width": 2, 16 | "text-outline-color": "white" 17 | } 18 | }, 19 | { 20 | "selector": "node[type='molecule']", 21 | "style": { 22 | "content": "data(id)", 23 | "background-color": "#EEEEEE" 24 | } 25 | }, 26 | { 27 | "selector": "edge", 28 | "style": { 29 | "width": 6, 30 | "line-color": "#9dbaea", 31 | "target-arrow-shape": "triangle", 32 | "target-arrow-color": "#9dbaea", 33 | "curve-style": "bezier" 34 | } 35 | }, 36 | { 37 | "selector": "edge[rule]", 38 | "style": { 39 | "content": "data(rule)", 40 | "color": "#2E56AD", 41 | "font-family": "arial", 42 | "font-size": "30px", 43 | "text-outline-width": 2, 44 | "text-outline-color": "white" 45 | } 46 | } 47 | ] -------------------------------------------------------------------------------- /scaffoldgraph/vis/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph.vis.utils 3 | """ 4 | 5 | import matplotlib.pyplot as plt 6 | import matplotlib as mpl 7 | 8 | from rdkit.Chem.Draw import rdMolDraw2D 9 | from rdkit import Chem 10 | 11 | from loguru import logger 12 | from urllib import parse 13 | 14 | 15 | def _maybe_kekulize(mol): 16 | """Private: attempt to kekulize a molecule.""" 17 | try: 18 | Chem.Kekulize(mol) 19 | except Chem.KekulizeException: 20 | smi = Chem.MolToSmiles(mol) 21 | logger.warning(f'Failed to kekulize mol: {smi}') 22 | return mol 23 | 24 | 25 | def smiles_to_svg(smiles, size=(350, 300), draw_options=None): 26 | """Create an SVG string from a SMILES string. 27 | 28 | Parameters 29 | ---------- 30 | smiles : str 31 | SMILES to create SVG image. 32 | size : tuple, optional 33 | Size of image, the default is (350, 300). 34 | draw_options : rdMolDraw2D.MolDrawOptions 35 | Options to pass to the drawer. 36 | 37 | Returns 38 | ------- 39 | svg : str 40 | SVG text for molecule. 41 | 42 | """ 43 | mol = Chem.MolFromSmiles(smiles) 44 | if mol is None: 45 | return '' 46 | mol = _maybe_kekulize(mol) 47 | drawer = rdMolDraw2D.MolDraw2DSVG(*size) 48 | if draw_options: 49 | drawer.SetDrawOptions(draw_options) 50 | rdMolDraw2D.PrepareAndDrawMolecule(drawer, mol) 51 | drawer.FinishDrawing() 52 | return drawer.GetDrawingText() 53 | 54 | 55 | def smiles_to_image(smiles, size=(350, 300), draw_options=None): 56 | """Create an SVG image from a SMILES string (ready for HTML). 57 | 58 | Parameters 59 | ---------- 60 | smiles : str 61 | SMILES to create SVG image. 62 | size : tuple, optional 63 | Size of image, the default is (350, 300). 64 | draw_options : rdMolDraw2D.MolDrawOptions 65 | Options to pass to the drawer. 66 | 67 | Returns 68 | ------- 69 | svg : str 70 | SVG image path. 71 | 72 | """ 73 | svg = smiles_to_svg(smiles, size, draw_options) 74 | img_path = 'data:image/svg+xml;charset=utf-8,' 75 | img_path += parse.quote(svg, safe='') 76 | return img_path 77 | 78 | 79 | def embed_node_mol_images(graph, size=(350, 300), draw_options=None, skip_existing=True): 80 | """Embed molecule images into a graph. 81 | 82 | Images are added as an attribute 'img' to each node with an 83 | available SMILES string ('molecule', 'scaffold'). The graph 84 | is modified in-place. 85 | 86 | Parameters 87 | ---------- 88 | graph : ScaffoldGraph 89 | Input ScaffoldGraph. 90 | size : tuple, optional 91 | Size of image, the default is (350, 300). 92 | draw_options : rdMolDraw2D.MolDrawOptions 93 | Options to pass to the drawer. 94 | skip_existing : bool 95 | Skip node if it contains an 'img' attribute. 96 | The default is True. 97 | 98 | """ 99 | for node, data in graph.nodes(data=True): 100 | if skip_existing and data.get('img', None): 101 | continue 102 | elif data.get('type', None) == 'scaffold': 103 | data['img'] = smiles_to_image(node, size, draw_options) 104 | elif data.get('type', None) == 'molecule': 105 | data['img'] = smiles_to_image(data['smiles'], size, draw_options) 106 | else: 107 | data['img'] = '' 108 | 109 | 110 | def remove_node_mol_images(graph): 111 | """Remove embeded images from a graph. 112 | 113 | Parameters 114 | ---------- 115 | graph : ScaffoldGraph 116 | Input ScaffoldGraph 117 | 118 | """ 119 | for node, data in graph.nodes(data=True): 120 | _ = data.pop('img', None) 121 | 122 | 123 | def rgba_to_hex(scalar_mappable, value): 124 | """str: rgba to hex.""" 125 | rgba = scalar_mappable.to_rgba(value) 126 | c_hex = mpl.colors.to_hex(rgba, keep_alpha=False) 127 | return c_hex 128 | 129 | 130 | def cmap_to_scalar_mappable(cmap, vmin, vmax): 131 | """Convert matplotlib Colormap to a ScalarMappable. 132 | 133 | Parameters 134 | ---------- 135 | cmap : matplotlib.colors.Colormap 136 | vmin : float 137 | Minimum value for normalization. 138 | vmax : float 139 | Maximum value for normalization. 140 | 141 | Returns 142 | ------- 143 | matplolib.cm.ScalarMappable 144 | 145 | """ 146 | cnorm = mpl.colors.Normalize(vmin, vmax) 147 | scalar = mpl.cm.ScalarMappable(norm=cnorm, cmap=cmap) 148 | return scalar 149 | 150 | 151 | def color_nodes_by_attribute(graph, attribute, cmap, node_type, label='color'): 152 | """ 153 | Add an attribute to nodes in a ScaffoldGraph containing a color hex code, 154 | calculated from a paticular node attribute and a matplotlib cmap. The 155 | operation is perfomred in-place. 156 | 157 | Can be used for adding colors to ScaffoldGraph visualizations. 158 | 159 | Parameters 160 | ---------- 161 | graph : ScaffoldGraph 162 | Input ScaffoldGraph 163 | attribute : str 164 | Key for the attibute from which to calculate a color. 165 | cmap : str or matplotlib.colors.Colormap 166 | A matplotlib cmap or name of a cmap e.g. 'BuPu' for 167 | calculating a nodes colour. 168 | node_type : str 169 | The type of node to process e.g. 'scaffold' / 'molecule' 170 | label : str, optional 171 | The attribute label to use for storing the color. 172 | The default is 'color'. 173 | 174 | """ 175 | # Cmap may be a string or a Colormap 176 | if isinstance(cmap, str): 177 | cmap = plt.get_cmap(cmap) 178 | else: 179 | if not issubclass(type(cmap), mpl.colors.Colormap): 180 | raise ValueError('cmap must be a string or a matplotlib Colormap') 181 | 182 | # Get attribute range. 183 | _, attr = zip(*graph._get_nodes_with_type(node_type, attribute, None)) 184 | attr = list(filter(lambda x: x is not None, attr)) 185 | attr = list(map(float, attr)) 186 | vmin, vmax = min(attr), max(attr) 187 | 188 | # Assign colors to each node. 189 | scalar_mappable = cmap_to_scalar_mappable(cmap, vmin, vmax) 190 | for node, data in graph._get_nodes_with_type(node_type, True, None): 191 | attr_val = data.get(attribute, None) 192 | if not attr_val: 193 | color = '#EEEEEE' # Set a neutral default. 194 | else: 195 | color = rgba_to_hex(scalar_mappable, attr_val) 196 | data[label] = color 197 | 198 | 199 | def color_scaffold_nodes_by_attribute(graph, attribute, cmap, label='color'): 200 | """ 201 | Add an attribute to scaffold nodes in a ScaffoldGraph containing a color hex code, 202 | calculated from a paticular scaffold node attribute and a matplotlib cmap. The 203 | operation is perfomred in-place. 204 | 205 | Can be used for adding colors to ScaffoldGraph visualizations. 206 | 207 | Parameters 208 | ---------- 209 | graph : ScaffoldGraph 210 | Input ScaffoldGraph 211 | attribute : str 212 | Key for the attibute from which to calculate a color. 213 | cmap : str or matplotlib.colors.Colormap 214 | A matplotlib cmap or name of a cmap e.g. 'BuPu' for 215 | calculating a nodes colour. 216 | label : str, optional 217 | The attribute label to use for storing the color. 218 | The default is 'color'. 219 | 220 | See Also 221 | -------- 222 | color_molecule_nodes_by_attribute 223 | 224 | """ 225 | color_nodes_by_attribute(graph, attribute, cmap, 'scaffold', label) 226 | 227 | 228 | def color_molecule_nodes_by_attribute(graph, attribute, cmap, label='color'): 229 | """ 230 | Add an attribute to molecule nodes in a ScaffoldGraph containing a color hex code, 231 | calculated from a paticular molecule node attribute and a matplotlib cmap. The 232 | operation is perfomred in-place. 233 | 234 | Can be used for adding colors to ScaffoldGraph visualizations. 235 | 236 | Parameters 237 | ---------- 238 | graph : ScaffoldGraph 239 | Input ScaffoldGraph 240 | attribute : str 241 | Key for the attibute from which to calculate a color. 242 | cmap : str or matplotlib.colors.Colormap 243 | A matplotlib cmap or name of a cmap e.g. 'BuPu' for 244 | calculating a nodes colour. 245 | label : str, optional 246 | The attribute label to use for storing the color. 247 | The default is 'color'. 248 | 249 | See Also 250 | -------- 251 | color_scaffold_nodes_by_attribute 252 | 253 | """ 254 | color_nodes_by_attribute(graph, attribute, cmap, 'molecule', label) 255 | 256 | 257 | def add_root_node(graph): 258 | """Add a root node to a scaffoldgraph. 259 | 260 | Parameters 261 | ---------- 262 | graph : ScaffoldGraph 263 | Graph to add root node. 264 | 265 | """ 266 | graph.add_node('root', type='root', hierarchy=0) 267 | edges = [('root', s) for s, d in graph.in_degree if d == 0 and s != 'root'] 268 | graph.add_edges_from(edges, type=2) 269 | 270 | 271 | def remove_root_node(graph): 272 | """Remove a root node from a scaffoldgraph. 273 | 274 | Parameters 275 | ---------- 276 | graph : Scaffoldgraph 277 | Graph from which to remove root node. 278 | 279 | """ 280 | if 'root' in graph: 281 | graph.remove_node('root') 282 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file=README.md 3 | license_files=LICENSE 4 | 5 | [options.extras_require] 6 | vis=ipycytoscape>=1.2.0 7 | rdkit=rdkit-pypi 8 | 9 | [aliases] 10 | test=pytest 11 | 12 | [tool:pytest] 13 | markers = 14 | slow: marks tests as slow (deselect with '-m "not slow"') 15 | serial -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph setup.py 3 | """ 4 | 5 | from setuptools import setup, find_packages 6 | from pathlib import Path 7 | import re 8 | 9 | url = 'https://github.com/UCLCheminformatics/scaffoldgraph' 10 | 11 | description = 'ScaffoldGraph is an open-source cheminformatics library, built using RDKit and \ 12 | NetworkX for generating scaffold networks and scaffold trees.' 13 | 14 | root = Path(__file__).parent.resolve() 15 | 16 | init_path = root / 'scaffoldgraph' / '__init__.py' 17 | with init_path.open('r', encoding='utf8') as f: 18 | __version__ = re.findall("__version__ = '(.*)'", f.read())[0] 19 | 20 | requires_path = root / 'requirements.txt' 21 | with requires_path.open('r', encoding='utf8') as f: 22 | install_requires = [line.strip() for line in f] 23 | install_requires.remove('rdkit') 24 | 25 | readme_path = root / 'README.md' 26 | with readme_path.open('r', encoding='utf-8') as f: 27 | long_description = f.read() 28 | 29 | setup_requires = ['pytest-runner'] 30 | tests_require = ['pytest', 'pytest-cov'] 31 | 32 | entry_points = { 33 | 'console_scripts': [ 34 | 'scaffoldgraph = scaffoldgraph.scripts.run:scaffoldgraph_main', 35 | ] 36 | } 37 | 38 | setup( 39 | name='ScaffoldGraph', 40 | version=__version__, 41 | description=description, 42 | long_description=long_description, 43 | long_description_content_type='text/markdown', 44 | author='Oliver Scott', 45 | author_email='oliver.scott.17@ucl.ac.uk', 46 | url=url, 47 | download_url='{}/archive/{}.tar.gz'.format(url, __version__), 48 | license='MIT', 49 | keywords=[ 50 | 'rdkit', 51 | 'networkx', 52 | 'cheminformatics', 53 | 'scaffolds', 54 | 'scaffold tree', 55 | 'scaffold network' 56 | ], 57 | classifiers=[ 58 | 'License :: OSI Approved :: MIT License', 59 | 'Programming Language :: Python :: 3', 60 | 'Operating System :: OS Independent', 61 | 'Topic :: Scientific/Engineering', 62 | 'Topic :: Scientific/Engineering :: Bio-Informatics', 63 | 'Topic :: Scientific/Engineering :: Chemistry', 64 | ], 65 | python_requires='>=3.6', 66 | install_requires=install_requires, 67 | setup_requires=setup_requires, 68 | tests_require=tests_require, 69 | entry_points=entry_points, 70 | include_package_data=True, 71 | packages=find_packages( 72 | exclude=['tests.*', 'tests'] 73 | ), 74 | ) 75 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests 3 | """ 4 | 5 | import os 6 | 7 | import pytest 8 | from rdkit import Chem 9 | from rdkit import rdBase 10 | 11 | rdBase.DisableLog('rdApp.*') 12 | 13 | 14 | def test_root_dir(): 15 | return os.path.dirname(os.path.abspath(__file__)) 16 | 17 | 18 | @pytest.fixture(name='sdf_file') 19 | def mock_sdf(tmp_path): 20 | d = tmp_path / "test_data" 21 | d.mkdir() 22 | p = d / "test.sdf" 23 | writer = Chem.SDWriter(str(p)) 24 | writer.write(Chem.MolFromSmiles('CN1C(=O)CN=C(C2=C1C=CC(=C2)Cl)C3=CC=CC=C3')) 25 | writer.write(Chem.MolFromSmiles('CCC1=CC2=C(S1)N(C(=O)CN=C2C3=CC=CC=C3Cl)C')) 26 | writer.close() 27 | return str(p) 28 | 29 | 30 | @pytest.fixture(name='sdf_file_2') 31 | def mock_sdf_2(tmp_path): 32 | d = tmp_path / "test_data" 33 | try: 34 | d.mkdir() 35 | except FileExistsError: 36 | pass 37 | p = d / "test_2.sdf" 38 | writer = Chem.SDWriter(str(p)) 39 | writer.write(Chem.MolFromSmiles('C1C(=O)NC2=C(C=C(C=C2)Br)C(=N1)C3=CC=CC=N3')) 40 | writer.write(Chem.MolFromSmiles('CC1=NN(C2=C1C(=NCC(=O)N2C)C3=CC=CC=C3F)C')) 41 | writer.close() 42 | return str(p) 43 | 44 | 45 | @pytest.fixture(name='smiles_file') 46 | def mock_smiles_file(tmp_path): 47 | d = tmp_path / "test_data" 48 | d.mkdir() 49 | p = d / "test.smi" 50 | writer = Chem.SmilesWriter(str(p)) 51 | writer.write(Chem.MolFromSmiles('CN1C(=O)CN=C(C2=C1C=CC(=C2)Cl)C3=CC=CC=C3')) 52 | writer.write(Chem.MolFromSmiles('CCC1=CC2=C(S1)N(C(=O)CN=C2C3=CC=CC=C3Cl)C')) 53 | writer.close() 54 | return str(p) 55 | 56 | 57 | def canon(smiles): 58 | """Canonicalize SMILES for safety. If canonicalization ever changes this should remain consistent""" 59 | return Chem.MolToSmiles(Chem.MolFromSmiles(smiles)) 60 | -------------------------------------------------------------------------------- /tests/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.analysis 3 | """ 4 | -------------------------------------------------------------------------------- /tests/analysis/test_enrichment.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.analysis.test_enrichment 3 | """ 4 | 5 | import pytest 6 | import networkx as nx 7 | import random 8 | 9 | from scaffoldgraph.analysis import calc_scaffold_enrichment, compound_set_enrichment 10 | from ..test_network import long_test_network 11 | 12 | 13 | def test_enrichment(network): 14 | 15 | ks_data = {} 16 | for molecule in network.get_molecule_nodes(): 17 | ks_data[molecule] = {'activity': random.random()} 18 | nx.set_node_attributes(network, ks_data) 19 | enrichment = calc_scaffold_enrichment(network, 'activity') 20 | entry = list(enrichment.items())[0] 21 | assert entry[0] in network 22 | assert 'pval' in entry[1] 23 | assert 'dmax' in entry[1] 24 | assert '_total' in entry[1] 25 | assert type(entry[1]['dmax']) == float 26 | assert type(entry[1]['_total']) == int 27 | compound_set_enrichment(network, 'activity', mode='ks') 28 | 29 | binom_data = {} 30 | for molecule in network.get_molecule_nodes(): 31 | binom_data[molecule] = {'activity': random.choice([0, 1])} 32 | nx.set_node_attributes(network, binom_data) 33 | enrichment = calc_scaffold_enrichment(network, 'activity', mode='b') 34 | entry = list(enrichment.items())[0] 35 | assert entry[0] in network 36 | assert 'pval' in entry[1] 37 | assert '_active' in entry[1] 38 | assert '_total' in entry[1] 39 | assert type(entry[1]['_active']) == int 40 | assert type(entry[1]['_total']) == int 41 | compound_set_enrichment(network, 'activity', mode='b') 42 | 43 | with pytest.raises(ValueError): 44 | compound_set_enrichment(network, 'activity', mode='not a mode') 45 | -------------------------------------------------------------------------------- /tests/analysis/test_general.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.analysis.test_general 3 | """ 4 | 5 | from scaffoldgraph.analysis import get_singleton_scaffolds, get_virtual_scaffolds 6 | from ..test_network import long_test_network 7 | 8 | 9 | def test_get_virtual_scaffolds(network): 10 | v = get_virtual_scaffolds(network) 11 | assert len(v) == 19 12 | 13 | 14 | def test_get_singleton_scaffolds(network): 15 | s = get_singleton_scaffolds(network) 16 | assert len(s) == 3 17 | -------------------------------------------------------------------------------- /tests/analysis/test_representation.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.analysis.test_representation 3 | """ 4 | 5 | from scaffoldgraph.analysis import calc_average_pairwise_similarity, get_over_represented_scaffold_classes 6 | from ..test_network import long_test_network 7 | 8 | 9 | def test_representation(network): 10 | aps = calc_average_pairwise_similarity(network) 11 | entry = list(aps.items())[0] 12 | assert entry[0] in network 13 | assert 'members' in entry[1] 14 | assert 'aps' in entry[1] 15 | assert type(entry[1]['aps']) == float 16 | assert type(entry[1]['members']) == int 17 | over = get_over_represented_scaffold_classes(network, 0.80) 18 | for entry in over: 19 | assert entry[1]['aps'] >= 0.80 20 | over = get_over_represented_scaffold_classes(network, 0.75, skip_aps=True) 21 | for entry in over: 22 | assert entry[1]['aps'] >= 0.75 23 | -------------------------------------------------------------------------------- /tests/core/test_fragment.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.core.test_fragment 3 | """ 4 | 5 | import pytest 6 | from rdkit import Chem 7 | 8 | from scaffoldgraph.core.fragment import * 9 | 10 | 11 | @pytest.fixture(name='mol') 12 | def test_molecule(): 13 | smiles = 'CCN1CCc2c(C1)sc(NC(=O)Nc3ccc(Cl)cc3)c2C#N' 14 | return Chem.MolFromSmiles(smiles) 15 | 16 | 17 | def canon(smiles): 18 | """Canonicalize SMILES for safety. If canonicalization ever changes this should remain consistent""" 19 | return Chem.MolToSmiles(Chem.MolFromSmiles(smiles)) 20 | 21 | 22 | def test_murcko(mol): 23 | murcko = get_murcko_scaffold(mol, generic=False) 24 | assert Chem.MolToSmiles(murcko) == canon('O=C(Nc1ccccc1)Nc1cc2c(s1)CNCC2') 25 | murcko = get_murcko_scaffold(mol, generic=True) 26 | assert Chem.MolToSmiles(murcko) == canon('CC(CC1CCCCC1)CC1CC2CCCCC2C1') 27 | murcko = get_murcko_scaffold(mol, generic=True, remove_exocyclic=True) 28 | assert Chem.MolToSmiles(murcko) == canon('C1CCC(CCCC2CC3CCCCC3C2)CC1') 29 | murcko = get_murcko_scaffold(mol, generic=True, remove_exocyclic=True, collapse_linkers=True) 30 | assert Chem.MolToSmiles(murcko) == canon('C1CCC(C2CC3CCCCC3C2)CC1') 31 | 32 | 33 | def test_annotation(mol): 34 | annotation = Chem.MolToSmiles(get_annotated_murcko_scaffold(mol)) 35 | annotation = annotation.replace('1*', '*') 36 | annotation = annotation.replace('2*', '*') 37 | annotation = annotation.replace('3*', '*') 38 | assert annotation.count('*') == 3 39 | 40 | 41 | def test_murcko_all(mol): 42 | frags = get_all_murcko_fragments(mol, break_fused_rings=True) 43 | assert len(frags) == 6 44 | frags = get_all_murcko_fragments(mol, break_fused_rings=False) 45 | assert len(frags) == 3 46 | 47 | 48 | def test_murcko_next(mol): 49 | scf = get_murcko_scaffold(mol) 50 | frags_1 = get_next_murcko_fragments(scf, break_fused_rings=True) 51 | frags_1 = {Chem.MolToSmiles(x) for x in frags_1} 52 | assert len(frags_1) == 2 53 | frags_2 = get_next_murcko_fragments(scf, break_fused_rings=False) 54 | frags_2 = {Chem.MolToSmiles(x) for x in frags_2} 55 | assert len(frags_2) == 2 56 | assert len(frags_1.intersection(frags_2)) == 1 57 | 58 | 59 | def test_collect_linker_atoms(): 60 | mol = Chem.MolFromSmiles('CCCCCCCCCc1ccccc1') 61 | remove_atoms = set() 62 | a = collect_linker_atoms(mol.GetAtomWithIdx(0), remove_atoms, True) 63 | assert len(a) == 1 64 | assert len(remove_atoms) == 9 65 | remove_atoms.clear() 66 | a = collect_linker_atoms(mol.GetAtomWithIdx(0), remove_atoms, False) 67 | assert len(a) == 1 68 | assert len(remove_atoms) == 8 69 | 70 | 71 | def test_remove_exocylic_attachments(mol): 72 | edited = remove_exocyclic_attachments(mol) 73 | assert Chem.MolToSmiles(edited) == canon('CCN1CCc2c(sc(NCNc3ccc(Cl)cc3)c2C#N)C1') 74 | 75 | 76 | def test_genericise_scaffold(mol): 77 | generic = genericise_scaffold(mol) 78 | assert Chem.MolToSmiles(generic) == canon('CCC1CCC2C(C1)CC(CC(C)CC1CCC(C)CC1)C2CC') 79 | 80 | 81 | def test_linker_collapse(mol): 82 | from scaffoldgraph.core.fragment import _collapse_linker_bonds 83 | collapsed = _collapse_linker_bonds(mol, retain_het=False) 84 | assert Chem.MolToSmiles(collapsed) == canon('CN1CCc2c(sc(C(=O)c3ccc(Cl)cc3)c2N)C1') 85 | collapsed = _collapse_linker_bonds(mol, retain_het=True) 86 | assert Chem.MolToSmiles(collapsed) == canon('CN1CCc2c(sc(NC(=O)Nc3ccc(Cl)cc3)c2N)C1') 87 | 88 | 89 | def test_ring_toplogy(): 90 | # Replicate figure 1 from paper: Scaffold Topologies II: Analysis of Chemical Databases 91 | smiles = 'CC(C)c1ccc(C)cc1OC(=O)C2(CCC3C2)C(=C)C3(C)C' 92 | mol = Chem.MolFromSmiles(smiles) 93 | topology = get_ring_toplogy_scaffold(mol) 94 | assert Chem.MolToSmiles(topology) == canon('C1CC1C12CC1C2') 95 | 96 | 97 | def _test_topology_helper(smiles, expected): 98 | mol = Chem.MolFromSmiles(smiles) 99 | topology = get_ring_toplogy_scaffold(mol) 100 | assert Chem.MolToSmiles(topology) == canon(expected) 101 | 102 | 103 | def test_ring_topology_extended(): 104 | # Replicate figure 2 from paper: Scaffold Topologies II: Analysis of Chemical Databases 105 | # Figure 2a: topologies, Figure 2b: Examples of molecules with each topology 106 | # First 10 examples 107 | _test_topology_helper('NCC1(CC(=O)O)CCCCC1', 'C1CC1') # 1 108 | _test_topology_helper('CNS(=O)(=O)Cc1ccc2[nH]cc(CCN(C)C)c2c1', 'C1C2CC12') # 2 109 | _test_topology_helper('COc1ccc(C(CN(C)C)C2(O)CCCCC2)cc1', 'C1CC1C1CC1') # 3 110 | _test_topology_helper('[NH3+][Pt]1([NH3+])OC(=O)C2(CC2)C(=O)O1', 'C1CC12CC2') # 4 111 | _test_topology_helper('CC1CCc2cc(F)cc3c(=O)c(C(=O)O)cn1c23', 'C12C3C1C23') # 5 112 | _test_topology_helper('NC(=O)N1C2C=CC=CC2CC(=O)C2C=CC=CC21', 'C1C2C1C1CC21') # 6 113 | _test_topology_helper('Cc1ccc(-c2cc(C(F)(F)F)nn2-c2ccc(S(C)(=O)=O)cc2)cc1', 'C1CC1C1CC1C1CC1') # 7 114 | _test_topology_helper('COc1ccc2[nH]c(S(=O)Cc3ncc(C)c(OC)c3C)nc2c1', 'C1CC1C1C2CC21') # 8 115 | _test_topology_helper('O=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1', 'C1CC1C(C1CC1)C1CC1') # 9 116 | _test_topology_helper('O=C1O[Pt]2(NC3CCCCC3N2)OC1=O', 'C1C2C1C21CC1') # 10 117 | 118 | 119 | def _test_connectivity_helper(smiles, expected, single=False): 120 | mol = Chem.MolFromSmiles(smiles) 121 | connectivity = get_ring_connectivity_scaffold(mol, single) 122 | assert Chem.MolToSmiles(connectivity) == canon(expected) 123 | 124 | 125 | def test_ring_connectivity(): 126 | # Test cases from Figure 1 from the paper: Scaffold analysis of pubchem database 127 | # as a background for hierarchial scaffold-based visualisation. 128 | _test_connectivity_helper('CC(C)CC1=CC=C(C=C1)C(C)C(O)=O', '*', False) 129 | _test_connectivity_helper('CC(C)CC1=CC=C(C=C1)C(C)C(O)=O', '*', True) 130 | _test_connectivity_helper('CC1=CC(NS(=O)(=O)C2=CC=C(N)C=C2)=NO1', '**', False) 131 | _test_connectivity_helper('CC1=CC(NS(=O)(=O)C2=CC=C(N)C=C2)=NO1', '**', True) 132 | _test_connectivity_helper('CN1C2=C(C=C(Cl)C=C2)C(=NCC1=O)C1=CC=CC=C1', '**=*', False) 133 | _test_connectivity_helper('CN1C2=C(C=C(Cl)C=C2)C(=NCC1=O)C1=CC=CC=C1', '***', True) 134 | db00741 = '[H][C@@]12CC[C@](O)(C(=O)CO)[C@@]1(C)C[C@H](O)[C@@]1([H])[C@@]2([H])CCC2=CC(=O)CC[c@]12C' 135 | _test_connectivity_helper(db00741, '*=*=*=*', False) 136 | _test_connectivity_helper(db00741, '****', True) 137 | -------------------------------------------------------------------------------- /tests/core/test_graph.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.core.test_graph 3 | """ 4 | 5 | from rdkit import Chem 6 | 7 | from scaffoldgraph.core.graph import * 8 | 9 | 10 | def test_init_molecule_name(): 11 | x = Chem.MolFromSmiles('CCC') 12 | assert bool(x.HasProp('_Name')) is False 13 | init_molecule_name(x) 14 | assert x.HasProp('_Name') 15 | assert x.GetProp('_Name') is not None 16 | assert x.GetProp('_Name') != '' 17 | 18 | 19 | def test_graph_subclass(): 20 | assert issubclass(ScaffoldGraph, nx.DiGraph) 21 | assert issubclass(ScaffoldGraph, ABC) 22 | -------------------------------------------------------------------------------- /tests/core/test_scaffold.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.core.test_scaffold 3 | """ 4 | 5 | import pytest 6 | import pickle 7 | 8 | from rdkit import Chem 9 | 10 | from scaffoldgraph.core.scaffold import * 11 | 12 | 13 | @pytest.fixture(name='scaffold') 14 | def basic_scaffold(): 15 | # murcko scaffold smiles 16 | mol = Chem.MolFromSmiles('O=C(Nc1ccccc1)Nc1cc2c(s1)CNCC2') 17 | scaffold = Scaffold(mol) 18 | return scaffold 19 | 20 | 21 | def test_new(): 22 | scaffold = Scaffold(None) 23 | assert scaffold is None 24 | 25 | 26 | def test_pickle(scaffold): 27 | b = pickle.dumps(scaffold) 28 | s = pickle.loads(b) 29 | assert s.atoms 30 | assert s.bonds 31 | assert s.rings 32 | assert s.ring_systems 33 | assert s.smiles 34 | 35 | 36 | def test_smiles(scaffold): 37 | assert scaffold.smiles == 'O=C(Nc1ccccc1)Nc1cc2c(s1)CNCC2' 38 | assert scaffold.get_canonical_identifier() == scaffold.smiles 39 | assert scaffold == Scaffold(Chem.MolFromSmiles(scaffold.smiles)) 40 | assert scaffold == scaffold.smiles 41 | assert str(scaffold) == scaffold.smiles 42 | assert hash(scaffold) == hash(scaffold.smiles) 43 | 44 | 45 | def test_name(scaffold): 46 | assert scaffold.name is None 47 | scaffold.name = 'TEST' 48 | assert scaffold.name == 'TEST' 49 | assert repr(scaffold) == ''.format(hex(id(scaffold))) 50 | assert bool(scaffold) is True 51 | 52 | 53 | def test_atoms(scaffold): 54 | atoms = scaffold.atoms 55 | assert len(atoms) == scaffold.mol.GetNumAtoms() 56 | assert all([isinstance(x, Chem.Atom) for x in atoms]) 57 | 58 | 59 | def test_bonds(scaffold): 60 | bonds = scaffold.bonds 61 | assert len(bonds) == scaffold.mol.GetNumBonds() 62 | assert all([isinstance(x, Chem.Bond) for x in bonds]) 63 | 64 | 65 | def test_rings(scaffold): 66 | rings = scaffold.rings 67 | assert isinstance(rings, RingStack) 68 | assert hasattr(rings, 'owner') 69 | assert hasattr(rings, 'info') 70 | assert hasattr(rings, 'atom_rings') 71 | assert hasattr(rings, 'bond_rings') 72 | assert rings.count == 3 and len(rings) == 3 73 | assert repr(rings) == ''.format(hex(id(rings))) 74 | assert isinstance(rings[0], Ring) 75 | assert len([x for x in rings]) == 3 76 | assert isinstance(rings.info, Chem.RingInfo) 77 | assert len(rings.atom_rings) == 3 and len(rings.bond_rings) == 3 78 | ring = rings[1] 79 | assert hasattr(ring, 'owner') 80 | assert hasattr(ring, 'aix') 81 | assert hasattr(ring, 'bix') 82 | assert all([isinstance(x, Chem.Bond) for x in ring.bonds]) 83 | assert all([isinstance(x, Chem.Atom) for x in ring.atoms]) 84 | assert isinstance(ring.size, int) 85 | assert len(ring) == len(ring.atoms) 86 | assert repr(ring) == ''.format(hex(id(ring))) 87 | assert len(ring.get_attachment_points()) == 1 88 | assert ring.is_exocyclic_attachment(ring.atoms[0]) is False 89 | assert ring.get_ring_system().size == 9 90 | assert len(rings.to_list()) == 3 91 | subset = rings[0:2] 92 | assert len(subset) == 2 93 | assert subset[0] != subset[1] 94 | 95 | 96 | def test_ring_systems(scaffold): 97 | rings = scaffold.ring_systems 98 | assert isinstance(rings, RingSystemStack) 99 | assert hasattr(rings, 'owner') 100 | assert hasattr(rings, 'ring_indexes') 101 | assert hasattr(rings, 'atom_rings') 102 | assert hasattr(rings, 'bond_rings') 103 | assert rings.count == 2 and len(rings) == 2 104 | assert repr(rings) == ''.format(hex(id(rings))) 105 | assert isinstance(rings[0], RingSystem) 106 | assert len([x for x in rings]) == 2 107 | assert len(rings.atom_rings) == 2 and len(rings.bond_rings) == 2 108 | ring = rings[1] 109 | assert hasattr(ring, 'owner') 110 | assert hasattr(ring, 'aix') 111 | assert hasattr(ring, 'bix') 112 | assert hasattr(ring, 'rix') 113 | assert all([isinstance(x, Chem.Bond) for x in ring.bonds]) 114 | assert all([isinstance(x, Chem.Atom) for x in ring.atoms]) 115 | assert isinstance(ring.size, int) 116 | assert len(ring) == len(ring.atoms) 117 | assert repr(ring) == ''.format(hex(id(ring))) 118 | assert isinstance(ring[0], Ring) 119 | assert len(list(ring.get_rings())) == 2 120 | assert len(ring.get_attachment_points()) == 1 121 | assert ring.is_exocyclic_attachment(ring.atoms[0]) is False 122 | subset = rings[1:] 123 | assert len(subset) == 1 124 | assert isinstance(subset[0][0], Ring) 125 | assert len(subset[0][0:2]) == 2 126 | 127 | -------------------------------------------------------------------------------- /tests/data/test_smiles.smi: -------------------------------------------------------------------------------- 1 | CN(C)Cc1n-2c(nn1)CN=C(c1ccccc1)c1cc(Cl)ccc12 Adinazolam 2 | Cc1n-2c(nn1)CN=C(c1ccccc1)c1cc(Cl)ccc12 Alprazolam 3 | Brc1cc2c(cc1)NC(=O)CN=C2c1ncccc1 Bromazepam 4 | CNC1=Nc2c(cc(Cl)cc2)C(c2ccccc2)=[N+]([O-])C1 Chlordiazepoxide 5 | CN1c2ccc(Cl)cc2N(c2ccccc2)C(=O)CC1=O Clobazam 6 | [O-][N+](=O)c1cc2c(cc1)NC(=O)CN=C2c1c(Cl)cccc1 Clonazepam 7 | OC(=O)C1N=C(c2ccccc2)c2cc(Cl)ccc2NC1=O Clorazepate 8 | Clc1cc2c(cc1)NC(=O)CN=C2c1c(Cl)cccc1 Delorazepam 9 | [O-][N+]1=C(c2ccccc2)c2cc(Cl)ccc2NC(=O)C1 Demoxepam 10 | Clc1cc2c(cc1)NC(=O)CC(=O)N2c1ccccc1 Desmethylclobazam 11 | -------------------------------------------------------------------------------- /tests/prioritization/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.prioritization 3 | """ 4 | -------------------------------------------------------------------------------- /tests/prioritization/test_generic_rules.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.prioritization.test_generic_rules 3 | """ 4 | 5 | from rdkit import Chem 6 | 7 | from scaffoldgraph.prioritization.generic_rules import * 8 | from scaffoldgraph import get_next_murcko_fragments 9 | from scaffoldgraph.utils import canonize_smiles 10 | from scaffoldgraph.core import Scaffold 11 | 12 | 13 | def fragment_and_filter(mol, rule): 14 | scf = Scaffold(mol) 15 | parents = get_next_murcko_fragments(mol) 16 | parents = list(map(lambda x: Scaffold(x), parents)) 17 | output = [x.smiles for x in rule.filter(scf, parents)] 18 | return output 19 | 20 | 21 | def _test_rule(mol, rule, expected): 22 | result = fragment_and_filter(mol, rule) 23 | assert len(result) == 1 24 | assert result[0] == expected 25 | 26 | 27 | def _test_rule_min_max(mol, rule, expected_min, expected_max): 28 | _test_rule(mol, rule('min'), expected_min) 29 | _test_rule(mol, rule('max'), expected_max) 30 | 31 | 32 | """ 33 | SCP RULES scaffold property (parent) 34 | ------------------------------------ 35 | SCPNumLinkerBonds 36 | SCPDelta 37 | SCPAbsDelta 38 | SCPNumAromaticRings 39 | SCPNumHetAtoms 40 | SCPNumNAtoms 41 | SCPNumOAtoms 42 | SCPNumSAtoms 43 | """ 44 | 45 | 46 | def test_scp_num_linker_bonds(): 47 | test_smiles = 'O=C(NCCCCN1CCN(c2ccccc2)CC1)c1ccc2c(c1)Cc1ccccc1-2' 48 | test_mol = Chem.MolFromSmiles(test_smiles) 49 | min_result = canonize_smiles('O=C(NCCCCN1CCNCC1)c1ccc2c(c1)Cc1ccccc1-2') 50 | max_result = canonize_smiles('O=C(NCCCCN1CCN(c2ccccc2)CC1)c1ccc2c(c1)CC=C2') 51 | _test_rule_min_max(test_mol, SCPNumLinkerBonds, min_result, max_result) 52 | 53 | 54 | def test_scp_delta(): 55 | test_smiles = 'OC5CC31CC5CC1C4CCc2occc2C4CC3' 56 | test_mol = Chem.MolFromSmiles(test_smiles) 57 | min_result = canonize_smiles('OC1CCC2(CCC3c4ccoc4CCC3C2)C1') # retain spiro 58 | max_result = canonize_smiles('OC1CC23CCC4C=CCCC4C2CC1C3') # retain bridged 59 | _test_rule_min_max(test_mol, SCPDelta, min_result, max_result) 60 | 61 | 62 | def test_scp_abs_delta(): 63 | test_smiles = 'C1CC2CN3C(CC=CC3=O)C4C2N(C1)CCC4' 64 | test_mol = Chem.MolFromSmiles(test_smiles) 65 | max_result = canonize_smiles('C1CC2CNCC3CCCN(C1)C23') 66 | _test_rule(test_mol, SCPAbsDelta('max'), max_result) 67 | 68 | 69 | def test_scp_num_het_atoms(): 70 | test_smiles = 'C2Oc3ccccc3C(=O)C2=O' 71 | test_mol = Chem.MolFromSmiles(test_smiles) 72 | min_result = canonize_smiles('c1ccccc1') 73 | max_result = canonize_smiles('O=C1C=COCC1=O') 74 | _test_rule_min_max(test_mol, SCPNumHetAtoms, min_result, max_result) 75 | 76 | 77 | def test_scp_num_aromatic_rings(): 78 | test_smiles = 'N1CCCC(OC(=O)C(O)(c2ccccc2)c2ccccc2)C1' 79 | test_mol = Chem.MolFromSmiles(test_smiles) 80 | min_result = canonize_smiles('O=C(OC1CCCNC1)C(O)c1ccccc1') 81 | max_result = canonize_smiles('OC(c1ccccc1)c1ccccc1') 82 | _test_rule_min_max(test_mol, SCPNumAromaticRings, min_result, max_result) 83 | 84 | 85 | def test_scp_num_oxygen_atoms(): 86 | test_smiles = 'C2Oc3ccccc3C(=O)C2=O' 87 | test_mol = Chem.MolFromSmiles(test_smiles) 88 | min_result = canonize_smiles('c1ccccc1') 89 | max_result = canonize_smiles('O=C1C=COCC1=O') 90 | _test_rule_min_max(test_mol, SCPNumOAtoms, min_result, max_result) 91 | 92 | 93 | def test_scp_num_nitrogen_atoms(): 94 | test_smiles = 'N1CCCC(OC(=O)C(O)(c2ccccc2)c2ccccc2)C1' 95 | test_mol = Chem.MolFromSmiles(test_smiles) 96 | min_result = canonize_smiles('OC(c1ccccc1)c1ccccc1') 97 | max_result = canonize_smiles('O=C(OC1CCCNC1)C(O)c1ccccc1') 98 | _test_rule_min_max(test_mol, SCPNumNAtoms, min_result, max_result) 99 | 100 | 101 | def test_scp_num_sulphur_atoms(): 102 | test_smiles = 'c1csc2c(NCN3CCN(CCc4ccccc4)CC3)ncnc12' 103 | test_mol = Chem.MolFromSmiles(test_smiles) 104 | min_result = canonize_smiles('c1ccc(CCN2CCN(CNc3ccncn3)CC2)cc1') 105 | max_result = canonize_smiles('c1nc(NCN2CCNCC2)c2sccc2n1') 106 | _test_rule_min_max(test_mol, SCPNumSAtoms, min_result, max_result) 107 | 108 | 109 | """ 110 | RRP RULES removed ring property 111 | ------------------------------- 112 | RRPRingSize 113 | RRPLinkerLength 114 | RRPHetAtomLinked 115 | RRPNumHetAtoms 116 | RRPNumNAtoms 117 | RRPNumOAtoms 118 | RRPNumSAtoms 119 | """ 120 | 121 | 122 | def test_rrp_ring_size(): 123 | test_smiles = 'n1nc(-c2ccccc2)nc1=S' 124 | test_mol = Chem.MolFromSmiles(test_smiles) 125 | min_result = canonize_smiles('c1ccccc1') 126 | max_result = canonize_smiles('S=C1N=CN=N1') 127 | _test_rule_min_max(test_mol, RRPRingSize, min_result, max_result) 128 | 129 | 130 | def test_rrp_linker_length(): 131 | test_smiles = 'O=C1c2ccccc2-c2c(NCCc3ccccc3)c(=O)[nH]c3cccc1c23' 132 | test_mol = Chem.MolFromSmiles(test_smiles) 133 | min_result = canonize_smiles('O=C1C=Cc2c(NCCc3ccccc3)c(=O)[nH]c3cccc1c23') 134 | max_result = canonize_smiles('O=C1c2ccccc2-c2cc(=O)[nH]c3cccc1c23') 135 | _test_rule_min_max(test_mol, RRPLinkerLength, min_result, max_result) 136 | 137 | 138 | def test_rrp_het_atom_linked(): 139 | test_smiles = 'O=C(NCc1ccccc1)N1CCN2C(=O)OC(c3ccccc3)(c3ccccc3)[C@@H]2C1' 140 | test_mol = Chem.MolFromSmiles(test_smiles) 141 | min_result = canonize_smiles('O=C(NCc1ccccc1)N1CCN2C(=O)OC(c3ccccc3)[C@@H]2C1') 142 | max_result = canonize_smiles('O=C1OC(c2ccccc2)(c2ccccc2)[C@@H]2CNCCN12') 143 | _test_rule_min_max(test_mol, RRPHetAtomLinked, min_result, max_result) 144 | 145 | 146 | def test_rrp_num_het_atoms(): 147 | test_smiles = 'c1cccc2c(=O)[nH][nH]c(=O)c12' 148 | test_mol = Chem.MolFromSmiles(test_smiles) 149 | min_result = canonize_smiles('O=c1ccc(=O)[nH][nH]1') 150 | max_result = canonize_smiles('c1ccccc1') 151 | _test_rule_min_max(test_mol, RRPNumHetAtoms, min_result, max_result) 152 | 153 | 154 | def test_rrp_num_nitrogen_atoms(): 155 | test_smiles = 'c1cccc2c(=O)[nH][nH]c(=O)c12' 156 | test_mol = Chem.MolFromSmiles(test_smiles) 157 | min_result = canonize_smiles('O=c1ccc(=O)[nH][nH]1') 158 | max_result = canonize_smiles('c1ccccc1') 159 | _test_rule_min_max(test_mol, RRPNumNAtoms, min_result, max_result) 160 | 161 | 162 | def test_rrp_num_oxygen_atoms(): 163 | test_smiles = 'C1OC(=O)C2=C1CCC=C2' 164 | test_mol = Chem.MolFromSmiles(test_smiles) 165 | min_result = canonize_smiles('O=C1C=CCO1') 166 | max_result = canonize_smiles('C1=CCCC=C1') 167 | _test_rule_min_max(test_mol, RRPNumOAtoms, min_result, max_result) 168 | 169 | 170 | def test_rrp_num_sulphur_atoms(): 171 | test_smiles = 'C1CSC(=NNC(=O)C(=O)CC2CCOCC2)N1' 172 | test_mol = Chem.MolFromSmiles(test_smiles) 173 | min_result = canonize_smiles('N=C1NCCS1') 174 | max_result = canonize_smiles('C1CCOCC1') 175 | _test_rule_min_max(test_mol, RRPNumSAtoms, min_result, max_result) 176 | 177 | 178 | """ 179 | RSP Rules property of the ring system of a removed ring before removal 180 | ---------------------------------------------------------------------- 181 | RSPAbsDelta 182 | RSPDelta 183 | RSPNumAromaticRings 184 | RSPNumHetAtoms 185 | RSPNumNAtoms 186 | RSPNumOAtoms 187 | RSPNumRings 188 | RSPNumSAtoms 189 | """ 190 | 191 | 192 | def test_rsp_delta(): 193 | test_smiles = 'O=C1N(CCCC3CCNCC3)CCC12CCN1CCCC12' 194 | test_mol = Chem.MolFromSmiles(test_smiles) 195 | min_result = canonize_smiles('O=C1N(CCCC2CCNCC2)CCC12CCNC2') 196 | max_result = canonize_smiles('O=C1NCCC12CCN1CCCC12') 197 | _test_rule_min_max(test_mol, RSPDelta, min_result, max_result) 198 | 199 | 200 | def test_rsp_abs_delta(): 201 | test_smiles = 'O=C1N(CCCC3CCNCC3)CCC12CCN1CCCC12' 202 | test_mol = Chem.MolFromSmiles(test_smiles) 203 | min_result = canonize_smiles('O=C1NCCC12CCN1CCCC12') 204 | max_result = canonize_smiles('O=C1N(CCCC2CCNCC2)CCC12CCNC2') 205 | _test_rule_min_max(test_mol, RSPAbsDelta, min_result, max_result) 206 | 207 | 208 | def test_rsp_num_aromatic_rings(): 209 | test_smiles = 'O=C(c1c2ccccc2cc2ccccc12)N1CCC(N2CCC[C@@H](C(=O)N3CCOCC3)C2)CC1' 210 | test_mol = Chem.MolFromSmiles(test_smiles) 211 | min_result = canonize_smiles('O=C(c1c2ccccc2cc2ccccc12)N1CCC(N2CCCCC2)CC1') 212 | max_result = canonize_smiles('O=C(c1cccc2ccccc12)N1CCC(N2CCC[C@@H](C(=O)N3CCOCC3)C2)CC1') 213 | _test_rule_min_max(test_mol, RSPNumAromaticRings, min_result, max_result) 214 | 215 | 216 | def test_rsp_num_het_atoms(): 217 | test_smiles = 'c1nc2ccc3nc(NC(=O)C(c4ccccc4)c4ccccc4)sc3c2s1' 218 | test_mol = Chem.MolFromSmiles(test_smiles) 219 | min_result = canonize_smiles('O=C(Cc1ccccc1)Nc1nc2ccc3ncsc3c2s1') 220 | max_result = canonize_smiles('O=C(Nc1nc2ccccc2s1)C(c1ccccc1)c1ccccc1') 221 | _test_rule_min_max(test_mol, RSPNumHetAtoms, min_result, max_result) 222 | 223 | 224 | def test_rsp_num_nitogen_atoms(): 225 | test_smiles = 'c1nc2ccc3nc(NC(=O)C(c4ccccc4)c4ccccc4)sc3c2s1' 226 | test_mol = Chem.MolFromSmiles(test_smiles) 227 | min_result = canonize_smiles('O=C(Cc1ccccc1)Nc1nc2ccc3ncsc3c2s1') 228 | max_result = canonize_smiles('O=C(Nc1nc2ccccc2s1)C(c1ccccc1)c1ccccc1') 229 | _test_rule_min_max(test_mol, RSPNumNAtoms, min_result, max_result) 230 | 231 | 232 | def test_rsp_num_oxygen_atoms(): 233 | test_smiles = 'c1c2c(c3occ(-c4ccccc4)c(=O)c3c1)C=CCO2' 234 | test_mol = Chem.MolFromSmiles(test_smiles) 235 | min_result = canonize_smiles('O=c1ccoc2c3c(ccc12)OCC=C3') 236 | max_result = canonize_smiles('O=c1c(-c2ccccc2)coc2ccccc12') 237 | _test_rule_min_max(test_mol, RSPNumOAtoms, min_result, max_result) 238 | 239 | 240 | def test_rsp_num_sulphur_atoms(): 241 | test_smiles = 'c1nc2ccc3nc(NC(=O)C(c4ccccc4)c4ccccc4)sc3c2s1' 242 | test_mol = Chem.MolFromSmiles(test_smiles) 243 | min_result = canonize_smiles('O=C(Cc1ccccc1)Nc1nc2ccc3ncsc3c2s1') 244 | max_result = canonize_smiles('O=C(Nc1nc2ccccc2s1)C(c1ccccc1)c1ccccc1') 245 | _test_rule_min_max(test_mol, RSPNumSAtoms, min_result, max_result) 246 | 247 | 248 | def test_rsp_num_rings(): 249 | test_smiles = 'O=C(c1c2ccccc2cc2ccccc12)N1CCC(N2CCC[C@@H](C(=O)N3CCOCC3)C2)CC1' 250 | test_mol = Chem.MolFromSmiles(test_smiles) 251 | min_result = canonize_smiles('O=C(c1c2ccccc2cc2ccccc12)N1CCC(N2CCCCC2)CC1') 252 | max_result = canonize_smiles('O=C(c1cccc2ccccc12)N1CCC(N2CCC[C@@H](C(=O)N3CCOCC3)C2)CC1') 253 | _test_rule_min_max(test_mol, RSPNumRings, min_result, max_result) 254 | -------------------------------------------------------------------------------- /tests/prioritization/test_prioritization_rules.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.prioritization.test_prioritization_rules 3 | """ 4 | 5 | import pytest 6 | 7 | from scaffoldgraph.prioritization.prioritization_rules import * 8 | 9 | 10 | class MockScaffoldFilterRule(BaseScaffoldFilterRule): 11 | def filter(self, child, parents): 12 | return parents[1:] 13 | 14 | @property 15 | def name(self): 16 | return 'mock' 17 | 18 | 19 | def test_prioritization_rules(): 20 | """Test abstract ruletypes cannot be initialized.""" 21 | with pytest.raises(TypeError): 22 | BaseScaffoldFilterRule() 23 | with pytest.raises(TypeError): 24 | ScaffoldFilterRule() 25 | with pytest.raises(TypeError): 26 | ScaffoldMinFilterRule() 27 | with pytest.raises(TypeError): 28 | ScaffoldMaxFilterRule() 29 | 30 | 31 | def test_base_rule_subclass(): 32 | """Test base class can be subclassed""" 33 | mock = MockScaffoldFilterRule() 34 | parents = [0, 1, 2, 3, 4] 35 | assert mock.name == 'mock' 36 | assert str(mock) == 'mock' 37 | assert mock.filter(None, parents) == [1, 2, 3, 4] 38 | assert mock(None, parents) == mock.filter(None, parents) 39 | assert repr(mock) == ''.format(hex(id(mock))) 40 | 41 | 42 | def test_subclassing(): 43 | assert issubclass(ScaffoldFilterRule, BaseScaffoldFilterRule) 44 | assert issubclass(ScaffoldMaxFilterRule, BaseScaffoldFilterRule) 45 | assert issubclass(ScaffoldMinFilterRule, BaseScaffoldFilterRule) 46 | -------------------------------------------------------------------------------- /tests/prioritization/test_prioritization_ruleset.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.prioritization.test_prioritization_ruleset 3 | """ 4 | 5 | import tempfile 6 | import pytest 7 | import os 8 | 9 | from scaffoldgraph.prioritization import ScaffoldRuleSet, BaseScaffoldFilterRule 10 | from scaffoldgraph.prioritization.original_rules import original_ruleset, OriginalRule10 11 | 12 | 13 | @pytest.fixture(name='null_set') 14 | def empty_ruleset(): 15 | return ScaffoldRuleSet() 16 | 17 | 18 | def test_empty_filter(null_set): 19 | """Test filtering with an empty ruleset raises an error""" 20 | with pytest.raises(ValueError): 21 | assert null_set.filter_scaffolds(null_set, []) 22 | with pytest.raises(ValueError): 23 | assert original_ruleset.filter_scaffolds('', []) 24 | 25 | 26 | def test_name(null_set): 27 | assert null_set.name is 'ScaffoldRuleSet' 28 | null_set.name = 'some_name' 29 | assert null_set.name == 'some_name' 30 | 31 | 32 | def test_rules(): 33 | rules = original_ruleset.rules 34 | assert all([issubclass(x.__class__, BaseScaffoldFilterRule) for x in rules]) 35 | 36 | 37 | def test_builtins(): 38 | assert len(original_ruleset) == 15 39 | assert isinstance(original_ruleset[0], BaseScaffoldFilterRule) 40 | assert original_ruleset.check_valid_rule(OriginalRule10()) 41 | original_ruleset.add_rule(OriginalRule10()) 42 | assert len(original_ruleset) == 16 43 | original_ruleset.insert_rule(OriginalRule10(), 1) 44 | assert original_ruleset[1].__class__ == OriginalRule10 45 | original_ruleset.delete_rule(16) 46 | original_ruleset.delete_rule(1) 47 | assert len(original_ruleset) == 15 48 | assert repr(original_ruleset) == ''.format(hex(id(original_ruleset))) 49 | 50 | 51 | def test_from_rule_file(): 52 | with tempfile.NamedTemporaryFile('w', suffix='.txt', prefix=os.path.basename(__file__)) as tf: 53 | tf.write('OriginalRule01\nOriginalRule02\nSCPNumHetAtoms_min\nRRPRingSizeX_max_6') 54 | tf.seek(0) 55 | ruleset = ScaffoldRuleSet.from_rule_file(tf.name) 56 | assert len(ruleset) == 4 57 | assert ruleset[0].name == 'original rule 01' 58 | assert ruleset[1].name == 'original rule 02' 59 | assert ruleset[2].name == 'SCPNumHetAtoms' 60 | assert ruleset[3].name == 'RRPRingSizeX' 61 | assert ruleset[2].func == min 62 | assert ruleset[3].func == max 63 | assert ruleset[3].size == 6 64 | 65 | 66 | def test_errors(null_set): 67 | with pytest.raises(TypeError): 68 | null_set.add_rule('') 69 | null_set.insert_rule('', 0) 70 | null_set[0] = '' 71 | -------------------------------------------------------------------------------- /tests/scripts/test_scripts.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.scripts.test_scripts 3 | """ 4 | 5 | import tempfile 6 | import pathlib 7 | import pytest 8 | import os 9 | 10 | from subprocess import Popen, PIPE 11 | 12 | 13 | TEST_DATA_DIR = pathlib.Path(__file__).resolve().parent / '..' / 'data' 14 | 15 | 16 | def check_generate_structure(fn): 17 | with open(fn, 'r') as f: 18 | lines = f.readlines() 19 | headings = lines[0].strip().split('\t') 20 | assert 'HIERARCHY' in headings 21 | assert 'SMILES' in headings 22 | assert 'SUBSCAFFOLDS' in headings 23 | assert 'MOLECULES' in headings 24 | assert 'ANNOTATIONS' in headings 25 | assert len(lines) > 1 26 | 27 | 28 | def check_aggregate_structure(fn): 29 | with open(fn, 'r') as f: 30 | lines = f.readlines() 31 | headings = lines[0].strip().split('\t') 32 | assert 'HIERARCHY' in headings 33 | assert 'SMILES' in headings 34 | assert 'SUBSCAFFOLDS' in headings 35 | assert 'ID' in headings 36 | assert len(lines) > 1 37 | smiles = lines[-1].strip().split('\t')[2] 38 | return smiles 39 | 40 | 41 | def check_select_structure(fn): 42 | with open(fn, 'r') as f: 43 | lines = f.readlines() 44 | headings = lines[0].strip().split('\t') 45 | assert 'HIERARCHY' in headings 46 | assert 'SMILES' in headings 47 | assert 'SUBSCAFFOLDS' in headings 48 | assert 'ID' in headings 49 | assert len(lines) > 1 50 | 51 | 52 | # test all utilities in one 53 | # skip: pytest -m "not slow" 54 | @pytest.mark.slow 55 | def test_cli(): 56 | funcs = ['tree', 'network', 'hiers'] 57 | fn = str(TEST_DATA_DIR / 'test_smiles.smi') 58 | with tempfile.TemporaryDirectory() as tmp: 59 | for func in funcs: 60 | 61 | # Test graph generation 62 | out = os.path.join(tmp, 'output.tmp') 63 | args = ['scaffoldgraph', func, fn, out] 64 | p2 = Popen(args, stdout=PIPE, stderr=PIPE) 65 | stdout, _ = p2.communicate() 66 | assert stdout is not None 67 | assert os.path.exists(out) 68 | check_generate_structure(out) 69 | 70 | # Test graph aggregation 71 | out2 = os.path.join(tmp, 'output.txt') 72 | args = [ 73 | 'scaffoldgraph', 'aggregate', out, out2 74 | ] 75 | p2 = Popen(args, stdout=PIPE, stderr=PIPE) 76 | stdout, _ = p2.communicate() 77 | assert stdout is not None 78 | assert os.path.exists(out) 79 | smiles = check_aggregate_structure(out2) 80 | 81 | # Test graph selection 82 | test_smi = os.path.join(tmp, 'test.smi') 83 | out3 = os.path.join(tmp, 'select.txt') 84 | with open(test_smi, 'w') as smi: 85 | smi.write(f'{smiles} fake_scaffold_id') 86 | args = [ 87 | 'scaffoldgraph', 'select', out2, test_smi, out3 88 | ] 89 | p2 = Popen(args, stdout=PIPE, stderr=PIPE) 90 | stdout, _ = p2.communicate() 91 | assert stdout is not None 92 | assert os.path.exists(out3) 93 | check_select_structure(out3) 94 | -------------------------------------------------------------------------------- /tests/test_network.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.test_network 3 | """ 4 | 5 | import pytest 6 | import os 7 | 8 | from pathlib import Path 9 | 10 | import scaffoldgraph as sg 11 | 12 | from . import mock_sdf, mock_smiles_file 13 | 14 | 15 | TEST_DATA_DIR = Path(__file__).resolve().parent / 'data' 16 | 17 | 18 | @pytest.fixture(name='test_net') 19 | def test_network(sdf_file): 20 | network = sg.ScaffoldNetwork.from_sdf(sdf_file) 21 | return network 22 | 23 | 24 | @pytest.fixture(name='network') 25 | def long_test_network(): 26 | network = sg.ScaffoldNetwork.from_smiles_file(str(TEST_DATA_DIR / 'test_smiles.smi')) 27 | return network 28 | 29 | 30 | def test_network_from_sdf(sdf_file): 31 | network = sg.ScaffoldNetwork.from_sdf(sdf_file) 32 | assert network.num_scaffold_nodes == 8 33 | assert network.num_molecule_nodes == 2 34 | 35 | 36 | def test_network_from_smiles(smiles_file): 37 | network = sg.ScaffoldNetwork.from_smiles_file(smiles_file) 38 | assert network.num_scaffold_nodes == 8 39 | assert network.num_molecule_nodes == 2 40 | 41 | 42 | def test_hiers(sdf_file): 43 | network = sg.HierS.from_sdf(sdf_file) 44 | assert network.num_scaffold_nodes == 5 45 | assert network.num_molecule_nodes == 2 46 | 47 | 48 | def test_hierarchy_functions(network): 49 | hierarchy_sizes = network.get_hierarchy_sizes() 50 | assert hierarchy_sizes[1] == 7 51 | assert hierarchy_sizes[2] == 10 52 | assert hierarchy_sizes[3] == 7 53 | assert hierarchy_sizes[4] == 1 54 | assert network.max_hierarchy() == 4 55 | assert network.min_hierarchy() == 1 56 | s_in_h2 = { 57 | 'C1=Cn2cnnc2CN=C1', 'O=C1CN=C(c2ccccn2)C=CN1', 'O=C1CN=Cc2ccccc2N1', 58 | 'C1=CC(c2ccccc2)=[NH+]CC=N1', 'C1=Nc2ccccc2C=[NH+]C1', 59 | 'O=C1CC(=O)N(c2ccccc2)C=CN1', 'O=C1CC(=O)Nc2ccccc2N1', 60 | 'O=C1CN=C(c2ccccc2)C=CN1', 'O=C1C[NH+]=Cc2ccccc2N1', 61 | 'O=C1C[NH+]=C(c2ccccc2)C=CN1' 62 | } 63 | assert s_in_h2 == set(network.get_scaffolds_in_hierarchy(2)) 64 | 65 | 66 | def test_simple_functions(network): 67 | assert network.scaffold_in_graph('C1=Cn2cnnc2CN=C1') is True 68 | # Below is the non-canonical SMILES of the above 69 | assert network.scaffold_in_graph('C1=C-n2:c:n:n:c:2-C-N=C-1') is True 70 | assert network.scaffold_in_graph('c1ccccc1CCNc2ccccc2') is False 71 | assert network.molecule_in_graph('Adinazolam') is True 72 | assert network.molecule_in_graph('Citalopram') is False 73 | 74 | 75 | def test_traversal(network): 76 | s_for_adinazolam = { 77 | 'c1ccc(C2=NCc3nncn3-c3ccccc32)cc1', 'C1=NCc2nncn2-c2ccccc21', 78 | 'C1=Cn2cnnc2CN=C1c1ccccc1', 'C1=Cn2cnnc2CN=C1', 'c1nnc[nH]1' 79 | } 80 | assert set(network.get_scaffolds_for_molecule('Adinazolam')) == s_for_adinazolam 81 | m_for_scaffold = {'Adinazolam', 'Alprazolam'} 82 | assert set(network.get_molecules_for_scaffold('c1nnc[nH]1')) == m_for_scaffold 83 | 84 | 85 | def test_separate_disconnected(network): 86 | assert len(network.separate_disconnected_components(sort=True)) == 2 87 | assert type(network.separate_disconnected_components()[0]) == type(network) 88 | 89 | 90 | def test_repr(test_net): 91 | assert repr(test_net) == ''.format(hex(id(test_net))) 92 | -------------------------------------------------------------------------------- /tests/test_tree.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.test_tree 3 | """ 4 | 5 | import networkx as nx 6 | import pytest 7 | 8 | import scaffoldgraph as sg 9 | 10 | from . import mock_sdf, mock_smiles_file 11 | 12 | 13 | @pytest.fixture(name='test_tree') 14 | def test_tree_graph(sdf_file): 15 | tree = sg.ScaffoldTree.from_sdf(sdf_file) 16 | return tree 17 | 18 | 19 | def test_tree_from_sdf(sdf_file): 20 | tree = sg.ScaffoldTree.from_sdf(sdf_file) 21 | assert tree.num_scaffold_nodes == 5 22 | assert tree.num_molecule_nodes == 2 23 | assert nx.is_tree(tree) 24 | 25 | 26 | def test_tree_from_smiles(smiles_file): 27 | tree = sg.ScaffoldTree.from_smiles_file(smiles_file) 28 | assert tree.num_scaffold_nodes == 5 29 | assert tree.num_molecule_nodes == 2 30 | assert nx.is_tree(tree) 31 | 32 | 33 | def test_repr(test_tree): 34 | assert repr(test_tree) == ''.format(hex(id(test_tree))) 35 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.utils 3 | """ 4 | 5 | from .. import mock_sdf, mock_sdf_2 6 | -------------------------------------------------------------------------------- /tests/utils/test_aggregate.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.utils.test_aggregate 3 | """ 4 | 5 | import scaffoldgraph as sg 6 | 7 | from scaffoldgraph.utils import aggregate 8 | from . import mock_sdf, mock_sdf_2 9 | 10 | 11 | def test_aggregate(sdf_file, sdf_file_2): 12 | net_1 = sg.ScaffoldNetwork.from_sdf(sdf_file) 13 | net_2 = sg.ScaffoldNetwork.from_sdf(sdf_file_2) 14 | network = aggregate([net_1, net_2]) 15 | assert network.num_scaffold_nodes == 14 16 | assert network.num_molecule_nodes == 4 17 | -------------------------------------------------------------------------------- /tests/utils/test_bipartite.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.utils.test_bipartite 3 | """ 4 | 5 | import scaffoldgraph as sg 6 | import networkx as nx 7 | 8 | from scaffoldgraph.utils.bipartite import make_bipartite_graph 9 | from . import mock_sdf 10 | 11 | 12 | def test_bipartite(sdf_file): 13 | network = sg.ScaffoldNetwork.from_sdf(sdf_file) 14 | biparite = make_bipartite_graph(network) 15 | assert nx.is_bipartite(biparite) 16 | -------------------------------------------------------------------------------- /tests/utils/test_misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.utils.test_misc 3 | """ 4 | 5 | import scaffoldgraph as sg 6 | 7 | from scaffoldgraph.utils import summary 8 | from . import mock_sdf 9 | 10 | 11 | SUMMARY_GRAPH = """Type: ScaffoldNetwork 12 | Number of molecule nodes: 2 13 | Number of scaffold nodes: 8 14 | Number of edges: 12 15 | Max hierarchy: 3 16 | Min hierarchy: 1 17 | """ 18 | 19 | 20 | SUMMARY_NODE = """Node c1ccccc1 has the following properties: 21 | Type: scaffold 22 | Hierarchy: 1 23 | Degree: 2 24 | Parent scaffolds: 25 | Child scaffolds: O=C1CN=C(c2ccccc2)C=CN1 O=C1CN=Cc2ccccc2N1 O=C1CN=C(c2ccccc2)c2ccccc2N1 O=C1CN=C(c2ccccc2)c2ccsc2N1 26 | Child molecules: 27 | """ 28 | 29 | 30 | def test_bipartite(sdf_file): 31 | network = sg.ScaffoldNetwork.from_sdf(sdf_file) 32 | assert summary(network).strip() == SUMMARY_GRAPH.strip() 33 | assert summary(network, 'c1ccccc1').strip() == SUMMARY_NODE.strip() 34 | -------------------------------------------------------------------------------- /tests/utils/test_subset.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.utils.test_subset 3 | 4 | """ 5 | import random 6 | import pytest 7 | 8 | from scaffoldgraph.utils.subset import split_graph_by_molecule_attribute 9 | from collections import defaultdict 10 | 11 | from ..test_network import long_test_network 12 | 13 | 14 | def test_split_by_attribute(network): 15 | key, attrs = 'ATTR', ['ATTR_1', 'ATTR_2', 'ATTR_3'] 16 | with pytest.raises(ValueError): 17 | split_graph_by_molecule_attribute(network, True, None) 18 | split_graph_by_molecule_attribute(network, False, None) 19 | assigned = defaultdict(int) 20 | for _, mol_data in network.get_molecule_nodes(True): 21 | attr = random.choice(attrs) 22 | mol_data[key] = attr 23 | assigned[attr] += 1 24 | subgraphs = split_graph_by_molecule_attribute(network, key, None) 25 | assert len(subgraphs) == len(assigned) 26 | for u_attr in assigned.keys(): 27 | subgraph = subgraphs[u_attr] 28 | assert subgraph.num_molecule_nodes == assigned[u_attr] 29 | assert subgraph.num_scaffold_nodes > 0 30 | assert all([d == u_attr for n, d in subgraph.get_molecule_nodes(key)]) 31 | -------------------------------------------------------------------------------- /tests/vis/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.vis 3 | """ 4 | 5 | from ..test_network import long_test_network 6 | -------------------------------------------------------------------------------- /tests/vis/test_notebook.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.vis.test_notebook 3 | """ 4 | 5 | from pytest import mark 6 | 7 | 8 | @mark.filterwarnings('ignore::UserWarning') 9 | def test_resources(): 10 | # Import inside function to supress user warning. 11 | from scaffoldgraph.vis.notebook import cytoscape 12 | check = cytoscape.DEFAULT_STYLE 13 | assert check.parent.exists() # resource directory 14 | assert check.exists() # cytoscape.json 15 | style = cytoscape.read_style_file(str(check)) 16 | assert style is not None 17 | -------------------------------------------------------------------------------- /tests/vis/test_vis_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | scaffoldgraph tests.vis.test_vis_utils 3 | """ 4 | 5 | import scaffoldgraph.vis.utils as vis_utils 6 | import matplotlib.pyplot as plt 7 | import random 8 | import pytest 9 | import re 10 | 11 | from rdkit.Chem.Draw import rdMolDraw2D 12 | from rdkit import Chem 13 | 14 | from scaffoldgraph.utils import suppress_rdlogger 15 | from . import long_test_network 16 | 17 | 18 | SVG_PATTERN = r'(?:<\?xml\b[^>]*>[^<]*)?(?:[^<]*)*(?:' in img # exists with background 72 | drawOpts.clearBackground = False 73 | img = vis_utils.smiles_to_svg(smi, draw_options=drawOpts) 74 | assert '' not in img 75 | 76 | 77 | @suppress_rdlogger() 78 | def test_smiles_to_image(): 79 | # These aren't paticularly great tests for this function... 80 | smi = 'Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1' 81 | img = vis_utils.smiles_to_image(smi) # smiles to SVG 82 | assert img is not None 83 | assert img != 'data:image/svg+xml;charset=utf-8,' 84 | null_smi = 'xxx' 85 | img = vis_utils.smiles_to_image(null_smi) 86 | assert img == 'data:image/svg+xml;charset=utf-8,' 87 | 88 | 89 | def test_embed_node_mol_images(network): 90 | # Embed images into node attributes. 91 | vis_utils.embed_node_mol_images(network) 92 | for _, data in network.nodes(data=True): 93 | img = data.get('img', None) 94 | assert img is not None 95 | # Remove images from node attributes. 96 | vis_utils.remove_node_mol_images(network) 97 | for _, data in network.nodes(data=True): 98 | img = data.get('img', None) 99 | assert img is None 100 | 101 | 102 | def test_color_nodes_by_attribute(network): 103 | key = 'attr' 104 | insert_random_node_attribute(network, key) 105 | # Color scaffold nodes. 106 | vis_utils.color_scaffold_nodes_by_attribute(network, key, 'BuPu') 107 | for _, data in network.get_scaffold_nodes(data=True): 108 | c = data.get('color', None) 109 | assert c is not None 110 | assert is_valid_hex(c) 111 | # Color molecule nodes. 112 | cmap = plt.get_cmap('hot') 113 | vis_utils.color_molecule_nodes_by_attribute(network, key, cmap, 'col') 114 | for _, data in network.get_molecule_nodes(data=True): 115 | c = data.get('col', None) 116 | assert c is not None 117 | assert is_valid_hex(c) 118 | 119 | 120 | def test_root_node(network): 121 | vis_utils.add_root_node(network) 122 | assert network.has_node('root') is True 123 | assert network.in_degree('root') == 0 124 | vis_utils.remove_root_node(network) 125 | assert network.has_node('root') is False 126 | --------------------------------------------------------------------------------