├── CHANGELOG.md ├── CITATION.cff ├── LICENSE ├── MANIFEST.in ├── Moment_Comparison.ipynb ├── README.md ├── docs ├── .buildinfo ├── .nojekyll ├── Makefile ├── _images │ ├── getting_started_10_0.png │ ├── getting_started_11_1.png │ ├── getting_started_14_0.png │ ├── getting_started_15_0.png │ ├── getting_started_16_0.png │ ├── getting_started_17_0.png │ ├── getting_started_18_0.png │ ├── getting_started_8_0.png │ └── getting_started_9_0.png ├── _modules │ ├── geometricus │ │ ├── geometricus.html │ │ ├── moment_utility.html │ │ ├── protein_utility.html │ │ └── utility.html │ ├── index.html │ └── typing.html ├── _sources │ ├── api │ │ ├── geometricus.rst.txt │ │ └── modules.rst.txt │ ├── getting_started.ipynb.txt │ ├── getting_started.rst.txt │ ├── index.rst.txt │ └── install.rst.txt ├── _static │ ├── alabaster.css │ ├── basic.css │ ├── custom.css │ ├── doctools.js │ ├── documentation_options.js │ ├── file.png │ ├── geometricus_logo.png │ ├── jquery-3.4.1.js │ ├── jquery-3.5.1.js │ ├── jquery.js │ ├── language_data.js │ ├── minus.png │ ├── plus.png │ ├── pygments.css │ ├── searchtools.js │ ├── sphinx_highlight.js │ ├── underscore-1.13.1.js │ ├── underscore-1.3.1.js │ └── underscore.js ├── api │ ├── geometricus.html │ └── modules.html ├── build │ ├── doctrees │ │ ├── api │ │ │ ├── geometricus.doctree │ │ │ └── modules.doctree │ │ ├── environment.pickle │ │ ├── getting_started.doctree │ │ ├── index.doctree │ │ ├── install.doctree │ │ └── nbsphinx │ │ │ ├── getting_started.ipynb │ │ │ ├── getting_started_11_1.png │ │ │ ├── getting_started_17_0.png │ │ │ └── getting_started_18_0.png │ └── html │ │ ├── .buildinfo │ │ ├── .nojekyll │ │ ├── _images │ │ ├── getting_started_11_1.png │ │ ├── getting_started_17_0.png │ │ └── getting_started_18_0.png │ │ ├── _modules │ │ ├── geometricus │ │ │ ├── geometricus.html │ │ │ ├── moment_utility.html │ │ │ └── protein_utility.html │ │ ├── index.html │ │ └── typing.html │ │ ├── _sources │ │ ├── api │ │ │ ├── geometricus.rst.txt │ │ │ └── modules.rst.txt │ │ ├── getting_started.ipynb.txt │ │ ├── getting_started.rst.txt │ │ ├── index.rst.txt │ │ └── install.rst.txt │ │ ├── _static │ │ ├── alabaster.css │ │ ├── basic.css │ │ ├── custom.css │ │ ├── doctools.js │ │ ├── documentation_options.js │ │ ├── file.png │ │ ├── jquery-3.4.1.js │ │ ├── jquery-3.5.1.js │ │ ├── jquery.js │ │ ├── language_data.js │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── underscore-1.3.1.js │ │ └── underscore.js │ │ ├── api │ │ ├── geometricus.html │ │ └── modules.html │ │ ├── genindex.html │ │ ├── getting_started.html │ │ ├── getting_started.ipynb │ │ ├── index.html │ │ ├── install.html │ │ ├── objects.inv │ │ ├── py-modindex.html │ │ ├── search.html │ │ └── searchindex.js ├── genindex.html ├── geometricus.geometricus.rst ├── geometricus.model_utility.rst ├── geometricus.moment_invariants.rst ├── geometricus.moment_utility.rst ├── geometricus.protein_utility.rst ├── geometricus.rst ├── getting_started.html ├── getting_started.ipynb ├── index.html ├── install.html ├── make.bat ├── modules.rst ├── objects.inv ├── py-modindex.html ├── search.html ├── searchindex.js └── source │ ├── api │ ├── geometricus.rst │ └── modules.rst │ ├── conf.py │ ├── getting_started.ipynb │ ├── index.rst │ └── install.rst ├── example_data └── MAPK_KLIFS.tsv ├── geometricus ├── __init__.py ├── geometricus.py ├── istarmap.py ├── model_utility.py ├── models │ └── ShapemerLearn_RADIUS-5_RADIUS-10_KMER-8_KMER-16_68_32_10.pt ├── moment_invariants.py ├── moment_utility.py ├── protein_utility.py └── sampling.py ├── geometricus_logo.png ├── notebooks └── getting_started.ipynb ├── pyproject.toml ├── setup.cfg ├── setup.py └── training ├── 1-generate-training-data.ipynb └── 2-train-shapemerization.ipynb /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres 6 | to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [0.3.0] - 2022-04-19 9 | 10 | - Added chiral invariant moment from https://royalsocietypublishing.org/doi/10.1098/rsif.2010.0297 11 | - Made MomentInvariants object pickle-able 12 | - Froze numpy and numba versions 13 | 14 | ## [0.2.0] - 2020-10-16 15 | 16 | Added more third order moment invariants (phi_{2-13} from [1]). These can be chosen via the `moment_types` argument 17 | in `MomentInvariants`' constructors 18 | 19 | [1] Flusser, Jan, Tomas Suk, and Barbara Zitová. 2D and 3D image analysis by moments. John Wiley & Sons, 2016. 20 | 21 | ## [0.1.2] - 2020-09-06 22 | 23 | Fixed PyPy readme, added badge 24 | 25 | ## [0.1.1] - 2020-09-06 26 | 27 | Linked readme to PyPy. Updated `pip install` instructions 28 | 29 | ## [0.1.0] - 2020-09-06 30 | 31 | First pip package release 32 | 33 | 34 | [Unreleased]: https://github.com/TurtleTools/geometricus/compare/v0.3.0...HEAD 35 | 36 | [0.3.0]: https://github.com/TurtleTools/geometricus/compare/v0.2.0...v0.3.0 37 | 38 | [0.2.0]: https://github.com/TurtleTools/geometricus/compare/v0.1.2...v0.2.0 39 | 40 | [0.1.2]: https://github.com/TurtleTools/geometricus/compare/v0.1.1...v0.1.2 41 | 42 | [0.1.1]: https://github.com/TurtleTools/geometricus/compare/v0.1.0...v0.1.1 43 | 44 | [0.1.0]: https://github.com/TurtleTools/geometricus/releases/tag/v0.1.0 45 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Durairaj" 5 | given-names: "Janani" 6 | - family-names: "Akdel" 7 | given-names: "Mehmet" 8 | - family-names: "Ridder" 9 | given-names: "Dick" 10 | name-particle: "de" 11 | - family-names: "Dijk" 12 | given-names: "Aalt D J" 13 | name-particle: "van" 14 | title: "Geometricus represents protein structures as shape-mers derived from moment invariants" 15 | doi: 10.1093/BIOINFORMATICS/BTAA839 16 | version: 0.2.0 17 | date-released: 2020-12-29 18 | identifiers: 19 | - type: doi 20 | value: 10.1093/BIOINFORMATICS/BTAA839 21 | - type: other 22 | value: urn:issn:1367-4803 23 | - type: other 24 | value: pmid:33381814 25 | url: "https://github.com/TurtleTools/geometricus" 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 TurtleTools 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include geometricus/models * -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | [![PyPI version](https://badge.fury.io/py/geometricus.svg)](https://badge.fury.io/py/geometricus) 4 | [![DOI](https://zenodo.org/badge/doi/10.1093/bioinformatics/btaa839.svg)](http://dx.doi.org/10.1093/bioinformatics/btaa839) 5 | 6 | # Geometricus Represents Protein Structures as Shape-mers derived from Moment Invariants 7 | 8 | A structure-based, alignment-free embedding approach for proteins. Can be used as input to machine learning algorithms. 9 | 10 | See the [documentation](https://turtletools.github.io/geometricus/). 11 | 12 | ## Installation 13 | 14 | Geometricus is a Python (3.9+) package with NumPy, SciPy, Numba, PyTorch and ProDy as dependencies. 15 | 16 | Install with `pip install git+https://github.com/TurtleTools/geometricus.git` 17 | 18 | ## Usage 19 | 20 | See the [Getting Started](https://turtletools.github.io/geometricus/getting_started) page for example usage. 21 | 22 | ## Publications 23 | 24 | Janani Durairaj, Mehmet Akdel, Dick de Ridder, Aalt D J van Dijk, Geometricus represents protein structures as 25 | shape-mers derived from moment invariants, 26 | Bioinformatics, Volume 36, Issue Supplement_2, December 2020, Pages 27 | i718–i725, https://doi.org/10.1093/bioinformatics/btaa839 28 | 29 | Janani Durairaj, Mehmet Akdel, Dick de Ridder, Aalt D.J. van Dijk, Fast and adaptive protein structure representations 30 | for machine learning, 31 | bioRxiv 2021.04.07.438777; doi: https://doi.org/10.1101/2021.04.07.438777 32 | 33 | Mehmet Akdel, Douglas E V Pires, Eduard Porta Pardo, Jürgen Jänes, Arthur O Zalevsky, Bálint Mészáros, Patrick Bryant, 34 | Lydia L. Good, Roman A Laskowski, Gabriele Pozzati, Aditi Shenoy, Wensi Zhu, Petras Kundrotas, Victoria Ruiz Serra, 35 | Carlos H M Rodrigues, Alistair S Dunham, David Burke, Neera Borkakoti, Sameer Velankar, Adam Frost, Kresten 36 | Lindorff-Larsen, Alfonso Valencia, Sergey Ovchinnikov, Janani Durairaj, David B Ascher, Janet M Thornton, Norman E 37 | Davey, Amelie Stein, Arne Elofsson, Tristan I Croll, Pedro Beltrao, A structural biology community assessment of 38 | AlphaFold 2 applications, 39 | bioRxiv 2021.09.26.461876; doi: https://doi.org/10.1101/2021.09.26.461876 40 | 41 | Janani Durairaj, Joana Pereira, Mehmet Akdel, Torsten Schwede, What is hidden in the darkness? Characterization of 42 | AlphaFold structural space, bioRxiv 2022.10.11.511548; doi: https://doi.org/10.1101/2022.10.11.511548 43 | -------------------------------------------------------------------------------- /docs/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 4aa8244b1f5cd7131ed562cbfa1f91f9 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/.nojekyll -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | github: 23 | @make html 24 | @cp -a build/html/. ../docs 25 | -------------------------------------------------------------------------------- /docs/_images/getting_started_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_images/getting_started_10_0.png -------------------------------------------------------------------------------- /docs/_images/getting_started_11_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_images/getting_started_11_1.png -------------------------------------------------------------------------------- /docs/_images/getting_started_14_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_images/getting_started_14_0.png -------------------------------------------------------------------------------- /docs/_images/getting_started_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_images/getting_started_15_0.png -------------------------------------------------------------------------------- /docs/_images/getting_started_16_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_images/getting_started_16_0.png -------------------------------------------------------------------------------- /docs/_images/getting_started_17_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_images/getting_started_17_0.png -------------------------------------------------------------------------------- /docs/_images/getting_started_18_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_images/getting_started_18_0.png -------------------------------------------------------------------------------- /docs/_images/getting_started_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_images/getting_started_8_0.png -------------------------------------------------------------------------------- /docs/_images/getting_started_9_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_images/getting_started_9_0.png -------------------------------------------------------------------------------- /docs/_modules/geometricus/utility.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | geometricus.utility — Geometricus 0.0.1-dev documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 27 | 84 |
85 |
86 | 87 | 88 |
89 | 90 |

Source code for geometricus.utility

 91 | import typing
 92 | from pathlib import Path
 93 | 
 94 | import numba as nb
 95 | import numpy as np
 96 | 
 97 | 
 98 | 
[docs]@nb.njit 99 | def nan_normalize(numbers): 100 | minv, maxv = np.nanmin(numbers), np.nanmax(numbers) 101 | return (numbers - minv) / (maxv - minv)
102 | 103 | 104 |
[docs]@nb.njit 105 | def normalize(numbers): 106 | minv, maxv = np.min(numbers), np.max(numbers) 107 | return (numbers - minv) / (maxv - minv)
108 | 109 | 110 |
[docs]@nb.njit 111 | def nb_mean_axis_0(array: np.ndarray) -> np.ndarray: 112 | """ 113 | Same as np.mean(array, axis=0) but njitted 114 | """ 115 | mean_array = np.zeros(array.shape[1]) 116 | for i in range(array.shape[1]): 117 | mean_array[i] = np.mean(array[:, i]) 118 | return mean_array
119 | 120 | 121 |
[docs]def get_file_parts(input_filename: typing.Union[str, Path]) -> tuple: 122 | """ 123 | Gets directory path, name, and extension from a filename 124 | Parameters 125 | ---------- 126 | input_filename 127 | 128 | Returns 129 | ------- 130 | (path, name, extension) 131 | """ 132 | input_filename = Path(input_filename) 133 | path = str(input_filename.parent) 134 | extension = input_filename.suffix 135 | name = input_filename.stem 136 | return path, name, extension
137 | 138 | 139 |
[docs]def group_indices(input_list: list) -> list: 140 | """ 141 | [1, 1, 1, 2, 2, 3, 3, 3, 4] -> [[0, 1, 2], [3, 4], [5, 6, 7], [8]] 142 | Parameters 143 | ---------- 144 | input_list 145 | 146 | Returns 147 | ------- 148 | list of lists 149 | """ 150 | output_list = [] 151 | current_list = [] 152 | current_index = None 153 | for i in range(len(input_list)): 154 | if current_index is None: 155 | current_index = input_list[i] 156 | if input_list[i] == current_index: 157 | current_list.append(i) 158 | else: 159 | output_list.append(current_list) 160 | current_list = [i] 161 | current_index = input_list[i] 162 | output_list.append(current_list) 163 | return output_list
164 |
165 | 166 |
167 | 168 |
169 |
170 |
171 |
172 | 180 | 181 | 182 | 183 | Fork me on GitHub 184 | 185 | 186 | 187 | 188 | 189 | -------------------------------------------------------------------------------- /docs/_modules/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Overview: module code — Geometricus 0.5.0 documentation 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 |
25 | 26 | 86 |
87 |
88 | 89 | 90 |
91 | 92 |

All modules for which code is available

93 | 97 | 98 |
99 | 100 |
101 |
102 |
103 |
104 | 112 | 113 | 114 | 115 | Fork me on GitHub 116 | 117 | 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /docs/_sources/api/geometricus.rst.txt: -------------------------------------------------------------------------------- 1 | geometricus package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | geometricus.geometricus module 8 | ------------------------------ 9 | 10 | .. automodule:: geometricus.geometricus 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | geometricus.moment\_utility module 16 | ---------------------------------- 17 | 18 | .. automodule:: geometricus.moment_utility 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | geometricus.protein\_utility module 24 | ----------------------------------- 25 | 26 | .. automodule:: geometricus.protein_utility 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/_sources/api/modules.rst.txt: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | geometricus 8 | -------------------------------------------------------------------------------- /docs/_sources/getting_started.rst.txt: -------------------------------------------------------------------------------- 1 | .. code:: ipython3 2 | 3 | import warnings 4 | warnings.filterwarnings('ignore') 5 | 6 | .. code:: ipython3 7 | 8 | %load_ext autoreload 9 | %autoreload 2 10 | 11 | Getting Started 12 | =============== 13 | 14 | Let’s walk through an example application of Geometricus in machine 15 | learning using a dataset of human MAP kinases, divided into three 16 | classes - JNK, Erk, and p38 kinases. This dataset was generated from the 17 | `Kinase-Ligand Interaction Fingerprints and Structures database 18 | (KLIFS) `__ (Raw table 19 | `here <../../example_data/MAPK_KLIFS.tsv>`__). 20 | 21 | .. code:: ipython3 22 | 23 | import pandas as pnd 24 | from pathlib import Path 25 | from time import time 26 | 27 | url = "https://raw.githubusercontent.com/TurtleTools/geometricus/master/example_data/MAPK_KLIFS.tsv" 28 | mapk_df = pnd.read_csv(url, sep="\t") 29 | 30 | mapk_pdb_id_to_class = {} 31 | for pdb_id, chain, class_name in list(zip(mapk_df["PDB"], mapk_df["CHAIN"], mapk_df["CLASS"])): 32 | mapk_pdb_id_to_class[(pdb_id, chain)] = class_name 33 | len(mapk_pdb_id_to_class) 34 | 35 | 36 | 37 | 38 | .. parsed-literal:: 39 | 40 | 527 41 | 42 | 43 | 44 | So now we have a list of (PDB ID, chain) pairs, each associated with a 45 | class 46 | 47 | .. code:: ipython3 48 | 49 | X_names = list(mapk_pdb_id_to_class.keys()) 50 | class_mapping = {"JNK": 0, "Erk": 1, "p38": 2} 51 | y = [class_mapping[mapk_pdb_id_to_class[k]] for k in X_names] 52 | 53 | Structural fragmentation 54 | ------------------------ 55 | 56 | We consider two different ways of dividing a protein with :math:`l` 57 | residues into structural fragments, a :math:`k`-mer-based approach and a 58 | radius-based approach. For each structural fragment, multiple rotation 59 | and translation-invariant moments can be calculated. While the 60 | :math:`k`-mer based approach is effective in describing structural 61 | fragments that are sequential in nature, such as :math:`\alpha`-helices 62 | and loops, the radius-based approach can capture long-range structural 63 | contacts as seen in :math:`\beta`-sheets, as well as distinct 64 | interaction patterns in space, as found in enzyme active sites. 65 | 66 | Below we fetch and parse each protein as a ProDy AtomGroup object. This 67 | takes a while (around 15 mins) since it downloads via FTP from RCSB. 68 | 69 | .. code:: ipython3 70 | 71 | import prody as pd 72 | 73 | start_time = time() 74 | pdbs = [] 75 | for i, (pdb_id, chain) in enumerate(X_names): 76 | if i > 0 and i % 50 == 0: 77 | print(f"{i} proteins fetched in {(time() - start_time):.2f} seconds") 78 | pdbs.append(pd.parsePDB(pdb_id, chain=chain)) 79 | 80 | 81 | .. parsed-literal:: 82 | 83 | 50 proteins fetched in 96.46 seconds 84 | 100 proteins fetched in 194.62 seconds 85 | 150 proteins fetched in 281.02 seconds 86 | 200 proteins fetched in 365.41 seconds 87 | 250 proteins fetched in 448.01 seconds 88 | 300 proteins fetched in 534.71 seconds 89 | 350 proteins fetched in 613.16 seconds 90 | 400 proteins fetched in 692.71 seconds 91 | 450 proteins fetched in 777.41 seconds 92 | 500 proteins fetched in 861.82 seconds 93 | 94 | 95 | Moment invariants can be calculated using the 96 | ``geometricus.MomentInvariants`` class. This class has many 97 | constructors, such as the ``from_prody_atomgroup`` below. This can be 98 | replaced by ``from_pdb_file`` to load a protein from a file or even 99 | ``from_pdb_id`` to download from RCSB via FTP. 100 | 101 | This is the point where you decide the type and size of structural 102 | fragmentation to use. The options are: 103 | 104 | - KMER - each residue is taken as the center of a kmer of length 105 | ``split_size``, ends are included but shorter 106 | - RADIUS - overlapping spheres of radius ``split_size`` 107 | - RADIUS_UPSAMPLE - upsamples backbone atoms before taking overlapping 108 | spheres. 109 | - KMER_CUT - same as kmer but ends are not included, only fragments of 110 | length ``split_size`` are kept 111 | - ALLMER - adds kmers of different lengths (``split_size - 5`` to 112 | ``split_size + 5``) to take into account deletions/insertions that 113 | don’t change the shape 114 | 115 | Below we use ``SplitType.KMER`` with a ``split_size`` (i.e. kmer size) 116 | of 16 and ``SplitType.RADIUS`` with a ``split_size`` (i.e. radius) of 117 | 10. 118 | 119 | ``geometricus.MomentType`` lists the available moment invariants. By 120 | default :math:`O_3`, :math:`O_4`, :math:`O_5`, and :math:`F` are 121 | calculated. This can be changed using the ``moment_types`` argument in 122 | any of the ``MomentInvariants`` constructors. 123 | 124 | .. code:: ipython3 125 | 126 | from geometricus import MomentInvariants, SplitType 127 | 128 | invariants_kmer = [] 129 | invariants_radius = [] 130 | 131 | start_time = time() 132 | for i, key in enumerate(X_names): 133 | if i > 0 and i % 50 == 0: 134 | print(f"{i} proteins in {(time() - start_time):.2f} seconds") 135 | invariants_kmer.append(MomentInvariants.from_prody_atomgroup(key, pdbs[i], split_type=SplitType.KMER, split_size=16)) 136 | invariants_radius.append(MomentInvariants.from_prody_atomgroup(key, pdbs[i], split_type=SplitType.RADIUS, split_size=10)) 137 | 138 | 139 | .. parsed-literal:: 140 | 141 | 50 proteins in 3.42 seconds 142 | 100 proteins in 8.67 seconds 143 | 150 proteins in 15.67 seconds 144 | 200 proteins in 20.08 seconds 145 | 250 proteins in 25.12 seconds 146 | 300 proteins in 29.88 seconds 147 | 350 proteins in 33.77 seconds 148 | 400 proteins in 38.55 seconds 149 | 450 proteins in 43.29 seconds 150 | 500 proteins in 45.82 seconds 151 | 152 | 153 | Generating an Embedding from Structural Fragments 154 | ------------------------------------------------- 155 | 156 | Moment invariants are discretized into shape-mers, using a 157 | **resolution** parameter which controls how coarse or fine-grained this 158 | discretization is. A count vector of shape-mers is calculated for each 159 | protein with each element recording the number of times the 160 | corresponding shape-mer appears in that protein. The resolution 161 | parameter can be optimized to the task at hand. Generally, more 162 | divergent proteins would require a lower resolution while highly similar 163 | proteins would need higher resolutions to differentiate them. For the 164 | MAP kinases, we use a relatively high resolution of 2. 165 | 166 | Depending on the use-case you may want to embed all proteins at once, as 167 | demonstrated below, or separate train and test proteins as demonstrated 168 | in the Supervised Learning section. 169 | 170 | Embedding for Dimensionality Reduction 171 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 172 | 173 | Embeddings across all proteins can be used for dimensionality reduction 174 | and visualization. 175 | 176 | .. code:: ipython3 177 | 178 | import umap 179 | import numpy as np 180 | import matplotlib.pyplot as plt 181 | from geometricus import GeometricusEmbedding 182 | 183 | start_time = time() 184 | kmer_embedder = GeometricusEmbedding.from_invariants(invariants_kmer, resolution=2.) 185 | radius_embedder = GeometricusEmbedding.from_invariants(invariants_radius, resolution=2.) 186 | print(f"Generated embeddings in {(time() - start_time):.2f} seconds") 187 | 188 | reducer = umap.UMAP(metric="cosine", n_components=2) 189 | reduced = reducer.fit_transform(np.hstack((kmer_embedder.embedding, radius_embedder.embedding))) 190 | 191 | class_names = ["JNK", "Erk", "p38"] 192 | colors = ["red", "green", "blue"] 193 | plt.figure(figsize=(10,10)) 194 | for i in range(3): 195 | indices = np.where(np.array(y) == i)[0] 196 | plt.scatter(reduced[indices, 0], 197 | reduced[indices, 1], 198 | label=class_names[i], edgecolor="black", linewidth=0.1, alpha=0.8) 199 | plt.axis("off") 200 | plt.legend(); 201 | 202 | 203 | .. parsed-literal:: 204 | 205 | Generated embeddings in 1.19 seconds 206 | 207 | 208 | 209 | .. image:: getting_started_files/getting_started_11_1.png 210 | 211 | 212 | Embedding for Supervised Learning 213 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 214 | 215 | Supervised learning, be it classification or regression, requires some 216 | form of training and test set separation. Below, the shape-mers used for 217 | embedding are calculated from the training set (using the kmer approach) 218 | and only those shape-mers are counted in the test proteins. 219 | 220 | .. code:: ipython3 221 | 222 | from sklearn.model_selection import train_test_split 223 | 224 | X_train_names, X_test_names, y_train, y_test = train_test_split(X_names, y, test_size=0.3) 225 | 226 | train_embedder = GeometricusEmbedding.from_invariants(invariants_kmer, resolution=2., protein_keys=X_train_names) 227 | test_embedder = train_embedder.embed(invariants_kmer, X_test_names) 228 | 229 | X_train, X_test = train_embedder.embedding, test_embedder.embedding 230 | 231 | For this simple problem, a decision tree classifier is more than enough 232 | to obtain good accuracy. 233 | 234 | .. code:: ipython3 235 | 236 | from sklearn.tree import DecisionTreeClassifier 237 | from sklearn.metrics import classification_report 238 | 239 | clf = DecisionTreeClassifier(random_state=42, max_depth=3) 240 | clf.fit(X_train, y_train) 241 | y_pred = clf.predict(X_test) 242 | print(classification_report(y_test, y_pred, [0, 1, 2], class_names)) 243 | 244 | 245 | .. parsed-literal:: 246 | 247 | precision recall f1-score support 248 | 249 | JNK 0.84 0.93 0.89 29 250 | Erk 0.93 0.89 0.91 46 251 | p38 0.95 0.94 0.95 84 252 | 253 | accuracy 0.92 159 254 | macro avg 0.91 0.92 0.91 159 255 | weighted avg 0.93 0.92 0.92 159 256 | 257 | 258 | 259 | Finding predictive residues and structural fragments 260 | ---------------------------------------------------- 261 | 262 | Since each shape-mer can be mapped back to the residues it describes, 263 | it’s easy to use Geometricus to determine predictive or functionally 264 | relevant residues and structural regions from a trained predictor (or 265 | from loadings generated from a method such as PCA). Such insights can be 266 | directly be applied to select candidate residues for mutational studies 267 | or used in directed evolution techniques to engineer proteins and 268 | enzymes with desired properties. 269 | 270 | By inspecting the decision tree created above, it becomes clear that 271 | some shape-mers are present multiple times across a protein and the 272 | number of times differs across classes. 273 | 274 | .. code:: ipython3 275 | 276 | from sklearn.tree import plot_tree 277 | fig, ax = plt.subplots(1, figsize=(15,15)) 278 | plot_tree(clf, filled=True, ax=ax, feature_names=train_embedder.shapemer_keys); 279 | 280 | 281 | 282 | .. image:: getting_started_files/getting_started_17_0.png 283 | 284 | 285 | .. code:: ipython3 286 | 287 | plt.plot(clf.feature_importances_) 288 | predictive_feature_indices = np.argsort(clf.feature_importances_)[::-1][:6] 289 | 290 | 291 | 292 | .. image:: getting_started_files/getting_started_18_0.png 293 | 294 | 295 | We can map back to the residues described by a shape-mer using the 296 | ``map_shapemer_to_residues`` function which returns a dictionary mapping 297 | each protein to the corresponding residue indices of the shape-mer 298 | within that protein, if it exists. 299 | 300 | .. code:: ipython3 301 | 302 | shapemer = train_embedder.shapemer_keys[predictive_feature_indices[1]] 303 | residue_indices_train = train_embedder.map_shapemer_to_residues(shapemer) 304 | print("Shape-mer:", shapemer, "Number of proteins with shape-mer:", len(residue_indices_train)) 305 | print() 306 | print("Residue indices per protein (for 10 proteins):") 307 | for i, key in enumerate(residue_indices_train): 308 | if i > 10: 309 | break 310 | print(key, residue_indices_train[key]) 311 | 312 | 313 | .. parsed-literal:: 314 | 315 | Shape-mer: (13, 24, 32, 34) Number of proteins with shape-mer: 507 316 | 317 | Residue indices per protein (for 10 proteins): 318 | ('3o2m', 'A') {234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249} 319 | ('3o2m', 'B') {234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249} 320 | ('4eh8', 'A') {231, 232, 233, 234, 235, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119} 321 | ('4eh2', 'A') {230, 231, 232, 233, 234, 235, 236, 221, 222, 223, 224, 225, 226, 227, 228, 229, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119} 322 | ('4eh7', 'A') {224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 221, 222, 223} 323 | ('4eh5', 'A') {224, 225, 226, 227, 228, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223} 324 | ('4dli', 'A') {224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 218, 219, 220, 221, 222, 223} 325 | ('4eh3', 'A') {231, 232, 233, 234, 235, 236, 237, 238, 223, 224, 225, 226, 227, 228, 229, 230, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119} 326 | ('4eh6', 'A') {231, 232, 233, 234, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119} 327 | ('4eh4', 'A') {224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 221, 222, 223} 328 | ('4eh9', 'A') {224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 218, 219, 220, 221, 222, 223} 329 | 330 | 331 | These residue indices can then be visualized on a protein structure 332 | using molecule visualization software such as PyMol. Comparing the same 333 | location in proteins which don’t have this shape-mer can also be 334 | informative. 335 | -------------------------------------------------------------------------------- /docs/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | .. Geometricus documentation master file, created by 2 | sphinx-quickstart on Thu Apr 23 20:38:06 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Geometricus's documentation! 7 | ======================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 3 11 | :caption: Contents: 12 | 13 | install 14 | getting_started.ipynb 15 | api/modules 16 | 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /docs/_sources/install.rst.txt: -------------------------------------------------------------------------------- 1 | Installing Geometricus 2 | ====================== 3 | 4 | Geometricus is a Python (3.9+) package with NumPy, SciPy, Numba, PyTorch and ProDy as dependencies. 5 | It can be installed using pip:: 6 | 7 | pip install git+https://github.com/TurtleTools/geometricus.git 8 | -------------------------------------------------------------------------------- /docs/_static/alabaster.css: -------------------------------------------------------------------------------- 1 | @import url("basic.css"); 2 | 3 | /* -- page layout ----------------------------------------------------------- */ 4 | 5 | body { 6 | font-family: Georgia, serif; 7 | font-size: 17px; 8 | background-color: #fff; 9 | color: #000; 10 | margin: 0; 11 | padding: 0; 12 | } 13 | 14 | 15 | div.document { 16 | width: 940px; 17 | margin: 30px auto 0 auto; 18 | } 19 | 20 | div.documentwrapper { 21 | float: left; 22 | width: 100%; 23 | } 24 | 25 | div.bodywrapper { 26 | margin: 0 0 0 220px; 27 | } 28 | 29 | div.sphinxsidebar { 30 | width: 220px; 31 | font-size: 14px; 32 | line-height: 1.5; 33 | } 34 | 35 | hr { 36 | border: 1px solid #B1B4B6; 37 | } 38 | 39 | div.body { 40 | background-color: #fff; 41 | color: #3E4349; 42 | padding: 0 30px 0 30px; 43 | } 44 | 45 | div.body > .section { 46 | text-align: left; 47 | } 48 | 49 | div.footer { 50 | width: 940px; 51 | margin: 20px auto 30px auto; 52 | font-size: 14px; 53 | color: #888; 54 | text-align: right; 55 | } 56 | 57 | div.footer a { 58 | color: #888; 59 | } 60 | 61 | p.caption { 62 | font-family: inherit; 63 | font-size: inherit; 64 | } 65 | 66 | 67 | div.relations { 68 | display: none; 69 | } 70 | 71 | 72 | div.sphinxsidebar a { 73 | color: #444; 74 | text-decoration: none; 75 | border-bottom: 1px dotted #999; 76 | } 77 | 78 | div.sphinxsidebar a:hover { 79 | border-bottom: 1px solid #999; 80 | } 81 | 82 | div.sphinxsidebarwrapper { 83 | padding: 18px 10px; 84 | } 85 | 86 | div.sphinxsidebarwrapper p.logo { 87 | padding: 0; 88 | margin: -10px 0 0 0px; 89 | text-align: center; 90 | } 91 | 92 | div.sphinxsidebarwrapper h1.logo { 93 | margin-top: -10px; 94 | text-align: center; 95 | margin-bottom: 5px; 96 | text-align: left; 97 | } 98 | 99 | div.sphinxsidebarwrapper h1.logo-name { 100 | margin-top: 0px; 101 | } 102 | 103 | div.sphinxsidebarwrapper p.blurb { 104 | margin-top: 0; 105 | font-style: normal; 106 | } 107 | 108 | div.sphinxsidebar h3, 109 | div.sphinxsidebar h4 { 110 | font-family: Georgia, serif; 111 | color: #444; 112 | font-size: 24px; 113 | font-weight: normal; 114 | margin: 0 0 5px 0; 115 | padding: 0; 116 | } 117 | 118 | div.sphinxsidebar h4 { 119 | font-size: 20px; 120 | } 121 | 122 | div.sphinxsidebar h3 a { 123 | color: #444; 124 | } 125 | 126 | div.sphinxsidebar p.logo a, 127 | div.sphinxsidebar h3 a, 128 | div.sphinxsidebar p.logo a:hover, 129 | div.sphinxsidebar h3 a:hover { 130 | border: none; 131 | } 132 | 133 | div.sphinxsidebar p { 134 | color: #555; 135 | margin: 10px 0; 136 | } 137 | 138 | div.sphinxsidebar ul { 139 | margin: 10px 0; 140 | padding: 0; 141 | color: #000; 142 | } 143 | 144 | div.sphinxsidebar ul li.toctree-l1 > a { 145 | font-size: 120%; 146 | } 147 | 148 | div.sphinxsidebar ul li.toctree-l2 > a { 149 | font-size: 110%; 150 | } 151 | 152 | div.sphinxsidebar input { 153 | border: 1px solid #CCC; 154 | font-family: Georgia, serif; 155 | font-size: 1em; 156 | } 157 | 158 | div.sphinxsidebar hr { 159 | border: none; 160 | height: 1px; 161 | color: #AAA; 162 | background: #AAA; 163 | 164 | text-align: left; 165 | margin-left: 0; 166 | width: 50%; 167 | } 168 | 169 | div.sphinxsidebar .badge { 170 | border-bottom: none; 171 | } 172 | 173 | div.sphinxsidebar .badge:hover { 174 | border-bottom: none; 175 | } 176 | 177 | /* To address an issue with donation coming after search */ 178 | div.sphinxsidebar h3.donation { 179 | margin-top: 10px; 180 | } 181 | 182 | /* -- body styles ----------------------------------------------------------- */ 183 | 184 | a { 185 | color: #004B6B; 186 | text-decoration: underline; 187 | } 188 | 189 | a:hover { 190 | color: #6D4100; 191 | text-decoration: underline; 192 | } 193 | 194 | div.body h1, 195 | div.body h2, 196 | div.body h3, 197 | div.body h4, 198 | div.body h5, 199 | div.body h6 { 200 | font-family: Georgia, serif; 201 | font-weight: normal; 202 | margin: 30px 0px 10px 0px; 203 | padding: 0; 204 | } 205 | 206 | div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; } 207 | div.body h2 { font-size: 180%; } 208 | div.body h3 { font-size: 150%; } 209 | div.body h4 { font-size: 130%; } 210 | div.body h5 { font-size: 100%; } 211 | div.body h6 { font-size: 100%; } 212 | 213 | a.headerlink { 214 | color: #DDD; 215 | padding: 0 4px; 216 | text-decoration: none; 217 | } 218 | 219 | a.headerlink:hover { 220 | color: #444; 221 | background: #EAEAEA; 222 | } 223 | 224 | div.body p, div.body dd, div.body li { 225 | line-height: 1.4em; 226 | } 227 | 228 | div.admonition { 229 | margin: 20px 0px; 230 | padding: 10px 30px; 231 | background-color: #EEE; 232 | border: 1px solid #CCC; 233 | } 234 | 235 | div.admonition tt.xref, div.admonition code.xref, div.admonition a tt { 236 | background-color: #FBFBFB; 237 | border-bottom: 1px solid #fafafa; 238 | } 239 | 240 | div.admonition p.admonition-title { 241 | font-family: Georgia, serif; 242 | font-weight: normal; 243 | font-size: 24px; 244 | margin: 0 0 10px 0; 245 | padding: 0; 246 | line-height: 1; 247 | } 248 | 249 | div.admonition p.last { 250 | margin-bottom: 0; 251 | } 252 | 253 | div.highlight { 254 | background-color: #fff; 255 | } 256 | 257 | dt:target, .highlight { 258 | background: #FAF3E8; 259 | } 260 | 261 | div.warning { 262 | background-color: #FCC; 263 | border: 1px solid #FAA; 264 | } 265 | 266 | div.danger { 267 | background-color: #FCC; 268 | border: 1px solid #FAA; 269 | -moz-box-shadow: 2px 2px 4px #D52C2C; 270 | -webkit-box-shadow: 2px 2px 4px #D52C2C; 271 | box-shadow: 2px 2px 4px #D52C2C; 272 | } 273 | 274 | div.error { 275 | background-color: #FCC; 276 | border: 1px solid #FAA; 277 | -moz-box-shadow: 2px 2px 4px #D52C2C; 278 | -webkit-box-shadow: 2px 2px 4px #D52C2C; 279 | box-shadow: 2px 2px 4px #D52C2C; 280 | } 281 | 282 | div.caution { 283 | background-color: #FCC; 284 | border: 1px solid #FAA; 285 | } 286 | 287 | div.attention { 288 | background-color: #FCC; 289 | border: 1px solid #FAA; 290 | } 291 | 292 | div.important { 293 | background-color: #EEE; 294 | border: 1px solid #CCC; 295 | } 296 | 297 | div.note { 298 | background-color: #EEE; 299 | border: 1px solid #CCC; 300 | } 301 | 302 | div.tip { 303 | background-color: #EEE; 304 | border: 1px solid #CCC; 305 | } 306 | 307 | div.hint { 308 | background-color: #EEE; 309 | border: 1px solid #CCC; 310 | } 311 | 312 | div.seealso { 313 | background-color: #EEE; 314 | border: 1px solid #CCC; 315 | } 316 | 317 | div.topic { 318 | background-color: #EEE; 319 | } 320 | 321 | p.admonition-title { 322 | display: inline; 323 | } 324 | 325 | p.admonition-title:after { 326 | content: ":"; 327 | } 328 | 329 | pre, tt, code { 330 | font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; 331 | font-size: 0.9em; 332 | } 333 | 334 | .hll { 335 | background-color: #FFC; 336 | margin: 0 -12px; 337 | padding: 0 12px; 338 | display: block; 339 | } 340 | 341 | img.screenshot { 342 | } 343 | 344 | tt.descname, tt.descclassname, code.descname, code.descclassname { 345 | font-size: 0.95em; 346 | } 347 | 348 | tt.descname, code.descname { 349 | padding-right: 0.08em; 350 | } 351 | 352 | img.screenshot { 353 | -moz-box-shadow: 2px 2px 4px #EEE; 354 | -webkit-box-shadow: 2px 2px 4px #EEE; 355 | box-shadow: 2px 2px 4px #EEE; 356 | } 357 | 358 | table.docutils { 359 | border: 1px solid #888; 360 | -moz-box-shadow: 2px 2px 4px #EEE; 361 | -webkit-box-shadow: 2px 2px 4px #EEE; 362 | box-shadow: 2px 2px 4px #EEE; 363 | } 364 | 365 | table.docutils td, table.docutils th { 366 | border: 1px solid #888; 367 | padding: 0.25em 0.7em; 368 | } 369 | 370 | table.field-list, table.footnote { 371 | border: none; 372 | -moz-box-shadow: none; 373 | -webkit-box-shadow: none; 374 | box-shadow: none; 375 | } 376 | 377 | table.footnote { 378 | margin: 15px 0; 379 | width: 100%; 380 | border: 1px solid #EEE; 381 | background: #FDFDFD; 382 | font-size: 0.9em; 383 | } 384 | 385 | table.footnote + table.footnote { 386 | margin-top: -15px; 387 | border-top: none; 388 | } 389 | 390 | table.field-list th { 391 | padding: 0 0.8em 0 0; 392 | } 393 | 394 | table.field-list td { 395 | padding: 0; 396 | } 397 | 398 | table.field-list p { 399 | margin-bottom: 0.8em; 400 | } 401 | 402 | /* Cloned from 403 | * https://github.com/sphinx-doc/sphinx/commit/ef60dbfce09286b20b7385333d63a60321784e68 404 | */ 405 | .field-name { 406 | -moz-hyphens: manual; 407 | -ms-hyphens: manual; 408 | -webkit-hyphens: manual; 409 | hyphens: manual; 410 | } 411 | 412 | table.footnote td.label { 413 | width: .1px; 414 | padding: 0.3em 0 0.3em 0.5em; 415 | } 416 | 417 | table.footnote td { 418 | padding: 0.3em 0.5em; 419 | } 420 | 421 | dl { 422 | margin: 0; 423 | padding: 0; 424 | } 425 | 426 | dl dd { 427 | margin-left: 30px; 428 | } 429 | 430 | blockquote { 431 | margin: 0 0 0 30px; 432 | padding: 0; 433 | } 434 | 435 | ul, ol { 436 | /* Matches the 30px from the narrow-screen "li > ul" selector below */ 437 | margin: 10px 0 10px 30px; 438 | padding: 0; 439 | } 440 | 441 | pre { 442 | background: #EEE; 443 | padding: 7px 30px; 444 | margin: 15px 0px; 445 | line-height: 1.3em; 446 | } 447 | 448 | div.viewcode-block:target { 449 | background: #ffd; 450 | } 451 | 452 | dl pre, blockquote pre, li pre { 453 | margin-left: 0; 454 | padding-left: 30px; 455 | } 456 | 457 | tt, code { 458 | background-color: #ecf0f3; 459 | color: #222; 460 | /* padding: 1px 2px; */ 461 | } 462 | 463 | tt.xref, code.xref, a tt { 464 | background-color: #FBFBFB; 465 | border-bottom: 1px solid #fff; 466 | } 467 | 468 | a.reference { 469 | text-decoration: none; 470 | border-bottom: 1px dotted #004B6B; 471 | } 472 | 473 | /* Don't put an underline on images */ 474 | a.image-reference, a.image-reference:hover { 475 | border-bottom: none; 476 | } 477 | 478 | a.reference:hover { 479 | border-bottom: 1px solid #6D4100; 480 | } 481 | 482 | a.footnote-reference { 483 | text-decoration: none; 484 | font-size: 0.7em; 485 | vertical-align: top; 486 | border-bottom: 1px dotted #004B6B; 487 | } 488 | 489 | a.footnote-reference:hover { 490 | border-bottom: 1px solid #6D4100; 491 | } 492 | 493 | a:hover tt, a:hover code { 494 | background: #EEE; 495 | } 496 | 497 | 498 | @media screen and (max-width: 870px) { 499 | 500 | div.sphinxsidebar { 501 | display: none; 502 | } 503 | 504 | div.document { 505 | width: 100%; 506 | 507 | } 508 | 509 | div.documentwrapper { 510 | margin-left: 0; 511 | margin-top: 0; 512 | margin-right: 0; 513 | margin-bottom: 0; 514 | } 515 | 516 | div.bodywrapper { 517 | margin-top: 0; 518 | margin-right: 0; 519 | margin-bottom: 0; 520 | margin-left: 0; 521 | } 522 | 523 | ul { 524 | margin-left: 0; 525 | } 526 | 527 | li > ul { 528 | /* Matches the 30px from the "ul, ol" selector above */ 529 | margin-left: 30px; 530 | } 531 | 532 | .document { 533 | width: auto; 534 | } 535 | 536 | .footer { 537 | width: auto; 538 | } 539 | 540 | .bodywrapper { 541 | margin: 0; 542 | } 543 | 544 | .footer { 545 | width: auto; 546 | } 547 | 548 | .github { 549 | display: none; 550 | } 551 | 552 | 553 | 554 | } 555 | 556 | 557 | 558 | @media screen and (max-width: 875px) { 559 | 560 | body { 561 | margin: 0; 562 | padding: 20px 30px; 563 | } 564 | 565 | div.documentwrapper { 566 | float: none; 567 | background: #fff; 568 | } 569 | 570 | div.sphinxsidebar { 571 | display: block; 572 | float: none; 573 | width: 102.5%; 574 | margin: -20px -30px 20px -30px; 575 | padding: 10px 20px; 576 | background: #333; 577 | color: #FFF; 578 | } 579 | 580 | div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p, 581 | div.sphinxsidebar h3 a { 582 | color: #fff; 583 | } 584 | 585 | div.sphinxsidebar a { 586 | color: #AAA; 587 | } 588 | 589 | div.sphinxsidebar p.logo { 590 | display: none; 591 | } 592 | 593 | div.document { 594 | width: 100%; 595 | margin: 0; 596 | } 597 | 598 | div.footer { 599 | display: none; 600 | } 601 | 602 | div.bodywrapper { 603 | margin: 0; 604 | } 605 | 606 | div.body { 607 | min-height: 0; 608 | padding: 0; 609 | } 610 | 611 | .rtd_doc_footer { 612 | display: none; 613 | } 614 | 615 | .document { 616 | width: auto; 617 | } 618 | 619 | .footer { 620 | width: auto; 621 | } 622 | 623 | .footer { 624 | width: auto; 625 | } 626 | 627 | .github { 628 | display: none; 629 | } 630 | } 631 | @media screen and (min-width: 876px) { 632 | div.sphinxsidebar { 633 | position: fixed; 634 | margin-left: 0; 635 | } 636 | } 637 | 638 | 639 | /* misc. */ 640 | 641 | .revsys-inline { 642 | display: none!important; 643 | } 644 | 645 | /* Make nested-list/multi-paragraph items look better in Releases changelog 646 | * pages. Without this, docutils' magical list fuckery causes inconsistent 647 | * formatting between different release sub-lists. 648 | */ 649 | div#changelog > div.section > ul > li > p:only-child { 650 | margin-bottom: 0; 651 | } 652 | 653 | /* Hide fugly table cell borders in ..bibliography:: directive output */ 654 | table.docutils.citation, table.docutils.citation td, table.docutils.citation th { 655 | border: none; 656 | /* Below needed in some edge cases; if not applied, bottom shadows appear */ 657 | -moz-box-shadow: none; 658 | -webkit-box-shadow: none; 659 | box-shadow: none; 660 | } 661 | 662 | 663 | /* relbar */ 664 | 665 | .related { 666 | line-height: 30px; 667 | width: 100%; 668 | font-size: 0.9rem; 669 | } 670 | 671 | .related.top { 672 | border-bottom: 1px solid #EEE; 673 | margin-bottom: 20px; 674 | } 675 | 676 | .related.bottom { 677 | border-top: 1px solid #EEE; 678 | } 679 | 680 | .related ul { 681 | padding: 0; 682 | margin: 0; 683 | list-style: none; 684 | } 685 | 686 | .related li { 687 | display: inline; 688 | } 689 | 690 | nav#rellinks { 691 | float: right; 692 | } 693 | 694 | nav#rellinks li+li:before { 695 | content: "|"; 696 | } 697 | 698 | nav#breadcrumbs li+li:before { 699 | content: "\00BB"; 700 | } 701 | 702 | /* Hide certain items when printing */ 703 | @media print { 704 | div.related { 705 | display: none; 706 | } 707 | } -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* This file intentionally left blank. */ 2 | -------------------------------------------------------------------------------- /docs/_static/doctools.js: -------------------------------------------------------------------------------- 1 | /* 2 | * doctools.js 3 | * ~~~~~~~~~~~ 4 | * 5 | * Base JavaScript utilities for all Sphinx HTML documentation. 6 | * 7 | * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | "use strict"; 12 | 13 | const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ 14 | "TEXTAREA", 15 | "INPUT", 16 | "SELECT", 17 | "BUTTON", 18 | ]); 19 | 20 | const _ready = (callback) => { 21 | if (document.readyState !== "loading") { 22 | callback(); 23 | } else { 24 | document.addEventListener("DOMContentLoaded", callback); 25 | } 26 | }; 27 | 28 | /** 29 | * Small JavaScript module for the documentation. 30 | */ 31 | const Documentation = { 32 | init: () => { 33 | Documentation.initDomainIndexTable(); 34 | Documentation.initOnKeyListeners(); 35 | }, 36 | 37 | /** 38 | * i18n support 39 | */ 40 | TRANSLATIONS: {}, 41 | PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), 42 | LOCALE: "unknown", 43 | 44 | // gettext and ngettext don't access this so that the functions 45 | // can safely bound to a different name (_ = Documentation.gettext) 46 | gettext: (string) => { 47 | const translated = Documentation.TRANSLATIONS[string]; 48 | switch (typeof translated) { 49 | case "undefined": 50 | return string; // no translation 51 | case "string": 52 | return translated; // translation exists 53 | default: 54 | return translated[0]; // (singular, plural) translation tuple exists 55 | } 56 | }, 57 | 58 | ngettext: (singular, plural, n) => { 59 | const translated = Documentation.TRANSLATIONS[singular]; 60 | if (typeof translated !== "undefined") 61 | return translated[Documentation.PLURAL_EXPR(n)]; 62 | return n === 1 ? singular : plural; 63 | }, 64 | 65 | addTranslations: (catalog) => { 66 | Object.assign(Documentation.TRANSLATIONS, catalog.messages); 67 | Documentation.PLURAL_EXPR = new Function( 68 | "n", 69 | `return (${catalog.plural_expr})` 70 | ); 71 | Documentation.LOCALE = catalog.locale; 72 | }, 73 | 74 | /** 75 | * helper function to focus on search bar 76 | */ 77 | focusSearchBar: () => { 78 | document.querySelectorAll("input[name=q]")[0]?.focus(); 79 | }, 80 | 81 | /** 82 | * Initialise the domain index toggle buttons 83 | */ 84 | initDomainIndexTable: () => { 85 | const toggler = (el) => { 86 | const idNumber = el.id.substr(7); 87 | const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); 88 | if (el.src.substr(-9) === "minus.png") { 89 | el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; 90 | toggledRows.forEach((el) => (el.style.display = "none")); 91 | } else { 92 | el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; 93 | toggledRows.forEach((el) => (el.style.display = "")); 94 | } 95 | }; 96 | 97 | const togglerElements = document.querySelectorAll("img.toggler"); 98 | togglerElements.forEach((el) => 99 | el.addEventListener("click", (event) => toggler(event.currentTarget)) 100 | ); 101 | togglerElements.forEach((el) => (el.style.display = "")); 102 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); 103 | }, 104 | 105 | initOnKeyListeners: () => { 106 | // only install a listener if it is really needed 107 | if ( 108 | !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && 109 | !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS 110 | ) 111 | return; 112 | 113 | document.addEventListener("keydown", (event) => { 114 | // bail for input elements 115 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; 116 | // bail with special keys 117 | if (event.altKey || event.ctrlKey || event.metaKey) return; 118 | 119 | if (!event.shiftKey) { 120 | switch (event.key) { 121 | case "ArrowLeft": 122 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; 123 | 124 | const prevLink = document.querySelector('link[rel="prev"]'); 125 | if (prevLink && prevLink.href) { 126 | window.location.href = prevLink.href; 127 | event.preventDefault(); 128 | } 129 | break; 130 | case "ArrowRight": 131 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; 132 | 133 | const nextLink = document.querySelector('link[rel="next"]'); 134 | if (nextLink && nextLink.href) { 135 | window.location.href = nextLink.href; 136 | event.preventDefault(); 137 | } 138 | break; 139 | } 140 | } 141 | 142 | // some keyboard layouts may need Shift to get / 143 | switch (event.key) { 144 | case "/": 145 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; 146 | Documentation.focusSearchBar(); 147 | event.preventDefault(); 148 | } 149 | }); 150 | }, 151 | }; 152 | 153 | // quick alias for translations 154 | const _ = Documentation.gettext; 155 | 156 | _ready(Documentation.init); 157 | -------------------------------------------------------------------------------- /docs/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '0.5.0', 4 | LANGUAGE: 'en', 5 | COLLAPSE_INDEX: false, 6 | BUILDER: 'html', 7 | FILE_SUFFIX: '.html', 8 | LINK_SUFFIX: '.html', 9 | HAS_SOURCE: true, 10 | SOURCELINK_SUFFIX: '.txt', 11 | NAVIGATION_WITH_KEYS: false, 12 | SHOW_SEARCH_SUMMARY: true, 13 | ENABLE_SEARCH_SHORTCUTS: true, 14 | }; -------------------------------------------------------------------------------- /docs/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_static/file.png -------------------------------------------------------------------------------- /docs/_static/geometricus_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_static/geometricus_logo.png -------------------------------------------------------------------------------- /docs/_static/language_data.js: -------------------------------------------------------------------------------- 1 | /* 2 | * language_data.js 3 | * ~~~~~~~~~~~~~~~~ 4 | * 5 | * This script contains the language-specific data used by searchtools.js, 6 | * namely the list of stopwords, stemmer, scorer and splitter. 7 | * 8 | * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. 9 | * :license: BSD, see LICENSE for details. 10 | * 11 | */ 12 | 13 | var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; 14 | 15 | 16 | /* Non-minified version is copied as a separate JS file, is available */ 17 | 18 | /** 19 | * Porter Stemmer 20 | */ 21 | var Stemmer = function() { 22 | 23 | var step2list = { 24 | ational: 'ate', 25 | tional: 'tion', 26 | enci: 'ence', 27 | anci: 'ance', 28 | izer: 'ize', 29 | bli: 'ble', 30 | alli: 'al', 31 | entli: 'ent', 32 | eli: 'e', 33 | ousli: 'ous', 34 | ization: 'ize', 35 | ation: 'ate', 36 | ator: 'ate', 37 | alism: 'al', 38 | iveness: 'ive', 39 | fulness: 'ful', 40 | ousness: 'ous', 41 | aliti: 'al', 42 | iviti: 'ive', 43 | biliti: 'ble', 44 | logi: 'log' 45 | }; 46 | 47 | var step3list = { 48 | icate: 'ic', 49 | ative: '', 50 | alize: 'al', 51 | iciti: 'ic', 52 | ical: 'ic', 53 | ful: '', 54 | ness: '' 55 | }; 56 | 57 | var c = "[^aeiou]"; // consonant 58 | var v = "[aeiouy]"; // vowel 59 | var C = c + "[^aeiouy]*"; // consonant sequence 60 | var V = v + "[aeiou]*"; // vowel sequence 61 | 62 | var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 63 | var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 64 | var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 65 | var s_v = "^(" + C + ")?" + v; // vowel in stem 66 | 67 | this.stemWord = function (w) { 68 | var stem; 69 | var suffix; 70 | var firstch; 71 | var origword = w; 72 | 73 | if (w.length < 3) 74 | return w; 75 | 76 | var re; 77 | var re2; 78 | var re3; 79 | var re4; 80 | 81 | firstch = w.substr(0,1); 82 | if (firstch == "y") 83 | w = firstch.toUpperCase() + w.substr(1); 84 | 85 | // Step 1a 86 | re = /^(.+?)(ss|i)es$/; 87 | re2 = /^(.+?)([^s])s$/; 88 | 89 | if (re.test(w)) 90 | w = w.replace(re,"$1$2"); 91 | else if (re2.test(w)) 92 | w = w.replace(re2,"$1$2"); 93 | 94 | // Step 1b 95 | re = /^(.+?)eed$/; 96 | re2 = /^(.+?)(ed|ing)$/; 97 | if (re.test(w)) { 98 | var fp = re.exec(w); 99 | re = new RegExp(mgr0); 100 | if (re.test(fp[1])) { 101 | re = /.$/; 102 | w = w.replace(re,""); 103 | } 104 | } 105 | else if (re2.test(w)) { 106 | var fp = re2.exec(w); 107 | stem = fp[1]; 108 | re2 = new RegExp(s_v); 109 | if (re2.test(stem)) { 110 | w = stem; 111 | re2 = /(at|bl|iz)$/; 112 | re3 = new RegExp("([^aeiouylsz])\\1$"); 113 | re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 114 | if (re2.test(w)) 115 | w = w + "e"; 116 | else if (re3.test(w)) { 117 | re = /.$/; 118 | w = w.replace(re,""); 119 | } 120 | else if (re4.test(w)) 121 | w = w + "e"; 122 | } 123 | } 124 | 125 | // Step 1c 126 | re = /^(.+?)y$/; 127 | if (re.test(w)) { 128 | var fp = re.exec(w); 129 | stem = fp[1]; 130 | re = new RegExp(s_v); 131 | if (re.test(stem)) 132 | w = stem + "i"; 133 | } 134 | 135 | // Step 2 136 | re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 137 | if (re.test(w)) { 138 | var fp = re.exec(w); 139 | stem = fp[1]; 140 | suffix = fp[2]; 141 | re = new RegExp(mgr0); 142 | if (re.test(stem)) 143 | w = stem + step2list[suffix]; 144 | } 145 | 146 | // Step 3 147 | re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 148 | if (re.test(w)) { 149 | var fp = re.exec(w); 150 | stem = fp[1]; 151 | suffix = fp[2]; 152 | re = new RegExp(mgr0); 153 | if (re.test(stem)) 154 | w = stem + step3list[suffix]; 155 | } 156 | 157 | // Step 4 158 | re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 159 | re2 = /^(.+?)(s|t)(ion)$/; 160 | if (re.test(w)) { 161 | var fp = re.exec(w); 162 | stem = fp[1]; 163 | re = new RegExp(mgr1); 164 | if (re.test(stem)) 165 | w = stem; 166 | } 167 | else if (re2.test(w)) { 168 | var fp = re2.exec(w); 169 | stem = fp[1] + fp[2]; 170 | re2 = new RegExp(mgr1); 171 | if (re2.test(stem)) 172 | w = stem; 173 | } 174 | 175 | // Step 5 176 | re = /^(.+?)e$/; 177 | if (re.test(w)) { 178 | var fp = re.exec(w); 179 | stem = fp[1]; 180 | re = new RegExp(mgr1); 181 | re2 = new RegExp(meq1); 182 | re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 183 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 184 | w = stem; 185 | } 186 | re = /ll$/; 187 | re2 = new RegExp(mgr1); 188 | if (re.test(w) && re2.test(w)) { 189 | re = /.$/; 190 | w = w.replace(re,""); 191 | } 192 | 193 | // and turn initial Y back to y 194 | if (firstch == "y") 195 | w = firstch.toLowerCase() + w.substr(1); 196 | return w; 197 | } 198 | } 199 | 200 | -------------------------------------------------------------------------------- /docs/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_static/minus.png -------------------------------------------------------------------------------- /docs/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/_static/plus.png -------------------------------------------------------------------------------- /docs/_static/pygments.css: -------------------------------------------------------------------------------- 1 | pre { line-height: 125%; } 2 | td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } 3 | span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } 4 | td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } 5 | span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } 6 | .highlight .hll { background-color: #ffffcc } 7 | .highlight { background: #f8f8f8; } 8 | .highlight .c { color: #8f5902; font-style: italic } /* Comment */ 9 | .highlight .err { color: #a40000; border: 1px solid #ef2929 } /* Error */ 10 | .highlight .g { color: #000000 } /* Generic */ 11 | .highlight .k { color: #004461; font-weight: bold } /* Keyword */ 12 | .highlight .l { color: #000000 } /* Literal */ 13 | .highlight .n { color: #000000 } /* Name */ 14 | .highlight .o { color: #582800 } /* Operator */ 15 | .highlight .x { color: #000000 } /* Other */ 16 | .highlight .p { color: #000000; font-weight: bold } /* Punctuation */ 17 | .highlight .ch { color: #8f5902; font-style: italic } /* Comment.Hashbang */ 18 | .highlight .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */ 19 | .highlight .cp { color: #8f5902 } /* Comment.Preproc */ 20 | .highlight .cpf { color: #8f5902; font-style: italic } /* Comment.PreprocFile */ 21 | .highlight .c1 { color: #8f5902; font-style: italic } /* Comment.Single */ 22 | .highlight .cs { color: #8f5902; font-style: italic } /* Comment.Special */ 23 | .highlight .gd { color: #a40000 } /* Generic.Deleted */ 24 | .highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */ 25 | .highlight .gr { color: #ef2929 } /* Generic.Error */ 26 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 27 | .highlight .gi { color: #00A000 } /* Generic.Inserted */ 28 | .highlight .go { color: #888888 } /* Generic.Output */ 29 | .highlight .gp { color: #745334 } /* Generic.Prompt */ 30 | .highlight .gs { color: #000000; font-weight: bold } /* Generic.Strong */ 31 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 32 | .highlight .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */ 33 | .highlight .kc { color: #004461; font-weight: bold } /* Keyword.Constant */ 34 | .highlight .kd { color: #004461; font-weight: bold } /* Keyword.Declaration */ 35 | .highlight .kn { color: #004461; font-weight: bold } /* Keyword.Namespace */ 36 | .highlight .kp { color: #004461; font-weight: bold } /* Keyword.Pseudo */ 37 | .highlight .kr { color: #004461; font-weight: bold } /* Keyword.Reserved */ 38 | .highlight .kt { color: #004461; font-weight: bold } /* Keyword.Type */ 39 | .highlight .ld { color: #000000 } /* Literal.Date */ 40 | .highlight .m { color: #990000 } /* Literal.Number */ 41 | .highlight .s { color: #4e9a06 } /* Literal.String */ 42 | .highlight .na { color: #c4a000 } /* Name.Attribute */ 43 | .highlight .nb { color: #004461 } /* Name.Builtin */ 44 | .highlight .nc { color: #000000 } /* Name.Class */ 45 | .highlight .no { color: #000000 } /* Name.Constant */ 46 | .highlight .nd { color: #888888 } /* Name.Decorator */ 47 | .highlight .ni { color: #ce5c00 } /* Name.Entity */ 48 | .highlight .ne { color: #cc0000; font-weight: bold } /* Name.Exception */ 49 | .highlight .nf { color: #000000 } /* Name.Function */ 50 | .highlight .nl { color: #f57900 } /* Name.Label */ 51 | .highlight .nn { color: #000000 } /* Name.Namespace */ 52 | .highlight .nx { color: #000000 } /* Name.Other */ 53 | .highlight .py { color: #000000 } /* Name.Property */ 54 | .highlight .nt { color: #004461; font-weight: bold } /* Name.Tag */ 55 | .highlight .nv { color: #000000 } /* Name.Variable */ 56 | .highlight .ow { color: #004461; font-weight: bold } /* Operator.Word */ 57 | .highlight .pm { color: #000000; font-weight: bold } /* Punctuation.Marker */ 58 | .highlight .w { color: #f8f8f8; text-decoration: underline } /* Text.Whitespace */ 59 | .highlight .mb { color: #990000 } /* Literal.Number.Bin */ 60 | .highlight .mf { color: #990000 } /* Literal.Number.Float */ 61 | .highlight .mh { color: #990000 } /* Literal.Number.Hex */ 62 | .highlight .mi { color: #990000 } /* Literal.Number.Integer */ 63 | .highlight .mo { color: #990000 } /* Literal.Number.Oct */ 64 | .highlight .sa { color: #4e9a06 } /* Literal.String.Affix */ 65 | .highlight .sb { color: #4e9a06 } /* Literal.String.Backtick */ 66 | .highlight .sc { color: #4e9a06 } /* Literal.String.Char */ 67 | .highlight .dl { color: #4e9a06 } /* Literal.String.Delimiter */ 68 | .highlight .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */ 69 | .highlight .s2 { color: #4e9a06 } /* Literal.String.Double */ 70 | .highlight .se { color: #4e9a06 } /* Literal.String.Escape */ 71 | .highlight .sh { color: #4e9a06 } /* Literal.String.Heredoc */ 72 | .highlight .si { color: #4e9a06 } /* Literal.String.Interpol */ 73 | .highlight .sx { color: #4e9a06 } /* Literal.String.Other */ 74 | .highlight .sr { color: #4e9a06 } /* Literal.String.Regex */ 75 | .highlight .s1 { color: #4e9a06 } /* Literal.String.Single */ 76 | .highlight .ss { color: #4e9a06 } /* Literal.String.Symbol */ 77 | .highlight .bp { color: #3465a4 } /* Name.Builtin.Pseudo */ 78 | .highlight .fm { color: #000000 } /* Name.Function.Magic */ 79 | .highlight .vc { color: #000000 } /* Name.Variable.Class */ 80 | .highlight .vg { color: #000000 } /* Name.Variable.Global */ 81 | .highlight .vi { color: #000000 } /* Name.Variable.Instance */ 82 | .highlight .vm { color: #000000 } /* Name.Variable.Magic */ 83 | .highlight .il { color: #990000 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /docs/_static/sphinx_highlight.js: -------------------------------------------------------------------------------- 1 | /* Highlighting utilities for Sphinx HTML documentation. */ 2 | "use strict"; 3 | 4 | const SPHINX_HIGHLIGHT_ENABLED = true 5 | 6 | /** 7 | * highlight a given string on a node by wrapping it in 8 | * span elements with the given class name. 9 | */ 10 | const _highlight = (node, addItems, text, className) => { 11 | if (node.nodeType === Node.TEXT_NODE) { 12 | const val = node.nodeValue; 13 | const parent = node.parentNode; 14 | const pos = val.toLowerCase().indexOf(text); 15 | if ( 16 | pos >= 0 && 17 | !parent.classList.contains(className) && 18 | !parent.classList.contains("nohighlight") 19 | ) { 20 | let span; 21 | 22 | const closestNode = parent.closest("body, svg, foreignObject"); 23 | const isInSVG = closestNode && closestNode.matches("svg"); 24 | if (isInSVG) { 25 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); 26 | } else { 27 | span = document.createElement("span"); 28 | span.classList.add(className); 29 | } 30 | 31 | span.appendChild(document.createTextNode(val.substr(pos, text.length))); 32 | parent.insertBefore( 33 | span, 34 | parent.insertBefore( 35 | document.createTextNode(val.substr(pos + text.length)), 36 | node.nextSibling 37 | ) 38 | ); 39 | node.nodeValue = val.substr(0, pos); 40 | 41 | if (isInSVG) { 42 | const rect = document.createElementNS( 43 | "http://www.w3.org/2000/svg", 44 | "rect" 45 | ); 46 | const bbox = parent.getBBox(); 47 | rect.x.baseVal.value = bbox.x; 48 | rect.y.baseVal.value = bbox.y; 49 | rect.width.baseVal.value = bbox.width; 50 | rect.height.baseVal.value = bbox.height; 51 | rect.setAttribute("class", className); 52 | addItems.push({ parent: parent, target: rect }); 53 | } 54 | } 55 | } else if (node.matches && !node.matches("button, select, textarea")) { 56 | node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); 57 | } 58 | }; 59 | const _highlightText = (thisNode, text, className) => { 60 | let addItems = []; 61 | _highlight(thisNode, addItems, text, className); 62 | addItems.forEach((obj) => 63 | obj.parent.insertAdjacentElement("beforebegin", obj.target) 64 | ); 65 | }; 66 | 67 | /** 68 | * Small JavaScript module for the documentation. 69 | */ 70 | const SphinxHighlight = { 71 | 72 | /** 73 | * highlight the search words provided in localstorage in the text 74 | */ 75 | highlightSearchWords: () => { 76 | if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight 77 | 78 | // get and clear terms from localstorage 79 | const url = new URL(window.location); 80 | const highlight = 81 | localStorage.getItem("sphinx_highlight_terms") 82 | || url.searchParams.get("highlight") 83 | || ""; 84 | localStorage.removeItem("sphinx_highlight_terms") 85 | url.searchParams.delete("highlight"); 86 | window.history.replaceState({}, "", url); 87 | 88 | // get individual terms from highlight string 89 | const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); 90 | if (terms.length === 0) return; // nothing to do 91 | 92 | // There should never be more than one element matching "div.body" 93 | const divBody = document.querySelectorAll("div.body"); 94 | const body = divBody.length ? divBody[0] : document.querySelector("body"); 95 | window.setTimeout(() => { 96 | terms.forEach((term) => _highlightText(body, term, "highlighted")); 97 | }, 10); 98 | 99 | const searchBox = document.getElementById("searchbox"); 100 | if (searchBox === null) return; 101 | searchBox.appendChild( 102 | document 103 | .createRange() 104 | .createContextualFragment( 105 | '" 109 | ) 110 | ); 111 | }, 112 | 113 | /** 114 | * helper function to hide the search marks again 115 | */ 116 | hideSearchWords: () => { 117 | document 118 | .querySelectorAll("#searchbox .highlight-link") 119 | .forEach((el) => el.remove()); 120 | document 121 | .querySelectorAll("span.highlighted") 122 | .forEach((el) => el.classList.remove("highlighted")); 123 | localStorage.removeItem("sphinx_highlight_terms") 124 | }, 125 | 126 | initEscapeListener: () => { 127 | // only install a listener if it is really needed 128 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; 129 | 130 | document.addEventListener("keydown", (event) => { 131 | // bail for input elements 132 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; 133 | // bail with special keys 134 | if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; 135 | if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { 136 | SphinxHighlight.hideSearchWords(); 137 | event.preventDefault(); 138 | } 139 | }); 140 | }, 141 | }; 142 | 143 | _ready(SphinxHighlight.highlightSearchWords); 144 | _ready(SphinxHighlight.initEscapeListener); 145 | -------------------------------------------------------------------------------- /docs/build/doctrees/api/geometricus.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/doctrees/api/geometricus.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/api/modules.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/doctrees/api/modules.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/build/doctrees/getting_started.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/doctrees/getting_started.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/install.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/doctrees/install.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/nbsphinx/getting_started_11_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/doctrees/nbsphinx/getting_started_11_1.png -------------------------------------------------------------------------------- /docs/build/doctrees/nbsphinx/getting_started_17_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/doctrees/nbsphinx/getting_started_17_0.png -------------------------------------------------------------------------------- /docs/build/doctrees/nbsphinx/getting_started_18_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/doctrees/nbsphinx/getting_started_18_0.png -------------------------------------------------------------------------------- /docs/build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 4aa8244b1f5cd7131ed562cbfa1f91f9 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/build/html/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/html/.nojekyll -------------------------------------------------------------------------------- /docs/build/html/_images/getting_started_11_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/html/_images/getting_started_11_1.png -------------------------------------------------------------------------------- /docs/build/html/_images/getting_started_17_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/html/_images/getting_started_17_0.png -------------------------------------------------------------------------------- /docs/build/html/_images/getting_started_18_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/html/_images/getting_started_18_0.png -------------------------------------------------------------------------------- /docs/build/html/_modules/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Overview: module code — Geometricus 0.5.0 documentation 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 |
25 | 26 | 86 |
87 |
88 | 89 | 90 |
91 | 92 |

All modules for which code is available

93 | 97 | 98 |
99 | 100 |
101 |
102 |
103 |
104 | 112 | 113 | 114 | 115 | Fork me on GitHub 116 | 117 | 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /docs/build/html/_sources/api/geometricus.rst.txt: -------------------------------------------------------------------------------- 1 | geometricus package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | geometricus.geometricus module 8 | ------------------------------ 9 | 10 | .. automodule:: geometricus.geometricus 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | geometricus.moment\_utility module 16 | ---------------------------------- 17 | 18 | .. automodule:: geometricus.moment_utility 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | geometricus.protein\_utility module 24 | ----------------------------------- 25 | 26 | .. automodule:: geometricus.protein_utility 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/build/html/_sources/api/modules.rst.txt: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | geometricus 8 | -------------------------------------------------------------------------------- /docs/build/html/_sources/getting_started.rst.txt: -------------------------------------------------------------------------------- 1 | .. code:: ipython3 2 | 3 | import warnings 4 | warnings.filterwarnings('ignore') 5 | 6 | .. code:: ipython3 7 | 8 | %load_ext autoreload 9 | %autoreload 2 10 | 11 | Getting Started 12 | =============== 13 | 14 | Let’s walk through an example application of Geometricus in machine 15 | learning using a dataset of human MAP kinases, divided into three 16 | classes - JNK, Erk, and p38 kinases. This dataset was generated from the 17 | `Kinase-Ligand Interaction Fingerprints and Structures database 18 | (KLIFS) `__ (Raw table 19 | `here <../../example_data/MAPK_KLIFS.tsv>`__). 20 | 21 | .. code:: ipython3 22 | 23 | import pandas as pnd 24 | from pathlib import Path 25 | from time import time 26 | 27 | url = "https://raw.githubusercontent.com/TurtleTools/geometricus/master/example_data/MAPK_KLIFS.tsv" 28 | mapk_df = pnd.read_csv(url, sep="\t") 29 | 30 | mapk_pdb_id_to_class = {} 31 | for pdb_id, chain, class_name in list(zip(mapk_df["PDB"], mapk_df["CHAIN"], mapk_df["CLASS"])): 32 | mapk_pdb_id_to_class[(pdb_id, chain)] = class_name 33 | len(mapk_pdb_id_to_class) 34 | 35 | 36 | 37 | 38 | .. parsed-literal:: 39 | 40 | 527 41 | 42 | 43 | 44 | So now we have a list of (PDB ID, chain) pairs, each associated with a 45 | class 46 | 47 | .. code:: ipython3 48 | 49 | X_names = list(mapk_pdb_id_to_class.keys()) 50 | class_mapping = {"JNK": 0, "Erk": 1, "p38": 2} 51 | y = [class_mapping[mapk_pdb_id_to_class[k]] for k in X_names] 52 | 53 | Structural fragmentation 54 | ------------------------ 55 | 56 | We consider two different ways of dividing a protein with :math:`l` 57 | residues into structural fragments, a :math:`k`-mer-based approach and a 58 | radius-based approach. For each structural fragment, multiple rotation 59 | and translation-invariant moments can be calculated. While the 60 | :math:`k`-mer based approach is effective in describing structural 61 | fragments that are sequential in nature, such as :math:`\alpha`-helices 62 | and loops, the radius-based approach can capture long-range structural 63 | contacts as seen in :math:`\beta`-sheets, as well as distinct 64 | interaction patterns in space, as found in enzyme active sites. 65 | 66 | Below we fetch and parse each protein as a ProDy AtomGroup object. This 67 | takes a while (around 15 mins) since it downloads via FTP from RCSB. 68 | 69 | .. code:: ipython3 70 | 71 | import prody as pd 72 | 73 | start_time = time() 74 | pdbs = [] 75 | for i, (pdb_id, chain) in enumerate(X_names): 76 | if i > 0 and i % 50 == 0: 77 | print(f"{i} proteins fetched in {(time() - start_time):.2f} seconds") 78 | pdbs.append(pd.parsePDB(pdb_id, chain=chain)) 79 | 80 | 81 | .. parsed-literal:: 82 | 83 | 50 proteins fetched in 96.46 seconds 84 | 100 proteins fetched in 194.62 seconds 85 | 150 proteins fetched in 281.02 seconds 86 | 200 proteins fetched in 365.41 seconds 87 | 250 proteins fetched in 448.01 seconds 88 | 300 proteins fetched in 534.71 seconds 89 | 350 proteins fetched in 613.16 seconds 90 | 400 proteins fetched in 692.71 seconds 91 | 450 proteins fetched in 777.41 seconds 92 | 500 proteins fetched in 861.82 seconds 93 | 94 | 95 | Moment invariants can be calculated using the 96 | ``geometricus.MomentInvariants`` class. This class has many 97 | constructors, such as the ``from_prody_atomgroup`` below. This can be 98 | replaced by ``from_pdb_file`` to load a protein from a file or even 99 | ``from_pdb_id`` to download from RCSB via FTP. 100 | 101 | This is the point where you decide the type and size of structural 102 | fragmentation to use. The options are: 103 | 104 | - KMER - each residue is taken as the center of a kmer of length 105 | ``split_size``, ends are included but shorter 106 | - RADIUS - overlapping spheres of radius ``split_size`` 107 | - RADIUS_UPSAMPLE - upsamples backbone atoms before taking overlapping 108 | spheres. 109 | - KMER_CUT - same as kmer but ends are not included, only fragments of 110 | length ``split_size`` are kept 111 | - ALLMER - adds kmers of different lengths (``split_size - 5`` to 112 | ``split_size + 5``) to take into account deletions/insertions that 113 | don’t change the shape 114 | 115 | Below we use ``SplitType.KMER`` with a ``split_size`` (i.e. kmer size) 116 | of 16 and ``SplitType.RADIUS`` with a ``split_size`` (i.e. radius) of 117 | 10. 118 | 119 | ``geometricus.MomentType`` lists the available moment invariants. By 120 | default :math:`O_3`, :math:`O_4`, :math:`O_5`, and :math:`F` are 121 | calculated. This can be changed using the ``moment_types`` argument in 122 | any of the ``MomentInvariants`` constructors. 123 | 124 | .. code:: ipython3 125 | 126 | from geometricus import MomentInvariants, SplitType 127 | 128 | invariants_kmer = [] 129 | invariants_radius = [] 130 | 131 | start_time = time() 132 | for i, key in enumerate(X_names): 133 | if i > 0 and i % 50 == 0: 134 | print(f"{i} proteins in {(time() - start_time):.2f} seconds") 135 | invariants_kmer.append(MomentInvariants.from_prody_atomgroup(key, pdbs[i], split_type=SplitType.KMER, split_size=16)) 136 | invariants_radius.append(MomentInvariants.from_prody_atomgroup(key, pdbs[i], split_type=SplitType.RADIUS, split_size=10)) 137 | 138 | 139 | .. parsed-literal:: 140 | 141 | 50 proteins in 3.42 seconds 142 | 100 proteins in 8.67 seconds 143 | 150 proteins in 15.67 seconds 144 | 200 proteins in 20.08 seconds 145 | 250 proteins in 25.12 seconds 146 | 300 proteins in 29.88 seconds 147 | 350 proteins in 33.77 seconds 148 | 400 proteins in 38.55 seconds 149 | 450 proteins in 43.29 seconds 150 | 500 proteins in 45.82 seconds 151 | 152 | 153 | Generating an Embedding from Structural Fragments 154 | ------------------------------------------------- 155 | 156 | Moment invariants are discretized into shape-mers, using a 157 | **resolution** parameter which controls how coarse or fine-grained this 158 | discretization is. A count vector of shape-mers is calculated for each 159 | protein with each element recording the number of times the 160 | corresponding shape-mer appears in that protein. The resolution 161 | parameter can be optimized to the task at hand. Generally, more 162 | divergent proteins would require a lower resolution while highly similar 163 | proteins would need higher resolutions to differentiate them. For the 164 | MAP kinases, we use a relatively high resolution of 2. 165 | 166 | Depending on the use-case you may want to embed all proteins at once, as 167 | demonstrated below, or separate train and test proteins as demonstrated 168 | in the Supervised Learning section. 169 | 170 | Embedding for Dimensionality Reduction 171 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 172 | 173 | Embeddings across all proteins can be used for dimensionality reduction 174 | and visualization. 175 | 176 | .. code:: ipython3 177 | 178 | import umap 179 | import numpy as np 180 | import matplotlib.pyplot as plt 181 | from geometricus import GeometricusEmbedding 182 | 183 | start_time = time() 184 | kmer_embedder = GeometricusEmbedding.from_invariants(invariants_kmer, resolution=2.) 185 | radius_embedder = GeometricusEmbedding.from_invariants(invariants_radius, resolution=2.) 186 | print(f"Generated embeddings in {(time() - start_time):.2f} seconds") 187 | 188 | reducer = umap.UMAP(metric="cosine", n_components=2) 189 | reduced = reducer.fit_transform(np.hstack((kmer_embedder.embedding, radius_embedder.embedding))) 190 | 191 | class_names = ["JNK", "Erk", "p38"] 192 | colors = ["red", "green", "blue"] 193 | plt.figure(figsize=(10,10)) 194 | for i in range(3): 195 | indices = np.where(np.array(y) == i)[0] 196 | plt.scatter(reduced[indices, 0], 197 | reduced[indices, 1], 198 | label=class_names[i], edgecolor="black", linewidth=0.1, alpha=0.8) 199 | plt.axis("off") 200 | plt.legend(); 201 | 202 | 203 | .. parsed-literal:: 204 | 205 | Generated embeddings in 1.19 seconds 206 | 207 | 208 | 209 | .. image:: getting_started_files/getting_started_11_1.png 210 | 211 | 212 | Embedding for Supervised Learning 213 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 214 | 215 | Supervised learning, be it classification or regression, requires some 216 | form of training and test set separation. Below, the shape-mers used for 217 | embedding are calculated from the training set (using the kmer approach) 218 | and only those shape-mers are counted in the test proteins. 219 | 220 | .. code:: ipython3 221 | 222 | from sklearn.model_selection import train_test_split 223 | 224 | X_train_names, X_test_names, y_train, y_test = train_test_split(X_names, y, test_size=0.3) 225 | 226 | train_embedder = GeometricusEmbedding.from_invariants(invariants_kmer, resolution=2., protein_keys=X_train_names) 227 | test_embedder = train_embedder.embed(invariants_kmer, X_test_names) 228 | 229 | X_train, X_test = train_embedder.embedding, test_embedder.embedding 230 | 231 | For this simple problem, a decision tree classifier is more than enough 232 | to obtain good accuracy. 233 | 234 | .. code:: ipython3 235 | 236 | from sklearn.tree import DecisionTreeClassifier 237 | from sklearn.metrics import classification_report 238 | 239 | clf = DecisionTreeClassifier(random_state=42, max_depth=3) 240 | clf.fit(X_train, y_train) 241 | y_pred = clf.predict(X_test) 242 | print(classification_report(y_test, y_pred, [0, 1, 2], class_names)) 243 | 244 | 245 | .. parsed-literal:: 246 | 247 | precision recall f1-score support 248 | 249 | JNK 0.84 0.93 0.89 29 250 | Erk 0.93 0.89 0.91 46 251 | p38 0.95 0.94 0.95 84 252 | 253 | accuracy 0.92 159 254 | macro avg 0.91 0.92 0.91 159 255 | weighted avg 0.93 0.92 0.92 159 256 | 257 | 258 | 259 | Finding predictive residues and structural fragments 260 | ---------------------------------------------------- 261 | 262 | Since each shape-mer can be mapped back to the residues it describes, 263 | it’s easy to use Geometricus to determine predictive or functionally 264 | relevant residues and structural regions from a trained predictor (or 265 | from loadings generated from a method such as PCA). Such insights can be 266 | directly be applied to select candidate residues for mutational studies 267 | or used in directed evolution techniques to engineer proteins and 268 | enzymes with desired properties. 269 | 270 | By inspecting the decision tree created above, it becomes clear that 271 | some shape-mers are present multiple times across a protein and the 272 | number of times differs across classes. 273 | 274 | .. code:: ipython3 275 | 276 | from sklearn.tree import plot_tree 277 | fig, ax = plt.subplots(1, figsize=(15,15)) 278 | plot_tree(clf, filled=True, ax=ax, feature_names=train_embedder.shapemer_keys); 279 | 280 | 281 | 282 | .. image:: getting_started_files/getting_started_17_0.png 283 | 284 | 285 | .. code:: ipython3 286 | 287 | plt.plot(clf.feature_importances_) 288 | predictive_feature_indices = np.argsort(clf.feature_importances_)[::-1][:6] 289 | 290 | 291 | 292 | .. image:: getting_started_files/getting_started_18_0.png 293 | 294 | 295 | We can map back to the residues described by a shape-mer using the 296 | ``map_shapemer_to_residues`` function which returns a dictionary mapping 297 | each protein to the corresponding residue indices of the shape-mer 298 | within that protein, if it exists. 299 | 300 | .. code:: ipython3 301 | 302 | shapemer = train_embedder.shapemer_keys[predictive_feature_indices[1]] 303 | residue_indices_train = train_embedder.map_shapemer_to_residues(shapemer) 304 | print("Shape-mer:", shapemer, "Number of proteins with shape-mer:", len(residue_indices_train)) 305 | print() 306 | print("Residue indices per protein (for 10 proteins):") 307 | for i, key in enumerate(residue_indices_train): 308 | if i > 10: 309 | break 310 | print(key, residue_indices_train[key]) 311 | 312 | 313 | .. parsed-literal:: 314 | 315 | Shape-mer: (13, 24, 32, 34) Number of proteins with shape-mer: 507 316 | 317 | Residue indices per protein (for 10 proteins): 318 | ('3o2m', 'A') {234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249} 319 | ('3o2m', 'B') {234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249} 320 | ('4eh8', 'A') {231, 232, 233, 234, 235, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119} 321 | ('4eh2', 'A') {230, 231, 232, 233, 234, 235, 236, 221, 222, 223, 224, 225, 226, 227, 228, 229, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119} 322 | ('4eh7', 'A') {224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 221, 222, 223} 323 | ('4eh5', 'A') {224, 225, 226, 227, 228, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223} 324 | ('4dli', 'A') {224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 218, 219, 220, 221, 222, 223} 325 | ('4eh3', 'A') {231, 232, 233, 234, 235, 236, 237, 238, 223, 224, 225, 226, 227, 228, 229, 230, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119} 326 | ('4eh6', 'A') {231, 232, 233, 234, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119} 327 | ('4eh4', 'A') {224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 221, 222, 223} 328 | ('4eh9', 'A') {224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 218, 219, 220, 221, 222, 223} 329 | 330 | 331 | These residue indices can then be visualized on a protein structure 332 | using molecule visualization software such as PyMol. Comparing the same 333 | location in proteins which don’t have this shape-mer can also be 334 | informative. 335 | -------------------------------------------------------------------------------- /docs/build/html/_sources/index.rst.txt: -------------------------------------------------------------------------------- 1 | .. Geometricus documentation master file, created by 2 | sphinx-quickstart on Thu Apr 23 20:38:06 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Geometricus's documentation! 7 | ======================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 3 11 | :caption: Contents: 12 | 13 | install 14 | getting_started.ipynb 15 | api/modules 16 | 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /docs/build/html/_sources/install.rst.txt: -------------------------------------------------------------------------------- 1 | Installing Geometricus 2 | ====================== 3 | 4 | Geometricus is a Python (3.9+) package with NumPy, SciPy, Numba, PyTorch and ProDy as dependencies. 5 | It can be installed using pip:: 6 | 7 | pip install git+https://github.com/TurtleTools/geometricus.git 8 | -------------------------------------------------------------------------------- /docs/build/html/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* This file intentionally left blank. */ 2 | -------------------------------------------------------------------------------- /docs/build/html/_static/doctools.js: -------------------------------------------------------------------------------- 1 | /* 2 | * doctools.js 3 | * ~~~~~~~~~~~ 4 | * 5 | * Base JavaScript utilities for all Sphinx HTML documentation. 6 | * 7 | * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. 8 | * :license: BSD, see LICENSE for details. 9 | * 10 | */ 11 | "use strict"; 12 | 13 | const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ 14 | "TEXTAREA", 15 | "INPUT", 16 | "SELECT", 17 | "BUTTON", 18 | ]); 19 | 20 | const _ready = (callback) => { 21 | if (document.readyState !== "loading") { 22 | callback(); 23 | } else { 24 | document.addEventListener("DOMContentLoaded", callback); 25 | } 26 | }; 27 | 28 | /** 29 | * Small JavaScript module for the documentation. 30 | */ 31 | const Documentation = { 32 | init: () => { 33 | Documentation.initDomainIndexTable(); 34 | Documentation.initOnKeyListeners(); 35 | }, 36 | 37 | /** 38 | * i18n support 39 | */ 40 | TRANSLATIONS: {}, 41 | PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), 42 | LOCALE: "unknown", 43 | 44 | // gettext and ngettext don't access this so that the functions 45 | // can safely bound to a different name (_ = Documentation.gettext) 46 | gettext: (string) => { 47 | const translated = Documentation.TRANSLATIONS[string]; 48 | switch (typeof translated) { 49 | case "undefined": 50 | return string; // no translation 51 | case "string": 52 | return translated; // translation exists 53 | default: 54 | return translated[0]; // (singular, plural) translation tuple exists 55 | } 56 | }, 57 | 58 | ngettext: (singular, plural, n) => { 59 | const translated = Documentation.TRANSLATIONS[singular]; 60 | if (typeof translated !== "undefined") 61 | return translated[Documentation.PLURAL_EXPR(n)]; 62 | return n === 1 ? singular : plural; 63 | }, 64 | 65 | addTranslations: (catalog) => { 66 | Object.assign(Documentation.TRANSLATIONS, catalog.messages); 67 | Documentation.PLURAL_EXPR = new Function( 68 | "n", 69 | `return (${catalog.plural_expr})` 70 | ); 71 | Documentation.LOCALE = catalog.locale; 72 | }, 73 | 74 | /** 75 | * helper function to focus on search bar 76 | */ 77 | focusSearchBar: () => { 78 | document.querySelectorAll("input[name=q]")[0]?.focus(); 79 | }, 80 | 81 | /** 82 | * Initialise the domain index toggle buttons 83 | */ 84 | initDomainIndexTable: () => { 85 | const toggler = (el) => { 86 | const idNumber = el.id.substr(7); 87 | const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); 88 | if (el.src.substr(-9) === "minus.png") { 89 | el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; 90 | toggledRows.forEach((el) => (el.style.display = "none")); 91 | } else { 92 | el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; 93 | toggledRows.forEach((el) => (el.style.display = "")); 94 | } 95 | }; 96 | 97 | const togglerElements = document.querySelectorAll("img.toggler"); 98 | togglerElements.forEach((el) => 99 | el.addEventListener("click", (event) => toggler(event.currentTarget)) 100 | ); 101 | togglerElements.forEach((el) => (el.style.display = "")); 102 | if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); 103 | }, 104 | 105 | initOnKeyListeners: () => { 106 | // only install a listener if it is really needed 107 | if ( 108 | !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && 109 | !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS 110 | ) 111 | return; 112 | 113 | document.addEventListener("keydown", (event) => { 114 | // bail for input elements 115 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; 116 | // bail with special keys 117 | if (event.altKey || event.ctrlKey || event.metaKey) return; 118 | 119 | if (!event.shiftKey) { 120 | switch (event.key) { 121 | case "ArrowLeft": 122 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; 123 | 124 | const prevLink = document.querySelector('link[rel="prev"]'); 125 | if (prevLink && prevLink.href) { 126 | window.location.href = prevLink.href; 127 | event.preventDefault(); 128 | } 129 | break; 130 | case "ArrowRight": 131 | if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; 132 | 133 | const nextLink = document.querySelector('link[rel="next"]'); 134 | if (nextLink && nextLink.href) { 135 | window.location.href = nextLink.href; 136 | event.preventDefault(); 137 | } 138 | break; 139 | } 140 | } 141 | 142 | // some keyboard layouts may need Shift to get / 143 | switch (event.key) { 144 | case "/": 145 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; 146 | Documentation.focusSearchBar(); 147 | event.preventDefault(); 148 | } 149 | }); 150 | }, 151 | }; 152 | 153 | // quick alias for translations 154 | const _ = Documentation.gettext; 155 | 156 | _ready(Documentation.init); 157 | -------------------------------------------------------------------------------- /docs/build/html/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '0.5.0', 4 | LANGUAGE: 'en', 5 | COLLAPSE_INDEX: false, 6 | BUILDER: 'html', 7 | FILE_SUFFIX: '.html', 8 | LINK_SUFFIX: '.html', 9 | HAS_SOURCE: true, 10 | SOURCELINK_SUFFIX: '.txt', 11 | NAVIGATION_WITH_KEYS: false, 12 | SHOW_SEARCH_SUMMARY: true, 13 | ENABLE_SEARCH_SHORTCUTS: true, 14 | }; -------------------------------------------------------------------------------- /docs/build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/html/_static/file.png -------------------------------------------------------------------------------- /docs/build/html/_static/language_data.js: -------------------------------------------------------------------------------- 1 | /* 2 | * language_data.js 3 | * ~~~~~~~~~~~~~~~~ 4 | * 5 | * This script contains the language-specific data used by searchtools.js, 6 | * namely the list of stopwords, stemmer, scorer and splitter. 7 | * 8 | * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. 9 | * :license: BSD, see LICENSE for details. 10 | * 11 | */ 12 | 13 | var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; 14 | 15 | 16 | /* Non-minified version is copied as a separate JS file, is available */ 17 | 18 | /** 19 | * Porter Stemmer 20 | */ 21 | var Stemmer = function() { 22 | 23 | var step2list = { 24 | ational: 'ate', 25 | tional: 'tion', 26 | enci: 'ence', 27 | anci: 'ance', 28 | izer: 'ize', 29 | bli: 'ble', 30 | alli: 'al', 31 | entli: 'ent', 32 | eli: 'e', 33 | ousli: 'ous', 34 | ization: 'ize', 35 | ation: 'ate', 36 | ator: 'ate', 37 | alism: 'al', 38 | iveness: 'ive', 39 | fulness: 'ful', 40 | ousness: 'ous', 41 | aliti: 'al', 42 | iviti: 'ive', 43 | biliti: 'ble', 44 | logi: 'log' 45 | }; 46 | 47 | var step3list = { 48 | icate: 'ic', 49 | ative: '', 50 | alize: 'al', 51 | iciti: 'ic', 52 | ical: 'ic', 53 | ful: '', 54 | ness: '' 55 | }; 56 | 57 | var c = "[^aeiou]"; // consonant 58 | var v = "[aeiouy]"; // vowel 59 | var C = c + "[^aeiouy]*"; // consonant sequence 60 | var V = v + "[aeiou]*"; // vowel sequence 61 | 62 | var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 63 | var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 64 | var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 65 | var s_v = "^(" + C + ")?" + v; // vowel in stem 66 | 67 | this.stemWord = function (w) { 68 | var stem; 69 | var suffix; 70 | var firstch; 71 | var origword = w; 72 | 73 | if (w.length < 3) 74 | return w; 75 | 76 | var re; 77 | var re2; 78 | var re3; 79 | var re4; 80 | 81 | firstch = w.substr(0,1); 82 | if (firstch == "y") 83 | w = firstch.toUpperCase() + w.substr(1); 84 | 85 | // Step 1a 86 | re = /^(.+?)(ss|i)es$/; 87 | re2 = /^(.+?)([^s])s$/; 88 | 89 | if (re.test(w)) 90 | w = w.replace(re,"$1$2"); 91 | else if (re2.test(w)) 92 | w = w.replace(re2,"$1$2"); 93 | 94 | // Step 1b 95 | re = /^(.+?)eed$/; 96 | re2 = /^(.+?)(ed|ing)$/; 97 | if (re.test(w)) { 98 | var fp = re.exec(w); 99 | re = new RegExp(mgr0); 100 | if (re.test(fp[1])) { 101 | re = /.$/; 102 | w = w.replace(re,""); 103 | } 104 | } 105 | else if (re2.test(w)) { 106 | var fp = re2.exec(w); 107 | stem = fp[1]; 108 | re2 = new RegExp(s_v); 109 | if (re2.test(stem)) { 110 | w = stem; 111 | re2 = /(at|bl|iz)$/; 112 | re3 = new RegExp("([^aeiouylsz])\\1$"); 113 | re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 114 | if (re2.test(w)) 115 | w = w + "e"; 116 | else if (re3.test(w)) { 117 | re = /.$/; 118 | w = w.replace(re,""); 119 | } 120 | else if (re4.test(w)) 121 | w = w + "e"; 122 | } 123 | } 124 | 125 | // Step 1c 126 | re = /^(.+?)y$/; 127 | if (re.test(w)) { 128 | var fp = re.exec(w); 129 | stem = fp[1]; 130 | re = new RegExp(s_v); 131 | if (re.test(stem)) 132 | w = stem + "i"; 133 | } 134 | 135 | // Step 2 136 | re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; 137 | if (re.test(w)) { 138 | var fp = re.exec(w); 139 | stem = fp[1]; 140 | suffix = fp[2]; 141 | re = new RegExp(mgr0); 142 | if (re.test(stem)) 143 | w = stem + step2list[suffix]; 144 | } 145 | 146 | // Step 3 147 | re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; 148 | if (re.test(w)) { 149 | var fp = re.exec(w); 150 | stem = fp[1]; 151 | suffix = fp[2]; 152 | re = new RegExp(mgr0); 153 | if (re.test(stem)) 154 | w = stem + step3list[suffix]; 155 | } 156 | 157 | // Step 4 158 | re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; 159 | re2 = /^(.+?)(s|t)(ion)$/; 160 | if (re.test(w)) { 161 | var fp = re.exec(w); 162 | stem = fp[1]; 163 | re = new RegExp(mgr1); 164 | if (re.test(stem)) 165 | w = stem; 166 | } 167 | else if (re2.test(w)) { 168 | var fp = re2.exec(w); 169 | stem = fp[1] + fp[2]; 170 | re2 = new RegExp(mgr1); 171 | if (re2.test(stem)) 172 | w = stem; 173 | } 174 | 175 | // Step 5 176 | re = /^(.+?)e$/; 177 | if (re.test(w)) { 178 | var fp = re.exec(w); 179 | stem = fp[1]; 180 | re = new RegExp(mgr1); 181 | re2 = new RegExp(meq1); 182 | re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); 183 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) 184 | w = stem; 185 | } 186 | re = /ll$/; 187 | re2 = new RegExp(mgr1); 188 | if (re.test(w) && re2.test(w)) { 189 | re = /.$/; 190 | w = w.replace(re,""); 191 | } 192 | 193 | // and turn initial Y back to y 194 | if (firstch == "y") 195 | w = firstch.toLowerCase() + w.substr(1); 196 | return w; 197 | } 198 | } 199 | 200 | -------------------------------------------------------------------------------- /docs/build/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/html/_static/minus.png -------------------------------------------------------------------------------- /docs/build/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/html/_static/plus.png -------------------------------------------------------------------------------- /docs/build/html/_static/pygments.css: -------------------------------------------------------------------------------- 1 | pre { line-height: 125%; } 2 | td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } 3 | span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } 4 | td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } 5 | span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } 6 | .highlight .hll { background-color: #ffffcc } 7 | .highlight { background: #f8f8f8; } 8 | .highlight .c { color: #8f5902; font-style: italic } /* Comment */ 9 | .highlight .err { color: #a40000; border: 1px solid #ef2929 } /* Error */ 10 | .highlight .g { color: #000000 } /* Generic */ 11 | .highlight .k { color: #004461; font-weight: bold } /* Keyword */ 12 | .highlight .l { color: #000000 } /* Literal */ 13 | .highlight .n { color: #000000 } /* Name */ 14 | .highlight .o { color: #582800 } /* Operator */ 15 | .highlight .x { color: #000000 } /* Other */ 16 | .highlight .p { color: #000000; font-weight: bold } /* Punctuation */ 17 | .highlight .ch { color: #8f5902; font-style: italic } /* Comment.Hashbang */ 18 | .highlight .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */ 19 | .highlight .cp { color: #8f5902 } /* Comment.Preproc */ 20 | .highlight .cpf { color: #8f5902; font-style: italic } /* Comment.PreprocFile */ 21 | .highlight .c1 { color: #8f5902; font-style: italic } /* Comment.Single */ 22 | .highlight .cs { color: #8f5902; font-style: italic } /* Comment.Special */ 23 | .highlight .gd { color: #a40000 } /* Generic.Deleted */ 24 | .highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */ 25 | .highlight .gr { color: #ef2929 } /* Generic.Error */ 26 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 27 | .highlight .gi { color: #00A000 } /* Generic.Inserted */ 28 | .highlight .go { color: #888888 } /* Generic.Output */ 29 | .highlight .gp { color: #745334 } /* Generic.Prompt */ 30 | .highlight .gs { color: #000000; font-weight: bold } /* Generic.Strong */ 31 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 32 | .highlight .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */ 33 | .highlight .kc { color: #004461; font-weight: bold } /* Keyword.Constant */ 34 | .highlight .kd { color: #004461; font-weight: bold } /* Keyword.Declaration */ 35 | .highlight .kn { color: #004461; font-weight: bold } /* Keyword.Namespace */ 36 | .highlight .kp { color: #004461; font-weight: bold } /* Keyword.Pseudo */ 37 | .highlight .kr { color: #004461; font-weight: bold } /* Keyword.Reserved */ 38 | .highlight .kt { color: #004461; font-weight: bold } /* Keyword.Type */ 39 | .highlight .ld { color: #000000 } /* Literal.Date */ 40 | .highlight .m { color: #990000 } /* Literal.Number */ 41 | .highlight .s { color: #4e9a06 } /* Literal.String */ 42 | .highlight .na { color: #c4a000 } /* Name.Attribute */ 43 | .highlight .nb { color: #004461 } /* Name.Builtin */ 44 | .highlight .nc { color: #000000 } /* Name.Class */ 45 | .highlight .no { color: #000000 } /* Name.Constant */ 46 | .highlight .nd { color: #888888 } /* Name.Decorator */ 47 | .highlight .ni { color: #ce5c00 } /* Name.Entity */ 48 | .highlight .ne { color: #cc0000; font-weight: bold } /* Name.Exception */ 49 | .highlight .nf { color: #000000 } /* Name.Function */ 50 | .highlight .nl { color: #f57900 } /* Name.Label */ 51 | .highlight .nn { color: #000000 } /* Name.Namespace */ 52 | .highlight .nx { color: #000000 } /* Name.Other */ 53 | .highlight .py { color: #000000 } /* Name.Property */ 54 | .highlight .nt { color: #004461; font-weight: bold } /* Name.Tag */ 55 | .highlight .nv { color: #000000 } /* Name.Variable */ 56 | .highlight .ow { color: #004461; font-weight: bold } /* Operator.Word */ 57 | .highlight .pm { color: #000000; font-weight: bold } /* Punctuation.Marker */ 58 | .highlight .w { color: #f8f8f8; text-decoration: underline } /* Text.Whitespace */ 59 | .highlight .mb { color: #990000 } /* Literal.Number.Bin */ 60 | .highlight .mf { color: #990000 } /* Literal.Number.Float */ 61 | .highlight .mh { color: #990000 } /* Literal.Number.Hex */ 62 | .highlight .mi { color: #990000 } /* Literal.Number.Integer */ 63 | .highlight .mo { color: #990000 } /* Literal.Number.Oct */ 64 | .highlight .sa { color: #4e9a06 } /* Literal.String.Affix */ 65 | .highlight .sb { color: #4e9a06 } /* Literal.String.Backtick */ 66 | .highlight .sc { color: #4e9a06 } /* Literal.String.Char */ 67 | .highlight .dl { color: #4e9a06 } /* Literal.String.Delimiter */ 68 | .highlight .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */ 69 | .highlight .s2 { color: #4e9a06 } /* Literal.String.Double */ 70 | .highlight .se { color: #4e9a06 } /* Literal.String.Escape */ 71 | .highlight .sh { color: #4e9a06 } /* Literal.String.Heredoc */ 72 | .highlight .si { color: #4e9a06 } /* Literal.String.Interpol */ 73 | .highlight .sx { color: #4e9a06 } /* Literal.String.Other */ 74 | .highlight .sr { color: #4e9a06 } /* Literal.String.Regex */ 75 | .highlight .s1 { color: #4e9a06 } /* Literal.String.Single */ 76 | .highlight .ss { color: #4e9a06 } /* Literal.String.Symbol */ 77 | .highlight .bp { color: #3465a4 } /* Name.Builtin.Pseudo */ 78 | .highlight .fm { color: #000000 } /* Name.Function.Magic */ 79 | .highlight .vc { color: #000000 } /* Name.Variable.Class */ 80 | .highlight .vg { color: #000000 } /* Name.Variable.Global */ 81 | .highlight .vi { color: #000000 } /* Name.Variable.Instance */ 82 | .highlight .vm { color: #000000 } /* Name.Variable.Magic */ 83 | .highlight .il { color: #990000 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /docs/build/html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Welcome to Geometricus’s documentation! — Geometricus 0.5.0 documentation 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | 28 | 89 |
90 |
91 | 92 | 93 |
94 | 95 |
96 |

Welcome to Geometricus’s documentation!

97 | 121 |
122 |
123 |

Indices and tables

124 | 129 |
130 | 131 | 132 |
133 | 134 |
135 |
136 |
137 |
138 | 149 | 150 | 151 | 152 | Fork me on GitHub 153 | 154 | 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /docs/build/html/install.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Installing Geometricus — Geometricus 0.5.0 documentation 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 29 | 91 |
92 |
93 | 94 | 95 |
96 | 97 |
98 |

Installing Geometricus

99 |

Geometricus is a Python (3.9+) package with NumPy, SciPy, Numba, PyTorch and ProDy as dependencies. 100 | It can be installed using pip:

101 |
pip install git+https://github.com/TurtleTools/geometricus.git
102 | 
103 |
104 |
105 | 106 | 107 |
108 | 109 |
110 |
111 |
112 |
113 | 124 | 125 | 126 | 127 | Fork me on GitHub 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /docs/build/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/build/html/objects.inv -------------------------------------------------------------------------------- /docs/build/html/py-modindex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Python Module Index — Geometricus 0.5.0 documentation 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 29 | 89 |
90 |
91 | 92 | 93 |
94 | 95 | 96 |

Python Module Index

97 | 98 |
99 | g 100 |
101 | 102 | 103 | 104 | 106 | 107 | 109 | 112 | 113 | 114 | 117 | 118 | 119 | 122 | 123 | 124 | 127 |
 
105 | g
110 | geometricus 111 |
    115 | geometricus.geometricus 116 |
    120 | geometricus.moment_utility 121 |
    125 | geometricus.protein_utility 126 |
128 | 129 | 130 |
131 | 132 |
133 |
134 |
135 |
136 | 144 | 145 | 146 | 147 | Fork me on GitHub 148 | 149 | 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /docs/build/html/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Search — Geometricus 0.5.0 documentation 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 |
31 | 32 | 82 |
83 |
84 | 85 | 86 |
87 | 88 |

Search

89 | 90 | 98 | 99 | 100 |

101 | Searching for multiple words only shows matches that contain 102 | all words. 103 |

104 | 105 | 106 |
107 | 108 | 109 | 110 |
111 | 112 | 113 | 114 |
115 | 116 |
117 | 118 | 119 |
120 | 121 |
122 |
123 |
124 |
125 | 133 | 134 | 135 | 136 | Fork me on GitHub 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /docs/geometricus.geometricus.rst: -------------------------------------------------------------------------------- 1 | geometricus.geometricus module 2 | ============================== 3 | 4 | .. automodule:: geometricus.geometricus 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/geometricus.model_utility.rst: -------------------------------------------------------------------------------- 1 | geometricus.model\_utility module 2 | ================================= 3 | 4 | .. automodule:: geometricus.model_utility 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/geometricus.moment_invariants.rst: -------------------------------------------------------------------------------- 1 | geometricus.moment\_invariants module 2 | ===================================== 3 | 4 | .. automodule:: geometricus.moment_invariants 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/geometricus.moment_utility.rst: -------------------------------------------------------------------------------- 1 | geometricus.moment\_utility module 2 | ================================== 3 | 4 | .. automodule:: geometricus.moment_utility 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/geometricus.protein_utility.rst: -------------------------------------------------------------------------------- 1 | geometricus.protein\_utility module 2 | =================================== 3 | 4 | .. automodule:: geometricus.protein_utility 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/geometricus.rst: -------------------------------------------------------------------------------- 1 | geometricus package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | geometricus.geometricus 11 | geometricus.model_utility 12 | geometricus.moment_invariants 13 | geometricus.moment_utility 14 | geometricus.protein_utility 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: geometricus 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Welcome to Geometricus’s documentation! — Geometricus 0.5.0 documentation 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | 28 | 89 |
90 |
91 | 92 | 93 |
94 | 95 |
96 |

Welcome to Geometricus’s documentation!

97 | 121 |
122 |
123 |

Indices and tables

124 | 129 |
130 | 131 | 132 |
133 | 134 |
135 |
136 |
137 |
138 | 149 | 150 | 151 | 152 | Fork me on GitHub 153 | 154 | 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /docs/install.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Installing Geometricus — Geometricus 0.5.0 documentation 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 29 | 91 |
92 |
93 | 94 | 95 |
96 | 97 |
98 |

Installing Geometricus

99 |

Geometricus is a Python (3.9+) package with NumPy, SciPy, Numba, PyTorch and ProDy as dependencies. 100 | It can be installed using pip:

101 |
pip install git+https://github.com/TurtleTools/geometricus.git
102 | 
103 |
104 |
105 | 106 | 107 |
108 | 109 |
110 |
111 |
112 |
113 | 124 | 125 | 126 | 127 | Fork me on GitHub 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | geometricus 2 | =========== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | geometricus 8 | -------------------------------------------------------------------------------- /docs/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/docs/objects.inv -------------------------------------------------------------------------------- /docs/py-modindex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Python Module Index — Geometricus 0.5.0 documentation 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
28 | 29 | 89 |
90 |
91 | 92 | 93 |
94 | 95 | 96 |

Python Module Index

97 | 98 |
99 | g 100 |
101 | 102 | 103 | 104 | 106 | 107 | 109 | 112 | 113 | 114 | 117 | 118 | 119 | 122 | 123 | 124 | 127 |
 
105 | g
110 | geometricus 111 |
    115 | geometricus.geometricus 116 |
    120 | geometricus.moment_utility 121 |
    125 | geometricus.protein_utility 126 |
128 | 129 | 130 |
131 | 132 |
133 |
134 |
135 |
136 | 144 | 145 | 146 | 147 | Fork me on GitHub 148 | 149 | 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /docs/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Search — Geometricus 0.5.0 documentation 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 |
31 | 32 | 82 |
83 |
84 | 85 | 86 |
87 | 88 |

Search

89 | 90 | 98 | 99 | 100 |

101 | Searching for multiple words only shows matches that contain 102 | all words. 103 |

104 | 105 | 106 |
107 | 108 | 109 | 110 |
111 | 112 | 113 | 114 |
115 | 116 |
117 | 118 | 119 |
120 | 121 |
122 |
123 |
124 |
125 | 133 | 134 | 135 | 136 | Fork me on GitHub 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /docs/source/api/geometricus.rst: -------------------------------------------------------------------------------- 1 | geometricus package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | geometricus.geometricus module 8 | ------------------------------ 9 | 10 | .. automodule:: geometricus.geometricus 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | geometricus.moment\_utility module 16 | ---------------------------------- 17 | 18 | .. automodule:: geometricus.moment_utility 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | geometricus.protein\_utility module 24 | ----------------------------------- 25 | 26 | .. automodule:: geometricus.protein_utility 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/source/api/modules.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | geometricus 8 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | 16 | rundir = os.path.dirname(__file__) 17 | sys.path.insert(0, os.path.abspath(rundir + '/../..')) 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = 'Geometricus' 22 | copyright = '2020, Janani Durairaj, Mehmet Akdel' 23 | author = 'Janani Durairaj, Mehmet Akdel' 24 | 25 | 26 | import geometricus 27 | version = geometricus.__version__ 28 | # The full version, including alpha/beta/rc tags. 29 | release = geometricus.__version__ 30 | 31 | # -- General configuration --------------------------------------------------- 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 35 | # ones. 36 | extensions = ["sphinx.ext.autodoc", 37 | "sphinx.ext.doctest", 38 | 'sphinx.ext.mathjax', 39 | 'sphinx.ext.viewcode', 40 | 'sphinx.ext.githubpages', 41 | "sphinx.ext.napoleon", 42 | "nbsphinx", 43 | ] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | source_suffix = '.rst' 48 | master_doc = 'index' 49 | autodoc_member_order = 'bysource' 50 | 51 | # List of patterns, relative to source directory, that match files and 52 | # directories to ignore when looking for source files. 53 | # This pattern also affects html_static_path and html_extra_path. 54 | exclude_patterns = ['_build', '.DS_Store'] 55 | 56 | # -- Options for HTML output ------------------------------------------------- 57 | 58 | # The theme to use for HTML and HTML Help pages. See the documentation for 59 | # a list of builtin themes. 60 | # 61 | 62 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 63 | 64 | html_theme = 'alabaster' 65 | # pygments_style = 'sphinx' 66 | # Add any paths that contain custom static files (such as style sheets) here, 67 | # relative to this directory. They are copied after the builtin static files, 68 | # so a file named "default.css" will overwrite the builtin "default.css". 69 | html_static_path = ['_static'] 70 | html_sidebars = { 71 | '**': [ 72 | 'about.html', 73 | 'navigation.html', 74 | 'relations.html', # needs 'show_related': True theme option to display 75 | 'searchbox.html', 76 | 'donate.html', 77 | ] 78 | } 79 | 80 | html_theme_options = { 81 | 'logo': 'geometricus_logo.png', 82 | 'description': 'Fast, structure-based, alignment-free protein embedding', 83 | 'github_user': 'TurtleTools', 84 | 'github_repo': 'geometricus', 85 | 'fixed_sidebar': True, 86 | 'github_banner': True, 87 | 'github_button': True 88 | 89 | } 90 | 91 | # -- Options for HTMLHelp output ------------------------------------------ 92 | 93 | # Output file base name for HTML help builder. 94 | htmlhelp_basename = 'Geometricusdoc' 95 | # -- Options for LaTeX output --------------------------------------------- 96 | 97 | latex_elements = { 98 | # The paper size ('letterpaper' or 'a4paper'). 99 | # 100 | # 'papersize': 'letterpaper', 101 | 102 | # The font size ('10pt', '11pt' or '12pt'). 103 | # 104 | # 'pointsize': '10pt', 105 | 106 | # Additional stuff for the LaTeX preamble. 107 | # 108 | # 'preamble': '', 109 | 110 | # Latex figure (float) alignment 111 | # 112 | # 'figure_align': 'htbp', 113 | } 114 | 115 | # Grouping the document tree into LaTeX files. List of tuples 116 | # (source start file, target name, title, 117 | # author, documentclass [howto, manual, or own class]). 118 | latex_documents = [ 119 | (master_doc, 'Geometricus.tex', 'Geometricus Documentation', 120 | 'Janani Durairaj, Mehmet Akdel', 'manual'), 121 | ] 122 | 123 | 124 | # -- Options for manual page output --------------------------------------- 125 | 126 | # One entry per manual page. List of tuples 127 | # (source start file, name, description, authors, manual section). 128 | man_pages = [ 129 | (master_doc, 'geometricus', 'Geometricus Documentation', 130 | [author], 1) 131 | ] 132 | 133 | 134 | # -- Options for Texinfo output ------------------------------------------- 135 | 136 | # Grouping the document tree into Texinfo files. List of tuples 137 | # (source start file, target name, title, author, 138 | # dir menu entry, description, category) 139 | texinfo_documents = [ 140 | (master_doc, 'Geometricus', 'Geometricus Documentation', 141 | author, 'Geometricus', 'Fast, structure-based, alignment-free protein embedding', 142 | 'Miscellaneous'), 143 | ] 144 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Geometricus documentation master file, created by 2 | sphinx-quickstart on Thu Apr 23 20:38:06 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Geometricus's documentation! 7 | ======================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 3 11 | :caption: Contents: 12 | 13 | install 14 | getting_started.ipynb 15 | api/modules 16 | 17 | 18 | Indices and tables 19 | ================== 20 | 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | Installing Geometricus 2 | ====================== 3 | 4 | Geometricus is a Python (3.9+) package with NumPy, SciPy, Numba, PyTorch and ProDy as dependencies. 5 | It can be installed using pip:: 6 | 7 | pip install git+https://github.com/TurtleTools/geometricus.git 8 | -------------------------------------------------------------------------------- /geometricus/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.5.0" 2 | 3 | from .geometricus import Geometricus 4 | from .model_utility import ShapemerLearn 5 | from .moment_invariants import MultipleMomentInvariants, MomentInvariants, SplitType, SplitInfo, \ 6 | get_invariants_for_structures 7 | from .moment_utility import MomentType 8 | from .protein_utility import Structure 9 | 10 | __all__ = ["Geometricus", "ShapemerLearn", "MultipleMomentInvariants", "MomentInvariants", "SplitType", "SplitInfo", 11 | "Structure", "MomentType", "get_invariants_for_structures"] 12 | -------------------------------------------------------------------------------- /geometricus/geometricus.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from pathlib import Path 3 | from tqdm import tqdm 4 | from typing import List, Tuple, Dict, Set, Union, Generator, Optional 5 | from collections import defaultdict 6 | from dataclasses import dataclass 7 | 8 | import numpy as np 9 | import numba as nb 10 | from geometricus.model_utility import ShapemerLearn 11 | from geometricus.moment_invariants import MultipleMomentInvariants, SplitInfo, get_invariants_for_structures 12 | from geometricus.protein_utility import ProteinKey 13 | 14 | Shapemer = Union[bytes, tuple] 15 | """ 16 | An integer (in the case of model) or a list of integers for each moment (the old way) 17 | """ 18 | Shapemers = List[Shapemer] 19 | """ 20 | A list of Shapemer types 21 | """ 22 | 23 | 24 | @dataclass 25 | class Geometricus: 26 | """ 27 | Class for storing embedding information 28 | """ 29 | protein_keys: List[ProteinKey] 30 | """ 31 | List of protein names = rows of the output embedding 32 | """ 33 | shapemer_to_protein_indices: Dict[Shapemer, List[Tuple[ProteinKey, int]]] 34 | """ 35 | Maps each shapemer to the proteins which have it and to the corresponding residue indices within these proteins 36 | """ 37 | proteins_to_shapemers: Dict[ProteinKey, Shapemers] 38 | """ 39 | Maps each protein to a list of shapemers in order of its residues\n\n 40 | """ 41 | shapemer_keys: Shapemers 42 | """ 43 | List of shapemers found 44 | """ 45 | proteins_to_shapemer_residue_indices: Dict[ProteinKey, Shapemers] 46 | """ 47 | Maps each protein to a set of residue indices covered by the current residue's shapemer in order of its residues\n\n 48 | """ 49 | resolution: Union[float, np.ndarray] = None 50 | """ 51 | Multiplier that determines how coarse/fine-grained each shape is. 52 | This can be a single number, multiplied to all four moment invariants 53 | or a numpy array of four numbers, one for each invariant 54 | (This is for the old way of binning shapemers) 55 | """ 56 | 57 | @classmethod 58 | def from_protein_files(cls, 59 | input_files: Union[Path, str, List[str]], 60 | model: ShapemerLearn = None, 61 | split_infos: List[SplitInfo] = None, 62 | moment_types: List[str] = None, 63 | resolution: Union[float, np.ndarray] = None, 64 | n_threads: int = 1, 65 | verbose: bool = True): 66 | """ 67 | Creates a Geometricus object from protein structure files 68 | 69 | Parameters 70 | ---------- 71 | input_files 72 | Can be \n 73 | A list of structure files (.pdb, .pdb.gz, .cif, .cif.gz), 74 | A list of (structure_file, chain) 75 | A list of PDBIDs or PDBID_chain or (PDB ID, chain) 76 | A folder with input structure files, 77 | A file which lists structure filenames or "structure_filename, chain" on each line, 78 | A file which lists PDBIDs or PDBID_chain or PDBID, chain on each line 79 | model 80 | trained ShapemerLearn model 81 | if this is not None, shapemers are generated using the trained model 82 | and split_infos, moment_types, and resolution is ignored 83 | split_infos 84 | List of SplitInfo objects 85 | moment_types 86 | List of moment types to use 87 | resolution 88 | Multiplier that determines how coarse/fine-grained each shape is. 89 | This can be a single number, multiplied to all four moment invariants 90 | or a numpy array of four numbers, one for each invariant 91 | (This is for the old way of binning shapemers) 92 | n_threads 93 | Number of threads to use 94 | verbose 95 | Whether to print progress 96 | 97 | Returns 98 | ------- 99 | Geometricus object 100 | """ 101 | invariants, errors = get_invariants_for_structures(input_files, 102 | split_infos=split_infos, 103 | moment_types=moment_types, 104 | n_threads=n_threads, 105 | verbose=verbose) 106 | return cls.from_invariants( 107 | invariants, 108 | model=model, resolution=resolution) 109 | 110 | @classmethod 111 | def from_invariants( 112 | cls, 113 | invariants: Union[Generator[MultipleMomentInvariants], List[MultipleMomentInvariants]], 114 | protein_keys: Optional[List[ProteinKey]] = None, 115 | model: Optional[ShapemerLearn] = None, 116 | resolution: Optional[Union[float, np.ndarray]] = None, 117 | ): 118 | """ 119 | Make a GeometricusEmbedding object from a list of MultipleMomentInvariant objects 120 | 121 | Parameters 122 | ---------- 123 | invariants 124 | List of MultipleMomentInvariant objects 125 | protein_keys 126 | list of protein names = rows of the output embedding. 127 | if None, takes all keys in `invariants` 128 | model 129 | if given, uses this model to make the shapemers 130 | resolution 131 | multiplier that determines how coarse/fine-grained each shape is 132 | this can be a single number, multiplied to all four moment invariants 133 | or a numpy array of four numbers, one for each invariant 134 | (This is for the old way of binning shapemers) 135 | """ 136 | assert model is not None or resolution is not None, "Must provide either a model or resolution" 137 | if isinstance(resolution, np.ndarray): 138 | assert resolution.shape[0] == invariants[0].invariants[0].moments.shape[1] 139 | invariants: Dict[ProteinKey, MultipleMomentInvariants] = { 140 | x.name: x for x in invariants 141 | } 142 | if protein_keys is None: 143 | protein_keys: List[ProteinKey] = list(invariants.keys()) 144 | assert all(k in invariants for k in protein_keys) 145 | if model is None: 146 | proteins_to_shapemers = {k: invariants[k].get_shapemers_binned(resolution) for k in 147 | tqdm(protein_keys, total=len(protein_keys))} 148 | else: 149 | proteins_to_shapemers = {k: invariants[k].get_shapemers_model(model) for k in 150 | tqdm(protein_keys, total=len(protein_keys))} 151 | 152 | proteins_to_shapemer_residue_indices = {k: invariants[k].get_neighbors() for k in protein_keys} 153 | geometricus_class = cls( 154 | proteins_to_shapemers=proteins_to_shapemers, 155 | protein_keys=protein_keys, 156 | resolution=resolution, 157 | proteins_to_shapemer_residue_indices=proteins_to_shapemer_residue_indices, 158 | shapemer_keys=[], 159 | shapemer_to_protein_indices={}, 160 | ) 161 | geometricus_class.shapemer_to_protein_indices = geometricus_class.map_shapemers_to_indices() 162 | geometricus_class.shapemer_keys = sorted(list(geometricus_class.shapemer_to_protein_indices.keys())) 163 | return geometricus_class 164 | 165 | def map_shapemers_to_indices(self, protein_keys=None): 166 | """ 167 | Maps each shapemer to the proteins which have it and to the corresponding residue indices within these proteins 168 | Maps shapemer to (protein_key, residue_index) 169 | """ 170 | if protein_keys is None: 171 | protein_keys = self.protein_keys 172 | shapemer_to_protein_indices: Dict[ 173 | Shapemer, List[Tuple[ProteinKey, int]] 174 | ] = defaultdict(list) 175 | for key in protein_keys: 176 | for j, shapemer in enumerate(self.proteins_to_shapemers[key]): 177 | shapemer_to_protein_indices[shapemer].append((key, j)) 178 | return shapemer_to_protein_indices 179 | 180 | def map_protein_to_shapemer_indices(self, protein_keys=None, shapemer_keys=None): 181 | """ 182 | Maps each protein to a list of shapemer indices where the index corresponds to the shapemer in shapemer_keys 183 | in order of its residues\n\n 184 | """ 185 | if protein_keys is not None and shapemer_keys is None: 186 | shapemer_keys = sorted(list(self.map_shapemers_to_indices(protein_keys).keys())) 187 | elif protein_keys is None: 188 | protein_keys = self.protein_keys 189 | if shapemer_keys is None: 190 | shapemer_keys = self.shapemer_keys 191 | shapemer_index = {k: i for i, k in enumerate(shapemer_keys)} 192 | return { 193 | k: np.array([shapemer_index[x] for x in self.proteins_to_shapemers[k] if x in shapemer_index], 194 | dtype=int) 195 | for 196 | k in 197 | protein_keys}, shapemer_keys 198 | 199 | def map_shapemer_to_residues( 200 | self, shapemer: Shapemer 201 | ) -> Dict[ProteinKey, Set[int]]: 202 | """ 203 | Gets residue indices within a particular shapemer across all proteins. 204 | """ 205 | protein_to_shapemer_residues: Dict[ProteinKey, Set[int]] = defaultdict(set) 206 | for protein_key, residue_index in self.shapemer_to_protein_indices[shapemer]: 207 | shapemer_residues = self.proteins_to_shapemer_residue_indices[protein_key][residue_index] 208 | for residue in shapemer_residues: 209 | protein_to_shapemer_residues[protein_key].add(residue) 210 | 211 | return protein_to_shapemer_residues 212 | 213 | def get_count_matrix(self, protein_keys=None, shapemer_keys=None): 214 | if protein_keys is None: 215 | protein_keys = self.protein_keys 216 | proteins_to_shapemer_indices, shapemer_keys = self.map_protein_to_shapemer_indices(protein_keys, shapemer_keys) 217 | return make_count_matrix([proteins_to_shapemer_indices[k] for k in protein_keys], 218 | len(shapemer_keys)) 219 | 220 | 221 | @nb.njit(parallel=True) 222 | def make_count_matrix(residues_list, alphabet_size: int): 223 | out = np.zeros((len(residues_list), alphabet_size)) 224 | for i in nb.prange(len(residues_list)): 225 | for j in range(len(residues_list[i])): 226 | out[i, residues_list[i][j]] += 1 227 | return out 228 | -------------------------------------------------------------------------------- /geometricus/istarmap.py: -------------------------------------------------------------------------------- 1 | import multiprocessing.pool as mpp 2 | 3 | 4 | def istarmap(self, func, iterable, chunksize=1): 5 | """starmap-version of imap 6 | """ 7 | self._check_running() 8 | if chunksize < 1: 9 | raise ValueError( 10 | "Chunksize must be 1+, not {0:n}".format( 11 | chunksize)) 12 | 13 | task_batches = mpp.Pool._get_tasks(func, iterable, chunksize) 14 | result = mpp.IMapIterator(self) 15 | self._taskqueue.put( 16 | ( 17 | self._guarded_task_generation(result._job, 18 | mpp.starmapstar, 19 | task_batches), 20 | result._set_length 21 | )) 22 | return (item for chunk in result for item in chunk) 23 | 24 | 25 | mpp.Pool.istarmap = istarmap 26 | -------------------------------------------------------------------------------- /geometricus/model_utility.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from geometricus.moment_invariants import SPLIT_INFOS, MomentType 5 | import importlib.resources as importlib_resources 6 | 7 | 8 | class ShapemerLearn(torch.nn.Module): 9 | def __init__(self, hidden_layer_dimension=32, output_dimension=10, split_infos=SPLIT_INFOS): 10 | super(ShapemerLearn, self).__init__() 11 | self.split_infos = split_infos 12 | self.number_of_moments = len(split_infos) * len(MomentType) 13 | self.hidden_layer_dimension = hidden_layer_dimension 14 | self.output_dimension = output_dimension 15 | self.linear_segment = torch.nn.Sequential( 16 | torch.nn.Linear(self.number_of_moments, hidden_layer_dimension), 17 | torch.nn.BatchNorm1d(hidden_layer_dimension), 18 | torch.nn.ReLU(), 19 | torch.nn.Linear(hidden_layer_dimension, hidden_layer_dimension), 20 | torch.nn.BatchNorm1d(hidden_layer_dimension), 21 | torch.nn.ReLU(), 22 | torch.nn.Linear(hidden_layer_dimension, hidden_layer_dimension), 23 | torch.nn.BatchNorm1d(hidden_layer_dimension), 24 | torch.nn.ReLU(), 25 | torch.nn.Linear(hidden_layer_dimension, output_dimension), 26 | torch.nn.BatchNorm1d(output_dimension), 27 | torch.nn.Sigmoid(), 28 | ) 29 | 30 | def forward(self, x, y, z): 31 | return self.linear_segment(x), self.linear_segment(y), z 32 | 33 | def forward_single_segment(self, x): 34 | return self.linear_segment(x) 35 | 36 | def save(self, folder): 37 | self.eval() 38 | torch.save(self.state_dict(), 39 | folder / self.filename) 40 | 41 | @property 42 | def filename(self): 43 | split_info_string = "_".join( 44 | [f"{split_info.split_type.name}-{split_info.split_size}" for split_info in self.split_infos]) 45 | 46 | return f"ShapemerLearn_{split_info_string}_{self.number_of_moments}_{self.hidden_layer_dimension}_{self.output_dimension}.pt" 47 | 48 | @classmethod 49 | def load(cls, hidden_layer_dimension=32, output_dimension=10, split_infos=SPLIT_INFOS): 50 | model = ShapemerLearn(hidden_layer_dimension, output_dimension, split_infos=split_infos) 51 | if torch.cuda.is_available(): 52 | m = torch.load(importlib_resources.files("geometricus") / "models" / model.filename, 53 | map_location=torch.device("cuda")) 54 | else: 55 | m = torch.load(importlib_resources.files("geometricus") / "models" / model.filename, 56 | map_location=torch.device("cpu")) 57 | model.load_state_dict(m) 58 | model.eval() 59 | if torch.cuda.is_available(): 60 | model.cuda() 61 | return model 62 | 63 | 64 | def loss_func(out, distant, y): 65 | # Calculate the squared Euclidean distance between out and distant 66 | dist_sq = torch.sum(torch.pow(out - distant, 2), 1) 67 | # Calculate the contrastive loss 68 | loss = y * dist_sq + (1 - y) * torch.pow(torch.clamp(1 - torch.sqrt(dist_sq + 1e-10), min=0.0), 2) 69 | # Return the mean loss over the batch 70 | return torch.mean(loss) 71 | 72 | 73 | def moment_tensors_to_bits(list_of_moment_tensors): 74 | bits = [] 75 | for i, segment in enumerate(list_of_moment_tensors): 76 | bits.append(tuple(list((segment > 0.5).astype(np.uint8)))) 77 | return bits 78 | 79 | 80 | def moments_to_tensors(segments, model): 81 | if torch.cuda.is_available(): 82 | return model.forward_single_segment(torch.tensor(segments).cuda()).cpu().detach().numpy() 83 | return model.forward_single_segment(torch.tensor(segments)).cpu().detach().numpy() 84 | 85 | 86 | def moments_to_shapemers(list_of_moments, model): 87 | if torch.cuda.is_available(): 88 | moment_tensors = ( 89 | model.forward_single_segment(torch.tensor(list_of_moments).cuda()) 90 | .cpu() 91 | .detach() 92 | .numpy() 93 | ) 94 | else: 95 | moment_tensors = ( 96 | model.forward_single_segment(torch.tensor(list_of_moments)) 97 | .cpu() 98 | .detach() 99 | .numpy() 100 | ) 101 | return moment_tensors_to_bits(moment_tensors) 102 | -------------------------------------------------------------------------------- /geometricus/models/ShapemerLearn_RADIUS-5_RADIUS-10_KMER-8_KMER-16_68_32_10.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/geometricus/models/ShapemerLearn_RADIUS-5_RADIUS-10_KMER-8_KMER-16_68_32_10.pt -------------------------------------------------------------------------------- /geometricus/protein_utility.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from pathlib import PosixPath, Path 4 | 5 | import gzip 6 | import io 7 | from dataclasses import dataclass, field 8 | from typing import Union, Tuple, List 9 | 10 | import numpy as np 11 | import numba as nb 12 | import warnings 13 | from Bio import BiopythonDeprecationWarning 14 | 15 | warnings.filterwarnings("ignore", category=BiopythonDeprecationWarning) 16 | import prody as pd 17 | 18 | ProteinKey = Union[str, Tuple[str, str]] 19 | """ 20 | A protein key is either its PDB ID (str) or a tuple of (PDB ID, chain) 21 | """ 22 | 23 | 24 | @dataclass(eq=False) 25 | class Structure: 26 | """ 27 | Class to store basic protein structure information 28 | """ 29 | 30 | name: ProteinKey 31 | """PDB ID or (PDB ID, chain)""" 32 | length: int 33 | """Number of residues""" 34 | coordinates: np.ndarray = field(repr=False) 35 | """Coordinates""" 36 | 37 | 38 | def parse_structure_file(input_value: Union[Path, (Path, str), str]): 39 | """ 40 | Parse a protein structure file (.pdb, .pdb.gz, .cif, .cif.gz) or PDBID or PDBID_Chain 41 | and returns a prody AtomGroup object 42 | 43 | Parameters 44 | ---------- 45 | input_value: filename or (filename, chain) or PDBID or PDBID_Chain or (PDBID, chain) 46 | 47 | Returns 48 | ------- 49 | prody AtomGroup object 50 | """ 51 | chain = None 52 | if type(input_value) == tuple: 53 | input_value, chain = input_value 54 | if not Path(input_value).is_file(): 55 | if "_" in input_value: 56 | pdb_id, chain = input_value.split("_") 57 | else: 58 | pdb_id = input_value 59 | protein = pd.parsePDB(pdb_id, compressed=False, chain=chain) 60 | if chain is not None: 61 | protein.setTitle(f"{pdb_id}_{chain}") 62 | else: 63 | protein.setTitle(pdb_id) 64 | else: 65 | filename = str(input_value) 66 | if filename.endswith('.pdb') or filename.endswith('.pdb.gz'): 67 | protein = pd.parsePDB(filename) 68 | if protein is None: 69 | with open(filename) as f: 70 | protein = pd.parsePDBStream(f) 71 | elif filename.endswith('.cif'): 72 | protein = pd.parseMMCIF(filename) 73 | if protein is None: 74 | with open(filename) as f: 75 | protein = pd.parseMMCIFStream(f) 76 | elif filename.endswith(".cif.gz"): 77 | with gzip.open(filename, 'r') as mmcif: 78 | with io.TextIOWrapper(mmcif, encoding='utf-8') as decoder: 79 | protein = pd.parseMMCIFStream(decoder) 80 | else: 81 | with open(filename) as f: 82 | protein = pd.parsePDBStream(f) 83 | input_value = Path(input_value).name 84 | 85 | if protein is None: 86 | raise ValueError(f"Could not parse {input_value}") 87 | if chain is not None: 88 | protein = protein[chain].toAtomGroup() 89 | if protein is None: 90 | raise ValueError(f"Could not parse {input_value} chain {chain}") 91 | protein.setTitle(f"{input_value}_{chain}") 92 | else: 93 | protein.setTitle(input_value) 94 | return protein 95 | 96 | 97 | def get_structure_files(input_value: Union[Path, str, List[str]]) -> List[Union[str, (str, str)]]: 98 | """ 99 | Get a list of structure files or PDB IDs from a string representing: 100 | A list of structure files (.pdb, .pdb.gz, .cif, .cif.gz), 101 | A list of (structure_file, chain) 102 | A list of PDBIDs or PDBID_chain or (PDB ID, chain) 103 | A folder with input structure files, 104 | A file which lists structure filenames or "structure_filename, chain" on each line, 105 | A file which lists PDBIDs or PDBID_chain or PDBID, chain on each line 106 | Parameters 107 | ---------- 108 | input_value 109 | 110 | Returns 111 | ------- 112 | List of structure files or (structure_file, chain) or PDBIDs or (PDB ID, chain) 113 | """ 114 | if type(input_value) == str or type(input_value) == PosixPath: 115 | input_value = Path(input_value) 116 | if input_value.is_dir(): 117 | protein_files = list(input_value.glob("*")) 118 | elif input_value.is_file(): 119 | with open(input_value) as f: 120 | protein_files = f.read().strip().split("\n") 121 | else: 122 | raise ValueError(f"Could not parse {input_value}") 123 | else: 124 | assert type(input_value) == list or type(input_value) == tuple, "Input must be a path or a list" 125 | protein_files = input_value 126 | final_protein_files = [] 127 | for protein_file in protein_files: 128 | if (type(protein_file) == str or type(protein_file) == PosixPath) and Path(protein_file).is_file(): 129 | final_protein_files.append(protein_file) 130 | elif type(protein_file) == tuple: 131 | protein_file, chain = protein_file 132 | final_protein_files.append((protein_file, chain)) 133 | else: 134 | assert type(protein_file) == str, f"Could not understand input {protein_file}" 135 | if ", " in protein_file: 136 | protein_file, chain = protein_file.split(", ") 137 | final_protein_files.append((protein_file, chain)) 138 | elif "_" in protein_file: 139 | pdb_id, chain = protein_file.split("_") 140 | final_protein_files.append((pdb_id, chain)) 141 | else: 142 | final_protein_files.append(protein_file) 143 | return list(set(final_protein_files)) 144 | 145 | 146 | def group_indices(input_list: List[int]) -> List[List[int]]: 147 | """ 148 | e.g [1, 1, 1, 2, 2, 3, 3, 3, 4] -> [[0, 1, 2], [3, 4], [5, 6, 7], [8]] 149 | """ 150 | output_list = [] 151 | current_list = [] 152 | current_index = None 153 | for i in range(len(input_list)): 154 | if current_index is None: 155 | current_index = input_list[i] 156 | if input_list[i] == current_index: 157 | current_list.append(i) 158 | else: 159 | output_list.append(current_list) 160 | current_list = [i] 161 | current_index = input_list[i] 162 | output_list.append(current_list) 163 | return output_list 164 | 165 | 166 | def get_alpha_indices(protein: pd.AtomGroup) -> List[int]: 167 | """ 168 | Get indices of alpha carbons of pd AtomGroup object 169 | """ 170 | return [i for i, a in enumerate(protein.iterAtoms()) if a.getName() == "CA"] 171 | 172 | 173 | def get_beta_indices(protein: pd.AtomGroup) -> List[int]: 174 | """ 175 | Get indices of beta carbons of pd AtomGroup object 176 | (If beta carbon doesn't exist, alpha carbon index is returned) 177 | """ 178 | residue_splits = group_indices(protein.getResindices()) 179 | i = 0 180 | indices = [] 181 | for split in residue_splits: 182 | ca = None 183 | cb = None 184 | for _ in split: 185 | if protein[i].getName() == "CB": 186 | cb = protein[i].getIndex() 187 | if protein[i].getName() == "CA": 188 | ca = protein[i].getIndex() 189 | i += 1 190 | if cb is not None: 191 | indices.append(cb) 192 | else: 193 | assert ca is not None 194 | indices.append(ca) 195 | return indices 196 | 197 | 198 | def get_sequences_from_fasta_yield(fasta_file: Union[str, Path], comments=("#")) -> tuple: 199 | """ 200 | Returns (accession, sequence) iterator 201 | Parameters 202 | ---------- 203 | fasta_file 204 | comments 205 | ignore lines containing any of these strings 206 | Returns 207 | ------- 208 | (accession, sequence) 209 | """ 210 | with open(fasta_file) as f: 211 | current_sequence = "" 212 | current_key = None 213 | for line in f: 214 | if not len(line.strip()) or any(comment in line for comment in comments): 215 | continue 216 | if ">" in line: 217 | if current_key is None: 218 | current_key = line.split(">")[1].strip() 219 | else: 220 | if current_sequence[-1] == "*": 221 | current_sequence = current_sequence[:-1] 222 | yield current_key, current_sequence 223 | current_sequence = "" 224 | current_key = line.split(">")[1].strip() 225 | else: 226 | current_sequence += line.strip() 227 | if current_sequence[-1] == "*": 228 | current_sequence = current_sequence[:-1] 229 | yield current_key, current_sequence 230 | 231 | 232 | def get_sequences_from_fasta(fasta_file: Union[str, Path], comments=("#")) -> dict: 233 | """ 234 | Returns dict of accession to sequence from fasta file 235 | Parameters 236 | ---------- 237 | fasta_file 238 | comments 239 | ignore lines containing any of these strings 240 | Returns 241 | ------- 242 | {accession:sequence} 243 | """ 244 | return { 245 | key: sequence for (key, sequence) in get_sequences_from_fasta_yield(fasta_file, comments=comments) 246 | } 247 | 248 | 249 | @nb.njit 250 | def get_rmsd(coords_1: np.ndarray, coords_2: np.ndarray) -> float: 251 | """ 252 | RMSD of paired coordinates = normalized square-root of sum of squares of euclidean distances 253 | """ 254 | return np.sqrt(np.sum((coords_1 - coords_2) ** 2) / coords_1.shape[0]) 255 | 256 | 257 | @nb.njit 258 | def get_rotation_matrix(coords_1: np.ndarray, coords_2: np.ndarray): 259 | """ 260 | Superpose paired coordinates on each other using Kabsch superposition (SVD) 261 | Assumes centered coordinates 262 | 263 | Parameters 264 | ---------- 265 | coords_1 266 | numpy array of coordinate data for the first protein; shape = (n, 3) 267 | coords_2 268 | numpy array of corresponding coordinate data for the second protein; shape = (n, 3) 269 | 270 | Returns 271 | ------- 272 | rotation matrix for optimal superposition 273 | """ 274 | correlation_matrix = np.dot(coords_2.T, coords_1) 275 | u, s, v = np.linalg.svd(correlation_matrix) 276 | reflect = np.linalg.det(u) * np.linalg.det(v) < 0 277 | if reflect: 278 | s[-1] = -s[-1] 279 | u[:, -1] = -u[:, -1] 280 | rotation_matrix = np.dot(u, v) 281 | return rotation_matrix.astype(np.float64) 282 | 283 | 284 | def alignment_to_numpy(alignment): 285 | aln_np = {} 286 | for n in alignment: 287 | aln_seq = [] 288 | index = 0 289 | for a in alignment[n]: 290 | if a == "-": 291 | aln_seq.append(-1) 292 | else: 293 | aln_seq.append(index) 294 | index += 1 295 | aln_np[n] = np.array(aln_seq) 296 | return aln_np 297 | -------------------------------------------------------------------------------- /geometricus_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurtleTools/geometricus/33b29e7e6074c867640455585d339f6319ec2e8d/geometricus_logo.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = geometricus 3 | version = 0.5.0 4 | description = Fast, structure-based, alignment-free protein embedding 5 | long_description = file: README.md 6 | long_description_content_type = text/markdown; charset=UTF-8 7 | url = https://github.com/TurtleTools/geometricus 8 | author = Janani Durairaj, Mehmet Akdel 9 | author_email = janani.durairaj@unibas.ch 10 | license = MIT License 11 | license_files = LICENSE 12 | classifiers = 13 | Intended Audience :: Science/Research 14 | Operating System :: Microsoft :: Windows 15 | Operating System :: POSIX 16 | Operating System :: Unix 17 | Operating System :: MacOS 18 | License :: OSI Approved :: MIT License 19 | Programming Language :: Python 20 | Programming Language :: Python :: 3 21 | Programming Language :: Python :: 3.9 22 | Topic :: Scientific/Engineering :: Bio-Informatics 23 | project_urls = 24 | Documentation = https://turtletools.github.io/geometricus/ 25 | Source = https://github.com/TurtleTools/geometricus 26 | Tracker = https://github.com/TurtleTools/geometricus/issues 27 | 28 | [options] 29 | zip_safe = False 30 | packages = 31 | geometricus 32 | platforms = any 33 | include_package_data = True 34 | install_requires = 35 | numpy==1.23.4 36 | numba==0.56.3 37 | scipy 38 | prody 39 | typer 40 | torch 41 | python_requires = >=3.9 42 | 43 | [bdist_wheel] 44 | universal = 1 45 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | if __name__ == "__main__": 4 | setuptools.setup() 5 | -------------------------------------------------------------------------------- /training/2-train-shapemerization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "18dfa935-c962-4813-a3ab-bc933c8ddbb5", 7 | "metadata": { 8 | "pycharm": { 9 | "name": "#%%\n" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "% load_ext autoreload\n", 15 | "% autoreload 2" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "id": "1b738ce1-a94b-4d72-a3c9-09ffdfaeb08b", 22 | "metadata": { 23 | "pycharm": { 24 | "name": "#%%\n" 25 | } 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "from collections import defaultdict\n", 30 | "from pathlib import Path\n", 31 | "\n", 32 | "import matplotlib.pyplot as plt\n", 33 | "import numpy as np\n", 34 | "import prody as pd\n", 35 | "import torch\n", 36 | "from sklearn import metrics\n", 37 | "from tqdm.notebook import tqdm\n", 38 | "\n", 39 | "from geometricus import sampling, moment_invariants, model_utility" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "80c950e6-be9e-4377-ac4b-be1b605d8144", 45 | "metadata": { 46 | "pycharm": { 47 | "name": "#%% md\n" 48 | } 49 | }, 50 | "source": [ 51 | "# Making data" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "id": "96ae02a7", 58 | "metadata": { 59 | "collapsed": false, 60 | "jupyter": { 61 | "outputs_hidden": false 62 | }, 63 | "pycharm": { 64 | "name": "#%%\n" 65 | } 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "data_folder = Path(\"data\")\n", 70 | "pdb_folder = data_folder / \"cath_data\" / \"dompdb\"\n", 71 | "matrices_folder = data_folder / \"cath_data\" / \"rotation_matrices\"\n", 72 | "training_data_folder = data_folder / \"training_data\"\n", 73 | "training_data_folder.mkdir(exist_ok=True)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "id": "0ccf1ce4-3348-4a4e-ae18-dbc573e29a35", 80 | "metadata": { 81 | "pycharm": { 82 | "name": "#%%\n" 83 | } 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "funfam_clusters = {}\n", 88 | "id_to_funfam_cluster = {}\n", 89 | "superfamily_clusters = defaultdict(list)\n", 90 | "id_to_superfamily_cluster = {}\n", 91 | "with open(data_folder / \"cath_data\" / \"clusters.txt\") as f:\n", 92 | " for line in tqdm(f):\n", 93 | " match_id, query_ids = line.strip().split(\": \")\n", 94 | " query_ids = query_ids.split(\", \")\n", 95 | " funfam_clusters[match_id] = query_ids\n", 96 | " superfamily_id = match_id.split(\"/FF\")[0]\n", 97 | " superfamily_clusters[superfamily_id] += query_ids\n", 98 | " for qid in query_ids:\n", 99 | " id_to_funfam_cluster[qid] = match_id\n", 100 | " id_to_superfamily_cluster[qid] = superfamily_id" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "id": "67518495-7ffc-409e-9e26-628312486276", 107 | "metadata": { 108 | "pycharm": { 109 | "name": "#%%\n" 110 | } 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "from geometricus import SplitInfo, SplitType\n", 115 | "SPLIT_INFOS = (SplitInfo(SplitType.RADIUS, 5),\n", 116 | " SplitInfo(SplitType.RADIUS, 10),\n", 117 | " SplitInfo(SplitType.KMER, 8),\n", 118 | " SplitInfo(SplitType.KMER, 16))" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "id": "a0919585-7730-408c-a9c0-6347778b8d74", 125 | "metadata": { 126 | "pycharm": { 127 | "name": "#%%\n" 128 | } 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "protein_moments, errors = moment_invariants.get_invariants_for_files(pdb_folder, \n", 133 | " split_infos=SPLIT_INFOS,\n", 134 | " n_threads=10)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "id": "1e206789-db78-4b30-91a5-8f7a910a32a4", 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "sampling.make_training_data_pair(training_data_folder, \n", 145 | " protein_moments,\n", 146 | " id_to_funfam_cluster,\n", 147 | " matrices_folder, pdb_folder, num_moments=num_moments)\n", 148 | "sampling.make_training_data_self(training_data_folder, \n", 149 | " protein_moments, num_moments=num_moments)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "id": "d64a912a-7902-469e-8fb0-8be723b7876c", 155 | "metadata": { 156 | "pycharm": { 157 | "name": "#%% md\n" 158 | } 159 | }, 160 | "source": [ 161 | "# Training" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "id": "131d7731-98b5-451d-81eb-ac9ce1d06154", 168 | "metadata": { 169 | "pycharm": { 170 | "is_executing": true, 171 | "name": "#%%\n" 172 | } 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "model_folder = data_folder / \"models\"\n", 177 | "model_folder.mkdir(exist_ok=True)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "id": "acf9be42-4efc-4b11-a46c-ab4408149ea8", 184 | "metadata": { 185 | "pycharm": { 186 | "name": "#%%\n" 187 | } 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "data = sampling.Data.from_files(training_data_folder, [\"_self\", \"_pair\"], \"moments\", representation_length=68)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "id": "6d74c80d-27a1-49d6-95a0-e9efafe08abb", 198 | "metadata": { 199 | "pycharm": { 200 | "name": "#%%\n" 201 | } 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "train_ids, test_ids = data.train_test_split(test_size=0.02, \n", 206 | " rmsd_threshold=8, \n", 207 | " ignore_first_last=True, \n", 208 | " protein_lengths=protein_lengths)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "id": "a23ee1e5-aead-46a2-bcc0-361320271e5e", 215 | "metadata": { 216 | "pycharm": { 217 | "name": "#%%\n" 218 | } 219 | }, 220 | "outputs": [], 221 | "source": [ 222 | "test_pairs_a, test_pairs_b, test_labels, test_rmsds = data.make_test(test_ids)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "id": "14f99398-5f7b-45aa-8f1f-2916133a6063", 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "from scipy.spatial.distance import hamming\n", 233 | "from scipy import stats\n", 234 | "def get_hamming_distances(pairs_a, pairs_b):\n", 235 | " return np.array([hamming(pa, pb) for pa, pb in zip(pairs_a, pairs_b)])" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "id": "c38da2f5-a6ec-434d-90e3-e7e9ed8af07b", 242 | "metadata": { 243 | "pycharm": { 244 | "name": "#%%\n" 245 | } 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "def plot_test_results(train_loss, discrete=True):\n", 250 | " model.eval()\n", 251 | " test_pairs_a_i, test_pairs_b_i, test_labels_i = model(test_pairs_a, test_pairs_b, test_labels)\n", 252 | " loss = model_utility.loss_func(test_pairs_a_i, test_pairs_b_i, test_labels_i)\n", 253 | " t1, t2 = test_pairs_a_i.cpu().detach().numpy(), test_pairs_b_i.cpu().detach().numpy()\n", 254 | " if discrete:\n", 255 | " t1b = np.array(model_utility.moment_tensors_to_bits(t1)) \n", 256 | " t2b = np.array(model_utility.moment_tensors_to_bits(t2))\n", 257 | " distances = NUM_BITS * get_hamming_distances(t1b, t2b)\n", 258 | " else:\n", 259 | " distances = np.abs(t1 - t2).mean(1)\n", 260 | " test_labels_i = test_labels_i.cpu().detach().numpy().astype(int)\n", 261 | " fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 4))\n", 262 | " ax1.hexbin(test_rmsds, distances, cmap=\"RdBu\")\n", 263 | " ax1.set_xlabel(\"RMSD\")\n", 264 | " ax1.set_ylabel(\"Distance\")\n", 265 | " metrics.PrecisionRecallDisplay.from_predictions(test_labels_i, -distances, ax=ax2)\n", 266 | " metrics.RocCurveDisplay.from_predictions(test_labels_i, -distances, ax=ax3)\n", 267 | " fig.suptitle(f\"Train loss: {train_loss:.3f} Test loss {loss.item():.3f}\\nSpearman correlation: {spearmanr(test_rmsds, distances)[0]:.3f}\")\n", 268 | " plt.show()\n", 269 | " model.train()" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "id": "7aac69a3-ae06-423d-8f12-2334c9075f0d", 276 | "metadata": { 277 | "pycharm": { 278 | "name": "#%%\n" 279 | } 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "epoch = 5\n", 284 | "NUM_HIDDEN = 32\n", 285 | "NUM_BITS = 10\n", 286 | "model = model_utility.ShapemerLearn(NUM_HIDDEN, NUM_BITS, split_infos=SPLIT_INFOS).cuda()\n", 287 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.001)" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "id": "77a90060-2e38-4802-af15-52b671f293fd", 294 | "metadata": { 295 | "pycharm": { 296 | "name": "#%%\n" 297 | } 298 | }, 299 | "outputs": [], 300 | "source": [ 301 | "current_losses = []\n", 302 | "for e in range(epoch):\n", 303 | " for x, (pair_a, pair_b, label) in enumerate(data.make_train_batches(train_ids)):\n", 304 | " pair_a, pair_b, label = model(pair_a, pair_b, label)\n", 305 | " loss = model_utility.loss_func(pair_a, pair_b, label)\n", 306 | " optimizer.zero_grad()\n", 307 | " loss.backward()\n", 308 | " current_losses.append(loss.item())\n", 309 | " optimizer.step()\n", 310 | " plot_test_results(np.mean(current_losses))\n", 311 | " plot_test_results(np.mean(current_losses), discrete=False)\n", 312 | " current_losses = []\n", 313 | "plot_test_results(np.mean(current_losses))\n", 314 | "plot_test_results(np.mean(current_losses), discrete=False)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "id": "90fca61f-d5ce-4b4f-8549-2c52453a38bb", 321 | "metadata": { 322 | "pycharm": { 323 | "name": "#%%\n" 324 | } 325 | }, 326 | "outputs": [], 327 | "source": [ 328 | "model.save(model_folder)" 329 | ] 330 | } 331 | ], 332 | "metadata": { 333 | "kernelspec": { 334 | "display_name": "Python 3 (ipykernel)", 335 | "language": "python", 336 | "name": "python3" 337 | }, 338 | "language_info": { 339 | "codemirror_mode": { 340 | "name": "ipython", 341 | "version": 3 342 | }, 343 | "file_extension": ".py", 344 | "mimetype": "text/x-python", 345 | "name": "python", 346 | "nbconvert_exporter": "python", 347 | "pygments_lexer": "ipython3", 348 | "version": "3.11.0" 349 | }, 350 | "vscode": { 351 | "interpreter": { 352 | "hash": "8ab6a161ba03c21d4642db29cdaabcfb98eddbe4c95ae95609ca0691105e488b" 353 | } 354 | } 355 | }, 356 | "nbformat": 4, 357 | "nbformat_minor": 5 358 | } 359 | --------------------------------------------------------------------------------