├── .gitignore ├── .travis.yml ├── .travis └── install-maven.sh ├── LICENSE ├── README.md ├── codestyle.xml ├── docker_example ├── deb │ ├── install_debs.sh │ ├── libfreetype6_2.9.1-3+deb10u2_amd64.deb │ └── libpng16-16_1.6.36-6_amd64.deb ├── plugins │ └── README.txt └── run_docker.sh ├── lib ├── org.RDKit.jar └── org.RDKitDoc.jar ├── native ├── linux.x86_64 │ └── libGraphMolWrap.so ├── macosx.x86_64 │ └── libGraphMolWrap.jnilib └── win32.x86_64 │ └── GraphMolWrap.dll ├── pom.xml ├── src ├── main │ ├── java │ │ └── org │ │ │ └── rdkit │ │ │ ├── fingerprint │ │ │ ├── DefaultFingerprintFactory.java │ │ │ ├── DefaultFingerprintSettings.java │ │ │ ├── FingerprintFactory.java │ │ │ ├── FingerprintSettings.java │ │ │ ├── FingerprintType.java │ │ │ ├── InvalidFingerprintSettingsException.java │ │ │ └── Utils.java │ │ │ └── neo4j │ │ │ ├── bin │ │ │ ├── LibraryLoaderExtensionFactory.java │ │ │ └── LibraryLoaderLifecycle.java │ │ │ ├── config │ │ │ └── RDKitSettings.java │ │ │ ├── handlers │ │ │ ├── RDKitEventHandler.java │ │ │ └── RDKitEventHandlerExtensionFactory.java │ │ │ ├── models │ │ │ ├── Constants.java │ │ │ ├── LuceneQuery.java │ │ │ ├── NodeFields.java │ │ │ └── NodeParameters.java │ │ │ ├── procedures │ │ │ ├── BaseProcedure.java │ │ │ ├── ExactSearch.java │ │ │ ├── FingerprintProcedures.java │ │ │ ├── SubstructureSearch.java │ │ │ └── UtilProcedures.java │ │ │ └── utils │ │ │ ├── CachingIterator.java │ │ │ ├── Converter.java │ │ │ ├── PagingIterator.java │ │ │ └── RWMolCloseable.java │ ├── license │ │ ├── evgerher_license │ │ │ ├── header.txt │ │ │ └── license.txt │ │ └── licenses.properties │ └── resources │ │ ├── META-INF │ │ └── services │ │ │ ├── org.neo4j.configuration.SettingsDeclaration │ │ │ └── org.neo4j.kernel.extension.ExtensionFactory │ │ └── logging.properties └── test │ ├── java │ └── org │ │ └── rdkit │ │ └── neo4j │ │ ├── bin │ │ └── LibraryLoaderLifecycleTest.java │ │ ├── config │ │ └── RDKitSettingsTest.java │ │ ├── handlers │ │ └── SmilesEventHandlerTest.java │ │ ├── index │ │ ├── BitSetIndexQueryingTest.java │ │ ├── EmbeddedTest.java │ │ ├── model │ │ │ └── ChemblRow.java │ │ └── utils │ │ │ ├── BaseTest.java │ │ │ ├── ChemicalStructureParser.java │ │ │ └── TestUtils.java │ │ ├── procedures │ │ ├── ExactSearchTest.java │ │ ├── FingerprintProcedureTest.java │ │ ├── SubstructureSearchTest.java │ │ └── UtilProceduresTest.java │ │ └── utils │ │ ├── ConverterTest.java │ │ └── VersionTest.java │ └── resources │ └── chembl_test_data.txt └── update_rdkit.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .idea/ 3 | LINKS.md 4 | target/ 5 | CHANGELOG.md 6 | *.sdf 7 | out/ 8 | neo4j-temp/ 9 | docker-compose.yml 10 | dependency-reduced-pom.xml 11 | temp/ 12 | docker_example/plugins/*jar 13 | 14 | settings.xml 15 | .vscode 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | cache: 2 | directories: 3 | - $HOME/.m2 4 | 5 | matrix: 6 | include: 7 | # Travis poorly supports windows with jdk 8 | - os: linux 9 | language: java 10 | dist: trusty 11 | jdk: openjdk8 12 | - os: linux 13 | language: java 14 | dist: trusty 15 | jdk: oraclejdk8 16 | - os: linux 17 | language: java 18 | dist: trusty 19 | jdk: openjdk11 20 | - os: linux 21 | language: java 22 | dist: trusty 23 | jdk: oraclejdk11 24 | - os: osx 25 | language: java 26 | osx_image: xcode9.3 # Further versions use java 13+ 27 | 28 | install: 29 | - mvn org.apache.maven.plugins:maven-install-plugin:2.3.1:install-file -Dfile=${TRAVIS_BUILD_DIR}/lib/org.RDKit.jar -DgroupId=org.rdkit -DartifactId=rdkit -Dversion=1.0.0 -Dpackaging=jar 30 | - mvn org.apache.maven.plugins:maven-install-plugin:2.3.1:install-file -Dfile=${TRAVIS_BUILD_DIR}/lib/org.RDKitDoc.jar -DgroupId=org.rdkit -DartifactId=rdkit-doc -Dversion=1.0.0 -Dpackaging=jar 31 | 32 | script: mvn -e test 33 | 34 | notifications: 35 | email: false 36 | -------------------------------------------------------------------------------- /.travis/install-maven.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # script to install maven 5 | TRAVIS_BUILD_DIR=$1 6 | maven_fname="apache-maven-3.6.1-bin" 7 | url="https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.zip" 8 | install_dir="${TRAVIS_BUILD_DIR}/../${maven_fname}" 9 | 10 | 11 | mkdir ${install_dir} 12 | curl -fsSL ${url} -o "${install_dir}.zip" 13 | unzip "${install_dir}.zip" 14 | rm "${install_dir}.zip" 15 | 16 | mkdir tmp 17 | 18 | cat << EOF > ${install_dir}/temp.sh 19 | #!/bin/sh 20 | export MAVEN_HOME=${install_dir} 21 | export M2_HOME=${install_dir} 22 | export M2=${install_dir}/bin 23 | export PATH=${install_dir}/bin:"$PATH" 24 | EOF 25 | 26 | source ${install_dir}/temp.sh 27 | 28 | echo Maven installed to ${install_dir} 29 | mvn --version 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2019, RDKit 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RDKit-Neo4j project 2 | [![Build Status](https://travis-ci.com/rdkit/neo4j-rdkit.svg?branch=master)](https://travis-ci.com/rdkit/neo4j-rdkit) `Open Chemistry`, `RDKit & Neo4j` GSoC 2019 project 3 | 4 | *** 5 | ## Abstract 6 | > Chemical and pharmaceutical R&D produce large amounts of data of completely different nature, such as chemical structures, recipe and process data, formulation data, and data from various application tests. Altogether these data rarely follow a schema. Consequently, relational data models and databases have frequetly disadvantages mapping these data appropriately. Here, chemical data frequently leads to rather abstract data models, which are difficult to develop, align, and maintain with the domain experts. Upon retrieval computationally expesive joins in not predetermined depths may cause issues. 7 | 8 | > Graph data models promise here advantages: 9 | > - they can easily be understood by non IT experts from the research domains 10 | > - due to their plasticity, they can easily be extended and refactored 11 | > - graph databases such as neo4j are made for coping with arbitrary path lengths 12 | 13 | > Chemical data models usually require a database to be able to deal with chemical structures to be utilized for structure based queries to either identify records or as filtering criteria. 14 | 15 | > The project will be focused on development of extension for neo4j graph database for querying knowledge graphs storing molecular and chemical information. 16 | > Task is to enable identification of entry points into the graph via exact/substructure/similarity searches (UC1). UC2 is closely related to UC1, but here the intention is to use chemical structures as limiting conditions in graph traversals originating from different entry points. Both use cases rely on the same integration of RDkit and Neo4j and will only differ in their CYPHER statements. 17 | 18 | __Mentors:__ 19 | * Greg Landrum 20 | * Christian Pilger 21 | * Stefan Armbruster 22 | 23 | ## Build & run 24 | 25 | 1) Install `lib/org.RDKit.jar` and `lib/org.RDKitDoc.jar` into your local maven repository 26 | ``` 27 | mvn org.apache.maven.plugins:maven-install-plugin:2.3.1:install-file \ 28 | -Dfile=lib/org.RDKit.jar -DgroupId=org.rdkit \ 29 | -DartifactId=rdkit -Dversion=1.0.0 \ 30 | -Dpackaging=jar 31 | 32 | mvn org.apache.maven.plugins:maven-install-plugin:2.3.1:install-file \ 33 | -Dfile=lib/org.RDKitDoc.jar -DgroupId=org.rdkit \ 34 | -DartifactId=rdkit-doc -Dversion=1.0.0 \ 35 | -Dpackaging=jar 36 | ``` 37 | 2) Generate .jar file with all dependencies with `mvn package` 38 | 3) Put generated .jar file into `plugins/` folder of your neo4j instance and start the server 39 | 4) add `server.rdkit.index.sanitize=false` to `neo4j.conf`if you want to switch of sanitizing for indexing. If not provided `true` is assumed as default. 40 | 5) By executing `CALL dbms.procedures()`, you are expected to see `org.rdkit.*` procedures 41 | 42 | ### usage within Docker 43 | 44 | The native libraries of rdkit do have a dependency on [libFreetype](https://www.freetype.org/) and [libPng](http://www.libpng.org/pub/png/libpng.html). On desktop Linux systems those are typically installed by default. The [Neo4j docker image](https://hub.docker.com/_/neo4j) is based on `openjdk:11-jdk-slim` which itself is based on a minimal Debian linux image. This does not contain these to libraries. To solve that you need to make sure these packages get installed. 45 | 46 | In [docker_example](docker_example) there's a script [run_docker.sh](docker_example/run_docker.sh) mounting a volume with these debian packages and using an extension script to install these images upon startup of the docker container. Before using that make sure to populate the [plugins](docker_example/plugins) folder with the plugin's jar file. 47 | 48 | ## Extension functionality 49 | 50 | ### User scenario: 51 | 52 | #### Feeding the data into database 53 | 54 | ##### way A: 55 | 1) Plugin not present 56 | 2) Feed Neo4j DB 57 | 3) then `CALL org.rdkit.update(['Chemical', 'Structure'])` & `CALL org.rdkit.search.createIndex(['Structure', 'Chemical'])` 58 | 59 | > That triggers computation of additional properties (fp, etc.) and fp index creation 60 | > Automated computation of properties enabled only after `update` procedure 61 | 62 | ##### way B: 63 | 1) Plugin present 64 | 2) Feed Neo4j DB 65 | 3) then `CALL org.rdkit.search.createIndex(['Structure', 'Chemical'])` 66 | 67 | > Automated computation of additional properties (fp, etc.) and triggered index 68 | > Fp index automatically updated when new :Structure:Chemical records arrive 69 | 70 | ##### way C (the most suitable) 71 | 1) Plugin present 72 | 2) `CALL org.rdkit.search.createIndex(['Structure', 'Chemical'])` 73 | 3) Then feed Knime 74 | 75 | > Automated computation of additional properties (fp, etc.) and index 76 | > Empty Neo4j instance is prepared in advance 77 | > Whenever a new :Structure:Chemical entries comes, property calculation and fp index update are automatically conducted 78 | 79 | #### Execution of exact search 80 | _It is possible to check index existence with `CALL db.indexes`_ 81 | 82 | 0) It would strongly affect performance of exact search if `createIndex` procedure was called earlier (it creates a property index). 83 | 1) `CALL org.rdkit.search.exact.smiles(['Chemical', 'Structure'], 'CC(=O)Nc1nnc(S(N)(=O)=O)s1')` 84 | 2) `CALL org.rdkit.search.exact.mol(['Chemical', 'Structure'], '')` (refer to tests for examples) 85 | 86 | #### Execution of substructure search 87 | 88 | 1) Make sure the fulltext index exists with `CALL db.indexes`, `fp_index` must exist. (It should be created with `createIndex` procedure) 89 | 2) `CALL org.rdkit.search.substructure.smiles(['Chemical', 'Structure'], 'CC(=O)Nc1nnc(S(N)(=O)=O)s1', (true/false))` 90 | 3) `CALL org.rdkit.search.substructure.mol(['Chemical', 'Structure'], '', (true/false))` 91 | 92 | #### Execution of similarity search (currently slow) 93 | 94 | 1) `CALL org.rdkit.fingerprint.create(['Chemical, 'Structure'], 'torsion_fp', 'torsion', (true/false))` - new property `torsion_fp` is created 95 | 2) `CALL org.rdkit.fingerprint.search.smiles(['Chemical', 'Structure'], 'CC(=O)Nc1nnc(S(N)(=O)=O)s1', 'torsion', 'torsion_fp', 0.4, (true/false))` 96 | 3) `CALL org.rdkit.fingerprint.search.smiles(['Chemical', 'Structure'], 'CC(=O)Nc1nnc(S(N)(=O)=O)s1', 'pattern', 'fp', 0.7, (true/false))` 97 | 98 | #### Usage of `org.rdkit.search.substructure.is.smiles` function in complex queries 99 | 100 | ```$cypher 101 | CALL org.rdkit.search.exact.smiles(['Chemical', 'Structure'], 'CC(C)(C)OC(=O)N1CCC(COc2ccc(OCc3ccccc3)cc2)CC1') YIELD luri 102 | MATCH (finalProduct:Entity{luri:luri}) 103 | CALL apoc.path.expand(finalProduct, "HAS_INGREDIENT", ">Reaction", 0, 4) yield path 104 | WITH nodes(path)[-1] as reaction, path, (length(path)+1)/2 as depths 105 | MATCH (reaction)-[:HAS_INGREDIENT]->(c:Compound) where org.rdkit.search.substructure.is(c, 'CC(C)C(O)=O') 106 | RETURN path 107 | ``` 108 | 109 | ```$cypher 110 | CALL org.rdkit.search.exact.smiles(['Chemical', 'Structure'], 'CC(C)(C)OC(=O)N1CCC(COc2ccc(OCc3ccccc3)cc2)CC1') YIELD luri 111 | MATCH (finalProduct:Entity{luri:luri}) 112 | CALL apoc.path.expand(finalProduct, "HAS_INGREDIENT", ">Reaction", 0, 4) yield path 113 | WITH nodes(path)[-1] AS reaction, path, (length(path)+1)/2 AS depths 114 | MATCH (reaction)-[:HAS_INGREDIENT]->(c:Compound) 115 | WITH path, COLLECT(c) as compounds 116 | WHERE ANY( x IN compounds where org.rdkit.search.substructure.is.mol(x, ' 117 | Ketcher 9 71921 82D 1 1.00000 0.00000 0 118 | 6 5 0 0 0 999 V2000 119 | 8.9170 -12.3000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 120 | 9.7830 -11.8000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 121 | 10.6490 -12.3000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 122 | 9.7830 -10.8000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 123 | 10.6490 -10.3000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 124 | 8.9170 -10.3000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 125 | 1 2 1 0 0 0 126 | 2 3 1 0 0 0 127 | 2 4 1 0 0 0 128 | 4 5 1 0 0 0 129 | 4 6 2 0 0 0 130 | M END')) 131 | RETURN path 132 | ``` 133 | 134 | #### Usage of `org.rdkit.utils.svg` function 135 | 136 | ```$cypher 137 | CALL org.rdkit.search.exact.smiles(['Chemical', 'Structure'], 'CCCC(C(=O)Nc1ccc(S(N)(=O)=O)cc1)C(C)(C)C') 138 | YIELD canonical_smiles 139 | RETURN org.rdkit.utils.svg(canonical_smiles) as svg 140 | ``` 141 | 142 | 143 | --- 144 | ### Node labels: [`Chemical`, `Structure`] - strict rule (!) 145 | 146 | * __Whenever a new node added with labels__, an `rdkit` event handler is applied and new node properties are constructed from `mdlmol` property. 147 | Those are also reserved property names 148 | 149 | 1) `canonical_smiles` 150 | 2) `inchi` 151 | 3) `formula` 152 | 4) `molecular_weight` 153 | 5) `fp` - bit-vector fingerprint in form of indexes of positive bits (`"1 4 19 23"`) 154 | 6) `fp_ones` - count of positive bits 155 | 7) `mdlmol` 156 | 157 | Additional reserved property names: 158 | 159 | - `smiles` 160 | 161 | * If the graph was fulfilled with nodes before the extension was loaded, it is possible to apply a procedure: 162 | `CALL org.rdkit.update(['Chemical', 'Structure'])` - which iterates through nodes with specified labels and creates properties described before. 163 | 164 | * In order to speed up an exact search, create an index on top of `canonical_smiles` property 165 | 166 | ### User-defined procedures & functions 167 | 168 | 1) `CALL org.rdkit.search.exact.smiles(['Chemical', 'Structure'], 'CC(=O)Nc1nnc(S(N)(=O)=O)s1')` 169 | 2) `CALL org.rdkit.search.exact.mol(['Chemical', 'Structure'], '')` 170 | * RDKit provides functionality to use `exact search` on top of `smiles` and `mdlmol blocks`, returns a node which satisfies `canonical smiles` 171 | 3) `CALL org.rdkit.update(['Chemical', 'Structure'])` 172 | * Update procedure (manual properties initialization from `mdlmol` property) 173 | * _Current implementation uses single thread and on a huge database may take a lot of time (>3 minutes)_ 174 | 4) `CALL org.rdkit.search.createIndex(['Chemical', 'Structure'])` 175 | * Create fulltext index (called `rdkitIndex`) on property `fp`, which is required for substructure search 176 | * Create index for `:Chemical(canonical_smiles)` property 177 | 5) `CALL org.rdkit.search.deleteIndex()` 178 | * Delete fulltext index (called `rdkitIndex`) on property `fp`, which is required for substructure search 179 | * Delete index for `:Chemical(canonical_smiles)` property 180 | 6) `CALL org.rdkit.search.substructure.smiles(['Chemical', 'Structure'], 'CC(=O)Nc1nnc(S(N)(=O)=O)s1')` 181 | * SSS based on smiles substructure 182 | 7) `CALL org.rdkit.search.substructure.mol(['Chemical', 'Structure'], '')` 183 | * SSS based on mdlmol block substructure 184 | 8) `CALL org.rdkit.fingerprint.create(['Chemical, 'Structure'], 'morgan_fp', 'morgan')` 185 | * Create a new property called `morgan_fp` with fingerprint type `morgan` on all nodes 186 | * Supporting properties are: `morgan_fp_type`, `morgan_fp_ones` are also added 187 | * Creates fulltext index on this property 188 | * Node is skipped if it's not possible to convert its smiles with this fingerprint type 189 | * It is __not allowed__ to use property name equal to predefined 190 | 9) `CALL org.rdkit.fingerprint.search.smiles(['Chemical', 'Structure'], 'CC(=O)Nc1nnc(S(N)(=O)=O)s1', 'pattern', 'fp', 0.7)` 191 | * Call similarity search with next parameters: 192 | - Node labels: `['Chemical', 'Structure']` 193 | - Smiles: `'CC(=O)Nc1nnc(S(N)(=O)=O)s1'` 194 | - Fingerprint type: `'pattern'` 195 | - Property name: `'fp'` 196 | - Threshold: `0.7` 197 | * Smiles value is converted into specfied _fingerprint type_ (if possible) and compared with nodes which have _property_ (`'fp'` in this case) 198 | * Threshold is a lower bound for the score value 199 | * _Current implementation uses single thread and on a huge database may take a lot of time (>3 minutes)_ 200 | 10) User-defined functions 201 | * `org.rdkit.search.substructure.is.smiles(, '')` 202 | * `org.rdkit.search.substructure.is.mol(, '')` 203 | * Return boolean answer: does specified `node` object have substructure match provided by `smiles_string` or `mol_string`. 204 | 11) User-defined function `org.rdkit.utils.svg('')` 205 | * Return svg image in text format from smiles 206 | 207 | --- 208 | 209 | # Results overview 210 | 211 | ## What was achieved 212 | 213 | 1) Implementation of exact search (100%) 214 | 2) Implementation of substructure search (90%, several minor bugs) 215 | 3) Implementation of condition based graph traversal - usage of function calls in complex queries (100%) 216 | 4) Implementation of similarity search (70%, major performance issues) 217 | 5) Coverage with unit tests (80%, not all invalid arguments for procedures are tested) 218 | 219 | ## What remains to be done 220 | 221 | 222 | 1) Speed up batch tasks by utilizing several threads (currently waiting for resolving issue on native level) 223 | 2) Speed up the `similarity search` procedures 224 | 3) Solve minor bugs (todos) like unclosed `query` object during SSS 225 | 226 | ## What problems were encountered 227 | 228 | 1) Compatability of native libraries for win64 (beginning of the development) 229 | 2) Lazy streams evaluation and not resolved issue with `query` object during SSS 230 | 3) Parallelization of stream evaluations 231 | 232 | ## Java requirements 233 | 234 | Plugin supports openjdk and oraclejdk java versions (< 12). 235 | Further versions upgraded _security sensitive fields_ [policy](https://bugs.openjdk.java.net/browse/JDK-8210496), those are currently not supported. 236 | -------------------------------------------------------------------------------- /docker_example/deb/install_debs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | dpkg -i /deb/*.deb 4 | 5 | 6 | -------------------------------------------------------------------------------- /docker_example/deb/libfreetype6_2.9.1-3+deb10u2_amd64.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rdkit/neo4j-rdkit/ed1489926d1e1e5ea25322a643873015e906b3ee/docker_example/deb/libfreetype6_2.9.1-3+deb10u2_amd64.deb -------------------------------------------------------------------------------- /docker_example/deb/libpng16-16_1.6.36-6_amd64.deb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rdkit/neo4j-rdkit/ed1489926d1e1e5ea25322a643873015e906b3ee/docker_example/deb/libpng16-16_1.6.36-6_amd64.deb -------------------------------------------------------------------------------- /docker_example/plugins/README.txt: -------------------------------------------------------------------------------- 1 | # copy the `rdkit-index-.jar` file here 2 | -------------------------------------------------------------------------------- /docker_example/run_docker.sh: -------------------------------------------------------------------------------- 1 | docker run --rm \ 2 | --name neo4j \ 3 | -e NEO4J_AUTH=neo4j/123 \ 4 | -e NEO4J_ACCEPT_LICENSE_AGREEMENT=yes \ 5 | -e NEO4J_dbms_security_procedures_unrestricted=\\\* \ 6 | -e EXTENSION_SCRIPT=/deb/install_debs.sh \ 7 | -v $PWD/plugins:/plugins \ 8 | -v $PWD/deb:/deb \ 9 | -p 7474:7474 \ 10 | -p 7687:7687 \ 11 | neo4j:4.1.3-enterprise 12 | #--user=$(id -u):$(id -g) \ 13 | # enable for remote debugging: 14 | #-e NEO4J_dbms_jvm_additional=-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=\\\*:5005 \ 15 | #-p 5005:5005 \ 16 | -------------------------------------------------------------------------------- /lib/org.RDKit.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rdkit/neo4j-rdkit/ed1489926d1e1e5ea25322a643873015e906b3ee/lib/org.RDKit.jar -------------------------------------------------------------------------------- /lib/org.RDKitDoc.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rdkit/neo4j-rdkit/ed1489926d1e1e5ea25322a643873015e906b3ee/lib/org.RDKitDoc.jar -------------------------------------------------------------------------------- /native/linux.x86_64/libGraphMolWrap.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rdkit/neo4j-rdkit/ed1489926d1e1e5ea25322a643873015e906b3ee/native/linux.x86_64/libGraphMolWrap.so -------------------------------------------------------------------------------- /native/macosx.x86_64/libGraphMolWrap.jnilib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rdkit/neo4j-rdkit/ed1489926d1e1e5ea25322a643873015e906b3ee/native/macosx.x86_64/libGraphMolWrap.jnilib -------------------------------------------------------------------------------- /native/win32.x86_64/GraphMolWrap.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rdkit/neo4j-rdkit/ed1489926d1e1e5ea25322a643873015e906b3ee/native/win32.x86_64/GraphMolWrap.dll -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.neo4j.rdkit 8 | rdkit-index 9 | 1.4.0 10 | RDKit-Neo4j plugin 11 | jar 12 | 13 | 14 | 2.27.0 15 | 5.5.0 16 | 1.7.25 17 | 5.2.0 18 | UTF-8 19 | 17 20 | 17 21 | evgerher_license 22 | ${project.baseUri}src/main/license 23 | 24 | 25 | 26 | 27 | 28 | org.neo4j 29 | neo4j 30 | ${neo4j.version} 31 | provided 32 | 33 | 34 | org.neo4j.community 35 | it-test-support 36 | ${neo4j.version} 37 | test 38 | 39 | 40 | org.neo4j.test 41 | neo4j-harness 42 | ${neo4j.version} 43 | test 44 | 45 | 46 | 47 | 48 | 49 | 50 | org.rdkit 51 | rdkit 52 | 1.0.0 53 | 54 | 55 | org.rdkit 56 | rdkit-doc 57 | 1.0.0 58 | 59 | 60 | 61 | 62 | org.neo4j 63 | neo4j 64 | 65 | 66 | 67 | org.neo4j.test 68 | neo4j-harness 69 | 70 | 71 | 72 | org.neo4j.community 73 | it-test-support 74 | 75 | 76 | 77 | 78 | org.slf4j 79 | slf4j-simple 80 | ${slf4j.version} 81 | provided 82 | 83 | 84 | 85 | 91 | 92 | junit 93 | junit 94 | 4.12 95 | test 96 | 97 | 98 | org.junit.vintage 99 | junit-vintage-engine 100 | 5.5.2 101 | test 102 | 103 | 104 | org.hamcrest 105 | hamcrest-library 106 | 2.2 107 | test 108 | 109 | 110 | 111 | 112 | 113 | 114 | native 115 | 116 | **/* 117 | 118 | native 119 | 120 | 121 | src/main/resources 122 | 123 | 124 | 125 | 126 | 127 | org.apache.maven.plugins 128 | maven-surefire-plugin 129 | 3.0.0-M4 130 | 131 | -Xmx1024m 132 | 133 | 134 | 135 | org.apache.maven.plugins 136 | maven-surefire-report-plugin 137 | 3.0.0-M4 138 | 139 | 140 | maven-jar-plugin 141 | 3.2.0 142 | 143 | 144 | org.apache.maven.plugins 145 | maven-shade-plugin 146 | 3.2.4 147 | 148 | 149 | package 150 | 151 | shade 152 | 153 | 154 | true 155 | 4.4 156 | 157 | 158 | 159 | 160 | 161 | org.codehaus.mojo 162 | license-maven-plugin 163 | 1.20 164 | 165 | RDKit 166 | 2019 167 | 168 | 169 | 170 | 171 | *.txt 172 | *.properties 173 | 174 | 175 | 176 | 177 | first 178 | 179 | update-file-header 180 | 181 | process-sources 182 | 183 | 184 | 185 | 186 | 187 | 188 | -------------------------------------------------------------------------------- /src/main/java/org/rdkit/fingerprint/DefaultFingerprintFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C)2014, Novartis Institutes for BioMedical Research Inc. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are 7 | * met: 8 | * 9 | * - Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * - Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * - Neither the name of Novartis Institutes for BioMedical Research Inc. 18 | * nor the names of its contributors may be used to endorse or promote 19 | * products derived from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | package org.rdkit.fingerprint; 34 | 35 | /*- 36 | * #%L 37 | * RDKit-Neo4j 38 | * %% 39 | * Copyright (C) 2019 RDKit 40 | * %% 41 | * Copyright (C) 2019 Evgeny Sorokin 42 | * @@ All Rights Reserved @@ 43 | * This file is part of the RDKit Neo4J integration. 44 | * The contents are covered by the terms of the BSD license 45 | * which is included in the file LICENSE, found at the root 46 | * of the neo4j-rdkit source tree. 47 | * #L% 48 | */ 49 | 50 | import org.RDKit.ExplicitBitVect; 51 | import org.RDKit.ROMol; 52 | import org.RDKit.RWMol; 53 | import org.rdkit.neo4j.utils.RWMolCloseable; 54 | import org.slf4j.Logger; 55 | import org.slf4j.LoggerFactory; 56 | 57 | import java.util.BitSet; 58 | 59 | /** 60 | * A fingerprint factory is an object that knows how to produce fingerprints for SMILES. It is used to calculate fingerprints for the search index as 61 | * well as for query structures when the index is searched. As some fingerprints, e.g. Avalon, support different optimizations we have two different 62 | * methods for the two different purposes. 63 | * 64 | * @author Manuel Schwarze 65 | */ 66 | 67 | public class DefaultFingerprintFactory implements FingerprintFactory { 68 | 69 | // 70 | // Constants 71 | // 72 | 73 | /** 74 | * The logger instance. 75 | */ 76 | private static final Logger logger = LoggerFactory.getLogger(DefaultFingerprintFactory.class); 77 | 78 | // 79 | // Members 80 | // 81 | 82 | /** 83 | * The settings to be used for calculating structure fingerprints with this factory. 84 | */ 85 | private final FingerprintSettings settingsStructure; 86 | 87 | /** 88 | * The settings to be used for calculating query fingerprints with this factory. 89 | */ 90 | private final FingerprintSettings settingsQuery; 91 | 92 | // 93 | // Constructors 94 | // 95 | 96 | /** 97 | * Creates a new fingerprint factory based on the past in settings. Structure and query fingerprints are handled the same way. There is distinction 98 | * between them. To handle them differently, use the other constructor. 99 | * 100 | * @param settings Fingerprint settings. Must not be null. 101 | */ 102 | public DefaultFingerprintFactory(final FingerprintSettings settings) { 103 | settingsStructure = settingsQuery = settings; 104 | } 105 | 106 | // 107 | // Public Methods 108 | // 109 | 110 | /** 111 | * Creates a fingerprint based on the passed in SMILES. 112 | * 113 | * @param strSmiles SMILES structure, preferably canonicalized by RDKit before. Must not be null. 114 | * @param sanitize 115 | * @return Fingerprint as BitSet. 116 | */ 117 | @Override 118 | public BitSet createStructureFingerprint(final String strSmiles, boolean sanitize) { 119 | return createFingerprint(strSmiles, settingsStructure, sanitize); 120 | } 121 | 122 | /** 123 | * Creates a fingerprint based on the passed in SMILES. 124 | * 125 | * @param strSmiles SMILES structure, preferably canonicalized by RDKit before. Must not be null. 126 | * @param sanitize 127 | * @return Fingerprint as BitSet. 128 | */ 129 | @Override 130 | public BitSet createQueryFingerprint(final String strSmiles, boolean sanitize) { 131 | return createFingerprint(strSmiles, settingsQuery, sanitize); 132 | } 133 | 134 | /** 135 | * Method for already opened RWMol to build fingerprint from Query settings. 136 | * 137 | * @param mol already opened RWMol object 138 | * @param sanitize 139 | * @return Fingerprint as BitSet. 140 | */ 141 | public BitSet createQueryFingerprint(final ROMol mol, boolean sanitize) { 142 | return createFingerprint(mol, settingsQuery, sanitize); 143 | } 144 | 145 | /** 146 | * Method for already opened RWMol to build fingerprint from Structure settings. 147 | * 148 | * @param mol already opened RWMol object 149 | * @param sanitize 150 | * @return Fingerprint as BitSet. 151 | */ 152 | public BitSet createStructureFingerprint(final ROMol mol, boolean sanitize) { 153 | return createFingerprint(mol, settingsStructure, sanitize); 154 | } 155 | 156 | // 157 | // Private Methods 158 | // 159 | 160 | /** 161 | * Creates a fingerprint based on the passed in SMILES. 162 | * 163 | * @param strSmiles SMILES structure, preferably canonicalized by RDKit before. Must not be null. ! EXPECTED CANONICALIZED SMILES ! 164 | * @param settings Fingerprint settings to be used. 165 | * @param sanitize 166 | * @return Fingerprint as BitSet. 167 | */ 168 | private BitSet createFingerprint(final String strSmiles, final FingerprintSettings settings, boolean sanitize) { 169 | 170 | // todo: update code if other types are used 171 | 172 | // Normally: ROMol objects are needed to calculate fingerprints 173 | // Create an ROMol object 174 | 175 | // Performance trick, if SMILES is already canonicalized 176 | try ( RWMolCloseable mol = RWMolCloseable.from(RWMol.MolFromSmiles(strSmiles, 0, sanitize))) { 177 | return createFingerprint(mol, settings, sanitize); 178 | } 179 | } 180 | 181 | /** 182 | * Method for already opened RWMol 183 | * @param mol - canonicalized 184 | * @param settings to build fingerprint from 185 | * @param sanitize 186 | * @return BitSet from rwmol (fingerprint of `settings` type) 187 | */ 188 | private BitSet createFingerprint(final ROMol mol, final FingerprintSettings settings, boolean sanitize) { 189 | mol.updatePropertyCache(sanitize); 190 | 191 | // Calculate fingerprint 192 | return convert(settings.getRdkitFingerprintType().calculate(mol, settings)); 193 | } 194 | 195 | /** 196 | * Converts an RDKit bit vector into a Java BitSet object. 197 | * 198 | * @param rdkitBitVector RDKit (C++ based) bit vector. Can be null. 199 | * @return BitSet or null, if null was passed in. 200 | */ 201 | private BitSet convert(final ExplicitBitVect rdkitBitVector) { 202 | BitSet fingerprint = null; 203 | 204 | if (rdkitBitVector != null) { 205 | final int iLength = (int) rdkitBitVector.getNumBits(); 206 | fingerprint = new BitSet(iLength); 207 | for (int i = 0; i < iLength; i++) { 208 | if (rdkitBitVector.getBit(i)) { 209 | fingerprint.set(i); 210 | } 211 | } 212 | } 213 | 214 | return fingerprint; 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /src/main/java/org/rdkit/fingerprint/FingerprintFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C)2014, Novartis Institutes for BioMedical Research Inc. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are 7 | * met: 8 | * 9 | * - Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * - Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * - Neither the name of Novartis Institutes for BioMedical Research Inc. 18 | * nor the names of its contributors may be used to endorse or promote 19 | * products derived from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | package org.rdkit.fingerprint; 34 | 35 | /*- 36 | * #%L 37 | * RDKit-Neo4j 38 | * %% 39 | * Copyright (C) 2019 RDKit 40 | * %% 41 | * Copyright (C) 2019 Evgeny Sorokin 42 | * @@ All Rights Reserved @@ 43 | * This file is part of the RDKit Neo4J integration. 44 | * The contents are covered by the terms of the BSD license 45 | * which is included in the file LICENSE, found at the root 46 | * of the neo4j-rdkit source tree. 47 | * #L% 48 | */ 49 | 50 | import java.util.BitSet; 51 | import org.RDKit.ROMol; 52 | 53 | /** 54 | * A fingerprint factory is an object that knows how to produce fingerprints for SMILES. 55 | * It is used to calculate fingerprints for the search index as well as for query structures 56 | * when the index is searched. As some fingerprints, e.g. Avalon, support different 57 | * optimizations we have two different methods for the two different purposes. 58 | * 59 | * @author Manuel Schwarze 60 | */ 61 | public interface FingerprintFactory { 62 | 63 | /** 64 | * Creates a query fingerprint based on the passed in SMILES. 65 | * 66 | * @param strSmiles SMILES structure, preferably canonicalized by RDKit before. Must not be null. 67 | * 68 | * @param sanitize 69 | * @return Fingerprint as BitSet. 70 | */ 71 | public BitSet createStructureFingerprint(final String strSmiles, boolean sanitize); 72 | 73 | /** 74 | * Creates a structure fingerprint based on the passed in SMILES. 75 | * 76 | * @param strSmiles SMILES structure, preferably canonicalized by RDKit before. Must not be null. 77 | * 78 | * @param sanitize 79 | * @return Fingerprint as BitSet. 80 | */ 81 | public BitSet createQueryFingerprint(final String strSmiles, boolean sanitize); 82 | 83 | /** 84 | * Method for already opened ROMol to build fingerprint from Structure settings. 85 | * 86 | * @param mol already opened ROMol object 87 | * @param sanitize 88 | * @return Fingerprint as BitSet. 89 | */ 90 | public BitSet createStructureFingerprint(final ROMol mol, boolean sanitize); 91 | 92 | /** 93 | * Method for already opened ROMol to build fingerprint from Query settings. 94 | * 95 | * @param mol already opened ROMol object 96 | * @param sanitize 97 | * @return Fingerprint as BitSet. 98 | */ 99 | public BitSet createQueryFingerprint(final ROMol mol, boolean sanitize); 100 | } 101 | -------------------------------------------------------------------------------- /src/main/java/org/rdkit/fingerprint/FingerprintSettings.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C)2014, Novartis Institutes for BioMedical Research Inc. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are 7 | * met: 8 | * 9 | * - Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * - Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * - Neither the name of Novartis Institutes for BioMedical Research Inc. 18 | * nor the names of its contributors may be used to endorse or promote 19 | * products derived from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | package org.rdkit.fingerprint; 34 | 35 | /*- 36 | * #%L 37 | * RDKit-Neo4j 38 | * %% 39 | * Copyright (C) 2019 RDKit 40 | * %% 41 | * Copyright (C) 2019 Evgeny Sorokin 42 | * @@ All Rights Reserved @@ 43 | * This file is part of the RDKit Neo4J integration. 44 | * The contents are covered by the terms of the BSD license 45 | * which is included in the file LICENSE, found at the root 46 | * of the neo4j-rdkit source tree. 47 | * #L% 48 | */ 49 | 50 | 51 | /** 52 | * Defines fingerprint settings used to calculate fingerprints. Not all settings 53 | * are used for all types of fingerprints. If a string setting is unavailable 54 | * null should be returned. For numeric settings this is not possible, therefore 55 | * the constant {@link #UNAVAILABLE} has been introduced, which shall be used 56 | * to express that a setting is not defined. The value of {@link #UNAVAILABLE} 57 | * may change over time, if the current value is needed in the future for a new 58 | * type of settings. 59 | * 60 | * @author Manuel Schwarze 61 | */ 62 | public interface FingerprintSettings { 63 | 64 | /** The integer value to be used if a property is not set. (like null) */ 65 | public static final int UNAVAILABLE = -1; 66 | 67 | /** 68 | * Returns the Fingerprint type that is part of this object as string. 69 | * 70 | * @return Fingerprint type or null, if not set. 71 | */ 72 | String getFingerprintType(); 73 | 74 | /** 75 | * Returns the Fingerprint type that is part of this object as FingerprintType 76 | * object known in RDKit. 77 | * 78 | * @return Fingerprint type or null, if not set or not available. 79 | */ 80 | FingerprintType getRdkitFingerprintType(); 81 | 82 | /** 83 | * Returns the Torsion path length setting if set or {@link #UNAVAILABLE} if not set. 84 | * 85 | * @return the Torsion path length value or {@link #UNAVAILABLE}. 86 | */ 87 | int getTorsionPathLength(); 88 | 89 | /** 90 | * Returns the minimum path setting if set or {@link #UNAVAILABLE} if not set. 91 | * 92 | * @return the MinPath value or {@link #UNAVAILABLE}. 93 | */ 94 | int getMinPath(); 95 | 96 | /** 97 | * Returns the maximum path setting if set or {@link #UNAVAILABLE} if not set. 98 | * 99 | * @return the MaxPath value or {@link #UNAVAILABLE}. 100 | */ 101 | int getMaxPath(); 102 | 103 | /** 104 | * Returns the AtomPair minimum path setting if set or {@link #UNAVAILABLE} if not set. 105 | * 106 | * @return the AtomPair MinPath value or {@link #UNAVAILABLE}. 107 | */ 108 | int getAtomPairMinPath(); 109 | 110 | /** 111 | * Returns the AtomPair maximum path setting if set or {@link #UNAVAILABLE} if not set. 112 | * 113 | * @return the AtomPair MaxPath value or {@link #UNAVAILABLE}. 114 | */ 115 | int getAtomPairMaxPath(); 116 | 117 | /** 118 | * Returns the number of bits (fingerprint length) if set or {@link #UNAVAILABLE} if not set. 119 | * 120 | * @return the NumBits (length) value or {@link #UNAVAILABLE}. 121 | */ 122 | int getNumBits(); 123 | 124 | /** 125 | * Returns the radius setting if set or {@link #UNAVAILABLE} if not set. 126 | * 127 | * @return the Radius value or {@link #UNAVAILABLE}. 128 | */ 129 | int getRadius(); 130 | 131 | /** 132 | * Returns the layer flags setting if set or {@link #UNAVAILABLE} if not set. 133 | * 134 | * @return the Layer Flags value or {@link #UNAVAILABLE}. 135 | */ 136 | int getLayerFlags(); 137 | 138 | /** 139 | * Returns the Avalon query flag setting if set or {@link #UNAVAILABLE} if not set. 140 | * 141 | * @return the Avalon query flag value (1) or (0) or {@link #UNAVAILABLE}. 142 | */ 143 | int getAvalonQueryFlag(); 144 | 145 | /** 146 | * Returns the Avalon bits flags setting if set or {@link #UNAVAILABLE} if not set. 147 | * 148 | * @return the Avalon bit flags value or {@link #UNAVAILABLE}. 149 | */ 150 | int getAvalonBitFlags(); 151 | 152 | /** 153 | * Sets the fingerprint type and also the RDKit Fingerprint Type based on it, 154 | * if known. 155 | * 156 | * @param strType Fingerprint type. 157 | * 158 | * @return A reference to this object. Makes it easy to concatenate settings calls. 159 | */ 160 | FingerprintSettings setFingerprintType(final String strType); 161 | 162 | /** 163 | * Sets the RDKit Fingerprint type and also the normal string type based on it. 164 | * 165 | * @param type 166 | * 167 | * @return A reference to this object. Makes it easy to concatenate settings calls. 168 | */ 169 | FingerprintSettings setRDKitFingerprintType(final FingerprintType type); 170 | 171 | /** 172 | * Sets the Torsion path length setting or {@link #UNAVAILABLE}. 173 | * 174 | * @param iTorsionPathLength the TorsionPathLength value or {@link #UNAVAILABLE}. 175 | * 176 | * @return A reference to this object. Makes it easy to concatenate settings calls. 177 | */ 178 | FingerprintSettings setTorsionPathLength(final int iTorsionPathLength); 179 | 180 | /** 181 | * Sets the minimum path setting or {@link #UNAVAILABLE}. 182 | * 183 | * @param iMinPath the MinPath value or {@link #UNAVAILABLE}. 184 | * 185 | * @return A reference to this object. Makes it easy to concatenate settings calls. 186 | */ 187 | FingerprintSettings setMinPath(final int iMinPath); 188 | 189 | /** 190 | * Sets the maximum path setting or {@link #UNAVAILABLE}. 191 | * 192 | * @param iMaxPath the MaxPath value or {@link #UNAVAILABLE}. 193 | * 194 | * @return A reference to this object. Makes it easy to concatenate settings calls. 195 | */ 196 | FingerprintSettings setMaxPath(final int iMaxPath); 197 | 198 | /** 199 | * Sets the Atom Pairs minimum path setting or {@link #UNAVAILABLE}. 200 | * 201 | * @param iMinPath the MinPath value or {@link #UNAVAILABLE}. 202 | * 203 | * @return A reference to this object. Makes it easy to concatenate settings calls. 204 | */ 205 | FingerprintSettings setAtomPairMinPath(final int iMinPath); 206 | 207 | /** 208 | * Sets the Atom Pairs maximum path setting or {@link #UNAVAILABLE}. 209 | * 210 | * @param iMaxPath the MaxPath value or {@link #UNAVAILABLE}. 211 | * 212 | * @return A reference to this object. Makes it easy to concatenate settings calls. 213 | */ 214 | FingerprintSettings setAtomPairMaxPath(final int iMaxPath); 215 | 216 | /** 217 | * Sets the number of bits (fingerprint length) or {@link #UNAVAILABLE}. 218 | * 219 | * @param iNumBits the NumBits (length) value or {@link #UNAVAILABLE}. 220 | * 221 | * @return A reference to this object. Makes it easy to concatenate settings calls. 222 | */ 223 | FingerprintSettings setNumBits(final int iNumBits); 224 | 225 | /** 226 | * Sets the radius setting or {@link #UNAVAILABLE}. 227 | * 228 | * @param iRadius the Radius value or {@link #UNAVAILABLE}. 229 | * 230 | * @return A reference to this object. Makes it easy to concatenate settings calls. 231 | */ 232 | FingerprintSettings setRadius(final int iRadius); 233 | 234 | /** 235 | * Sets the layer flags setting or {@link #UNAVAILABLE}. 236 | * 237 | * @param iLayerFlags the Layer Flags value or {@link #UNAVAILABLE}. 238 | * 239 | * @return A reference to this object. Makes it easy to concatenate settings calls. 240 | */ 241 | FingerprintSettings setLayerFlags(final int iLayerFlags); 242 | 243 | /** 244 | * Sets the Avalon query flag setting or {@link #UNAVAILABLE}. 245 | * 246 | * @param iAvalonQueryFlag the Avalon query flag value or {@link #UNAVAILABLE}. 247 | * 248 | * @return A reference to this object. Makes it easy to concatenate settings calls. 249 | */ 250 | FingerprintSettings setAvalonQueryFlag(final int iAvalonQueryFlag); 251 | 252 | /** 253 | * Sets the Avalon bit flags setting or {@link #UNAVAILABLE}. 254 | * 255 | * @param iAvalonBitFlags the Avalon bit flags or {@link #UNAVAILABLE}. 256 | * 257 | * @return A reference to this object. Makes it easy to concatenate settings calls. 258 | */ 259 | FingerprintSettings setAvalonBitFlags(final int iAvalonBitFlags); 260 | 261 | /** 262 | * Returns true, if the specified number is a value that is not equal 263 | * to the value the represents an unavailable value. 264 | * 265 | * @param iNumber A number to check. 266 | * 267 | * @return True, if this value does represent a valid number. False, 268 | * if it represents the reserved UNAVAILABLE value. 269 | */ 270 | boolean isAvailable(final int iNumber); 271 | 272 | } 273 | -------------------------------------------------------------------------------- /src/main/java/org/rdkit/fingerprint/FingerprintType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C)2014, Novartis Institutes for BioMedical Research Inc. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are 7 | * met: 8 | * 9 | * - Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * - Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * - Neither the name of Novartis Institutes for BioMedical Research Inc. 18 | * nor the names of its contributors may be used to endorse or promote 19 | * products derived from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | package org.rdkit.fingerprint; 34 | 35 | /*- 36 | * #%L 37 | * RDKit-Neo4j 38 | * %% 39 | * Copyright (C) 2019 RDKit 40 | * %% 41 | * Copyright (C) 2019 Evgeny Sorokin 42 | * @@ All Rights Reserved @@ 43 | * This file is part of the RDKit Neo4J integration. 44 | * The contents are covered by the terms of the BSD license 45 | * which is included in the file LICENSE, found at the root 46 | * of the neo4j-rdkit source tree. 47 | * #L% 48 | */ 49 | 50 | import org.RDKit.ExplicitBitVect; 51 | import org.RDKit.RDKFuncs; 52 | import org.RDKit.ROMol; 53 | 54 | /** Defines supported fingerprint types. */ 55 | public enum FingerprintType { 56 | 57 | pattern("Pattern") { 58 | @Override 59 | public FingerprintSettings getSpecification(final int iTorsionPathLength, final int iMinPath, 60 | final int iMaxPath, final int iAtomPairMinPath, final int iAtomPairMaxPath, 61 | final int iNumBits, final int iRadius, final int iLayerFlags, 62 | final int iAvalonQueryFlag, final int iAvalonBitFlags) { 63 | return new DefaultFingerprintSettings(toString(), 64 | FingerprintSettings.UNAVAILABLE, 65 | FingerprintSettings.UNAVAILABLE, 66 | FingerprintSettings.UNAVAILABLE, 67 | FingerprintSettings.UNAVAILABLE, 68 | FingerprintSettings.UNAVAILABLE, 69 | iNumBits, 70 | FingerprintSettings.UNAVAILABLE, 71 | FingerprintSettings.UNAVAILABLE, 72 | FingerprintSettings.UNAVAILABLE, 73 | FingerprintSettings.UNAVAILABLE); 74 | } 75 | 76 | @Override 77 | public void validateSpecification(final FingerprintSettings settings) 78 | throws InvalidFingerprintSettingsException { 79 | super.validateSpecification(settings); 80 | if (settings.getNumBits() <= 0) { 81 | throw new InvalidFingerprintSettingsException("Number of bits must be a positive number > 0."); 82 | } 83 | } 84 | 85 | @Override 86 | public ExplicitBitVect calculate(final ROMol mol, final FingerprintSettings settings) { 87 | synchronized (PATTERN_FP_LOCK) { 88 | return RDKFuncs.PatternFingerprintMol(mol, settings.getNumBits()); 89 | } 90 | } 91 | }, 92 | 93 | morgan("Morgan") { 94 | @Override 95 | public FingerprintSettings getSpecification(final int iTorsionPathLength, final int iMinPath, 96 | final int iMaxPath, final int iAtomPairMinPath, final int iAtomPairMaxPath, 97 | final int iNumBits, final int iRadius, final int iLayerFlags, 98 | final int iAvalonQueryFlag, final int iAvalonBitFlags) { 99 | return new DefaultFingerprintSettings(toString(), 100 | FingerprintSettings.UNAVAILABLE, 101 | FingerprintSettings.UNAVAILABLE, 102 | FingerprintSettings.UNAVAILABLE, 103 | FingerprintSettings.UNAVAILABLE, 104 | FingerprintSettings.UNAVAILABLE, 105 | iNumBits, 106 | iRadius, 107 | FingerprintSettings.UNAVAILABLE, 108 | FingerprintSettings.UNAVAILABLE, 109 | FingerprintSettings.UNAVAILABLE); 110 | } 111 | 112 | @Override 113 | public void validateSpecification(final FingerprintSettings settings) 114 | throws InvalidFingerprintSettingsException { 115 | super.validateSpecification(settings); 116 | if (settings.getNumBits() <= 0) { 117 | throw new InvalidFingerprintSettingsException("Number of bits must be a positive number > 0."); 118 | } 119 | if (settings.getRadius() <= 0) { 120 | throw new InvalidFingerprintSettingsException("Radius must be a positive number > 0."); 121 | } 122 | } 123 | 124 | @Override 125 | public ExplicitBitVect calculate(final ROMol mol, final FingerprintSettings settings) { 126 | return RDKFuncs.getMorganFingerprintAsBitVect(mol, settings.getRadius(), settings.getNumBits()); 127 | } 128 | }, 129 | 130 | torsion("Torsion") { 131 | @Override 132 | public FingerprintSettings getSpecification(final int iTorsionPathLength, final int iMinPath, 133 | final int iMaxPath, final int iAtomPairMinPath, final int iAtomPairMaxPath, 134 | final int iNumBits, final int iRadius, final int iLayerFlags, 135 | final int iAvalonQueryFlag, final int iAvalonBitFlags) { 136 | return new DefaultFingerprintSettings(toString(), 137 | iTorsionPathLength, 138 | FingerprintSettings.UNAVAILABLE, 139 | FingerprintSettings.UNAVAILABLE, 140 | FingerprintSettings.UNAVAILABLE, 141 | FingerprintSettings.UNAVAILABLE, 142 | iNumBits, 143 | FingerprintSettings.UNAVAILABLE, 144 | FingerprintSettings.UNAVAILABLE, 145 | FingerprintSettings.UNAVAILABLE, 146 | FingerprintSettings.UNAVAILABLE); 147 | } 148 | 149 | @Override 150 | public void validateSpecification(final FingerprintSettings settings) 151 | throws InvalidFingerprintSettingsException { 152 | super.validateSpecification(settings); 153 | if (settings.getNumBits() <= 0) { 154 | throw new InvalidFingerprintSettingsException("Number of bits must be a positive number > 0."); 155 | } 156 | if (settings.getTorsionPathLength() != FingerprintSettings.UNAVAILABLE && settings.getTorsionPathLength() <= 0) { 157 | throw new InvalidFingerprintSettingsException("Torsion path length must be a positive number > 0."); 158 | } 159 | } 160 | 161 | @Override 162 | public ExplicitBitVect calculate(final ROMol mol, final FingerprintSettings settings) { 163 | int iTorsionPathLength = settings.getTorsionPathLength(); 164 | 165 | // Use old default value, if the value is undefined 166 | if (!settings.isAvailable(iTorsionPathLength)) { 167 | iTorsionPathLength = 4; 168 | } 169 | 170 | return RDKFuncs.getHashedTopologicalTorsionFingerprintAsBitVect(mol, settings.getNumBits(), iTorsionPathLength); 171 | } 172 | }; 173 | 174 | // 175 | // Constants 176 | // 177 | 178 | /** 179 | * This lock prevents two calls at the same time into the Pattern Fingerprint functionality, 180 | * which is currently not thread-safe. 181 | * Once there is a fix implemented in the RDKit (or somewhere else?) we can 182 | * remove this lock again. 183 | */ 184 | public static final Object PATTERN_FP_LOCK = new Object(); 185 | 186 | /** 187 | * This lock prevents two calls at the same time into the Avalon Fingerprint functionality, 188 | * which has caused crashes under Windows 7 before. 189 | * Once there is a fix implemented in the RDKit (or somewhere else?) we can 190 | * remove this lock again. 191 | */ 192 | public static final Object AVALON_FP_LOCK = new Object(); 193 | 194 | // 195 | // Members 196 | // 197 | 198 | private final String m_strName; 199 | 200 | // 201 | // Constructors 202 | // 203 | 204 | /** 205 | * Creates a new fingerprint type enumeration value. 206 | * 207 | * @param strName Name to be shown as string representation. 208 | */ 209 | private FingerprintType(final String strName) { 210 | m_strName = strName; 211 | } 212 | 213 | /** 214 | * Creates a new fingerprint settings object for a fingerprint type. 215 | * Not all parameters are used for all fingerprints. This method 216 | * takes are that only those parameters are included in the 217 | * fingerprint specification, if they are are really used. 218 | * 219 | * @param iTorsionPathLength Torsion Path Length value. Can be -1 ({@link FingerprintSettings#UNAVAILABLE}. 220 | * @param iMinPath Min Path value. Can be -1 ({@link FingerprintSettings#UNAVAILABLE}. 221 | * @param iMaxPath Min Path value. Can be -1 ({@link FingerprintSettings#UNAVAILABLE}. 222 | * @param iAtomPairMinPath Min Path value. Can be -1 ({@link FingerprintSettings#UNAVAILABLE}. 223 | * @param iAtomPairMaxPath Min Path value. Can be -1 ({@link FingerprintSettings#UNAVAILABLE}. 224 | * @param iNumBits Num Bits (Length) value. Can be -1 ({@link FingerprintSettings#UNAVAILABLE}. 225 | * @param iRadius Radius value. Can be -1 ({@link FingerprintSettings#UNAVAILABLE}. 226 | * @param iLayerFlags Layer Flags value. Can be -1 ({@link FingerprintSettings#UNAVAILABLE}. 227 | * @param iAvalonQueryFlag Avalon query flag. Can be -1 ({@link FingerprintSettings#UNAVAILABLE}. 228 | * @param iAvalonBitFlags Avalon bit flags. Can be -1 ({@link FingerprintSettings#UNAVAILABLE}. 229 | * 230 | * @return Specification of the fingerprint based on the passed in 231 | * values. Never null. 232 | */ 233 | public abstract FingerprintSettings getSpecification(final int iTorsionPathLength, final int iMinPath, 234 | final int iMaxPath, final int iAtomPairMinPath, final int iAtomPairMaxPath, 235 | final int iNumBits, final int iRadius, final int iLayerFlags, 236 | final int iAvalonQueryFlag, final int iAvalonBitFlags); 237 | 238 | /** 239 | * Validates the passed in settings for a fingerprint type. This basis method checks two things: 240 | * 1. That the setting object is not null, 2. If the fingerprint type can calculate rooted 241 | * fingerprints and a rooted fingerprint is desired, it checks that the atom list reference 242 | * is set. 243 | * 244 | * @param settings Fingerprint settings to be validated. 245 | * 246 | * @throws InvalidFingerprintSettingsException Thrown, if settings are invalid and cannot be used. 247 | */ 248 | public void validateSpecification(final FingerprintSettings settings) throws InvalidFingerprintSettingsException { 249 | if (settings == null) { 250 | throw new InvalidFingerprintSettingsException("No fingerprint settings available."); 251 | } 252 | } 253 | 254 | /** 255 | * Calculates the fingerprint based on the specified settings. Important: 256 | * It is the responsibility of the caller of the function to free memory 257 | * for the returned fingerprint when it is not needed anymore. Call 258 | * the {@link ExplicitBitVect#delete()} for this purpose. 259 | * 260 | * @param settings Fingerprint settings. Must not be null. 261 | * 262 | * @return Fingerprint or null. 263 | */ 264 | public abstract ExplicitBitVect calculate(final ROMol mol, final FingerprintSettings settings); 265 | 266 | /** 267 | * {@inheritDoc} 268 | */ 269 | @Override 270 | public String toString() { 271 | return m_strName; 272 | } 273 | 274 | /** 275 | * Tries to determine the fingerprint type based on the passed in string. First it 276 | * will try to determine it by assuming that the passed in string is the 277 | * name of the fingerprint type ({@link #name()}. If this fails, it will compare the 278 | * string representation trying to find a match there ({@link #toString()}. 279 | * If none is found it will return null. 280 | */ 281 | public static FingerprintType parseString(String str) { 282 | FingerprintType type = null; 283 | 284 | if (str != null) { 285 | try { 286 | type = FingerprintType.valueOf(str); 287 | } 288 | catch (final IllegalArgumentException exc) { 289 | // Ignored here 290 | } 291 | 292 | if (type == null) { 293 | str = str.trim().toUpperCase(); 294 | for (final FingerprintType typeExisting : FingerprintType.values()) { 295 | if (str.equals(typeExisting.toString().toUpperCase())) { 296 | type = typeExisting; 297 | break; 298 | } 299 | } 300 | } 301 | } 302 | 303 | return type; 304 | } 305 | 306 | /** 307 | * Determines, if the two specified fingerprint setting objects are compatible. 308 | * They are compatible if they are both not null and if the settings are the same 309 | * except for the detail information for rooted fingerprints (atom list column). 310 | * 311 | * @param fps1 Fingerprint settings 1. Can be null. 312 | * @param fps2 Fingerprint settings 2. Can be null. 313 | * 314 | * @return True, if both settings are compatible. False otherwise. 315 | */ 316 | public static boolean isCompatible(final FingerprintSettings fps1, final FingerprintSettings fps2) { 317 | boolean bRet = false; 318 | 319 | if (fps1 != null && fps2 != null) { 320 | bRet = Utils.equals(fps1, fps2); 321 | } 322 | 323 | return bRet; 324 | } 325 | } 326 | -------------------------------------------------------------------------------- /src/main/java/org/rdkit/fingerprint/InvalidFingerprintSettingsException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C)2014, Novartis Institutes for BioMedical Research Inc. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are 7 | * met: 8 | * 9 | * - Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 12 | * - Redistributions in binary form must reproduce the above 13 | * copyright notice, this list of conditions and the following 14 | * disclaimer in the documentation and/or other materials provided 15 | * with the distribution. 16 | * 17 | * - Neither the name of Novartis Institutes for BioMedical Research Inc. 18 | * nor the names of its contributors may be used to endorse or promote 19 | * products derived from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | package org.rdkit.fingerprint; 34 | 35 | /*- 36 | * #%L 37 | * RDKit-Neo4j 38 | * %% 39 | * Copyright (C) 2019 RDKit 40 | * %% 41 | * Copyright (C) 2019 Evgeny Sorokin 42 | * @@ All Rights Reserved @@ 43 | * This file is part of the RDKit Neo4J integration. 44 | * The contents are covered by the terms of the BSD license 45 | * which is included in the file LICENSE, found at the root 46 | * of the neo4j-rdkit source tree. 47 | * #L% 48 | */ 49 | 50 | /** 51 | * This exception is thrown if fingerprint settings are incorrect in a certain context. 52 | * 53 | * @author Manuel Schwarze 54 | */ 55 | public class InvalidFingerprintSettingsException extends Exception { 56 | 57 | // 58 | // Constants 59 | // 60 | 61 | /** Serialnumber */ 62 | private static final long serialVersionUID = 1L; 63 | 64 | // 65 | // Public Methods 66 | // 67 | 68 | /** 69 | * Constructs an InvalidFingerprintSettingsException with the specified 70 | * detail message. 71 | * 72 | * @param s the detail message. 73 | */ 74 | public InvalidFingerprintSettingsException(final String s) { 75 | super(s); 76 | } 77 | 78 | 79 | /** 80 | * Constructs an InvalidFingerprintSettingsException with the specified 81 | * cause. 82 | * 83 | * @param cause the original cause of the exeception 84 | */ 85 | public InvalidFingerprintSettingsException(final Throwable cause) { 86 | super(cause); 87 | } 88 | 89 | 90 | /** 91 | * Constructs an InvalidFingerprintSettingsException with the specified 92 | * detail message and a cause. 93 | 94 | * @param msg the detail message 95 | * @param cause the root cause 96 | */ 97 | public InvalidFingerprintSettingsException(final String msg, final Throwable cause) { 98 | super(msg, cause); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/main/java/org/rdkit/fingerprint/Utils.java: -------------------------------------------------------------------------------- 1 | package org.rdkit.fingerprint; 2 | 3 | /*- 4 | * #%L 5 | * RDKit-Neo4j 6 | * %% 7 | * Copyright (C) 2019 RDKit 8 | * %% 9 | * Copyright (C) 2019 Evgeny Sorokin 10 | * @@ All Rights Reserved @@ 11 | * This file is part of the RDKit Neo4J integration. 12 | * The contents are covered by the terms of the BSD license 13 | * which is included in the file LICENSE, found at the root 14 | * of the neo4j-rdkit source tree. 15 | * #L% 16 | */ 17 | 18 | import java.lang.reflect.Array; 19 | import java.util.Collection; 20 | import java.util.Iterator; 21 | 22 | class Utils { 23 | 24 | private Utils() { 25 | 26 | } 27 | 28 | @SuppressWarnings("rawtypes") 29 | static boolean equals(final Object o1, final Object o2) { 30 | boolean bResult = false; 31 | 32 | if (o1 == o2) { 33 | bResult = true; 34 | } else if (o1 != null && o1.getClass().isArray() && 35 | o2 != null && o2.getClass().isArray() && 36 | o1.getClass().getComponentType().equals(o2.getClass().getComponentType()) && 37 | Array.getLength(o1) == Array.getLength(o2)) { 38 | final int iLength = Array.getLength(o1); 39 | 40 | // Positive presumption 41 | bResult = true; 42 | 43 | for (int i = 0; i < iLength; i++) { 44 | if ((bResult &= equals(Array.get(o1, i), Array.get(o2, i))) == false) { 45 | break; 46 | } 47 | } 48 | } else if (o1 instanceof Collection && o2 instanceof Collection && 49 | ((Collection) o1).size() == ((Collection) o2).size()) { 50 | final Iterator i1 = ((Collection) o1).iterator(); 51 | final Iterator i2 = ((Collection) o2).iterator(); 52 | 53 | // Positive presumption 54 | if (i1.hasNext() && i2.hasNext()) { 55 | bResult = true; 56 | 57 | while (i1.hasNext() && i2.hasNext()) { 58 | if ((bResult &= equals(i1.next(), i2.next())) == false) { 59 | break; 60 | } 61 | } 62 | } 63 | } else if (o1 != null && o2 != null) { 64 | bResult = o1.equals(o2); 65 | } 66 | 67 | return bResult; 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/org/rdkit/neo4j/bin/LibraryLoaderExtensionFactory.java: -------------------------------------------------------------------------------- 1 | package org.rdkit.neo4j.bin; 2 | 3 | /*- 4 | * #%L 5 | * RDKit-Neo4j plugin 6 | * %% 7 | * Copyright (C) 2019 - 2020 RDKit 8 | * %% 9 | * Copyright (C) 2019 Evgeny Sorokin 10 | * @@ All Rights Reserved @@ 11 | * This file is part of the RDKit Neo4J integration. 12 | * The contents are covered by the terms of the BSD license 13 | * which is included in the file LICENSE, found at the root 14 | * of the neo4j-rdkit source tree. 15 | * #L% 16 | */ 17 | 18 | import org.neo4j.annotations.service.ServiceProvider; 19 | import org.neo4j.kernel.extension.ExtensionFactory; 20 | import org.neo4j.kernel.extension.ExtensionType; 21 | import org.neo4j.kernel.extension.context.ExtensionContext; 22 | import org.neo4j.kernel.lifecycle.Lifecycle; 23 | import org.neo4j.logging.internal.LogService; 24 | 25 | @ServiceProvider 26 | public class LibraryLoaderExtensionFactory extends ExtensionFactory { 27 | 28 | public LibraryLoaderExtensionFactory() { 29 | super(ExtensionType.GLOBAL, "rdkitlibraryloader"); 30 | } 31 | 32 | @Override 33 | public Lifecycle newInstance(ExtensionContext context, Dependencies dependencies) { 34 | return new LibraryLoaderLifecycle(dependencies.log().getUserLog(LibraryLoaderLifecycle.class)); 35 | } 36 | 37 | interface Dependencies { 38 | LogService log(); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/rdkit/neo4j/bin/LibraryLoaderLifecycle.java: -------------------------------------------------------------------------------- 1 | package org.rdkit.neo4j.bin; 2 | 3 | /*- 4 | * #%L 5 | * RDKit-Neo4j 6 | * %% 7 | * Copyright (C) 2019 RDKit 8 | * %% 9 | * Copyright (C) 2019 Evgeny Sorokin 10 | * @@ All Rights Reserved @@ 11 | * This file is part of the RDKit Neo4J integration. 12 | * The contents are covered by the terms of the BSD license 13 | * which is included in the file LICENSE, found at the root 14 | * of the neo4j-rdkit source tree. 15 | * #L% 16 | */ 17 | 18 | import org.apache.commons.io.FileUtils; 19 | import org.neo4j.internal.helpers.collection.Iterables; 20 | import org.neo4j.kernel.lifecycle.LifecycleAdapter; 21 | import org.neo4j.logging.Log; 22 | import org.neo4j.logging.NullLog; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.nio.file.DirectoryStream; 27 | import java.nio.file.FileSystem; 28 | import java.nio.file.FileSystems; 29 | import java.nio.file.Files; 30 | import java.nio.file.Path; 31 | import java.nio.file.Paths; 32 | import java.util.List; 33 | import java.util.stream.Collectors; 34 | 35 | /** 36 | * kernel extension to load native libs 37 | */ 38 | public class LibraryLoaderLifecycle extends LifecycleAdapter { 39 | 40 | private static final String OS_LINUX = "linux"; 41 | private static final String OS_MACOSX = "macosx"; 42 | private static final String OS_WIN32 = "win32"; 43 | private static final String ARCH_X86_64 = "x86_64"; 44 | private static final String ARCH_X86 = "x86"; 45 | 46 | private Log log; 47 | private Path tempDirectory; 48 | 49 | public LibraryLoaderLifecycle(Log userLog) { 50 | this.log = userLog; 51 | } 52 | 53 | @Override 54 | public void init() throws Exception { 55 | loadNativeLibraries(); 56 | } 57 | 58 | @Override 59 | public void shutdown() throws Exception { 60 | if (tempDirectory!= null) { 61 | FileUtils.deleteDirectory(tempDirectory.toFile()); 62 | } 63 | } 64 | 65 | private void loadNativeLibraries() throws IOException { 66 | final String platform = getPlatform(); 67 | 68 | final File jarFileOrDirectory = new File(LibraryLoaderLifecycle.class.getProtectionDomain().getCodeSource().getLocation().getPath()); 69 | 70 | List librariesToLoad = jarFileOrDirectory.isFile() ? 71 | copyNativeLibsFromJar(platform, jarFileOrDirectory) : 72 | getNativeLibsFromLocalDirectory(platform); 73 | librariesToLoad.forEach(filename -> { 74 | log.info("trying to load native library " + filename); 75 | System.load(filename); 76 | }); 77 | } 78 | 79 | private List getNativeLibsFromLocalDirectory(String platform) throws IOException { 80 | log.info("Loading libraries from local directories"); 81 | try (DirectoryStream paths = Files.newDirectoryStream(Paths.get("native", platform))) { 82 | return Iterables.stream(paths).map(Path::toFile).map(File::getAbsolutePath).collect(Collectors.toList()); 83 | } 84 | } 85 | 86 | private List copyNativeLibsFromJar(String platform, File jarFileOrDirectory) throws IOException { 87 | log.info("Loading libraries from JAR"); 88 | tempDirectory = Files.createTempDirectory("rdkit-"); 89 | try (FileSystem fs = FileSystems.newFileSystem(jarFileOrDirectory.toPath(), LibraryLoaderLifecycle.class.getClassLoader())) { 90 | Path folder = fs.getPath("/native", platform); 91 | try (DirectoryStream paths = Files.newDirectoryStream(folder)) { 92 | 93 | return Iterables.stream(paths) 94 | .map(path -> { 95 | try { 96 | String fileName = path.getFileName().toString(); 97 | File target = new File(tempDirectory.toFile(), fileName); 98 | Files.copy(path, target.toPath()); 99 | return target.getAbsolutePath(); 100 | } catch (IOException e) { 101 | throw new RuntimeException(e); 102 | } 103 | }) 104 | .collect(Collectors.toList()); 105 | } 106 | } 107 | } 108 | 109 | private String getPlatform() { 110 | String osname = System.getProperty("os.name"); 111 | osname = osname.toLowerCase(); 112 | 113 | String formattedOs; 114 | if (osname.contains("linux")) { 115 | formattedOs = OS_LINUX; 116 | } else if (osname.contains("mac")) { 117 | formattedOs = OS_MACOSX; 118 | } else if (osname.contains("windows")) { 119 | formattedOs = OS_WIN32; 120 | } else { 121 | throw new IllegalArgumentException("Could not determine the system parameters properly " + osname); 122 | } 123 | 124 | String arch = System.getProperty("os.arch"); 125 | arch = arch.toLowerCase().endsWith("64") ? ARCH_X86_64 : ARCH_X86; 126 | 127 | return String.format("%s.%s", formattedOs, arch); 128 | } 129 | 130 | public static void loadForTests() { 131 | try { 132 | new LibraryLoaderLifecycle(NullLog.getInstance()).init(); 133 | } catch (Exception e) { 134 | throw new RuntimeException(e); 135 | } 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/main/java/org/rdkit/neo4j/config/RDKitSettings.java: -------------------------------------------------------------------------------- 1 | package org.rdkit.neo4j.config; 2 | 3 | /*- 4 | * #%L 5 | * RDKit-Neo4j plugin 6 | * %% 7 | * Copyright (C) 2019 - 2020 RDKit 8 | * %% 9 | * Copyright (C) 2019 Evgeny Sorokin 10 | * @@ All Rights Reserved @@ 11 | * This file is part of the RDKit Neo4J integration. 12 | * The contents are covered by the terms of the BSD license 13 | * which is included in the file LICENSE, found at the root 14 | * of the neo4j-rdkit source tree. 15 | * #L% 16 | */ 17 | 18 | import org.neo4j.configuration.SettingImpl; 19 | import org.neo4j.configuration.SettingValueParsers; 20 | import org.neo4j.configuration.SettingsDeclaration; 21 | import org.neo4j.graphdb.config.Setting; 22 | 23 | public class RDKitSettings implements SettingsDeclaration { 24 | public static final Setting indexSanitize = SettingImpl.newBuilder("server.rdkit.index.sanitize", SettingValueParsers.BOOL, true).build(); 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/org/rdkit/neo4j/handlers/RDKitEventHandler.java: -------------------------------------------------------------------------------- 1 | package org.rdkit.neo4j.handlers; 2 | 3 | /*- 4 | * #%L 5 | * RDKit-Neo4j 6 | * %% 7 | * Copyright (C) 2019 RDKit 8 | * %% 9 | * Copyright (C) 2019 Evgeny Sorokin 10 | * @@ All Rights Reserved @@ 11 | * This file is part of the RDKit Neo4J integration. 12 | * The contents are covered by the terms of the BSD license 13 | * which is included in the file LICENSE, found at the root 14 | * of the neo4j-rdkit source tree. 15 | * #L% 16 | */ 17 | 18 | import org.RDKit.MolSanitizeException; 19 | import org.neo4j.graphdb.GraphDatabaseService; 20 | import org.neo4j.graphdb.Label; 21 | import org.neo4j.graphdb.Node; 22 | import org.neo4j.graphdb.Transaction; 23 | import org.neo4j.graphdb.event.LabelEntry; 24 | import org.neo4j.graphdb.event.TransactionData; 25 | import org.neo4j.graphdb.event.TransactionEventListenerAdapter; 26 | import org.rdkit.neo4j.models.Constants; 27 | import org.rdkit.neo4j.models.NodeFields; 28 | import org.rdkit.neo4j.models.NodeParameters; 29 | import org.rdkit.neo4j.utils.Converter; 30 | import org.slf4j.Logger; 31 | import org.slf4j.LoggerFactory; 32 | 33 | import java.util.Arrays; 34 | import java.util.List; 35 | import java.util.Set; 36 | import java.util.stream.Collectors; 37 | import java.util.stream.StreamSupport; 38 | 39 | /** 40 | * RDKit event handler 41 | * Handler tracks new nodes with property `smiles` or `mdlmol` 42 | * In case of new node - creates additional properties, the list of properties may be found in {@link org.rdkit.neo4j.models.NodeFields} 43 | * 44 | * todo: add functionality to track new properties during runtime (similarity search can create new properties) 45 | */ 46 | public class RDKitEventHandler extends TransactionEventListenerAdapter { 47 | 48 | private static final Logger logger = LoggerFactory.getLogger(RDKitEventHandler.class); 49 | 50 | private final List