├── .gitignore ├── cdk ├── README.md ├── src │ └── test │ │ ├── resources │ │ ├── uk │ │ │ └── ac │ │ │ │ └── ebi │ │ │ │ └── centres │ │ │ │ └── cdk │ │ │ │ ├── ChEBI_33517.mol │ │ │ │ ├── mixed_h_representation.mol │ │ │ │ ├── bad_h_labels.mol │ │ │ │ ├── r_sarin.mol │ │ │ │ ├── s_sarin.mol │ │ │ │ ├── 2-iminoethen-1-ol.xml │ │ │ │ ├── ChEBI_10642.mol │ │ │ │ ├── implicitPseudoCentre.xml │ │ │ │ ├── hydroxy-cyclobutane.cml │ │ │ │ ├── explicitPseudoCentre.xml │ │ │ │ ├── ChEBI_2639.mol │ │ │ │ ├── handbook_example_8.mol │ │ │ │ ├── handbook_example_9.mol │ │ │ │ ├── ChEBI_82965.mol │ │ │ │ ├── ChEBI_61677.mol │ │ │ │ ├── ChEBI_53643.mol │ │ │ │ ├── alliin.xml │ │ │ │ ├── ChEBI_4991.mol │ │ │ │ ├── ChEBI_3049.mol │ │ │ │ ├── myo-inositol.xml │ │ │ │ ├── ChEBI_66261.mol │ │ │ │ ├── (E)-2-(2-Furyl)-3-(5-nitro-2-furyl)acrylamide.xml │ │ │ │ ├── demo.xml │ │ │ │ ├── intradependants.xml │ │ │ │ ├── (Z)-2-(2-Furyl)-3-(5-nitro-2-furyl)acrylamide.xml │ │ │ │ ├── CHEBI_17268.xml │ │ │ │ ├── (6R)-vomifoliol.xml │ │ │ │ ├── Daniel_Macude_1.mol │ │ │ │ ├── Daniel_Macude_2.mol │ │ │ │ ├── Daniel_Macude_3.mol │ │ │ │ ├── Daniel_Macude_4.mol │ │ │ │ └── ChEBI_2955.mol │ │ └── com │ │ │ └── simolecule │ │ │ └── centres │ │ │ ├── issue15-1.mol │ │ │ ├── issue15-2.mol │ │ │ ├── issue14.mol │ │ │ ├── M_BiNAP.mol │ │ │ └── P_BiNAP.mol │ │ └── java │ │ └── com │ │ └── simolecule │ │ └── centres │ │ ├── PolymerTest.java │ │ ├── ShuffleTest.java │ │ └── ValidateCdkTest.java └── pom.xml ├── core ├── src │ ├── test │ │ ├── resources │ │ │ ├── com │ │ │ │ └── simolecule │ │ │ │ │ └── centres │ │ │ │ │ └── bob_test.smi │ │ │ └── uk │ │ │ │ └── ac │ │ │ │ └── ebi │ │ │ │ └── centres │ │ │ │ └── cdk │ │ │ │ ├── but-2-ene.xml │ │ │ │ ├── (3R)-5-amino(5-13C)pentane-1,3-diol.xml │ │ │ │ ├── (2R,3r,4S)-pentane-2,3,4-triol.xml │ │ │ │ ├── bidentate.xml │ │ │ │ ├── (1R,2E,3S)-1-amino-2-(butan-2-ylidene)butane-1,3-diol.xml │ │ │ │ ├── l-chiro-inostiol.xml │ │ │ │ ├── myo-inositol.xml │ │ │ │ ├── (Z)-2-(2-Furyl)-3-(5-nitro-2-furyl)acrylamide.xml │ │ │ │ ├── (2Z,5R,7E)-4,6-bis[(1E)-prop-1-en-1-yl]nona-2,7-dien-5-ol.xml │ │ │ │ └── sulochrin.xml │ │ └── java │ │ │ └── uk │ │ │ └── ac │ │ │ └── ebi │ │ │ └── centres │ │ │ └── test │ │ │ └── TestAtom.java │ └── main │ │ └── java │ │ └── com │ │ └── simolecule │ │ └── centres │ │ ├── TooManyNodesException.java │ │ ├── rules │ │ ├── Rule1a.java │ │ ├── Rule3.java │ │ ├── Rule6.java │ │ ├── Rule4c.java │ │ ├── Rule5.java │ │ ├── Rule4a.java │ │ ├── Rule2.java │ │ ├── Rule1b.java │ │ ├── Priority.java │ │ ├── Rules.java │ │ ├── Rule5New.java │ │ └── Sort.java │ │ ├── Edge.java │ │ ├── Descriptor.java │ │ └── BaseMol.java └── pom.xml ├── CITATION.cff ├── LICENSE ├── opsin ├── pom.xml └── src │ ├── test │ └── java │ │ └── uk │ │ └── ac │ │ └── cam │ │ └── ch │ │ └── wwmm │ │ └── opsin │ │ └── ValidateOpsinTest.java │ └── main │ └── java │ └── uk │ └── ac │ └── cam │ └── ch │ └── wwmm │ └── opsin │ └── OpsinLabeller.java ├── jchem ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── simolecule │ └── centres │ └── jchem │ └── JChemLabeller.java └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | target -------------------------------------------------------------------------------- /cdk/README.md: -------------------------------------------------------------------------------- 1 | __centres-cdk__ 2 | 3 | This module provides an implementation of centres for the Chemistry Development Kit (CDK). -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/ChEBI_33517.mol: -------------------------------------------------------------------------------- 1 | 2 | Marvin 12090511072D 3 | 4 | 1 0 0 0 0 0 999 V2000 5 | 0.0000 0.0000 0.0000 Uub 0 0 0 0 0 0 0 0 0 0 0 0 6 | M END 7 | -------------------------------------------------------------------------------- /core/src/test/resources/com/simolecule/centres/bob_test.smi: -------------------------------------------------------------------------------- 1 | C/N=C\[C@@H](O)C1=CC=CC=N1.O[C@H](C1=CC=CC=C1O)C2=C(O)C=CC=C2.C[C@H](O)C1=C(C=CC=C1)[C@H](O)C2=CC=CC=C2[C@@H](C)O.C[C@H]1C2=C(C=CC=C2)[C@@H](C)C3=CC=CC=C13 2 | [H][C@@]12C[C@]1([H])C=CC2 R,S P-92.2.2 Example 2 3 | [H][C@]12C[C@]1(CC=C2C)C(C)C -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "John" 5 | given-names: "Mayfield" 6 | orcid: "https://orcid.org/0000-0001-7730-2646" 7 | title: "centres" 8 | version: 1.0 9 | doi: 10.1021/acs.jcim.8b00324 10 | date-released: 2018-06-28 11 | url: "https://github.com/SiMolecule/centres" 12 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/mixed_h_representation.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv0541 01071521152D 3 | 4 | 5 4 0 0 0 0 999 V2000 5 | -2.4100 -0.4326 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -1.6955 -0.0201 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 7 | -0.9810 -0.4326 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -1.6955 0.8049 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 9 | -0.2666 -0.0201 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 1 2 1 0 0 0 0 11 | 2 3 1 0 0 0 0 12 | 2 4 1 1 0 0 0 13 | 3 5 1 0 0 0 0 14 | M END 15 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/bad_h_labels.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv0541 01071521182D 3 | 4 | 6 5 0 0 0 0 999 V2000 5 | -2.4100 -0.4326 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -1.6955 -0.0201 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 7 | -0.9810 -0.4326 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -1.2475 0.7895 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 9 | -0.2666 -0.0201 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -2.1080 0.6944 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 11 | 1 2 1 0 0 0 0 12 | 2 3 1 0 0 0 0 13 | 2 4 1 1 0 0 0 14 | 3 5 1 0 0 0 0 15 | 2 6 1 6 0 0 0 16 | M END 17 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/r_sarin.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv0541 01071521222D 3 | 4 | 8 7 0 0 0 0 999 V2000 5 | 0.1105 -2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 0.1105 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -0.6039 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 0.8250 -0.8250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 9 | 0.8250 0.0000 0.0000 P 0 0 2 0 0 0 0 0 0 0 0 0 10 | 0.8250 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 1.6500 -0.0000 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 12 | 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 13 | 1 2 1 0 0 0 0 14 | 2 3 1 0 0 0 0 15 | 2 4 1 0 0 0 0 16 | 4 5 1 0 0 0 0 17 | 5 6 1 6 0 0 0 18 | 5 7 1 1 0 0 0 19 | 5 8 2 0 0 0 0 20 | M END 21 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/s_sarin.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv0541 01071521222D 3 | 4 | 8 7 0 0 0 0 999 V2000 5 | 0.1105 -2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 0.1105 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -0.6039 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 0.8250 -0.8250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 9 | 0.8250 0.0000 0.0000 P 0 0 2 0 0 0 0 0 0 0 0 0 10 | 0.8250 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 1.6500 -0.0000 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 12 | 0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 13 | 1 2 1 0 0 0 0 14 | 2 3 1 0 0 0 0 15 | 2 4 1 0 0 0 0 16 | 4 5 1 0 0 0 0 17 | 5 6 1 1 0 0 0 18 | 5 7 1 6 0 0 0 19 | 5 8 2 0 0 0 0 20 | M END 21 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/2-iminoethen-1-ol.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020, John Mayfield 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation 10 | and/or other materials provided with the distribution. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 13 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 16 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /opsin/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | centres 7 | com.simolecule.centres 8 | 1.3-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | centres-opsin 13 | 14 | 15 | 16 | ${project.groupId} 17 | centres-core 18 | ${project.parent.version} 19 | 20 | 21 | uk.ac.cam.ch.opsin 22 | opsin-core 23 | 2.7.0 24 | 25 | 26 | ${project.groupId} 27 | centres-core 28 | ${project.parent.version} 29 | test 30 | test-jar 31 | 32 | 33 | junit 34 | junit 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/ChEBI_10642.mol: -------------------------------------------------------------------------------- 1 | 2 | Marvin 06070613572D 3 | 4 | 12 12 0 0 1 0 999 V2000 5 | 0.7145 -0.4125 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 6 | 0.0000 -0.8250 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 7 | 0.7145 0.4125 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 8 | -0.7145 -0.4125 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 9 | 0.0000 0.8250 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 10 | -0.7145 0.4125 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 11 | 1.4289 0.8250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 12 | 1.4289 -0.8250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 13 | -0.0000 -1.6500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 14 | -1.4289 -0.8250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 15 | -1.4289 0.8250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 16 | 0.0000 1.6500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 17 | 2 1 1 0 0 0 0 18 | 3 1 1 0 0 0 0 19 | 4 2 1 0 0 0 0 20 | 5 3 1 0 0 0 0 21 | 6 4 1 0 0 0 0 22 | 6 5 1 0 0 0 0 23 | 3 7 1 6 0 0 0 24 | 1 8 1 1 0 0 0 25 | 2 9 1 6 0 0 0 26 | 4 10 1 1 0 0 0 27 | 6 11 1 6 0 0 0 28 | 5 12 1 1 0 0 0 29 | M END 30 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/implicitPseudoCentre.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 10 | 12 | 14 | 17 | 18 | 19 | 20 | W 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /core/src/test/resources/uk/ac/ebi/centres/cdk/but-2-ene.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 18 | 19 | 20 | 21 | T 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /cdk/src/test/java/com/simolecule/centres/PolymerTest.java: -------------------------------------------------------------------------------- 1 | package com.simolecule.centres; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | import org.openscience.cdk.exception.InvalidSmilesException; 6 | import org.openscience.cdk.interfaces.IAtom; 7 | import org.openscience.cdk.interfaces.IAtomContainer; 8 | import org.openscience.cdk.interfaces.IBond; 9 | import org.openscience.cdk.silent.SilentChemObjectBuilder; 10 | import org.openscience.cdk.smiles.SmilesParser; 11 | import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; 12 | 13 | import java.util.Arrays; 14 | import java.util.Collections; 15 | import java.util.List; 16 | 17 | public class PolymerTest { 18 | 19 | @Test public void test_basicPolymer() throws InvalidSmilesException { 20 | SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); 21 | IAtomContainer mol = smilesParser.parseSmiles("*CC[C@H](O)CO*"); 22 | CdkLabeller.label(mol); 23 | Assert.assertEquals(Descriptor.S, mol.getAtom(3).getProperty("cip.label")); 24 | } 25 | 26 | @Test public void test_NeedRepeat() throws InvalidSmilesException { 27 | // not fixable: *C[C@H](O)CO* 28 | // but this is: *C[C@H](O)COC* 29 | SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); 30 | IAtomContainer mol = smilesParser.parseSmiles("*C[C@H](O)COC* |Sg:n:1,2,3,4,5,6::ht|"); 31 | CdkLabeller.label(mol); 32 | Assert.assertEquals(Descriptor.S, mol.getAtom(2).getProperty("cip.label")); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /core/pom.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 4.0.0 6 | 7 | 8 | com.simolecule.centres 9 | centres 10 | 1.3-SNAPSHOT 11 | 12 | 13 | centres-core 14 | jar 15 | 16 | 17 | 18 | junit 19 | junit 20 | test 21 | 22 | 23 | org.hamcrest 24 | hamcrest-all 25 | test 26 | 27 | 28 | 29 | 30 | 31 | 32 | org.apache.maven.plugins 33 | maven-jar-plugin 34 | 2.2 35 | 36 | 37 | 38 | test-jar 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/hydroxy-cyclobutane.cml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | W 20 | 21 | 22 | H 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/explicitPseudoCentre.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 10 | 12 | 14 | 17 | 19 | 20 | 21 | 22 | W 23 | 24 | 25 | 26 | 27 | H 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/ChEBI_2639.mol: -------------------------------------------------------------------------------- 1 | 2 | Marvin 09160818122D 3 | 4 | 15 15 0 0 0 0 999 V2000 5 | -0.3125 1.0125 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 6 | -0.3125 -0.6375 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 7 | -1.0270 -0.2250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -1.7414 -0.6375 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 9 | 0.4020 -0.2250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 1.1164 -0.6375 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 11 | -1.0270 0.6000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -1.7414 1.0125 0.0000 Cl 0 0 0 0 0 0 0 0 0 0 0 0 13 | 0.4020 0.6000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 1.8309 0.6000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 15 | 3.2598 0.6000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 16 | 2.5454 1.0125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 2.5454 1.8375 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 18 | 1.1164 1.0125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 1.1164 1.8375 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 20 | 1 7 1 0 0 0 0 21 | 1 9 2 0 0 0 0 22 | 7 3 2 0 0 0 0 23 | 3 2 1 0 0 0 0 24 | 2 5 2 0 0 0 0 25 | 5 9 1 0 0 0 0 26 | 3 4 1 0 0 0 0 27 | 5 6 1 0 0 0 0 28 | 7 8 1 0 0 0 0 29 | 9 14 1 0 0 0 0 30 | 14 10 1 0 0 0 0 31 | 10 12 1 0 0 0 0 32 | 12 11 1 0 0 0 0 33 | 12 13 2 0 0 0 0 34 | 14 15 2 0 0 0 0 35 | M END 36 | -------------------------------------------------------------------------------- /cdk/src/test/resources/com/simolecule/centres/issue15-1.mol: -------------------------------------------------------------------------------- 1 | Molecule Name 2 | ChemDodl01222418042D 0 0.00000 0.00000 0 3 | [Insert Comment Here] 4 | 15 16 0 0 0 0 0 0 0 0 1 V2000 5 | 0.0000 1.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 0.5773 1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -0.5773 1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 0.5773 0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | -0.5773 0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 1.1547 0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 0.0000 0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -1.1547 0.3333 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 13 | 0.0000 -0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | -0.5773 -0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 0.5773 -0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | -0.5773 -1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 1.1547 -0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 0.5773 -1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 0.0000 -1.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 1 3 2 0 0 0 0 21 | 3 5 1 0 0 0 0 22 | 5 7 2 0 0 0 0 23 | 7 4 1 0 0 0 0 24 | 4 2 2 0 0 0 0 25 | 2 1 1 0 0 0 0 26 | 7 9 1 0 0 0 0 27 | 9 10 1 1 0 0 0 28 | 10 12 2 0 0 0 0 29 | 12 15 1 0 0 0 0 30 | 15 14 2 0 0 0 0 31 | 14 11 1 0 0 0 0 32 | 11 9 2 0 0 0 0 33 | 4 6 1 0 0 0 0 34 | 11 13 1 0 0 0 0 35 | 5 8 1 0 0 0 0 36 | M END -------------------------------------------------------------------------------- /cdk/src/test/resources/com/simolecule/centres/issue15-2.mol: -------------------------------------------------------------------------------- 1 | Molecule Name 2 | ChemDodl01222418042D 0 0.00000 0.00000 0 3 | [Insert Comment Here] 4 | 15 16 0 0 0 0 0 0 0 0 1 V2000 5 | 0.0000 -0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -0.5774 -0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 0.5773 -0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 0.0000 0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | -0.5774 -1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 1.1547 -0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 0.5773 -1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 0.5773 0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | -0.5774 0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 0.0000 -1.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 0.5773 1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 1.1547 0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | -1.1547 0.3333 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 18 | -0.5774 1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 0.0000 1.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 1 2 1 1 0 0 0 21 | 2 5 2 0 0 0 0 22 | 5 10 1 0 0 0 0 23 | 10 7 2 0 0 0 0 24 | 7 3 1 0 0 0 0 25 | 3 1 2 0 0 0 0 26 | 1 4 1 0 0 0 0 27 | 4 8 1 0 0 0 0 28 | 8 11 2 0 0 0 0 29 | 11 15 1 0 0 0 0 30 | 15 14 2 0 0 0 0 31 | 14 9 1 0 0 0 0 32 | 9 4 2 0 0 0 0 33 | 8 12 1 0 0 0 0 34 | 3 6 1 0 0 0 0 35 | 9 13 1 0 0 0 0 36 | M END -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/handbook_example_8.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv0541 01081515062D 3 | 4 | 16 16 0 0 0 0 999 V2000 5 | 1.0134 1.5960 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 6 | 1.5903 1.0063 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 7 | 1.5814 0.1814 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 8 | 0.9917 -0.3956 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 9 | 0.1667 -0.3866 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 10 | -0.4102 0.2030 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 11 | -0.4013 1.0280 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 12 | 0.1884 1.6050 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 13 | -1.1600 1.3520 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | -0.1190 2.3706 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 1.3374 2.3547 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 2.3559 1.3137 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 2.3401 -0.1426 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 1.2991 -1.1612 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | -0.1573 -1.1454 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | -1.1758 -0.1044 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 1 2 1 0 0 0 0 22 | 2 3 1 0 0 0 0 23 | 3 4 1 0 0 0 0 24 | 4 5 1 0 0 0 0 25 | 5 6 1 0 0 0 0 26 | 6 7 1 0 0 0 0 27 | 7 8 1 0 0 0 0 28 | 1 8 1 0 0 0 0 29 | 7 9 1 1 0 0 0 30 | 8 10 1 1 0 0 0 31 | 1 11 1 1 0 0 0 32 | 2 12 1 1 0 0 0 33 | 3 13 1 6 0 0 0 34 | 4 14 1 6 0 0 0 35 | 5 15 1 6 0 0 0 36 | 6 16 1 6 0 0 0 37 | M END 38 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/handbook_example_9.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv0541 01081515082D 3 | 4 | 16 16 0 0 0 0 999 V2000 5 | 6.9939 1.5364 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 6 | 7.5708 0.9467 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 7 | 7.5619 0.1218 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 8 | 6.9722 -0.4552 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 9 | 6.1472 -0.4462 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 10 | 5.5703 0.1434 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 11 | 5.5792 0.9684 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 12 | 6.1689 1.5454 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 13 | 4.8205 1.2924 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 5.8615 2.3110 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 7.3179 2.2951 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 8.3364 1.2541 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 8.3206 -0.2022 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 7.2796 -1.2208 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 5.8232 -1.2050 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 4.8047 -0.1640 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 1 2 1 0 0 0 0 22 | 1 8 1 0 0 0 0 23 | 1 11 1 1 0 0 0 24 | 2 3 1 0 0 0 0 25 | 2 12 1 6 0 0 0 26 | 3 4 1 0 0 0 0 27 | 3 13 1 1 0 0 0 28 | 4 5 1 0 0 0 0 29 | 4 14 1 6 0 0 0 30 | 5 6 1 0 0 0 0 31 | 5 15 1 6 0 0 0 32 | 6 7 1 0 0 0 0 33 | 6 16 1 6 0 0 0 34 | 7 8 1 0 0 0 0 35 | 7 9 1 1 0 0 0 36 | 8 10 1 1 0 0 0 37 | M END 38 | -------------------------------------------------------------------------------- /cdk/src/test/resources/com/simolecule/centres/issue14.mol: -------------------------------------------------------------------------------- 1 | Molecule Name 2 | ChemDodl01222418032D 0 0.00000 0.00000 0 3 | [Insert Comment Here] 4 | 16 17 0 0 0 0 0 0 0 0 1 V2000 5 | 0.0000 -0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 0.5773 -0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -0.5774 -0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 0.0000 0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 0.5773 -1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | 1.1547 -0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -0.5774 -1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -0.5774 0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 0.5773 0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 0.0000 -1.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | -1.1547 0.3333 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 16 | -0.5774 1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 1.1547 0.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 0.5773 1.3333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 0.0000 1.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | -1.1545 -0.3335 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 1 3 1 1 0 0 0 22 | 3 7 2 0 0 0 0 23 | 7 10 1 0 0 0 0 24 | 10 5 2 0 0 0 0 25 | 5 2 1 0 0 0 0 26 | 2 1 2 0 0 0 0 27 | 1 4 1 0 0 0 0 28 | 4 9 1 0 0 0 0 29 | 9 14 2 0 0 0 0 30 | 14 15 1 0 0 0 0 31 | 15 12 2 0 0 0 0 32 | 12 8 1 0 0 0 0 33 | 8 4 2 0 0 0 0 34 | 9 13 1 0 0 0 0 35 | 2 6 1 0 0 0 0 36 | 8 11 1 0 0 0 0 37 | 3 16 1 0 0 0 0 38 | M END -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/TooManyNodesException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres; 28 | 29 | /** 30 | * @author John May 31 | */ 32 | public class TooManyNodesException extends RuntimeException { 33 | public TooManyNodesException(int limit) { 34 | super("More than " + limit + " nodes were created. CIP assignment for this atom is probably intractable."); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/ChEBI_82965.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv0541 09171412112D 3 | 4 | 18 17 0 0 0 0 999 V2000 5 | 3.3497 -7.0607 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 3.3497 -6.2362 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 7 | 2.6349 -7.4730 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 4.0642 -7.4730 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 9 | 1.9204 -7.0607 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 10 | 3.3497 -9.5290 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 11 | 2.6349 -8.2919 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 3.3497 -8.7043 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 4.0642 -9.9412 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 1.2056 -7.4730 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | -1.6420 -7.4730 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 1.2056 -8.2919 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 17 | 0.5021 -7.0607 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | -1.6420 -8.2919 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 19 | -0.2127 -7.4730 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | -0.9272 -7.0607 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 21 | -2.3565 -7.0607 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 22 | -0.9272 -6.2362 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 23 | 2 1 2 0 0 0 0 24 | 3 1 1 0 0 0 0 25 | 4 1 1 0 0 0 0 26 | 5 3 1 6 0 0 0 27 | 7 3 1 0 0 0 0 28 | 10 5 1 0 0 0 0 29 | 8 6 1 0 0 0 0 30 | 9 6 1 0 0 0 0 31 | 8 7 1 0 0 0 0 32 | 12 10 2 0 0 0 0 33 | 13 10 1 0 0 0 0 34 | 16 11 1 0 0 0 0 35 | 14 11 2 0 0 0 0 36 | 17 11 1 0 0 0 0 37 | 15 13 1 0 0 0 0 38 | 16 15 1 0 0 0 0 39 | 16 18 1 1 0 0 0 40 | M END 41 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/ChEBI_61677.mol: -------------------------------------------------------------------------------- 1 | 2 | Marvin 03161113082D 3 | 4 | 18 20 0 0 1 0 999 V2000 5 | 17.1681 -4.9305 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 17.5994 -5.7726 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 7 | 16.3306 -4.7405 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 8 | 16.7684 -5.2434 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 9 | 15.4781 -5.1146 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 10 | 15.6682 -5.7731 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 11 | 16.4992 -3.5975 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 15.1753 -6.2159 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 18.3068 -6.1352 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 14 | 17.7653 -5.1511 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 15 | 15.7605 -4.1727 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 16 | 15.0665 -4.3996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 14.7916 -5.5157 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 14.1009 -5.1128 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 13.4103 -5.5157 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 12.7197 -5.1128 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 12.0033 -5.5220 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 12.7233 -4.2879 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 2 1 1 0 0 0 0 24 | 3 1 1 0 0 0 0 25 | 4 2 1 0 0 0 0 26 | 5 3 1 0 0 0 0 27 | 6 4 1 0 0 0 0 28 | 6 5 1 0 0 0 0 29 | 4 7 1 0 0 0 0 30 | 7 3 1 0 0 0 0 31 | 6 2 1 0 0 0 0 32 | 6 8 1 1 0 0 0 33 | 2 9 1 1 0 0 0 34 | 4 10 1 1 0 0 0 35 | 3 11 1 6 0 0 0 36 | 5 12 1 1 0 0 0 37 | 5 13 1 6 0 0 0 38 | 13 14 1 0 0 0 0 39 | 14 15 1 0 0 0 0 40 | 15 16 2 0 0 0 0 41 | 16 17 1 0 0 0 0 42 | 16 18 1 0 0 0 0 43 | M END 44 | -------------------------------------------------------------------------------- /core/src/test/resources/uk/ac/ebi/centres/cdk/(3R)-5-amino(5-13C)pentane-1,3-diol.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | W 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /core/src/test/resources/uk/ac/ebi/centres/cdk/(2R,3r,4S)-pentane-2,3,4-triol.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 24 | 25 | 26 | 27 | 28 | W 29 | 30 | 31 | 32 | 33 | W 34 | 35 | 36 | W 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /cdk/src/test/resources/com/simolecule/centres/M_BiNAP.mol: -------------------------------------------------------------------------------- 1 | (M)-BiNAP 2 | CDK 2.0 06081720152D 3 | 4 | 22 25 0 0 0 0 999 V2000 5 | -0.8264 -0.0100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -1.5434 -0.4182 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -1.5434 -1.2446 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -0.8264 -1.6529 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | -0.1195 -1.2446 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -0.1195 -0.4182 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -0.8264 0.8165 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -0.8264 2.4594 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | -1.5434 2.0412 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | -1.5434 1.2247 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | -0.1195 1.2247 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | -0.1195 2.0412 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | -2.2503 0.8165 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 18 | -2.2503 -0.0100 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 19 | 0.5974 2.4594 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 0.5974 0.8165 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 1.3044 1.2247 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 1.3044 2.0412 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 0.5974 -0.0100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 0.5974 -1.6529 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 1.3044 -1.2446 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 1.3044 -0.4182 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 1 2 1 0 0 0 0 28 | 2 3 2 0 0 0 0 29 | 3 4 1 0 0 0 0 30 | 4 5 2 0 0 0 0 31 | 5 6 1 0 0 0 0 32 | 1 6 2 0 0 0 0 33 | 1 7 1 0 0 0 0 34 | 8 9 2 0 0 0 0 35 | 9 10 1 0 0 0 0 36 | 11 12 2 0 0 0 0 37 | 8 12 1 0 0 0 0 38 | 10 7 2 0 0 0 0 39 | 7 11 1 1 0 0 0 40 | 10 13 1 0 0 0 0 41 | 2 14 1 0 0 0 0 42 | 16 17 2 0 0 0 0 43 | 17 18 1 0 0 0 0 44 | 15 18 2 0 0 0 0 45 | 11 16 1 0 0 0 0 46 | 15 12 1 0 0 0 0 47 | 20 21 2 0 0 0 0 48 | 21 22 1 0 0 0 0 49 | 19 22 2 0 0 0 0 50 | 5 20 1 0 0 0 0 51 | 19 6 1 0 0 0 0 52 | M END 53 | -------------------------------------------------------------------------------- /cdk/src/test/resources/com/simolecule/centres/P_BiNAP.mol: -------------------------------------------------------------------------------- 1 | (P)-BiNAP 2 | CDK 2.0 06081720152D 3 | 4 | 22 25 0 0 0 0 999 V2000 5 | -0.8264 -0.0100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -1.5434 -0.4182 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -1.5434 -1.2446 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | -0.8264 -1.6529 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | -0.1195 -1.2446 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -0.1195 -0.4182 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -0.8264 0.8165 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | -0.8264 2.4594 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | -1.5434 2.0412 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | -1.5434 1.2247 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | -0.1195 1.2247 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | -0.1195 2.0412 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | -2.2503 0.8165 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 18 | -2.2503 -0.0100 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 19 | 0.5974 2.4594 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 0.5974 0.8165 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 1.3044 1.2247 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 1.3044 2.0412 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 0.5974 -0.0100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 0.5974 -1.6529 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 1.3044 -1.2446 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 1.3044 -0.4182 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 1 2 1 0 0 0 0 28 | 2 3 2 0 0 0 0 29 | 3 4 1 0 0 0 0 30 | 4 5 2 0 0 0 0 31 | 5 6 1 0 0 0 0 32 | 1 6 2 0 0 0 0 33 | 1 7 1 0 0 0 0 34 | 8 9 2 0 0 0 0 35 | 9 10 1 0 0 0 0 36 | 11 12 2 0 0 0 0 37 | 8 12 1 0 0 0 0 38 | 10 7 2 0 0 0 0 39 | 7 11 1 6 0 0 0 40 | 10 13 1 0 0 0 0 41 | 2 14 1 0 0 0 0 42 | 16 17 2 0 0 0 0 43 | 17 18 1 0 0 0 0 44 | 15 18 2 0 0 0 0 45 | 11 16 1 0 0 0 0 46 | 15 12 1 0 0 0 0 47 | 20 21 2 0 0 0 0 48 | 21 22 1 0 0 0 0 49 | 19 22 2 0 0 0 0 50 | 5 20 1 0 0 0 0 51 | 19 6 1 0 0 0 0 52 | M END 53 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Rule1a.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import com.simolecule.centres.BaseMol; 30 | import com.simolecule.centres.Edge; 31 | import com.simolecule.centres.Mancude; 32 | 33 | public final class Rule1a extends SequenceRule { 34 | 35 | private final BaseMol mol; 36 | 37 | public Rule1a(BaseMol mol) 38 | { 39 | super(mol); 40 | this.mol = mol; 41 | } 42 | 43 | @Override 44 | public int compare(Edge a, Edge b) 45 | { 46 | final int anum = a.getEnd().getAtomicNumNumerator(); 47 | final int aden = a.getEnd().getAtomicNumDenominator(); 48 | final int bnum = b.getEnd().getAtomicNumNumerator(); 49 | final int bden = b.getEnd().getAtomicNumDenominator(); 50 | if (anum == 0 || bnum == 0) 51 | return SequenceRule.COMP_TO_WILDCARD; 52 | if (aden == 1 && bden == 1) 53 | return Integer.compare(anum, bnum); 54 | return Mancude.Fraction.compare(anum, aden, bnum, bden); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /core/src/test/resources/uk/ac/ebi/centres/cdk/bidentate.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | W 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Rule3.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import com.simolecule.centres.BaseMol; 30 | import com.simolecule.centres.Descriptor; 31 | import com.simolecule.centres.Edge; 32 | 33 | /** 34 | * Sequence Rule 3 35 | * "‘seqcis’ (‘Z’) precedes ‘seqtrans’ (‘E’) and this order precedes 36 | * nonstereogenic double bonds" 37 | * 38 | * @param generic atom class 39 | */ 40 | public final class Rule3 extends SequenceRule { 41 | 42 | public Rule3(BaseMol mol) 43 | { 44 | super(mol); 45 | } 46 | 47 | private static int ord(Descriptor lab) 48 | { 49 | if (lab == null) 50 | return 0; 51 | switch (lab) { 52 | case E: 53 | return 1; 54 | case Z: 55 | return 2; 56 | default: 57 | return 0; 58 | } 59 | } 60 | 61 | @Override 62 | public int compare(Edge a, Edge b) 63 | { 64 | return Integer.compare(ord(a.getEnd().getAux()), 65 | ord(b.getEnd().getAux())); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/ChEBI_53643.mol: -------------------------------------------------------------------------------- 1 | 2 | Marvin 11190915002D 3 | 4 | 23 26 0 0 0 0 999 V2000 5 | -1.4289 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | -0.7145 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 0.0000 -0.4125 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 8 | -0.7145 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 9 | 1.4289 0.4125 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 10 | 0.7145 -0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | -1.4289 0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 1.4289 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 0.7145 0.8250 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 14 | 0.0000 0.4125 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 15 | 0.7145 1.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 0.0000 -1.2375 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 17 | 2.1434 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 1.4289 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 0.7145 0.0000 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 20 | 0.0000 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 2.1434 1.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | -0.7145 -1.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | -1.4289 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 2.1434 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 2.8579 1.2375 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 26 | 3.3762 1.8009 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 3.5318 0.9202 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 28 | 2 1 1 0 0 0 0 29 | 2 18 1 0 0 0 0 30 | 2 19 1 0 0 0 0 31 | 3 2 1 0 0 0 0 32 | 3 12 1 6 0 0 0 33 | 5 13 1 1 0 0 0 34 | 3 6 1 0 0 0 0 35 | 7 1 1 0 0 0 0 36 | 4 7 1 0 0 0 0 37 | 5 8 1 0 0 0 0 38 | 6 8 1 0 0 0 0 39 | 9 5 1 0 0 0 0 40 | 9 11 1 0 0 0 0 41 | 9 15 1 6 0 0 0 42 | 10 3 1 0 0 0 0 43 | 10 4 1 0 0 0 0 44 | 10 9 1 0 0 0 0 45 | 10 16 1 1 0 0 0 46 | 11 14 1 0 0 0 0 47 | 17 13 1 1 0 0 0 48 | 17 14 1 0 0 0 0 49 | 5 20 1 0 0 0 0 50 | 17 21 1 0 0 0 0 51 | 21 20 1 0 0 0 0 52 | 21 22 1 1 0 0 0 53 | 21 23 1 6 0 0 0 54 | M END 55 | -------------------------------------------------------------------------------- /jchem/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | centres 7 | com.simolecule.centres 8 | 1.0 9 | 10 | 4.0.0 11 | 12 | centres-jchem 13 | jar 14 | 15 | 14.11.10.0 16 | provided 17 | 18 | 19 | 20 | 21 | com.simolecule.centres 22 | centres-core 23 | ${project.parent.version} 24 | 25 | 26 | com.chemaxon 27 | chemaxon-concurrent 28 | ${jchem.version} 29 | ${jchem.scope} 30 | 31 | 32 | com.chemaxon 33 | chemaxon-concurrent-utils 34 | ${jchem.version} 35 | ${jchem.scope} 36 | 37 | 38 | com.chemaxon 39 | chemaxon-core 40 | ${jchem.version} 41 | ${jchem.scope} 42 | 43 | 44 | com.chemaxon 45 | chemaxon-io 46 | ${jchem.version} 47 | ${jchem.scope} 48 | 49 | 50 | com.chemaxon 51 | chemaxon-io-smiles 52 | ${jchem.version} 53 | ${jchem.scope} 54 | 55 | 56 | com.chemaxon 57 | chemaxon-license 58 | ${jchem.version} 59 | ${jchem.scope} 60 | 61 | 62 | com.chemaxon 63 | chemaxon-common 64 | ${jchem.version} 65 | ${jchem.scope} 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Rule6.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import com.simolecule.centres.BaseMol; 30 | import com.simolecule.centres.Digraph; 31 | import com.simolecule.centres.Edge; 32 | 33 | /** 34 | * Sequence Rule 6 (proposed) 35 | * @param generic atom class 36 | */ 37 | public final class Rule6 extends SequenceRule { 38 | 39 | public Rule6(BaseMol mol) 40 | { 41 | super(mol); 42 | } 43 | 44 | @Override 45 | public boolean isPseudoAsymmetric() 46 | { 47 | return true; // comes after Rule 5 so must be true 48 | } 49 | 50 | @Override 51 | public int compare(Edge a, Edge b) 52 | { 53 | Digraph digraph = a.getBeg().getDigraph(); 54 | A ref = digraph.getRule6Ref(); 55 | if (ref == null) 56 | return 0; 57 | A aAtom = a.getEnd().getAtom(); 58 | A bAtom = b.getEnd().getAtom(); 59 | // JWM: note we had to go through rule 5 (pseudoasymmetric) to get here 60 | // so the return type is -2/+2 61 | if (ref.equals(aAtom) && !ref.equals(bAtom)) 62 | return +2; // a is ref (has priority) 63 | else if (!ref.equals(aAtom) && ref.equals(bAtom)) 64 | return -2; // b is ref (has priority) 65 | return 0; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Rule4c.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import com.simolecule.centres.BaseMol; 30 | import com.simolecule.centres.Descriptor; 31 | import com.simolecule.centres.Edge; 32 | 33 | /** 34 | * Sequence Rule 4c 35 | * ‘r’ precedes ‘s’ and ‘m’ precedes ‘p’ 36 | * 37 | * @param generic atom class 38 | */ 39 | public final class Rule4c extends SequenceRule { 40 | 41 | public Rule4c(BaseMol mol) 42 | { 43 | super(mol); 44 | } 45 | 46 | private static int ord(Descriptor lab) { 47 | if (lab == null) 48 | return 0; 49 | switch (lab) { 50 | case m: 51 | case r: 52 | return 2; 53 | case p: 54 | case s: 55 | return 1; 56 | default: 57 | return 0; 58 | } 59 | } 60 | 61 | @Override 62 | public int compare(Edge a, Edge b) 63 | { 64 | // m vs p 65 | int aOrdinal = ord(getBondLabel(a)); 66 | int bOrdinal = ord(getBondLabel(b)); 67 | int cmp = Integer.compare(aOrdinal, bOrdinal); 68 | if (cmp != 0) return cmp; 69 | // r vs s 70 | aOrdinal = ord(a.getEnd().getAux()); 71 | bOrdinal = ord(b.getEnd().getAux()); 72 | return Integer.compare(aOrdinal, bOrdinal); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /core/src/test/resources/uk/ac/ebi/centres/cdk/(1R,2E,3S)-1-amino-2-(butan-2-ylidene)butane-1,3-diol.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 | 29 | 30 | 31 | 32 | 33 | T 34 | 35 | 36 | 37 | 38 | 39 | 40 | H 41 | 42 | 43 | 44 | W 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/alliin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 | 29 | 31 | 32 | 33 | 34 | 35 | 36 | W 37 | 38 | 39 | 40 | 41 | 42 | W 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/ChEBI_4991.mol: -------------------------------------------------------------------------------- 1 | 2 | Marvin 10140916132D 3 | 4 | 27 30 0 0 1 0 999 V2000 5 | 11.7818 -8.7555 0.0000 Fe 0 0 0 0 0 0 0 0 0 0 0 0 6 | 11.7856 -9.6398 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 7 | 11.7856 -7.9263 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 8 | 10.4770 -9.8756 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 9 | 13.3420 -9.9543 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 10 | 13.0315 -7.1677 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 11 | 10.1586 -7.2425 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 12 | 11.4752 -10.2333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 12.0019 -7.1284 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 10.4691 -10.7402 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 15 | 13.3223 -10.7796 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 13.0236 -6.4171 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 17 | 10.1625 -6.4171 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 11.8957 -10.7600 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 19 | 10.9934 -10.3589 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 20 | 11.5931 -6.4171 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 21 | 12.4954 -6.7865 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 22 | 11.1766 -11.1608 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 9.6673 -11.0783 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 24 | 12.6031 -11.1844 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 13.9511 -11.0783 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 26 | 12.3084 -6.0006 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 13.7193 -5.9770 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 28 | 10.8817 -6.0006 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 29 | 9.4944 -5.9770 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 30 | 12.2887 -10.1232 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 31 | 11.3730 -7.2149 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 32 | 1 2 1 0 0 0 0 33 | 1 3 1 0 0 0 0 34 | 1 4 1 1 0 0 0 35 | 1 5 1 1 0 0 0 36 | 1 6 1 6 0 0 0 37 | 1 7 1 6 0 0 0 38 | 2 8 1 6 0 0 0 39 | 3 9 1 1 0 0 0 40 | 4 10 1 0 0 0 0 41 | 5 11 1 0 0 0 0 42 | 6 12 1 0 0 0 0 43 | 7 13 1 0 0 0 0 44 | 14 8 1 6 0 0 0 45 | 8 15 1 0 0 0 0 46 | 16 9 1 1 0 0 0 47 | 9 17 1 0 0 0 0 48 | 10 18 1 0 0 0 0 49 | 10 19 1 0 0 0 0 50 | 11 20 1 0 0 0 0 51 | 11 21 1 0 0 0 0 52 | 12 22 1 0 0 0 0 53 | 12 23 1 0 0 0 0 54 | 13 24 1 0 0 0 0 55 | 13 25 1 0 0 0 0 56 | 14 26 1 1 0 0 0 57 | 16 27 1 6 0 0 0 58 | 14 18 1 0 0 0 0 59 | 14 20 1 0 0 0 0 60 | 16 22 1 0 0 0 0 61 | 16 24 1 0 0 0 0 62 | M END 63 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Rule5.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import com.simolecule.centres.BaseMol; 30 | import com.simolecule.centres.Descriptor; 31 | import com.simolecule.centres.Edge; 32 | 33 | /** 34 | * Sequence Rule 5 35 | * An atom or group with descriptor 'R','M' and 'seqCis' 36 | * has priority over its enantiomorph 'S', 'P' or 'seqTrans'. 37 | * 38 | * @param generic atom class 39 | */ 40 | public final class Rule5 extends SequenceRule { 41 | 42 | public Rule5(BaseMol mol) 43 | { 44 | super(mol); 45 | } 46 | 47 | private static int ord(Descriptor lab) { 48 | if (lab == null) 49 | return 0; 50 | switch (lab) { 51 | case R: 52 | case M: 53 | case seqCis: 54 | return 2; 55 | case S: 56 | case P: 57 | case seqTrans: 58 | return 1; 59 | default: 60 | return 0; 61 | } 62 | } 63 | 64 | @Override 65 | public boolean isPseudoAsymmetric() 66 | { 67 | return true; 68 | } 69 | 70 | @Override 71 | public int compare(Edge a, Edge b) 72 | { 73 | int aOrdinal = ord(getBondLabel(a)); 74 | int bOrdinal = ord(getBondLabel(b)); 75 | int cmp = Integer.compare(aOrdinal, bOrdinal); 76 | if (cmp != 0) return cmp; 77 | aOrdinal = ord(a.getEnd().getAux()); 78 | bOrdinal = ord(b.getEnd().getAux()); 79 | return 2*Integer.compare(aOrdinal, bOrdinal); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /core/src/test/java/uk/ac/ebi/centres/test/TestAtom.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package uk.ac.ebi.centres.test; 28 | 29 | /** 30 | * Simple test atom that holds the properties we need 31 | * 32 | * @author John May 33 | */ 34 | public class TestAtom { 35 | 36 | private String symbol; 37 | private int atomicNumber; 38 | private int massNumber = 0; 39 | 40 | 41 | public TestAtom(String symbol, int atomicNumber) { 42 | this.symbol = symbol; 43 | this.atomicNumber = atomicNumber; 44 | } 45 | 46 | 47 | public TestAtom(String symbol, int atomicNumber, int massNumber) { 48 | this.symbol = symbol; 49 | this.atomicNumber = atomicNumber; 50 | this.massNumber = massNumber; 51 | } 52 | 53 | 54 | public String getSymbol() { 55 | return symbol; 56 | } 57 | 58 | 59 | public void setSymbol(String symbol) { 60 | this.symbol = symbol; 61 | } 62 | 63 | 64 | public int getAtomicNumber() { 65 | return atomicNumber; 66 | } 67 | 68 | 69 | public void setAtomicNumber(int atomicNumber) { 70 | this.atomicNumber = atomicNumber; 71 | } 72 | 73 | 74 | public int getMassNumber() { 75 | return massNumber; 76 | } 77 | 78 | 79 | public void setMassNumber(int massNumber) { 80 | this.massNumber = massNumber; 81 | } 82 | 83 | 84 | @Override 85 | public String toString() { 86 | return symbol + "" + (massNumber != 0 ? Integer.toString(massNumber) 87 | : ""); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /opsin/src/test/java/uk/ac/cam/ch/wwmm/opsin/ValidateOpsinTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package uk.ac.cam.ch.wwmm.opsin; 28 | 29 | import centres.AbstractValidationSuite; 30 | import com.simolecule.centres.BaseMol; 31 | import org.junit.Assert; 32 | import org.junit.Test; 33 | 34 | public class ValidateOpsinTest extends AbstractValidationSuite { 35 | @Test 36 | public void testAssignment() { 37 | try { 38 | IDManager idManager = new IDManager(); 39 | SMILESFragmentBuilder fragbuilder = new SMILESFragmentBuilder(idManager); 40 | FragmentManager manager = new FragmentManager(fragbuilder, idManager); 41 | Fragment fragment = manager.buildSMILES(expected.getSmiles()); 42 | manager.makeHydrogensExplicit(); 43 | 44 | BaseMol mol = new OpsinMol(fragment); 45 | 46 | new OpsinLabeller().label(mol, OpsinLabeller.createCfgs(fragment)); 47 | 48 | check(mol, 49 | new GenSmiles() { 50 | @Override 51 | public String generate(BaseMol mol) 52 | { 53 | try { 54 | return SMILESWriter.generateSmiles(((OpsinMol) mol).getBaseImpl()); 55 | } catch (Exception ex) { 56 | return "ERROR: " + ex.getMessage(); 57 | } 58 | } 59 | }); 60 | //System.out.println(expected.getSmiles()); 61 | } catch (StructureBuildingException ex) { 62 | Assert.fail("Could not parse SMILES: " + expected.getSmiles()); 63 | } 64 | } 65 | 66 | 67 | 68 | } 69 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Rule4a.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import com.simolecule.centres.BaseMol; 30 | import com.simolecule.centres.Descriptor; 31 | import com.simolecule.centres.Edge; 32 | 33 | /** 34 | * Sequence Rule 4a 35 | * Chiral stereogenic units precede pseudoasymmetric stereogenic 36 | * units and these precede nonstereogenic units. 37 | * 38 | * @param generic atom class 39 | */ 40 | public final class Rule4a extends SequenceRule { 41 | 42 | public Rule4a(BaseMol mol) 43 | { 44 | super(mol); 45 | } 46 | 47 | private static int ord(Descriptor lab) { 48 | if (lab == null) 49 | return 0; 50 | switch (lab) { 51 | case Unknown: 52 | case ns: 53 | return 0; 54 | case r: 55 | case s: 56 | case m: 57 | case p: 58 | case E: 59 | case Z: 60 | return 1; 61 | case R: 62 | case S: 63 | case M: 64 | case P: 65 | case seqTrans: 66 | case seqCis: 67 | return 2; 68 | default: 69 | throw new IllegalArgumentException("New label?"); 70 | } 71 | } 72 | 73 | @Override 74 | public int compare(Edge a, Edge b) 75 | { 76 | int aOrdinal = ord(getBondLabel(a)); 77 | int bOrdinal = ord(getBondLabel(b)); 78 | int cmp = Integer.compare(aOrdinal, bOrdinal); 79 | if (cmp != 0) return cmp; 80 | aOrdinal = ord(a.getEnd().getAux()); 81 | bOrdinal = ord(b.getEnd().getAux()); 82 | return Integer.compare(aOrdinal, bOrdinal); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/Edge.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres; 28 | 29 | public class Edge { 30 | 31 | private Node beg, end; 32 | private final B bond; 33 | private Descriptor aux; 34 | 35 | public Edge(Node beg, Node end, B bond) 36 | { 37 | this.beg = beg; 38 | this.end = end; 39 | this.bond = bond; 40 | } 41 | 42 | public Node getOther(Node node) 43 | { 44 | if (node.equals(getBeg())) 45 | return getEnd(); 46 | else if (node.equals(getEnd())) 47 | return getBeg(); 48 | else 49 | throw new IllegalArgumentException("Not an end-point of this edge!"); 50 | } 51 | 52 | public Node getBeg() 53 | { 54 | return beg; 55 | } 56 | 57 | public Node getEnd() 58 | { 59 | return end; 60 | } 61 | 62 | public Descriptor getAux() 63 | { 64 | return aux; 65 | } 66 | 67 | public void setAux(Descriptor aux) 68 | { 69 | this.aux = aux; 70 | } 71 | 72 | public void flip() 73 | { 74 | Node tmp = end; 75 | end = beg; 76 | beg = tmp; 77 | } 78 | 79 | public B getBond() 80 | { 81 | return bond; 82 | } 83 | 84 | public boolean isBeg(Node node) 85 | { 86 | return node.equals(beg); 87 | } 88 | 89 | public boolean isEnd(Node node) 90 | { 91 | return node.equals(end); 92 | } 93 | 94 | @Override 95 | public String toString() 96 | { 97 | return beg.toString() + "->" + end.toString(); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Rule2.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import com.simolecule.centres.BaseMol; 30 | import com.simolecule.centres.Edge; 31 | import com.simolecule.centres.Isotope; 32 | 33 | /** 34 | * Sequence Rule 1b 35 | * "A duplicate atom node whose corresponding nonduplicated atom 36 | * node is the root or is closer to the root ranks higher than 37 | * a duplicate atom node whose corresponding nonduplicated atom 38 | * node is farther from the root." 39 | * 40 | * @param generic atom class 41 | */ 42 | public class Rule2 extends SequenceRule { 43 | 44 | private final BaseMol mol; 45 | 46 | public Rule2(BaseMol mol) 47 | { 48 | super(mol); 49 | this.mol = mol; 50 | } 51 | 52 | @Override 53 | public int compare(Edge a, Edge b) 54 | { 55 | int aAtomNum = mol.getAtomicNum(a.getEnd().getAtom()); 56 | int bAtomNum = mol.getAtomicNum(b.getEnd().getAtom()); 57 | int aMassNum = a.getEnd().isDuplicate() ? 0 : mol.getMassNum(a.getEnd().getAtom()); 58 | int bMassNum = b.getEnd().isDuplicate() ? 0 : mol.getMassNum(b.getEnd().getAtom()); 59 | if (aMassNum == 0 && bMassNum == 0) 60 | return 0; 61 | Isotope aiso = Isotope.find(aAtomNum, aMassNum); 62 | Isotope biso = Isotope.find(bAtomNum, bMassNum); 63 | 64 | double aweight, bweight; 65 | if (aiso == null) 66 | aweight = aMassNum; 67 | else 68 | aweight = aiso.getWeight(); 69 | if (biso == null) 70 | bweight = bMassNum; 71 | else 72 | bweight = biso.getWeight(); 73 | 74 | return Double.compare(aweight, bweight); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Rule1b.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import com.simolecule.centres.BaseMol; 30 | import com.simolecule.centres.Edge; 31 | import com.simolecule.centres.Node; 32 | 33 | /** 34 | * Sequence Rule 1b 35 | * "A duplicate atom node whose corresponding nonduplicated atom 36 | * node is the root or is closer to the root ranks higher than 37 | * a duplicate atom node whose corresponding nonduplicated atom 38 | * node is farther from the root." 39 | * 40 | * @param generic atom class 41 | */ 42 | public class Rule1b extends SequenceRule { 43 | 44 | /** 45 | * Flag indicates whether to match the problematic 46 | * IUPAC 2013 recommendations for Rule 1B. 47 | */ 48 | private static final boolean IUPAC_2013 = false; 49 | 50 | public Rule1b(BaseMol mol) { 51 | super(mol); 52 | } 53 | 54 | @Override 55 | public int compare(Edge a, Edge b) { 56 | if (IUPAC_2013) { 57 | return -Integer.compare(a.getEnd().getDistance(), 58 | b.getEnd().getDistance()); 59 | } else { 60 | if (a.getEnd().isSet(Node.RING_DUPLICATE) && 61 | b.getEnd().isSet(Node.RING_DUPLICATE)) 62 | return -Integer.compare(a.getEnd().getDistance(), 63 | b.getEnd().getDistance()); 64 | else { 65 | if (a.getEnd().isSet(Node.RING_DUPLICATE) && !b.getEnd() 66 | .isSet(Node.RING_DUPLICATE)) 67 | return +1; 68 | if (!a.getEnd().isSet(Node.RING_DUPLICATE) && b.getEnd() 69 | .isSet(Node.RING_DUPLICATE)) 70 | return -1; 71 | return 0; 72 | } 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/ChEBI_3049.mol: -------------------------------------------------------------------------------- 1 | 2 | Marvin 04211012442D 3 | 4 | 31 33 0 0 0 0 999 V2000 5 | 5.0933 -5.5515 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 6 | 6.1822 -6.3701 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 7 | 5.6597 -5.9352 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 4.6662 -5.9352 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 9 | 4.5215 -5.1055 0.0000 N 0 3 0 0 0 0 0 0 0 0 0 0 10 | 5.9596 -5.5696 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 4.1353 -6.6591 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 4.7670 -6.2764 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 6.8697 -7.5363 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 6.1602 -7.1609 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 15 | 3.8600 -5.7119 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 16 | 5.5023 -4.8311 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 17 | 7.5685 -7.0977 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 18 | 6.9002 -8.3608 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 8.2970 -7.4867 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | 8.9953 -7.0487 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 8.9654 -6.2233 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 8.2310 -5.8378 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 7.5357 -6.2780 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 6.2018 -8.7962 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 6.2319 -9.6200 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 6.9618 -10.0067 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 7.6628 -9.5635 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 7.6293 -8.7413 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 29 | 4.0968 -4.3980 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 6.8967 -5.9576 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 31 | 3.7797 -8.4642 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 4.4942 -8.0517 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 33 | 5.2087 -8.4642 0.0000 O 0 5 0 0 0 0 0 0 0 0 0 0 34 | 4.0817 -7.3372 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 35 | 4.9067 -7.3372 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 36 | 1 12 1 6 0 0 0 37 | 9 13 1 0 0 0 0 38 | 4 5 1 0 0 0 0 39 | 9 14 1 0 0 0 0 40 | 5 1 1 0 0 0 0 41 | 13 15 2 0 0 0 0 42 | 1 6 1 0 0 0 0 43 | 15 16 1 0 0 0 0 44 | 6 2 1 0 0 0 0 45 | 16 17 2 0 0 0 0 46 | 4 7 1 0 0 0 0 47 | 17 18 1 0 0 0 0 48 | 1 8 1 0 0 0 0 49 | 18 19 2 0 0 0 0 50 | 19 13 1 0 0 0 0 51 | 7 8 1 0 0 0 0 52 | 14 20 2 0 0 0 0 53 | 10 9 1 0 0 0 0 54 | 20 21 1 0 0 0 0 55 | 2 3 1 0 0 0 0 56 | 21 22 2 0 0 0 0 57 | 2 10 1 0 0 0 0 58 | 22 23 1 0 0 0 0 59 | 3 4 1 0 0 0 0 60 | 23 24 2 0 0 0 0 61 | 24 14 1 0 0 0 0 62 | 4 11 1 1 0 0 0 63 | 5 25 1 0 0 0 0 64 | 2 26 1 1 0 0 0 65 | 27 28 1 0 0 0 0 66 | 28 29 1 0 0 0 0 67 | 28 30 2 0 0 0 0 68 | 28 31 2 0 0 0 0 69 | M CHG 2 5 1 29 -1 70 | M END 71 | -------------------------------------------------------------------------------- /core/src/test/resources/uk/ac/ebi/centres/cdk/l-chiro-inostiol.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 | 29 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | H 41 | 42 | 43 | H 44 | 45 | 46 | H 47 | 48 | 49 | W 50 | 51 | 52 | W 53 | 54 | 55 | W 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/myo-inositol.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 | 29 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | W 41 | 42 | 43 | H 44 | 45 | 46 | H 47 | 48 | 49 | W 50 | 51 | 52 | W 53 | 54 | 55 | W 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /core/src/test/resources/uk/ac/ebi/centres/cdk/myo-inositol.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 | 29 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | W 41 | 42 | 43 | H 44 | 45 | 46 | W 47 | 48 | 49 | W 50 | 51 | 52 | W 53 | 54 | 55 | H 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /cdk/src/test/java/com/simolecule/centres/ShuffleTest.java: -------------------------------------------------------------------------------- 1 | package com.simolecule.centres; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | import org.openscience.cdk.exception.InvalidSmilesException; 6 | import org.openscience.cdk.interfaces.IAtomContainer; 7 | import org.openscience.cdk.interfaces.IBond; 8 | import org.openscience.cdk.silent.SilentChemObjectBuilder; 9 | import org.openscience.cdk.smiles.SmilesParser; 10 | import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; 11 | 12 | import java.util.Arrays; 13 | import java.util.Collections; 14 | import java.util.List; 15 | 16 | public class ShuffleTest { 17 | 18 | public static final int NUM_TRIALS = 100; 19 | 20 | private static void shuffleBonds(IAtomContainer mol) { 21 | List bondList = Arrays.asList(AtomContainerManipulator.getBondArray(mol)); 22 | Collections.shuffle(bondList); 23 | mol.setBonds(bondList.toArray(new IBond[0])); 24 | } 25 | 26 | // we do not have enough information to assign a label here, see 27 | // GitHub Issue #9 28 | @Test public void test_alwaysUndefined() throws InvalidSmilesException { 29 | SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); 30 | IAtomContainer mol = smilesParser.parseSmiles("O[C@H](C(*)C)C(C)O"); 31 | for (int i = 0; i < NUM_TRIALS; i++) { 32 | shuffleBonds(mol); 33 | CdkLabeller.label(mol); 34 | Assert.assertNull(mol.getAtom(1).getProperty("cip.label")); 35 | } 36 | } 37 | 38 | @Test public void test_alwaysUndefined_OH() throws InvalidSmilesException { 39 | SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); 40 | IAtomContainer mol = smilesParser.parseSmiles("O[Co@OH1](C)(C)(C)(*)O"); 41 | for (int i = 0; i < NUM_TRIALS; i++) { 42 | shuffleBonds(mol); 43 | CdkLabeller.label(mol); 44 | Assert.assertNull(mol.getAtom(1).getProperty("cip.label")); 45 | } 46 | } 47 | 48 | // similar to the above but there is enough info since we split ties before 49 | // reaching the * 50 | @Test public void test_alwaysDefined() throws InvalidSmilesException { 51 | SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); 52 | IAtomContainer mol = smilesParser.parseSmiles("O[C@H](C(C*)C)C(C)O"); 53 | for (int i = 0; i < NUM_TRIALS; i++) { 54 | shuffleBonds(mol); 55 | CdkLabeller.label(mol); 56 | Assert.assertEquals(Descriptor.R, mol.getAtom(1).getProperty("cip.label")); 57 | } 58 | } 59 | 60 | // See GitHub Issue #6 61 | @Test public void test_consistent() throws InvalidSmilesException { 62 | SmilesParser smilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance()); 63 | IAtomContainer mol = smilesParser.parseSmiles("O[C@H]1O[C@@]2(O[C@H]1O)O[C@H]([C@H](O2)O)O"); 64 | for (int i = 0; i < NUM_TRIALS; i++) { 65 | shuffleBonds(mol); 66 | CdkLabeller.label(mol); 67 | Assert.assertEquals(Descriptor.S, mol.getAtom(1).getProperty("cip.label")); 68 | Assert.assertEquals(Descriptor.R, mol.getAtom(3).getProperty("cip.label")); 69 | Assert.assertEquals(Descriptor.R, mol.getAtom(5).getProperty("cip.label")); 70 | Assert.assertEquals(Descriptor.R, mol.getAtom(8).getProperty("cip.label")); 71 | Assert.assertEquals(Descriptor.S, mol.getAtom(9).getProperty("cip.label")); 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Priority.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import java.util.Set; 30 | 31 | /** 32 | * Holds some properties that are determined when sorting/prioritising ligands. 33 | * 34 | * @author John May 35 | */ 36 | public class Priority { 37 | 38 | private final Boolean unique; 39 | private final boolean foundWildcard; 40 | private final boolean pseudoAsym; 41 | private Set> duplicates; 42 | private int ruleIdx; 43 | 44 | 45 | public Priority(boolean unique, 46 | boolean foundWildcard, 47 | int ruleIdx, 48 | boolean pseudoAsym) { 49 | this.unique = unique; 50 | this.foundWildcard = foundWildcard; 51 | this.pseudoAsym = pseudoAsym; 52 | this.ruleIdx = ruleIdx; 53 | } 54 | 55 | public Priority(boolean unique, 56 | boolean foundWildcard, 57 | boolean pseudoAsym, 58 | Set> duplicates) { 59 | this.unique = unique; 60 | this.foundWildcard = foundWildcard; 61 | this.pseudoAsym = pseudoAsym; 62 | this.duplicates = duplicates; 63 | } 64 | 65 | 66 | /** 67 | * Indicates whether the ligands were unique (i.e. could be ordered) 68 | * 69 | * @return whether the ligands were unique 70 | */ 71 | public boolean isUnique() { 72 | return unique; 73 | } 74 | 75 | public int getRuleIdx() { 76 | return ruleIdx; 77 | } 78 | 79 | /** 80 | * Indicates the descriptor type used to. This allows methods that represent 81 | * pseudo-asymmetric molecules to indicate that the centre is 82 | * pseudo-asymmetric. 83 | * 84 | * @return The type of the descriptor that should be assigned 85 | */ 86 | public boolean isPseduoAsymettric() { 87 | return pseudoAsym; 88 | } 89 | 90 | /** 91 | * Indicates 92 | * @return 93 | */ 94 | public boolean wasWildcardFound() { 95 | return foundWildcard; 96 | } 97 | 98 | } 99 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/Descriptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres; 28 | 29 | /** 30 | * Defines a descriptor which can be assigned to an atom to indicate the type of 31 | * chirality (if there is any). Each descriptor defines it's general @{link 32 | * Type} which can be useful when comparing centres of different geometry. 33 | * 34 | * @author John May 35 | */ 36 | public enum Descriptor { 37 | /** 38 | * Unknown/Unspecified 39 | */ 40 | Unknown, 41 | /** 42 | * Other 43 | */ 44 | ns, 45 | /** 46 | * Tetrahedral 47 | */ 48 | R, 49 | S, 50 | r, 51 | s, 52 | /** 53 | * Cis/Trans 54 | */ 55 | seqTrans, 56 | seqCis, 57 | E, 58 | Z, 59 | /* Axial */ 60 | M, 61 | P, 62 | m, 63 | p, 64 | 65 | SP_4, 66 | TBPY_5, 67 | OC_6; 68 | 69 | boolean isPseudoAsymmetric() 70 | { 71 | switch (this) { 72 | case r: 73 | case s: 74 | case seqCis: 75 | case seqTrans: 76 | case m: 77 | case p: 78 | return true; 79 | default: 80 | return false; 81 | } 82 | } 83 | 84 | public static Descriptor parse(String str) 85 | { 86 | switch (str) { 87 | case "R": 88 | return R; 89 | case "S": 90 | return S; 91 | case "r": 92 | return r; 93 | case "s": 94 | return s; 95 | case "M": 96 | return M; 97 | case "P": 98 | return P; 99 | case "m": 100 | return m; 101 | case "p": 102 | return p; 103 | case "E": 104 | return E; 105 | case "Z": 106 | return Z; 107 | case "seqTrans": 108 | return seqTrans; 109 | case "seqCis": 110 | return seqCis; 111 | case "U": 112 | return Unknown; 113 | default: 114 | throw new IllegalArgumentException("Unknown descriptor label: " + str); 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Stereochemistry Labelling Done Good 2 | 3 | Centres is an Open Source Java library for that allows perception and labelling of stereogenic centres in chemical structures using the [Cahn-Ingold-Prelog priority rules](https://en.wikipedia.org/wiki/Cahn%E2%80%93Ingold%E2%80%93Prelog_priority_rules). 4 | 5 | ### How to Use 6 | 7 | You can use Centres interactively at http://www.simolecule.com/cdkdepict by selecting the annotation "CIP Stereo Label". Alternatively you can download the `centres.jar` (built on CDK) from the releases page. This JAR can be used to label and test an SD or SMILES file. 8 | 9 | ``` 10 | java -jar centres.jar input.sdf 11 | ``` 12 | 13 | To run the benchmark tests pressented by [Hanson *et al*](https://chemrxiv.org/articles/Algorithmic_Analysis_of_Cahn-Ingold-Prelog_Rules_of_Stereochemistry_Proposals_for_Revised_Rules_and_a_Guide_for_Machine_Implementation/6342881) (note limitations below) run the following commands. The files can be downloaded from https://cipvalidationsuite.github.io/ValidationSuite/. 14 | 15 | ``` 16 | java -jar centres.jar compounds_2d.sdf -e CIP_LABELS 17 | java -jar centres.jar compounds_3d.sdf -e CIP_LABELS 18 | java -jar centres.jar compounds.smi -e 1 19 | ``` 20 | 21 | ### Key Features 22 | * Generic library allowing [dependency injection](http://en.wikipedia.org/wiki/Dependency_injection) of any molecule/atom object representation. Currently 23 | supported 'endpoints': 24 | * [Chemistry Development Kit](https://github.com/cdk/cdk) 25 | * [OPSIN](https://bitbucket.org/dan2097/opsin/) 26 | * [JChem](https://chemaxon.com/products/jchem-engines) 27 | * Perception and labelling of tetrahedral (__R__/__S__/__r__/__s__) and geometric double bonds (__Z__/__E__). 28 | * Implementation of the Cahn-Ingold-Prelog (CIP) priority rules as they appear in Prelog and Helmchen, 1982 29 | * Implementation of the Cahn-Ingold-Prelog (CIP) priority rules as they appear in Nomenclature of Organic Chemistry, IUPAC Recommendations and Preferred Names 2013 30 | 31 | ### Install 32 | 33 | `jchem` and `opsin` backends do not currently pass the internal validation tests and should be skipped when running tests. 34 | 35 | ``` 36 | mvn install -pl '!jchem,!opsin' 37 | ``` 38 | 39 | alternatively skip the tests: 40 | 41 | ``` 42 | mvn install -DskipTests 43 | ``` 44 | 45 | ### License 46 | [BSD 2-Clause](https://opensource.org/licenses/BSD-2-Clause) 47 | 48 | ### Authors 49 | John Mayfield (né May) 50 | 51 | ### Limitations 52 | - Helicene and chirality planes are not supported 53 | - SMILES does not capture axial atropisomerism but this is supported in 2D/3D 54 | - When using CDK the 3D SDfile must find stereocentres with little information available, the current algorithm does not find all centres in the test sets by default. To prevent this a modified version of CDK is used in Centres that captures all possible tetrahedral/double bond stereochemistries. 55 | 56 | ### References 57 | * Robert M. Hanson John Mayfield Mikko J. Vainio Andrey Yerin Dmitry Redkin Sophia Musacchio. Algorithmic Analysis of Cahn-Ingold-Prelog Rules of Stereochemistry: Proposals for Revised Rules and a Guide for Machine Implementation. __Submitted__ [preprint](https://chemrxiv.org/articles/Algorithmic_Analysis_of_Cahn-Ingold-Prelog_Rules_of_Stereochemistry_Proposals_for_Revised_Rules_and_a_Guide_for_Machine_Implementation/6342881). 58 | * Prelog and Helmchen. Basic Principles of the CIP-System and Proposals for a Revision. __Angewandte Chemie International Edition__ 21 (1982) 567-683 59 | * Perdih and Rmzinger. Stereo-chemistry and Sequence Rules A Proposal for Modification of Cahn-Ingold-Prelog System. __Tetrahedron: Asymmetry__ Vol 5 (1994) 835-861 60 | -------------------------------------------------------------------------------- /cdk/src/test/java/com/simolecule/centres/ValidateCdkTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres; 28 | 29 | import centres.AbstractValidationSuite; 30 | import org.junit.Test; 31 | import org.openscience.cdk.CDKConstants; 32 | import org.openscience.cdk.exception.CDKException; 33 | import org.openscience.cdk.interfaces.IAtom; 34 | import org.openscience.cdk.interfaces.IAtomContainer; 35 | import org.openscience.cdk.interfaces.IBond; 36 | import org.openscience.cdk.silent.SilentChemObjectBuilder; 37 | import org.openscience.cdk.smiles.SmiFlavor; 38 | import org.openscience.cdk.smiles.SmilesGenerator; 39 | import org.openscience.cdk.smiles.SmilesParser; 40 | 41 | public class ValidateCdkTest extends AbstractValidationSuite { 42 | 43 | private static final SmilesParser smigen = new SmilesParser(SilentChemObjectBuilder.getInstance()); 44 | 45 | @Test 46 | public void testAssignment() throws Exception 47 | { 48 | IAtomContainer base = smigen.parseSmiles(expected.getSmiles()); 49 | CdkMol mol = new CdkMol(base); 50 | new CdkLabeller().label(mol, CdkLabeller.createConfigs(base)); 51 | check(mol, new GenSmiles() { 52 | @Override 53 | public String generate(BaseMol mol) 54 | { 55 | try { 56 | return toSmiles((IAtomContainer) mol.getBaseImpl()); 57 | } catch (CDKException e) { 58 | return "ERROR: " + e.getMessage(); 59 | } 60 | } 61 | }); 62 | } 63 | 64 | private String toSmiles(IAtomContainer mol) throws CDKException 65 | { 66 | for (IAtom atom : mol.atoms()) { 67 | Descriptor descriptor = atom.getProperty(BaseMol.CIP_LABEL_KEY); 68 | String configIdx = atom.getProperty(BaseMol.CONF_INDEX); 69 | if (configIdx != null) 70 | atom.setProperty(CDKConstants.COMMENT, configIdx); 71 | else if (descriptor != null && descriptor != Descriptor.Unknown) 72 | atom.setProperty(CDKConstants.COMMENT, descriptor); 73 | } 74 | for (IBond bond : mol.bonds()) { 75 | Descriptor descriptor = bond.getProperty(BaseMol.CIP_LABEL_KEY); 76 | if (descriptor != null && descriptor != Descriptor.Unknown) 77 | bond.getBegin().setProperty(CDKConstants.COMMENT, descriptor); 78 | } 79 | return new SmilesGenerator(SmiFlavor.CxAtomValue | SmiFlavor.Isomeric).create(mol); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /opsin/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinLabeller.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package uk.ac.cam.ch.wwmm.opsin; 28 | 29 | import com.simolecule.centres.Labeller; 30 | import com.simolecule.centres.config.Configuration; 31 | import com.simolecule.centres.config.Sp2Bond; 32 | import com.simolecule.centres.config.Tetrahedral; 33 | 34 | import java.util.ArrayList; 35 | import java.util.Arrays; 36 | import java.util.List; 37 | 38 | public class OpsinLabeller extends Labeller { 39 | 40 | OpsinLabeller() 41 | { 42 | } 43 | 44 | static List> createCfgs(Fragment fragment) 45 | { 46 | List> configs = new ArrayList<>(); 47 | 48 | for (Atom atom : fragment.getAtomList()) { 49 | 50 | AtomParity parity = atom.getAtomParity(); 51 | if (parity == null) 52 | continue; 53 | Atom[] carriers = Arrays.copyOf(parity.getAtomRefs4(), 54 | 4); 55 | for (int i = 0; i < carriers.length; i++) { 56 | // deoxyhydrogen too? 57 | if (carriers[i].equals(AtomParity.hydrogen)) { 58 | carriers[i] = atom; 59 | } 60 | } 61 | 62 | int cfg = 0; 63 | switch (parity.getParity()) { 64 | case -1: 65 | cfg = Tetrahedral.LEFT; 66 | break; 67 | case 1: 68 | cfg = Tetrahedral.RIGHT; 69 | break; 70 | } 71 | 72 | if (cfg != 0) 73 | configs.add(new Tetrahedral(atom, carriers, cfg)); 74 | } 75 | 76 | for (Bond bond : fragment.getBondSet()) { 77 | BondStereo bstereo = bond.getBondStereo(); 78 | if (bstereo == null) 79 | continue; 80 | Atom[] ref = bstereo.getAtomRefs4(); 81 | int cfg = 0; 82 | switch (bstereo.getBondStereoValue()) { 83 | case CIS: 84 | cfg = Sp2Bond.TOGETHER; 85 | break; 86 | case TRANS: 87 | cfg = Sp2Bond.OPPOSITE; 88 | break; 89 | } 90 | configs.add(new Sp2Bond<>(bond, 91 | new Atom[]{ref[1], ref[2]}, 92 | new Atom[]{ref[0], ref[3]}, 93 | cfg)); 94 | } 95 | 96 | return configs; 97 | } 98 | 99 | public static void label(Fragment fragment) 100 | { 101 | new OpsinLabeller().label(new OpsinMol(fragment), 102 | createCfgs(fragment)); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/ChEBI_66261.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv0541 11061309422D 3 | 4 | 38 42 0 0 0 0 999 V2000 5 | 2.2252 -3.6222 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 6 | 1.7428 -4.2883 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 7 | 2.2249 -4.9639 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 8 | 3.0123 -4.7057 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 9 | 3.0079 -3.8850 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 10 | 3.5920 -3.3023 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 3.5927 -5.2825 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 12 | 4.4080 -5.2854 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 13 | 4.4075 -3.3001 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 4.9875 -3.8844 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 15 | 4.9849 -4.7062 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 16 | 5.7641 -4.9694 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 17 | 6.2530 -4.3007 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 18 | 5.7707 -3.6273 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 19 | 3.2742 -2.5387 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 20 | 4.8111 -2.5804 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | 4.3956 -1.8705 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | 5.6370 -2.5748 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 23 | 3.5699 -1.8714 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 3.1525 -1.1612 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 2.3268 -1.1669 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 26 | 1.9132 -0.4498 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 3.1580 -5.9885 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 4.7190 -6.0496 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 29 | 2.3313 -5.9666 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 30 | 3.5502 -6.7157 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 3.1204 -7.4209 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 3.5078 -8.1433 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 3.0780 -8.8485 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 3.4703 -9.5758 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 1.4195 -3.4317 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 36 | 2.5906 -3.1624 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | 1.4233 -5.1514 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | 2.7935 -5.5000 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 39 | 6.5634 -5.1664 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 40 | 6.5719 -3.4375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 41 | 5.1973 -5.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 5.1973 -3.0810 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 43 | 5 1 1 0 0 0 0 44 | 1 2 1 0 0 0 0 45 | 2 3 1 0 0 0 0 46 | 3 4 1 0 0 0 0 47 | 4 5 1 0 0 0 0 48 | 5 6 1 0 0 0 0 49 | 4 7 1 0 0 0 0 50 | 7 8 2 0 0 0 0 51 | 8 11 1 0 0 0 0 52 | 10 9 1 0 0 0 0 53 | 9 6 2 0 0 0 0 54 | 10 11 1 0 0 0 0 55 | 11 12 1 0 0 0 0 56 | 12 13 1 0 0 0 0 57 | 13 14 1 0 0 0 0 58 | 14 10 1 0 0 0 0 59 | 1 14 1 0 0 0 0 60 | 3 12 1 0 0 0 0 61 | 6 15 1 0 0 0 0 62 | 9 16 1 0 0 0 0 63 | 16 17 1 0 0 0 0 64 | 16 18 2 0 0 0 0 65 | 17 19 2 0 0 0 0 66 | 19 20 1 0 0 0 0 67 | 20 21 2 0 0 0 0 68 | 21 22 1 0 0 0 0 69 | 7 23 1 0 0 0 0 70 | 8 24 1 0 0 0 0 71 | 23 25 2 0 0 0 0 72 | 23 26 1 0 0 0 0 73 | 26 27 2 0 0 0 0 74 | 27 28 1 0 0 0 0 75 | 28 29 2 0 0 0 0 76 | 29 30 1 0 0 0 0 77 | 1 31 1 6 0 0 0 78 | 5 32 1 6 0 0 0 79 | 3 33 1 6 0 0 0 80 | 4 34 1 6 0 0 0 81 | 12 35 1 6 0 0 0 82 | 14 36 1 6 0 0 0 83 | 11 37 1 6 0 0 0 84 | 10 38 1 6 0 0 0 85 | M END 86 | -------------------------------------------------------------------------------- /core/src/test/resources/uk/ac/ebi/centres/cdk/(Z)-2-(2-Furyl)-3-(5-nitro-2-furyl)acrylamide.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 | 29 | 31 | 33 | 35 | 37 | 39 | 41 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/(E)-2-(2-Furyl)-3-(5-nitro-2-furyl)acrylamide.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 | 29 | 31 | 33 | 35 | 37 | 39 | 41 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | C 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/demo.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | W 4 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/intradependants.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | W 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | W 36 | 37 | 38 | W 39 | 40 | 41 | W 42 | 43 | 44 | H 45 | 46 | 47 | W 48 | 49 | 50 | H 51 | 52 | 53 | H 54 | 55 | 56 | H 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/BaseMol.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres; 28 | 29 | import com.simolecule.centres.Mancude.Fraction; 30 | 31 | import java.util.Iterator; 32 | 33 | /** 34 | * Defines how we can access the properties and connections 35 | * of a molecule. 36 | * 37 | * @param atom type 38 | * @param bond type 39 | */ 40 | public abstract class BaseMol { 41 | 42 | private Fraction[] atomnums; 43 | 44 | public static final String CIP_LABEL_KEY = "cip.label"; 45 | public static final String CONF_INDEX = "conf.index"; 46 | 47 | public abstract Object getBaseImpl(); 48 | 49 | public abstract int getNumAtoms(); 50 | 51 | public abstract int getNumBonds(); 52 | 53 | public abstract A getAtom(int idx); 54 | 55 | public abstract int getAtomIdx(A atom); 56 | 57 | public Iterable atoms() { 58 | return new Iterable() { 59 | @Override 60 | public Iterator iterator() 61 | { 62 | return new Iterator() { 63 | private int pos = 0; 64 | 65 | @Override 66 | public boolean hasNext() 67 | { 68 | return pos < getNumAtoms(); 69 | } 70 | 71 | @Override 72 | public A next() 73 | { 74 | return getAtom(pos++); 75 | } 76 | 77 | @Override 78 | public void remove() 79 | { 80 | throw new UnsupportedOperationException(); 81 | } 82 | }; 83 | } 84 | }; 85 | } 86 | 87 | public abstract B getBond(int idx); 88 | 89 | public abstract int getBondIdx(B bond); 90 | 91 | public abstract Iterable getBonds(A atom); 92 | 93 | public abstract A getOther(B bond, A atom); 94 | 95 | public abstract A getBeg(B bond); 96 | 97 | public abstract A getEnd(B bond); 98 | 99 | public abstract boolean isInRing(B bond); 100 | 101 | public abstract int getAtomicNum(A atom); 102 | 103 | public Fraction getFractionalAtomicNum(A atom) { 104 | if (atomnums == null) 105 | atomnums = Mancude.CalcFracAtomNums(this); 106 | return atomnums[getAtomIdx(atom)]; 107 | } 108 | 109 | public abstract int getNumHydrogens(A atom); 110 | 111 | public abstract int getMassNum(A atom); 112 | 113 | public abstract int getCharge(A atom); 114 | 115 | public abstract int getBondOrder(B bond); 116 | 117 | public abstract void setAtomProp(A atom, String key, Object val); 118 | 119 | public abstract V getAtomProp(A atom, String key); 120 | 121 | public abstract void setBondProp(B bond, String key, Object val); 122 | 123 | public abstract V getBondProp(B bond, String key); 124 | 125 | public abstract String dumpDigraph(Digraph digraph); 126 | 127 | 128 | } 129 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/(Z)-2-(2-Furyl)-3-(5-nitro-2-furyl)acrylamide.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 | 29 | 31 | 33 | 35 | 37 | 39 | 41 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/CHEBI_17268.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | WHWWWH 5 | -------------------------------------------------------------------------------- /jchem/src/main/java/com/simolecule/centres/jchem/JChemLabeller.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.jchem; 28 | 29 | import chemaxon.struc.MolAtom; 30 | import chemaxon.struc.MolBond; 31 | import chemaxon.struc.Molecule; 32 | import chemaxon.struc.StereoConstants; 33 | import com.simolecule.centres.BaseMol; 34 | import com.simolecule.centres.Labeller; 35 | import com.simolecule.centres.config.Configuration; 36 | import com.simolecule.centres.config.Sp2Bond; 37 | import com.simolecule.centres.config.Tetrahedral; 38 | 39 | import java.util.ArrayList; 40 | import java.util.List; 41 | 42 | public class JChemLabeller extends Labeller { 43 | 44 | private static List> findConfigs( 45 | Molecule mol) { 46 | List> configs = new ArrayList<>(); 47 | for (int i = 0; i < mol.getAtomCount(); i++) { 48 | switch (mol.getParityType(i)) { 49 | case StereoConstants.PARITY_TETRAHEDRAL: 50 | MolAtom focus = mol.getAtom(i); 51 | MolAtom[] ligands = focus.getLigands(); 52 | if (ligands.length != 4) { 53 | System.err.println("Please provided hydrogen expanded graph for JChem"); 54 | continue; 55 | // not correct 56 | // ligands = Arrays.copyOf(ligands, 4); 57 | // ligands[3] = focus; 58 | // int sign = MolAtom.paritySign(mol.indexOf(ligands[0]), 59 | // mol.indexOf(ligands[1]), 60 | // mol.indexOf(ligands[2]), 61 | // mol.indexOf(ligands[3])); 62 | } 63 | 64 | configs.add(new Tetrahedral(focus, 65 | ligands, 66 | mol.getParity(i))); 67 | break; 68 | } 69 | } 70 | for (int i = 0; i < mol.getBondCount(); i++) { 71 | MolBond bond = mol.getBond(i); 72 | MolAtom a1 = bond.getCTAtom1(); 73 | MolAtom a4 = bond.getCTAtom4(); 74 | if (a1 != null && a4 != null) { 75 | int cfg = 0; 76 | switch (bond.getFlags() & StereoConstants.CTUMASK) { 77 | case StereoConstants.CIS: 78 | cfg = 2; 79 | break; 80 | case StereoConstants.TRANS: 81 | cfg = 1; 82 | break; 83 | } 84 | if (cfg != 0) { 85 | configs.add(new Sp2Bond<>(bond, 86 | new MolAtom[]{bond.getAtom1(), 87 | bond.getAtom2()}, 88 | new MolAtom[]{ 89 | a1, a4 90 | }, 91 | cfg)); 92 | } 93 | } 94 | } 95 | return configs; 96 | } 97 | 98 | public static void label(BaseMol mol) { 99 | new JChemLabeller().label(mol, findConfigs(((Molecule) mol.getBaseImpl()))); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Rules.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import com.simolecule.centres.BaseMol; 30 | import com.simolecule.centres.Edge; 31 | 32 | import java.util.ArrayList; 33 | import java.util.List; 34 | 35 | /** 36 | * A priority rules made up of other rules. Each sub-rules is used exhaustively on 37 | * the digraph before the next one is applied. 38 | * 39 | * @author John May 40 | */ 41 | public class Rules extends SequenceRule { 42 | 43 | private static final boolean SORT_BRANCHES_WITH_RULE5 = false; 44 | 45 | /** 46 | * Rule storage 47 | */ 48 | private final List> rules = new ArrayList<>(); 49 | 50 | 51 | public Rules(SequenceRule... rules) { 52 | super(null); 53 | for (SequenceRule rule : rules) 54 | add(rule); 55 | } 56 | 57 | public void add(SequenceRule rule) { 58 | if (rule == null) 59 | throw new NullPointerException("No sequence rule provided"); 60 | rules.add(rule); 61 | rule.setSorter(createSorter(rules)); 62 | } 63 | 64 | 65 | public Sort createSorter(List> rules) { 66 | List> subrules = new ArrayList<>(rules.size()); 67 | for (SequenceRule rule : rules) { 68 | if (!SORT_BRANCHES_WITH_RULE5 && rule instanceof Rule5) 69 | continue; 70 | subrules.add(rule); 71 | } 72 | return new Sort<>(subrules); 73 | } 74 | 75 | @Override 76 | public int getNumSubRules() { 77 | return rules.size(); 78 | } 79 | 80 | public Sort getSorter() { 81 | return new Sort<>(rules); 82 | } 83 | 84 | @Override 85 | public BaseMol getMol() { 86 | BaseMol res = null; 87 | for (SequenceRule rule : rules) { 88 | res = rule.getMol(); 89 | if (res != null) 90 | break; 91 | } 92 | return res; 93 | } 94 | 95 | @Override 96 | public int compare(Edge o1, Edge o2) { 97 | // Try using each rules. The rules will expand the search exhaustively 98 | // to all child ligands 99 | for (SequenceRule rule : rules) { 100 | // compare expands exhaustively across the whole graph 101 | int value = rule.recursiveCompare(o1, o2); 102 | if (value != 0) return value; 103 | } 104 | return 0; 105 | } 106 | 107 | @Override 108 | public int getComparision(Edge a, Edge b, boolean deep) { 109 | // Try using each rules. The rules will expand the search exhaustively 110 | // to all child ligands 111 | for (SequenceRule rule : rules) { 112 | 113 | // compare expands exhaustively across the whole graph 114 | int value = rule.recursiveCompare(a, b); 115 | 116 | if (value != 0) 117 | return value; 118 | 119 | } 120 | 121 | return 0; 122 | } 123 | 124 | @Override 125 | public String toString() { 126 | StringBuilder builder = new StringBuilder("Combined rules:"); 127 | for (SequenceRule rule : rules) 128 | builder.append(rule.toString()).append(", "); 129 | return builder.toString(); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Rule5New.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import com.simolecule.centres.*; 30 | 31 | import java.util.*; 32 | 33 | /** 34 | * A descriptor pair rule. This rule defines that like descriptor pairs have 35 | * priority over unlike descriptor pairs. 36 | * 37 | * @author John May 38 | */ 39 | public class Rule5New 40 | extends SequenceRule { 41 | 42 | private final Descriptor ref; 43 | 44 | public Rule5New(BaseMol mol) 45 | { 46 | super(mol); 47 | ref = null; 48 | } 49 | 50 | public Rule5New(BaseMol mol, Descriptor ref) 51 | { 52 | super(mol); 53 | this.ref = ref; 54 | } 55 | 56 | @Override 57 | public boolean isPseudoAsymmetric() 58 | { 59 | return true; 60 | } 61 | 62 | private void fillPairs(Node beg, PairList plist) 63 | { 64 | Sort sorter = getRefSorter(plist.getRefDescriptor()); 65 | Deque> queue = new ArrayDeque<>(); 66 | queue.add(beg); 67 | while (!queue.isEmpty()) { 68 | Node node = queue.poll(); 69 | plist.add(node.getAux()); 70 | List> edges = node.getEdges(); 71 | sorter.prioritise(node, edges); 72 | for (Edge edge : edges) { 73 | if (edge.isBeg(node) && !edge.getEnd().isTerminal()) { 74 | queue.add(edge.getEnd()); 75 | } 76 | } 77 | } 78 | } 79 | 80 | private Sort getRefSorter(Descriptor refA) 81 | { 82 | List> rules = new ArrayList<>(getSorter().getRules()); 83 | rules.remove(this); 84 | rules.add(new Rule5New<>(getMol(), refA)); 85 | return new Sort<>(rules); 86 | } 87 | 88 | @Override 89 | public int compare(Edge a, Edge b) 90 | { 91 | if (!a.getBeg().getDigraph().getCurrRoot().equals(a.getBeg()) || 92 | !b.getBeg().getDigraph().getCurrRoot().equals(b.getBeg())) { 93 | if (ref == null) 94 | return 0; 95 | Descriptor aDesc = a.getEnd().getAux(); 96 | Descriptor bDesc = b.getEnd().getAux(); 97 | if (aDesc != null && bDesc != null && aDesc != Descriptor.ns && bDesc != Descriptor.ns) { 98 | boolean alike = PairList.ref(ref) == PairList.ref(aDesc); 99 | boolean blike = PairList.ref(ref) == PairList.ref(bDesc); 100 | if (alike && !blike) 101 | return +1; 102 | if (blike && !alike) 103 | return -1; 104 | } 105 | return 0; 106 | } else { 107 | PairList listRA = new PairList(Descriptor.R); 108 | PairList listRB = new PairList(Descriptor.R); 109 | PairList listSA = new PairList(Descriptor.S); 110 | PairList listSB = new PairList(Descriptor.S); 111 | fillPairs(a.getEnd(), listRA); 112 | fillPairs(a.getEnd(), listSA); 113 | fillPairs(b.getEnd(), listRB); 114 | fillPairs(b.getEnd(), listSB); 115 | int cmpR = listRA.compareTo(listRB); 116 | int cmpS = listSA.compareTo(listSB); 117 | // -2/+2 for psuedo-asymetric 118 | // -1/+1 if not (e.g. the R > R and S > S lists) 119 | if (cmpR < 0) 120 | return cmpS < 0 ? -1 : -2; 121 | else if (cmpR > 0) 122 | return cmpS > 0 ? +1 : +2; 123 | else 124 | return 0; 125 | } 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /core/src/test/resources/uk/ac/ebi/centres/cdk/(2Z,5R,7E)-4,6-bis[(1E)-prop-1-en-1-yl]nona-2,7-dien-5-ol.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 | 29 | 31 | 33 | 35 | 37 | 39 | 41 | 43 | 45 | 47 | 48 | 49 | 50 | W 51 | 52 | 53 | 54 | 55 | 56 | 57 | T 58 | 59 | 60 | T 61 | 62 | 63 | 64 | 65 | 66 | 67 | T 68 | 69 | 70 | C 71 | 72 | 73 | T 74 | 75 | 76 | T 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /cdk/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 23 | 24 | centres 25 | com.simolecule.centres 26 | 1.3-SNAPSHOT 27 | 28 | 4.0.0 29 | 30 | centres-cdk 31 | 32 | 33 | 2.9 34 | 35 | 36 | 37 | 38 | ${project.groupId} 39 | centres-core 40 | ${project.parent.version} 41 | 42 | 43 | ${project.groupId} 44 | centres-core 45 | ${project.parent.version} 46 | test-jar 47 | test 48 | 49 | 50 | junit 51 | junit 52 | 53 | 54 | 55 | org.openscience.cdk 56 | cdk-interfaces 57 | ${cdk.version} 58 | 59 | 60 | org.openscience.cdk 61 | cdk-io 62 | ${cdk.version} 63 | 64 | 65 | org.openscience.cdk 66 | cdk-ctab 67 | ${cdk.version} 68 | 69 | 70 | org.openscience.cdk 71 | cdk-standard 72 | ${cdk.version} 73 | 74 | 75 | org.openscience.cdk 76 | cdk-smiles 77 | ${cdk.version} 78 | 79 | 80 | org.openscience.cdk 81 | cdk-silent 82 | ${cdk.version} 83 | 84 | 85 | 86 | 87 | 88 | org.apache.maven.plugins 89 | maven-shade-plugin 90 | 3.1.1 91 | 92 | 93 | package 94 | 95 | shade 96 | 97 | 98 | centres 99 | 100 | 101 | com.simolecule.centres.LabelCip 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | org.apache.maven.plugins 111 | maven-compiler-plugin 112 | 113 | 8 114 | 8 115 | 116 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /core/src/test/resources/uk/ac/ebi/centres/cdk/sulochrin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 11 | 13 | 15 | 17 | 19 | 21 | 23 | 25 | 27 | 29 | 31 | 33 | 35 | 37 | 39 | 41 | 43 | 45 | 47 | 49 | 51 | 53 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/(6R)-vomifoliol.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | ChEBI 9 | 10 | 11 | 12 | 13 | 14 | D-arabinono-1,4-lactone 15 | 16 | 17 | 3 18 | 19 | 20 | 21 | 23 | 25 | 27 | 29 | 31 | 33 | 35 | 37 | 39 | 41 | 43 | 45 | 47 | 49 | 51 | 53 | 55 | 57 | 59 | 60 | 61 | 62 | T 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | H 75 | 76 | 77 | 78 | 79 | 80 | 81 | W 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /core/src/main/java/com/simolecule/centres/rules/Sort.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 John Mayfield 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions are met: 6 | * 7 | * 1. Redistributions of source code must retain the above copyright notice, 8 | * this list of conditions and the following disclaimer. 9 | * 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, 11 | * this list of conditions and the following disclaimer in the documentation 12 | * and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 18 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | * POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | package com.simolecule.centres.rules; 28 | 29 | import com.simolecule.centres.Edge; 30 | import com.simolecule.centres.Node; 31 | 32 | import java.util.ArrayList; 33 | import java.util.Collections; 34 | import java.util.List; 35 | 36 | /** 37 | * A simple insertion sort for ligands. The number of ligands is not likely to 38 | * be very larger as such doing a merge sort would have little benefit. 39 | * 40 | * @author John May 41 | */ 42 | public class Sort { 43 | 44 | private int ruleMax = 0; 45 | 46 | private final List> rules = new ArrayList<>(5); 47 | 48 | public Sort(SequenceRule comparator) 49 | { 50 | this.rules.add(comparator); 51 | } 52 | 53 | public Sort(List> comparators) 54 | { 55 | rules.addAll(comparators); 56 | } 57 | 58 | public List> getRules() 59 | { 60 | return Collections.unmodifiableList(rules); 61 | } 62 | 63 | public Priority prioritise(Node node, List> edges) 64 | { 65 | return prioritise(node, edges, true); 66 | } 67 | 68 | public Priority prioritise(Node node, List> edges, boolean deep) 69 | { 70 | boolean unique = true; 71 | boolean foundWildcard = false; 72 | int numPseudoAsym = 0; 73 | 74 | outer: 75 | for (int i = 0; i < edges.size(); i++) { 76 | for (int j = i; j > 0; j--) { 77 | 78 | int cmp = compareLigands(node, edges.get(j - 1), edges.get(j), deep); 79 | 80 | 81 | if (cmp == SequenceRule.COMP_TO_WILDCARD) { 82 | unique = false; 83 | foundWildcard = true; 84 | break outer; 85 | } 86 | 87 | // -2/+2 means we used Rule 5 (or more) and the ligands are mirror 88 | // images 89 | if (cmp < -1 || cmp > 1) 90 | numPseudoAsym++; 91 | 92 | if (cmp < 0) { 93 | swap(edges, j, j - 1); 94 | } else { 95 | if (cmp == 0) 96 | unique = false; 97 | break; 98 | } 99 | } 100 | } 101 | 102 | return new Priority(unique, foundWildcard, ruleMax, numPseudoAsym == 1); 103 | } 104 | 105 | public final int compareLigands(Node node, Edge a, Edge b, boolean deep) 106 | { 107 | // ensure 'up' edges are moved to the front 108 | if (!a.isBeg(node) && b.isBeg(node)) 109 | return +1; 110 | else if (a.isBeg(node) && !b.isBeg(node)) 111 | return -1; 112 | 113 | for (int i = 0; i < rules.size(); i++) { 114 | SequenceRule rule = rules.get(i); 115 | int cmp = rule.getComparision(a, b, deep); 116 | if (cmp != 0) { 117 | ruleMax = Math.max(ruleMax, i); 118 | return cmp; 119 | } 120 | } 121 | return 0; 122 | } 123 | 124 | 125 | public void swap(List> list, int i, int j) 126 | { 127 | Edge tmp = list.get(i); 128 | list.set(i, list.get(j)); 129 | list.set(j, tmp); 130 | } 131 | 132 | 133 | public List>> getGroups(List> sorted) 134 | { 135 | 136 | // would be nice to have this integrated whilst sorting - may provide a small speed increase 137 | // but as most of our lists are small we take use ugly sort then group approach 138 | List>> groups = new ArrayList<>(); 139 | 140 | Edge prev = null; 141 | for (Edge edge : sorted) { 142 | if (prev == null || compareLigands(prev.getBeg(), prev, edge, true) != 0) 143 | groups.add(new ArrayList>()); 144 | prev = edge; 145 | groups.get(groups.size() - 1).add(edge); 146 | } 147 | 148 | return groups; 149 | 150 | } 151 | 152 | } 153 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/Daniel_Macude_1.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv1641808301611122D 3 | 4 | 49 55 0 0 1 0 999 V2000 5 | 2.9684 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 2.1434 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 2.1434 2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 2.8579 3.3000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 9 | 1.4289 3.3000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 10 | 1.4289 4.1250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 0.7615 4.6099 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 12 | 1.0164 5.3945 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 13 | 1.8414 5.3945 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 2.0964 4.6099 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 15 | 2.3264 6.0620 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 3.1468 5.9757 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 17 | 1.9908 6.8157 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 18 | 1.1703 6.9019 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 0.6854 6.2345 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | -0.1351 6.3207 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | -0.4706 7.0744 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 22 | 0.0143 7.7418 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 0.8348 7.6556 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 1.3184 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 2.1434 1.2375 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 26 | 2.8579 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 2.8579 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 3.5724 -0.4125 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 29 | 4.2868 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 4.2868 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 3.5724 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 5.0013 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 5.7158 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 6.4302 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 6.4302 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | 5.7158 -1.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | 5.0013 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | 7.1447 -1.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 39 | 7.8592 -1.2375 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 40 | 7.1447 -2.4750 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 41 | 7.8592 -2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 7.8592 -3.7125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 43 | 7.1447 -4.1250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 44 | 6.4302 -3.7125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 45 | 6.4302 -2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 46 | 2.1434 -0.4125 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 47 | 1.4289 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 48 | 0.7145 -0.4125 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 49 | 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 50 | 0.0000 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 51 | 0.7145 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 52 | 1.4289 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 53 | -0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 54 | 1 2 1 0 0 0 0 55 | 2 3 1 0 0 0 0 56 | 3 4 2 0 0 0 0 57 | 3 5 1 0 0 0 0 58 | 5 6 1 0 0 0 0 59 | 6 7 2 0 0 0 0 60 | 7 8 1 0 0 0 0 61 | 8 9 2 0 0 0 0 62 | 9 10 1 0 0 0 0 63 | 6 10 1 0 0 0 0 64 | 9 11 1 0 0 0 0 65 | 11 12 2 0 0 0 0 66 | 11 13 1 0 0 0 0 67 | 13 14 1 0 0 0 0 68 | 14 15 1 0 0 0 0 69 | 15 16 1 0 0 0 0 70 | 16 17 1 0 0 0 0 71 | 17 18 1 0 0 0 0 72 | 18 19 1 0 0 0 0 73 | 14 19 1 0 0 0 0 74 | 2 20 1 0 0 0 0 75 | 21 2 1 6 0 0 0 76 | 21 22 1 0 0 0 0 77 | 22 23 1 0 0 0 0 78 | 23 24 2 0 0 0 0 79 | 24 25 1 0 0 0 0 80 | 25 26 2 0 0 0 0 81 | 26 27 1 0 0 0 0 82 | 22 27 2 0 0 0 0 83 | 25 28 1 0 0 0 0 84 | 28 29 2 0 0 0 0 85 | 29 30 1 0 0 0 0 86 | 30 31 2 0 0 0 0 87 | 31 32 1 0 0 0 0 88 | 32 33 2 0 0 0 0 89 | 28 33 1 0 0 0 0 90 | 31 34 1 0 0 0 0 91 | 34 35 2 0 0 0 0 92 | 34 36 1 0 0 0 0 93 | 36 37 1 0 0 0 0 94 | 37 38 1 0 0 0 0 95 | 38 39 1 0 0 0 0 96 | 39 40 1 0 0 0 0 97 | 40 41 1 0 0 0 0 98 | 36 41 1 0 0 0 0 99 | 23 42 1 0 0 0 0 100 | 42 43 1 0 0 0 0 101 | 43 44 2 0 0 0 0 102 | 44 45 1 0 0 0 0 103 | 45 46 2 0 0 0 0 104 | 46 47 1 0 0 0 0 105 | 47 48 2 0 0 0 0 106 | 43 48 1 0 0 0 0 107 | 21 48 1 0 0 0 0 108 | 45 49 1 0 0 0 0 109 | M END 110 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/Daniel_Macude_2.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv1641808301611132D 3 | 4 | 49 55 0 0 1 0 999 V2000 5 | 2.9684 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 2.1434 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 2.1434 2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 2.8579 3.3000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 9 | 1.4289 3.3000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 10 | 1.4289 4.1250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 0.7615 4.6099 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 12 | 1.0164 5.3945 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 13 | 1.8414 5.3945 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 2.0964 4.6099 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 15 | 2.3264 6.0620 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 3.1468 5.9757 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 17 | 1.9908 6.8157 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 18 | 1.1703 6.9019 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 0.6854 6.2345 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | -0.1351 6.3207 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | -0.4706 7.0744 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 22 | 0.0143 7.7418 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 0.8348 7.6556 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 1.3184 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 2.1434 1.2375 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 26 | 2.8579 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 2.8579 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 3.5724 -0.4125 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 29 | 4.2868 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 4.2868 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 3.5724 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 5.0013 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 5.7158 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 6.4302 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 6.4302 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | 5.7158 -1.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | 5.0013 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | 7.1447 -1.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 39 | 7.8592 -1.2375 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 40 | 7.1447 -2.4750 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 41 | 7.8592 -2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 7.8592 -3.7125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 43 | 7.1447 -4.1250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 44 | 6.4302 -3.7125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 45 | 6.4302 -2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 46 | 2.1434 -0.4125 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 47 | 1.4289 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 48 | 0.7145 -0.4125 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 49 | 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 50 | 0.0000 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 51 | 0.7145 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 52 | 1.4289 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 53 | -0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 54 | 1 2 1 0 0 0 0 55 | 2 3 1 0 0 0 0 56 | 3 4 2 0 0 0 0 57 | 3 5 1 0 0 0 0 58 | 5 6 1 0 0 0 0 59 | 6 7 2 0 0 0 0 60 | 7 8 1 0 0 0 0 61 | 8 9 2 0 0 0 0 62 | 9 10 1 0 0 0 0 63 | 6 10 1 0 0 0 0 64 | 9 11 1 0 0 0 0 65 | 11 12 2 0 0 0 0 66 | 11 13 1 0 0 0 0 67 | 13 14 1 0 0 0 0 68 | 14 15 1 0 0 0 0 69 | 15 16 1 0 0 0 0 70 | 16 17 1 0 0 0 0 71 | 17 18 1 0 0 0 0 72 | 18 19 1 0 0 0 0 73 | 14 19 1 0 0 0 0 74 | 2 20 1 0 0 0 0 75 | 21 2 1 6 0 0 0 76 | 21 22 1 0 0 0 0 77 | 25 28 1 0 0 0 0 78 | 28 29 2 0 0 0 0 79 | 29 30 1 0 0 0 0 80 | 30 31 2 0 0 0 0 81 | 31 32 1 0 0 0 0 82 | 32 33 2 0 0 0 0 83 | 28 33 1 0 0 0 0 84 | 31 34 1 0 0 0 0 85 | 34 35 2 0 0 0 0 86 | 34 36 1 0 0 0 0 87 | 36 37 1 0 0 0 0 88 | 37 38 1 0 0 0 0 89 | 38 39 1 0 0 0 0 90 | 39 40 1 0 0 0 0 91 | 40 41 1 0 0 0 0 92 | 36 41 1 0 0 0 0 93 | 23 42 1 0 0 0 0 94 | 42 43 1 0 0 0 0 95 | 43 44 2 0 0 0 0 96 | 44 45 1 0 0 0 0 97 | 45 46 2 0 0 0 0 98 | 46 47 1 0 0 0 0 99 | 47 48 2 0 0 0 0 100 | 43 48 1 0 0 0 0 101 | 21 48 1 0 0 0 0 102 | 45 49 1 0 0 0 0 103 | 22 27 1 0 0 0 0 104 | 26 27 2 0 0 0 0 105 | 25 26 1 0 0 0 0 106 | 24 25 2 0 0 0 0 107 | 23 24 1 0 0 0 0 108 | 22 23 2 0 0 0 0 109 | M END 110 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/Daniel_Macude_3.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv1641808301611132D 3 | 4 | 49 55 0 0 1 0 999 V2000 5 | 2.9684 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 2.1434 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 2.1434 2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 2.8579 3.3000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 9 | 1.4289 3.3000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 10 | 1.4289 4.1250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 0.7615 4.6099 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 12 | 1.0164 5.3945 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 13 | 1.8414 5.3945 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 2.0964 4.6099 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 15 | 2.3264 6.0620 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 3.1468 5.9757 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 17 | 1.9908 6.8157 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 18 | 1.1703 6.9019 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 0.6854 6.2345 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | -0.1351 6.3207 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | -0.4706 7.0744 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 22 | 0.0143 7.7418 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 0.8348 7.6556 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 1.3184 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 2.1434 1.2375 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 26 | 2.8579 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 2.8579 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 3.5724 -0.4125 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 29 | 4.2868 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 4.2868 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 3.5724 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 5.0013 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 5.7158 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 6.4302 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 6.4302 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | 5.7158 -1.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | 5.0013 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | 7.1447 -1.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 39 | 7.8592 -1.2375 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 40 | 7.1447 -2.4750 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 41 | 7.8592 -2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 7.8592 -3.7125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 43 | 7.1447 -4.1250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 44 | 6.4302 -3.7125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 45 | 6.4302 -2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 46 | 2.1434 -0.4125 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 47 | 1.4289 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 48 | 0.7145 -0.4125 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 49 | 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 50 | 0.0000 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 51 | 0.7145 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 52 | 1.4289 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 53 | -0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 54 | 1 2 1 0 0 0 0 55 | 2 3 1 0 0 0 0 56 | 3 4 2 0 0 0 0 57 | 3 5 1 0 0 0 0 58 | 5 6 1 0 0 0 0 59 | 6 7 2 0 0 0 0 60 | 7 8 1 0 0 0 0 61 | 8 9 2 0 0 0 0 62 | 9 10 1 0 0 0 0 63 | 6 10 1 0 0 0 0 64 | 9 11 1 0 0 0 0 65 | 11 12 2 0 0 0 0 66 | 11 13 1 0 0 0 0 67 | 13 14 1 0 0 0 0 68 | 14 15 1 0 0 0 0 69 | 15 16 1 0 0 0 0 70 | 16 17 1 0 0 0 0 71 | 17 18 1 0 0 0 0 72 | 18 19 1 0 0 0 0 73 | 14 19 1 0 0 0 0 74 | 2 20 1 0 0 0 0 75 | 21 2 1 6 0 0 0 76 | 21 22 1 0 0 0 0 77 | 25 28 1 0 0 0 0 78 | 28 29 2 0 0 0 0 79 | 29 30 1 0 0 0 0 80 | 30 31 2 0 0 0 0 81 | 31 32 1 0 0 0 0 82 | 32 33 2 0 0 0 0 83 | 28 33 1 0 0 0 0 84 | 31 34 1 0 0 0 0 85 | 34 35 2 0 0 0 0 86 | 34 36 1 0 0 0 0 87 | 36 37 1 0 0 0 0 88 | 37 38 1 0 0 0 0 89 | 38 39 1 0 0 0 0 90 | 39 40 1 0 0 0 0 91 | 40 41 1 0 0 0 0 92 | 36 41 1 0 0 0 0 93 | 23 42 1 0 0 0 0 94 | 42 43 1 0 0 0 0 95 | 21 48 1 0 0 0 0 96 | 45 49 1 0 0 0 0 97 | 22 27 1 0 0 0 0 98 | 26 27 2 0 0 0 0 99 | 25 26 1 0 0 0 0 100 | 24 25 2 0 0 0 0 101 | 23 24 1 0 0 0 0 102 | 22 23 2 0 0 0 0 103 | 47 48 1 0 0 0 0 104 | 46 47 2 0 0 0 0 105 | 45 46 1 0 0 0 0 106 | 44 45 2 0 0 0 0 107 | 43 44 1 0 0 0 0 108 | 43 48 2 0 0 0 0 109 | M END 110 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/Daniel_Macude_4.mol: -------------------------------------------------------------------------------- 1 | 2 | Mrv1641808301611132D 3 | 4 | 49 55 0 0 1 0 999 V2000 5 | 2.9684 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6 | 2.1434 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | 2.1434 2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 8 | 2.8579 3.3000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 9 | 1.4289 3.3000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 10 | 1.4289 4.1250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 11 | 0.7615 4.6099 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 12 | 1.0164 5.3945 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 13 | 1.8414 5.3945 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 14 | 2.0964 4.6099 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0 15 | 2.3264 6.0620 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | 3.1468 5.9757 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 17 | 1.9908 6.8157 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 18 | 1.1703 6.9019 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 19 | 0.6854 6.2345 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 20 | -0.1351 6.3207 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | -0.4706 7.0744 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 22 | 0.0143 7.7418 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 23 | 0.8348 7.6556 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 24 | 1.3184 2.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | 2.1434 1.2375 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 26 | 2.8579 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | 2.8579 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | 3.5724 -0.4125 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 29 | 4.2868 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | 4.2868 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 31 | 3.5724 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 32 | 5.0013 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 33 | 5.7158 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | 6.4302 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 35 | 6.4302 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 36 | 5.7158 -1.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 37 | 5.0013 -1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 38 | 7.1447 -1.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 39 | 7.8592 -1.2375 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 40 | 7.1447 -2.4750 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 41 | 7.8592 -2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | 7.8592 -3.7125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 43 | 7.1447 -4.1250 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 44 | 6.4302 -3.7125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 45 | 6.4302 -2.8875 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 46 | 2.1434 -0.4125 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 47 | 1.4289 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 48 | 0.7145 -0.4125 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 49 | 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 50 | 0.0000 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 51 | 0.7145 1.2375 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 52 | 1.4289 0.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 53 | -0.7145 -0.4125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 54 | 1 2 1 0 0 0 0 55 | 2 3 1 0 0 0 0 56 | 3 4 2 0 0 0 0 57 | 3 5 1 0 0 0 0 58 | 5 6 1 0 0 0 0 59 | 6 7 2 0 0 0 0 60 | 7 8 1 0 0 0 0 61 | 8 9 2 0 0 0 0 62 | 9 10 1 0 0 0 0 63 | 6 10 1 0 0 0 0 64 | 9 11 1 0 0 0 0 65 | 11 12 2 0 0 0 0 66 | 11 13 1 0 0 0 0 67 | 13 14 1 0 0 0 0 68 | 14 15 1 0 0 0 0 69 | 15 16 1 0 0 0 0 70 | 16 17 1 0 0 0 0 71 | 17 18 1 0 0 0 0 72 | 18 19 1 0 0 0 0 73 | 14 19 1 0 0 0 0 74 | 2 20 1 0 0 0 0 75 | 21 2 1 6 0 0 0 76 | 21 22 1 0 0 0 0 77 | 25 28 1 0 0 0 0 78 | 28 29 2 0 0 0 0 79 | 29 30 1 0 0 0 0 80 | 30 31 2 0 0 0 0 81 | 31 32 1 0 0 0 0 82 | 32 33 2 0 0 0 0 83 | 28 33 1 0 0 0 0 84 | 31 34 1 0 0 0 0 85 | 34 35 2 0 0 0 0 86 | 34 36 1 0 0 0 0 87 | 36 37 1 0 0 0 0 88 | 37 38 1 0 0 0 0 89 | 38 39 1 0 0 0 0 90 | 39 40 1 0 0 0 0 91 | 40 41 1 0 0 0 0 92 | 36 41 1 0 0 0 0 93 | 23 42 1 0 0 0 0 94 | 42 43 1 0 0 0 0 95 | 21 48 1 0 0 0 0 96 | 45 49 1 0 0 0 0 97 | 47 48 1 0 0 0 0 98 | 46 47 2 0 0 0 0 99 | 45 46 1 0 0 0 0 100 | 44 45 2 0 0 0 0 101 | 43 44 1 0 0 0 0 102 | 43 48 2 0 0 0 0 103 | 22 23 1 0 0 0 0 104 | 22 27 2 0 0 0 0 105 | 26 27 1 0 0 0 0 106 | 25 26 2 0 0 0 0 107 | 24 25 1 0 0 0 0 108 | 23 24 2 0 0 0 0 109 | M END 110 | -------------------------------------------------------------------------------- /cdk/src/test/resources/uk/ac/ebi/centres/cdk/ChEBI_2955.mol: -------------------------------------------------------------------------------- 1 | 2 | Marvin 09240816422D 3 | 4 | 52 54 0 0 0 0 999 V2000 5 | -3.2657 2.7280 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 6 | -1.7679 2.7107 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 | -3.6782 2.0135 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 8 | -1.3554 1.9962 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 9 | -1.7679 1.2818 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 10 | -1.1845 0.6984 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 11 | -1.1845 -0.1266 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 12 | -1.7679 -0.7100 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 13 | -1.7679 -1.5350 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 14 | -2.4823 -1.9475 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 15 | -3.1968 -1.5350 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 16 | -3.1968 -0.7100 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 17 | -3.9113 -0.2975 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 18 | -3.9113 0.5275 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 19 | -3.3279 1.1109 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 20 | -3.2657 3.5530 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 21 | -4.4751 2.2270 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 22 | -2.5310 0.8974 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 23 | -4.6258 0.9400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 24 | -2.4823 -2.7725 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 25 | -0.3876 0.9119 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 26 | -0.5585 2.2097 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 27 | -0.4700 0.2859 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 28 | -4.6258 0.1150 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 29 | -4.6258 -0.7100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 30 | -0.6394 -3.2068 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 31 | 0.5612 -0.6851 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 32 | -3.9113 -1.9475 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 33 | -5.4508 -0.7100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 34 | -0.6394 -4.0318 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 35 | 1.7987 -1.3996 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 36 | 0.9737 -1.3996 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 37 | 0.5612 -2.1141 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 38 | 0.9736 -2.8286 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 39 | 1.7986 -2.8286 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 40 | 2.2112 -2.1141 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 41 | 0.1856 -4.0318 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 42 | -1.0519 -4.7463 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 43 | -0.6395 -5.4608 0.0000 C 0 0 2 0 0 0 0 0 0 0 0 0 44 | 0.1855 -5.4608 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 45 | 0.5981 -4.7463 0.0000 C 0 0 1 0 0 0 0 0 0 0 0 0 46 | 2.3820 -0.8162 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 47 | -1.2228 -6.0441 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 48 | 1.1814 -5.3296 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 49 | 0.5611 -3.5430 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 50 | 3.0362 -2.1141 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 51 | 1.1814 -4.1629 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 52 | 0.5980 -6.1752 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 53 | 2.0064 -4.1629 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 54 | 3.6195 -1.5307 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 55 | 3.4487 -2.8286 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 56 | -2.4824 -0.2975 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 57 | 1 2 1 0 0 0 0 58 | 1 3 1 0 0 0 0 59 | 2 4 1 0 0 0 0 60 | 4 5 1 0 0 0 0 61 | 5 6 1 0 0 0 0 62 | 6 7 1 0 0 0 0 63 | 7 8 1 0 0 0 0 64 | 8 9 1 0 0 0 0 65 | 9 10 1 0 0 0 0 66 | 10 11 1 0 0 0 0 67 | 11 12 1 0 0 0 0 68 | 12 13 1 0 0 0 0 69 | 13 14 1 0 0 0 0 70 | 14 15 1 0 0 0 0 71 | 15 3 1 0 0 0 0 72 | 1 16 1 0 0 0 0 73 | 3 17 1 1 0 0 0 74 | 15 18 1 1 0 0 0 75 | 14 19 1 1 0 0 0 76 | 10 20 1 1 0 0 0 77 | 6 21 1 1 0 0 0 78 | 4 22 1 6 0 0 0 79 | 6 23 1 6 0 0 0 80 | 14 24 1 6 0 0 0 81 | 13 25 1 6 0 0 0 82 | 9 26 1 6 0 0 0 83 | 7 27 1 6 0 0 0 84 | 11 28 2 0 0 0 0 85 | 25 29 1 0 0 0 0 86 | 32 27 1 1 0 0 0 87 | 30 26 1 1 0 0 0 88 | 31 32 1 0 0 0 0 89 | 32 33 1 0 0 0 0 90 | 33 34 1 0 0 0 0 91 | 34 35 1 0 0 0 0 92 | 35 36 1 0 0 0 0 93 | 31 36 1 0 0 0 0 94 | 37 30 1 0 0 0 0 95 | 30 38 1 0 0 0 0 96 | 38 39 1 0 0 0 0 97 | 39 40 1 0 0 0 0 98 | 40 41 1 0 0 0 0 99 | 37 41 1 0 0 0 0 100 | 31 42 1 6 0 0 0 101 | 39 43 1 6 0 0 0 102 | 41 44 1 6 0 0 0 103 | 34 45 1 1 0 0 0 104 | 36 46 1 1 0 0 0 105 | 41 47 1 1 0 0 0 106 | 40 48 1 1 0 0 0 107 | 47 49 1 0 0 0 0 108 | 46 50 1 0 0 0 0 109 | 46 51 1 0 0 0 0 110 | 8 52 1 6 0 0 0 111 | M END 112 | --------------------------------------------------------------------------------