├── .github └── workflows │ └── cibuildwheel.yaml ├── .gitignore ├── Code └── Osmordred │ ├── CMakeLists.txt │ ├── Osmordred.cpp │ ├── Osmordred.h │ └── Wrap │ ├── CMakeLists.txt │ └── rdOsmordred.cpp ├── README.md ├── build_patched_rdkit-pypi ├── .gitignore ├── README.md ├── build_rdkit-pypi.sh └── rdkit-pypi_2023_09_3_osmordred.diff ├── license.txt ├── osmordred_rdkit_2023_09_3_patches ├── AllChem.py.diff ├── CMakeLists.txt.diff ├── Code_CMakeLists.txt.diff └── font_sha_patch.diff ├── rdkit └── Chem │ └── Osmordred │ └── __init__.py └── skbuild ├── CMakeLists.txt ├── MANIFEST.in ├── __init__.py ├── build.sh ├── osmordred ├── __init__.py └── osmordred.cpp ├── pyproject.toml ├── setup.py ├── setup_env.sh └── test ├── tAll.py ├── tBEState.py └── test.py /.github/workflows/cibuildwheel.yaml: -------------------------------------------------------------------------------- 1 | name: rdkit-osmordred CIBuildWheel 2 | on: 3 | workflow_dispatch: 4 | jobs: 5 | build_wheels: 6 | name: Build wheels on mac 7 | runs-on: macos-13 8 | steps: 9 | - name: Checkout 10 | uses: actions/checkout@v4 11 | 12 | - uses: actions/setup-python@v4 13 | name: Install Python 14 | with: 15 | python-version: '3.11' 16 | 17 | - name: Build 18 | env: 19 | CIBW_PLATFORM: macos 20 | CIBW_BUILD: cp311-macosx_arm64 21 | shell: bash 22 | run: | 23 | NO_CONDA=1 ./build_rdkit-pypi.sh 24 | 25 | - name: Save Wheels 26 | uses: actions/upload-artifact@v4 27 | with: 28 | name: wheels 29 | path: ./wheelhouse/*.whl 30 | 31 | 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | __pycache__ 3 | _skbuild 4 | dist 5 | 6 | MANIFEST.in 7 | .DS_Store 8 | *.csv 9 | *.egg-info 10 | -------------------------------------------------------------------------------- /Code/Osmordred/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | rdkit_library(Osmordred Osmordred.cpp LINK_LIBRARIES DataStructs Descriptors Fingerprints GraphMol PartialCharges RDGeneral SmilesParse Subgraphs SubstructMatch) 2 | target_compile_definitions(Osmordred PRIVATE RDKIT_OSMORDRED_BUILD) 3 | 4 | rdkit_headers(Osmordred.h DEST Osmordred) 5 | 6 | find_path(EIGEN3_INCLUDE_DIR Eigen/Dense 7 | PATH_SUFFIXES /eigen 8 | REQUIRED 9 | ) 10 | include_directories(Osmordred ${EIGEN3_INCLUDE_DIR}) 11 | 12 | find_package(LAPACK REQUIRED) 13 | include_directories(Osmordred ${LAPACK_INCLUDE_DIRS}) 14 | if (CMAKE_SYSTEM_NAME STREQUAL "Linux") 15 | # ${LAPACK_INCLUDE_DIRS} isn't working yet on linux so hack it: 16 | include_directories(Osmordred /usr/include/lapacke) 17 | endif () 18 | 19 | link_libraries(Osmordred ${LAPACK_LIBRARIES}) 20 | 21 | # rdkit_test(testOsmordred testOsmordred.cpp LINK_LIBRARIES Osmordred ) 22 | 23 | if(RDK_BUILD_PYTHON_WRAPPERS) 24 | add_subdirectory(Wrap) 25 | endif() -------------------------------------------------------------------------------- /Code/Osmordred/Osmordred.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef _OSMORDRED_H 4 | #define _OSMORDRED_H 5 | 6 | #include 7 | #include 8 | 9 | namespace RDKit { 10 | 11 | RDKIT_OSMORDRED_EXPORT std::vector calcABCIndex(const ROMol& mol); 12 | RDKIT_OSMORDRED_EXPORT std::vector calcAcidBase(const ROMol& mol); 13 | RDKIT_OSMORDRED_EXPORT std::vector calcAromatic(const ROMol& mol); 14 | RDKIT_OSMORDRED_EXPORT std::vector calcAtomCounts(const ROMol& mol, int version = 1); 15 | RDKIT_OSMORDRED_EXPORT std::vector calcBalabanJ(const ROMol& mol); 16 | RDKIT_OSMORDRED_EXPORT std::vector calcBertzCT(const RDKit::ROMol& mol); 17 | RDKIT_OSMORDRED_EXPORT std::vector calcBondCounts(const RDKit::ROMol& mol); 18 | RDKIT_OSMORDRED_EXPORT std::vector calcVertexAdjacencyInformation(const ROMol& mol); 19 | RDKIT_OSMORDRED_EXPORT std::vector calcWeight(const ROMol& mol); 20 | RDKIT_OSMORDRED_EXPORT std::vector calcWienerIndex(const ROMol& mol); 21 | RDKIT_OSMORDRED_EXPORT std::vector calcVdwVolumeABC(const ROMol& mol); 22 | RDKIT_OSMORDRED_EXPORT std::vector calcTopoPSA(const ROMol& mol); 23 | RDKIT_OSMORDRED_EXPORT std::vector calcSLogP(const ROMol& mol); 24 | RDKIT_OSMORDRED_EXPORT std::vector calcHydrogenBond(const ROMol& mol); 25 | RDKIT_OSMORDRED_EXPORT std::vector calcLogS(const RDKit::ROMol& mol); 26 | RDKIT_OSMORDRED_EXPORT std::vector calcLipinskiGhose(const RDKit::ROMol& mol); 27 | RDKIT_OSMORDRED_EXPORT std::vector calcMcGowanVolume(const RDKit::ROMol& mol); 28 | RDKIT_OSMORDRED_EXPORT std::vector calcPolarizability(const RDKit::ROMol& mol); 29 | RDKIT_OSMORDRED_EXPORT std::vector calcRotatableBond(const ROMol& mol); 30 | RDKIT_OSMORDRED_EXPORT std::vector calcFragmentComplexity(const ROMol& mol); 31 | RDKIT_OSMORDRED_EXPORT std::vector calcConstitutional(const ROMol& mol); 32 | RDKIT_OSMORDRED_EXPORT std::vector calcTopologicalIndex(const RDKit::ROMol& mol); 33 | RDKIT_OSMORDRED_EXPORT std::vector calcDetourMatrixDescs(const RDKit::ROMol& mol); 34 | RDKIT_OSMORDRED_EXPORT std::vector calcDetourMatrixDescsL(const RDKit::ROMol& mol); 35 | RDKIT_OSMORDRED_EXPORT std::vector calcDistMatrixDescs(const RDKit::ROMol& mol, int version=1); 36 | RDKIT_OSMORDRED_EXPORT std::vector calcDistMatrixDescsL(const RDKit::ROMol& mol, int version=1); 37 | RDKIT_OSMORDRED_EXPORT std::vector calcAdjMatrixDescs(const RDKit::ROMol& mol, int version =1); 38 | RDKIT_OSMORDRED_EXPORT std::vector calcAdjMatrixDescsL(const RDKit::ROMol& mol, int version=1); 39 | RDKIT_OSMORDRED_EXPORT std::vector calcCarbonTypes(const RDKit::ROMol& mol, int version =1); 40 | RDKIT_OSMORDRED_EXPORT std::vector calcEccentricConnectivityIndex(const ROMol& mol); 41 | RDKIT_OSMORDRED_EXPORT std::vector calcBaryszMatrixDescsL(const RDKit::ROMol& mol); 42 | RDKIT_OSMORDRED_EXPORT std::vector calcBaryszMatrixDescs(const RDKit::ROMol& mol); 43 | RDKIT_OSMORDRED_EXPORT std::vector calcZagrebIndex(const RDKit::ROMol& mol); 44 | RDKIT_OSMORDRED_EXPORT std::vector calcMoeType(const ROMol& mol); 45 | RDKIT_OSMORDRED_EXPORT std::vector calcMolecularDistanceEdgeDescs(const RDKit::ROMol& mol); 46 | RDKIT_OSMORDRED_EXPORT std::vector calcEStateDescs(const RDKit::ROMol& mol, bool extended = false); 47 | RDKIT_OSMORDRED_EXPORT std::vector calcWalkCounts(const RDKit::ROMol& mol); 48 | RDKIT_OSMORDRED_EXPORT std::vector calcTopologicalChargeDescs(const RDKit::ROMol& mol); 49 | RDKIT_OSMORDRED_EXPORT std::vector calcAllChiDescriptors(const RDKit::ROMol& mol); 50 | RDKIT_OSMORDRED_EXPORT std::vector calcPathCount(const RDKit::ROMol& mol); 51 | RDKIT_OSMORDRED_EXPORT std::vector calcKappaShapeIndex(const RDKit::ROMol& mol); // closer "missing" k3 path count not correct on few cases 52 | RDKIT_OSMORDRED_EXPORT std::vector calcRingCount(const ROMol& mol); 53 | RDKIT_OSMORDRED_EXPORT std::vector calcMolecularId(const RDKit::ROMol& mol); 54 | RDKIT_OSMORDRED_EXPORT std::vector calcBCUTs(const RDKit::ROMol& mol); // 10x faster the 55 | RDKIT_OSMORDRED_EXPORT std::vector calcAutoCorrelation(const RDKit::ROMol& mol); 56 | RDKIT_OSMORDRED_EXPORT std::vector calcFramework(const ROMol& mol); 57 | RDKIT_OSMORDRED_EXPORT std::vector calcExtendedTopochemicalAtom(const RDKit::ROMol& mol); 58 | RDKIT_OSMORDRED_EXPORT std::vector calculateETADescriptors(const RDKit::ROMol& mol); 59 | RDKIT_OSMORDRED_EXPORT std::vector calcChipath(const RDKit::ROMol& mol); 60 | RDKIT_OSMORDRED_EXPORT std::vector calcChichain(const RDKit::ROMol& mol); 61 | RDKIT_OSMORDRED_EXPORT std::vector calcChicluster(const RDKit::ROMol& mol); 62 | RDKIT_OSMORDRED_EXPORT std::vector calcChipathcluster(const RDKit::ROMol& mol); 63 | RDKIT_OSMORDRED_EXPORT int calcAcidicGroupCount(const ROMol& mol); 64 | RDKIT_OSMORDRED_EXPORT int calcBasicGroupCount(const ROMol& mol); 65 | RDKIT_OSMORDRED_EXPORT int countAromaticAtoms(const ROMol& mol); 66 | RDKIT_OSMORDRED_EXPORT int countAromaticBonds(const ROMol& mol); 67 | RDKIT_OSMORDRED_EXPORT std::vector calcBEStateDescs(const RDKit::ROMol& mol); 68 | RDKIT_OSMORDRED_EXPORT std::vector calcHEStateDescs(const RDKit::ROMol& mol); 69 | RDKIT_OSMORDRED_EXPORT std::vector calcAlphaKappaShapeIndex(const RDKit::ROMol& mol); // closer "missing" k3 path count not correct on few cases 70 | RDKIT_OSMORDRED_EXPORT std::vector calcAbrahams(const RDKit::ROMol& mol); // Platts, Butina, Abraham, Hersey paper J Chem Inf Comput Sci. 1999 30/8/01;39(5):835-45 71 | RDKIT_OSMORDRED_EXPORT std::vector calcPol(const RDKit::ROMol& mol); 72 | RDKIT_OSMORDRED_EXPORT std::vector calcMR(const RDKit::ROMol& mol); 73 | RDKIT_OSMORDRED_EXPORT std::vector calcFlexibility(const RDKit::ROMol& mol); 74 | RDKIT_OSMORDRED_EXPORT std::vector calcODT(const RDKit::ROMol& mol); 75 | RDKIT_OSMORDRED_EXPORT std::vector calcSchultz(const RDKit::ROMol& mol); 76 | RDKIT_OSMORDRED_EXPORT std::vector calcRNCG_RPCG(const RDKit::ROMol& mol); 77 | RDKIT_OSMORDRED_EXPORT std::vector calcAZV(const RDKit::ROMol& mol); 78 | RDKIT_OSMORDRED_EXPORT std::vector calcASV(const RDKit::ROMol& mol); 79 | RDKIT_OSMORDRED_EXPORT std::vector calcDSV(const RDKit::ROMol& mol); 80 | RDKIT_OSMORDRED_EXPORT std::vector calcAZS(const RDKit::ROMol& mol); 81 | RDKIT_OSMORDRED_EXPORT std::vector calcASZ(const RDKit::ROMol& mol); 82 | RDKIT_OSMORDRED_EXPORT std::vector calcDN2S(const RDKit::ROMol& mol); 83 | RDKIT_OSMORDRED_EXPORT std::vector calcDN2I(const RDKit::ROMol& mol); 84 | RDKIT_OSMORDRED_EXPORT std::vector calcASI(const RDKit::ROMol& mol); 85 | RDKIT_OSMORDRED_EXPORT std::vector calcDSI(const RDKit::ROMol& mol); 86 | RDKIT_OSMORDRED_EXPORT std::vector calcASN(const RDKit::ROMol& mol); 87 | RDKIT_OSMORDRED_EXPORT std::vector calcDSN(const RDKit::ROMol& mol); 88 | RDKIT_OSMORDRED_EXPORT std::vector calcDN2N(const RDKit::ROMol& mol); 89 | RDKIT_OSMORDRED_EXPORT std::vector calcANS(const RDKit::ROMol& mol); 90 | RDKIT_OSMORDRED_EXPORT std::vector calcANV(const RDKit::ROMol& mol); 91 | RDKIT_OSMORDRED_EXPORT std::vector calcAZN(const RDKit::ROMol& mol); 92 | RDKIT_OSMORDRED_EXPORT std::vector calcANZ(const RDKit::ROMol& mol); 93 | RDKIT_OSMORDRED_EXPORT std::vector calcANI(const RDKit::ROMol& mol); 94 | RDKIT_OSMORDRED_EXPORT std::vector calcDSZ(const RDKit::ROMol& mol); 95 | RDKIT_OSMORDRED_EXPORT std::vector calcANN(const RDKit::ROMol& mol); 96 | RDKIT_OSMORDRED_EXPORT std::vector calcDN2Z(const RDKit::ROMol& mol); 97 | RDKIT_OSMORDRED_EXPORT std::vector calcANMat(const RDKit::ROMol& mol); 98 | RDKIT_OSMORDRED_EXPORT std::vector calcAZMat(const RDKit::ROMol& mol); 99 | RDKIT_OSMORDRED_EXPORT std::vector calcASMat(const RDKit::ROMol& mol); 100 | RDKIT_OSMORDRED_EXPORT std::vector calcDSMat(const RDKit::ROMol& mol); 101 | RDKIT_OSMORDRED_EXPORT std::vector calcDN2Mat(const RDKit::ROMol& mol); 102 | RDKIT_OSMORDRED_EXPORT std::vector calcFrags(const RDKit::ROMol& mol); 103 | RDKIT_OSMORDRED_EXPORT std::vector calcAddFeatures(const RDKit::ROMol& mol); 104 | RDKIT_OSMORDRED_EXPORT std::vector calcInformationContent(const RDKit::ROMol& mol, int maxradius=5); // Inspired by 1984 Basak paper 105 | 106 | } // namespace RDKit 107 | 108 | #endif //_OSMORDRED_H -------------------------------------------------------------------------------- /Code/Osmordred/Wrap/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | remove_definitions(-DRDKIT_OSMORDRED_BUILD) 2 | rdkit_python_extension(rdOsmordred 3 | rdOsmordred.cpp 4 | DEST Chem 5 | LINK_LIBRARIES Osmordred) 6 | 7 | # add_pytest(pyOsmordred ${CMAKE_CURRENT_SOURCE_DIR}/testOsmordred.py) 8 | 9 | -------------------------------------------------------------------------------- /Code/Osmordred/Wrap/rdOsmordred.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace python = boost::python; 5 | 6 | using namespace RDKit; 7 | 8 | BOOST_PYTHON_MODULE(rdOsmordred) { 9 | python::scope().attr("__doc__") = "Functions for Mordred calculations"; 10 | 11 | python::def("CalcABCIndex", calcABCIndex, 12 | "CalcABCIndex function\n"); 13 | python::def("CalcAcidBase", calcAcidBase, 14 | "CalcAcidBase function\n"); 15 | python::def("CalcAromatic", calcAromatic, 16 | "CalcAromatic function\n"); 17 | python::def("CalcAtomCount", calcAtomCounts, 18 | "CalcAtomCounts function\n"); 19 | python::def("CalcBalabanJ", calcBalabanJ, 20 | "CalcBalabanJ function\n"); 21 | python::def("CalcBertzCT", calcBertzCT, 22 | "CalcBertzCT function\n"); 23 | python::def("CalcBondCount", calcBondCounts, 24 | "CalcBondCounts function\n"); 25 | python::def("CalcVertexAdjacencyInformation", calcVertexAdjacencyInformation, 26 | "CalcVertexAdjacencyInformation function\n"); 27 | python::def("CalcWeight", calcWeight, 28 | "CalcWeight function\n"); 29 | python::def("CalcWienerIndex", calcWienerIndex, 30 | "CalcWienerIndex function\n"); 31 | python::def("CalcVdwVolumeABC", calcVdwVolumeABC, 32 | "CalcVdwVolumeABC function\n"); 33 | python::def("CalcTopoPSA", calcTopoPSA, 34 | "CalcTopoPSA function\n"); 35 | python::def("CalcSLogP", calcSLogP, 36 | "CalcSLogP function\n"); 37 | python::def("CalcHydrogenBond", calcHydrogenBond, 38 | "CalcHydrogenBond function\n"); 39 | python::def("CalcLogS", calcLogS, 40 | "CalcLogS function\n"); 41 | python::def("CalcLipinski", calcLipinskiGhose, 42 | "CalcLipinskiGhose function\n"); 43 | python::def("CalcMcGowanVolume", calcMcGowanVolume, 44 | "CalcMcGowanVolume function\n"); 45 | python::def("CalcPolarizability", calcPolarizability, 46 | "CalcPolarizability function\n"); 47 | python::def("CalcRotatableBond", calcRotatableBond, 48 | "CalcRotatableBond function\n"); 49 | python::def("CalcFragmentComplexity", calcFragmentComplexity, 50 | "CalcFragmentComplexity function\n"); 51 | python::def("CalcConstitutional", calcConstitutional, 52 | "CalcConstitutional function\n"); 53 | python::def("CalcTopologicalIndex", calcTopologicalIndex, 54 | "CalcTopologicalIndex function\n"); 55 | python::def("CalcDetourMatrixEigen", calcDetourMatrixDescs, 56 | "CalcDetourMatrixDescs function\n"); 57 | python::def("CalcDetourMatrix", calcDetourMatrixDescsL, 58 | "CalcDetourMatrixDescsL function\n"); 59 | python::def("CalcDistanceMatrixEigen", calcDistMatrixDescs, 60 | "CalcDistMatrixDescs function\n"); 61 | python::def("CalcDistanceMatrix", calcDistMatrixDescsL, 62 | "CalcDistMatrixDescsL function\n"); 63 | python::def("CalcAdjacencyMatrixEigen", calcAdjMatrixDescs, 64 | "CalcAdjMatrixDescs function\n"); 65 | python::def("CalcAdjacencyMatrix", calcAdjMatrixDescsL, 66 | "CalcAdjMatrixDescsL function\n"); 67 | python::def("CalcCarbonTypes", calcCarbonTypes, 68 | "CalcCarbonTypes function\n"); 69 | python::def("CalcEccentricConnectivityIndex", calcEccentricConnectivityIndex, 70 | "CalcEccentricConnectivityIndex function\n"); 71 | python::def("CalcBaryszMatrix", calcBaryszMatrixDescsL, 72 | "CalcBaryszMatrixDescsL function\n"); 73 | python::def("CalcBaryszMatrixEigen", calcBaryszMatrixDescs, 74 | "CalcBaryszMatrixDescs function\n"); 75 | python::def("CalcZagrebIndex", calcZagrebIndex, 76 | "CalcZagrebIndex function\n"); 77 | python::def("CalcMoeType", calcMoeType, 78 | "CalcMoeType function\n"); 79 | python::def("CalcMolecularDistanceEdge", calcMolecularDistanceEdgeDescs, 80 | "CalcMolecularDistanceEdgeDescs function\n"); 81 | python::def("CalcEState", calcEStateDescs, 82 | "CalcEStateDescs function\n"); 83 | python::def("CalcWalkCount", calcWalkCounts, 84 | "CalcWalkCounts function\n"); 85 | python::def("CalcTopologicalCharge", calcTopologicalChargeDescs, 86 | "CalcTopologicalChargeDescs function\n"); 87 | python::def("CalcChi", calcAllChiDescriptors, 88 | "CalcAllChiDescriptors function\n"); 89 | python::def("CalcPathCount", calcPathCount, 90 | "CalcPathCount function\n"); 91 | python::def("CalcKappaShapeIndex", calcKappaShapeIndex, 92 | "CalcKappaShapeIndex function\n"); 93 | python::def("CalcRingCount", calcRingCount, 94 | "CalcRingCount function\n"); 95 | python::def("CalcMolecularId", calcMolecularId, 96 | "CalcMolecularId function\n"); 97 | python::def("CalcBCUT", calcBCUTs, 98 | "CalcBCUTs function\n"); 99 | python::def("CalcAutocorrelation", calcAutoCorrelation, 100 | "CalcAutoCorrelation function\n"); 101 | python::def("CalcFramework", calcFramework, 102 | "CalcFramework function\n"); 103 | python::def("CalcExtendedTopochemicalAtom", calcExtendedTopochemicalAtom, 104 | "CalcExtendedTopochemicalAtom function\n"); 105 | python::def("CalcExtendedTopochemicalAtom2", calculateETADescriptors, 106 | "CalculateETADescriptors function\n"); 107 | python::def("CalcChipath", calcChipath, 108 | "CalcChipath function\n"); 109 | python::def("CalcChichain", calcChichain, 110 | "CalcChichain function\n"); 111 | python::def("CalcChicluster", calcChicluster, 112 | "CalcChicluster function\n"); 113 | python::def("CalcChipathcluster", calcChipathcluster, 114 | "CalcChipathcluster function\n"); 115 | python::def("CalcAcidicGroupCount", calcAcidicGroupCount, 116 | "CalcAcidicGroupCount function\n"); 117 | python::def("CalcBasicGroupCount", calcBasicGroupCount, 118 | "CalcBasicGroupCount function\n"); 119 | python::def("CalcCountAromaticAtoms", countAromaticAtoms, 120 | "CalcCountAromaticAtoms function"); 121 | python::def("CalcCountAromaticBonds", countAromaticBonds, 122 | "CalcCountAromaticBonds function"); 123 | python::def("CalcBEState", calcBEStateDescs, 124 | "CalcBEStateDescs function\n"); 125 | python::def("CalcHEState", calcHEStateDescs, 126 | "CalcHEStateDescs function\n"); 127 | python::def("CalcAlphaKappaShapeIndex", calcAlphaKappaShapeIndex, 128 | "CalcAlphaKappaShapeIndex function\n"); 129 | python::def("CalcAbrahams", calcAbrahams, 130 | "CalcAbrahams function\n"); 131 | python::def("CalcPol", calcPol, 132 | "CalcPol function\n"); 133 | python::def("CalcMR", calcMR, 134 | "CalcMR function\n"); 135 | python::def("CalcFlexibility", calcFlexibility, 136 | "CalcFlexibility function\n"); 137 | python::def("CalcODT", calcODT, 138 | "CalcODT function\n"); 139 | python::def("CalcSchultz", calcSchultz, 140 | "CalcSchultz function\n"); 141 | python::def("CalcRNCGRPCG", calcRNCG_RPCG, 142 | "CalcRNCG_RPCG function\n"); 143 | python::def("CalcAZV", calcAZV, 144 | "CalcAZV function\n"); 145 | python::def("CalcASV", calcASV, 146 | "CalcASV function\n"); 147 | python::def("CalcDSV", calcDSV, 148 | "CalcDSV function\n"); 149 | python::def("CalcAZS", calcAZS, 150 | "CalcAZS function\n"); 151 | python::def("CalcASZ", calcASZ, 152 | "CalcASZ function\n"); 153 | python::def("CalcDN2S", calcDN2S, 154 | "CalcDN2S function\n"); 155 | python::def("CalcDN2I", calcDN2I, 156 | "CalcDN2I function\n"); 157 | python::def("CalcASI", calcASI, 158 | "CalcASI function\n"); 159 | python::def("CalcDSI", calcDSI, 160 | "CalcDSI function\n"); 161 | python::def("CalcASN", calcASN, 162 | "CalcASN function\n"); 163 | python::def("CalcDSN", calcDSN, 164 | "CalcDSN function\n"); 165 | python::def("CalcDN2N", calcDN2N, 166 | "CalcDN2N function\n"); 167 | python::def("CalcANS", calcANS, 168 | "CalcANS function\n"); 169 | python::def("CalcANV", calcANV, 170 | "CalcANV function\n"); 171 | python::def("CalcAZN", calcAZN, 172 | "CalcAZN function\n"); 173 | python::def("CalcANZ", calcANZ, 174 | "CalcANZ function\n"); 175 | python::def("CalcANI", calcANI, 176 | "CalcANI function\n"); 177 | python::def("CalcDSZ", calcDSZ, 178 | "CalcDSZ function\n"); 179 | python::def("CalcANN", calcANN, 180 | "CalcANN function\n"); 181 | python::def("CalcDN2Z", calcDN2Z, 182 | "CalcDN2Z function\n"); 183 | python::def("CalcANMat", calcANMat, 184 | "CalcANMat function\n"); 185 | python::def("CalcAZMat", calcAZMat, 186 | "CalcAZMat function\n"); 187 | python::def("CalcASMat", calcASMat, 188 | "CalcASMat function\n"); 189 | python::def("CalcDSMat", calcDSMat, 190 | "CalcDSMat function\n"); 191 | python::def("CalcDN2Mat", calcDN2Mat, 192 | "CalcDN2Mat function\n"); 193 | python::def("CalcFrags", calcFrags, 194 | "CalcFrags function\n"); 195 | python::def("CalcAddFeatures", calcAddFeatures, 196 | "CalcAddFeatures function\n"); 197 | python::def("CalcInformationContent", calcInformationContent, 198 | "CalcInformationContent function\n"); 199 | 200 | }; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Osmordred : Unified RDkit new descriptors in c++ 2 | 3 | Osmordred was inspired by the Dragon, Padel, Mordred and other toolkits to generate empirical molecular features 4 | Our goal focus only on 0D,1D and 2D molecular descriptors fused with rdkit backend at cpp level to get very fast computation in parallel if needed. 5 | 6 | ## Remark on reproductibility: 7 | 8 | I spent quite time to implement a descent Information Content descriptor version based on the first paper from 1984 where Basak describes in detail his method https://doi.org/10.1016/B978-0-08-030156-3.50138-7. 9 | So our implementation of Information Content is not 100% identical to Basak, Padel and Mordred but it follows the core Basak logic within RDkit where aromaticity is "specific". 10 | This was indeed during this period that I also implement the Triplet features from Basak team. 11 | 12 | ## Future: 13 | Current version is around 10k lines of codes in only one file. 14 | It will be great to better integrate and refactor python bindings. 15 | Additionally a list of other descriptors were added to produce now 3586 individual features. 16 | 17 | ## Speed 18 | This is fully parallelized. Lapack was selected of it speed specially on the SVD decompostion of symmetrical squared matrix instead of Eigen3 solvers. 19 | The Lapack can produce very small fluctuation for almost zeros Eigen values and affect very slighlty few descriptors. 20 | 21 | ## Installation: 22 | 23 | ### Method 1 : from scratch create a new environment python 3.11 24 | ``` 25 | ./setup_env.sh 26 | 27 | ./build.sh 28 | 29 | conda activate osmordred 30 | 31 | pip install dist/osmordred-0.2.0-cp311-cp311-macosx_15_0_arm64.whl --force-reinstall 32 | ``` 33 | 34 | ### Method 2 : include into your current environement (for the moment python 3.11 for RDKit 2023.9.3) 35 | 36 | --- 37 | Mac Arm64 38 | ``` 39 | rm -rf _skbuild src/osmordred.egg-info dist 40 | conda install boost==1.82.0 eigen lapack ninja python-build rdkit==2023.9.3 blas='*=*openblas' -c conda-forge 41 | conda run python -m build 42 | pip install dist/osmordred-0.2.0-cp311-cp311-macosx_15_0_arm64.whl --force-reinstall 43 | pip show osmordred # normally you can see the installation in your environement 44 | ``` 45 | 46 | 47 | --- 48 | Linux 49 | ``` 50 | rm -rf _skbuild src/osmordred.egg-info dist 51 | conda install boost==1.82.0 eigen lapack ninja python-build rdkit==2023.9.3 blas='*=*mkl' -c conda-forge 52 | conda run python -m build 53 | pip install dist/osmordred-0.2.0-cp311-cp311-linux_x86_64.whl --force-reinstall 54 | pip show osmordred # normally you can see the installation in your environement 55 | ``` 56 | 57 | #### example for testing: 58 | ``` 59 | cd test 60 | pip install tqdm 61 | python test.py 62 | ``` 63 | 64 | #### note that attempting to `import osmordred` from python running in the skbuild directory does not work, as it imports the `osmordred` subdirectory. 65 | 66 | ### Known issue: 67 | Such a complex molecule causes lag due to intensive computations : 68 | ``` 69 | c12c3c4c5c1c1c6c7c2c2c8c3c3c9c4c4c%10c5c5c1c1c6c6c%11c7c2c2c7c8c3c3c8c9c4c4c9c%10c5c5c1c1c6c6c%11c2c2c7c3c3c8c4c4c9c5c1c1c6c2c3c41 70 | ``` 71 | ## License: 72 | 73 | BSD-3-Clause 74 | -------------------------------------------------------------------------------- /build_patched_rdkit-pypi/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore the checkout of the repo to build 2 | rdkit-pypi 3 | -------------------------------------------------------------------------------- /build_patched_rdkit-pypi/README.md: -------------------------------------------------------------------------------- 1 | # Building patched rdkit wheel 2 | Run the script with the necessary CI Buildwheel env vars, e.g.: 3 | ``` 4 | CIBW_PLATFORM=linux CIBW_BUILD=cp311-manylinux_x86_64 ./build_rdkit-pypi.sh 5 | ``` -------------------------------------------------------------------------------- /build_patched_rdkit-pypi/build_rdkit-pypi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # work in the script directory 5 | cd $(dirname $0) 6 | 7 | if [[ -z "$CIBW_BUILD" || -z "$CIBW_PLATFORM" ]]; then 8 | echo "You must set CIBW_BUILD and CIBW_PLATFORM to set a build target environment." 9 | exit 1 10 | fi 11 | 12 | if [ -z "$NO_CONDA" ]; then 13 | CONDA_ENV=build-rdkit-pypi 14 | RUN="conda run -n $CONDA_ENV" 15 | echo "Will create a conda env ($CONDA_ENV) to launch cibuildwheel from" 16 | conda create -y -n $CONDA_ENV python=3.11 17 | else 18 | RUN= 19 | fi 20 | 21 | $RUN pip install cibuildwheel==2.16.2 22 | 23 | # there's no tag in rdkit-pypi for the 2023.9.3 release 24 | # (I think it moved from a different repo) 25 | RDKIT_PYPI_2023_9_3_SHA=311157810e3d018bd2333f0f3c75bcc8538bf486 26 | git clone https://github.com/kuelumbus/rdkit-pypi.git 27 | cd rdkit-pypi 28 | git checkout -b 2023.9.3-osmordred $RDKIT_PYPI_2023_9_3_SHA 29 | # Patch the setup.py and pyproject.toml 30 | git apply ../rdkit-pypi_2023_09_3_osmordred.diff 31 | 32 | cp -a ../../osmordred_rdkit_2023_09_3_patches . 33 | mkdir osmordred_source 34 | cp -a ../../Code osmordred_source 35 | cp -a ../../rdkit osmordred_source 36 | 37 | echo "Kicking off cibuildwheel" 38 | $RUN python3 -m cibuildwheel --output-dir wheelhouse --config-file pyproject.toml -------------------------------------------------------------------------------- /build_patched_rdkit-pypi/rdkit-pypi_2023_09_3_osmordred.diff: -------------------------------------------------------------------------------- 1 | diff --git a/pyproject.toml b/pyproject.toml 2 | index b4dedf4..160e982 100644 3 | --- a/pyproject.toml 4 | +++ b/pyproject.toml 5 | @@ -22,8 +22,9 @@ test-command = "pytest --exitfirst --verbose --failed-first {package}/tests" 6 | before-all = [ 7 | # Has eigen3-devel.aarch64 8 | "yum install -y epel-release", 9 | - # "yum install -y wget freetype-devel libpng12-devel pixman-devel zlib-devel eigen3-devel", 10 | - "yum install -y wget freetype-devel libpng12-devel pixman-devel zlib-devel", 11 | + "yum install -y wget freetype-devel libpng12-devel pixman-devel zlib-devel eigen3-devel", 12 | + "yum install -y lapack-devel openblas-devel", 13 | + # "yum install -y wget freetype-devel libpng12-devel pixman-devel zlib-devel", 14 | # cairo-devel from the centos repo does not work, build here 15 | "wget https://www.cairographics.org/snapshots/cairo-1.15.14.tar.xz --no-check-certificate", 16 | "tar xvf cairo-*", 17 | diff --git a/setup.py b/setup.py 18 | index e915938..d8e05e8 100644 19 | --- a/setup.py 20 | +++ b/setup.py 21 | @@ -168,6 +168,18 @@ class BuildRDKit(build_ext_orig): 22 | ["git", "clone", "-b", f"{ext.rdkit_tag}", "https://github.com/rdkit/rdkit"] 23 | ) 24 | 25 | + # patch and copy files to add Osmordred (also backport font sha fix) 26 | + osmordred_patches_path = Path.resolve(Path.joinpath(cwd, "osmordred_rdkit_2023_09_3_patches")) 27 | + osmordred_source_path = Path.resolve(Path.joinpath(cwd, "osmordred_source")) 28 | + check_call( 29 | + f"echo PATCHING Osmordred from {osmordred_patches_path}; cd rdkit; git apply {osmordred_patches_path}/*.diff", 30 | + shell=True, 31 | + ) 32 | + check_call( 33 | + f"echo COPYING Osmordred sources {osmordred_source_path}; cp -a {osmordred_source_path}/* rdkit/", 34 | + shell=True, 35 | + ) 36 | + 37 | # Location of license file 38 | license_file = build_path / "rdkit" / "license.txt" 39 | 40 | @@ -201,7 +213,8 @@ class BuildRDKit(build_ext_orig): 41 | "-DRDK_BUILD_CPP_TESTS=OFF", 42 | # Fix InChi download 43 | "-DINCHI_URL=https://rdkit.org/downloads/INCHI-1-SRC.zip", 44 | - 45 | + # Build Osmordred code 46 | + "-DRDK_BUILD_OSMORDRED=ON", 47 | ] 48 | 49 | # Modifications for Windows 50 | @@ -343,7 +356,7 @@ class BuildRDKit(build_ext_orig): 51 | 52 | setup( 53 | name="rdkit", 54 | - version=rdkit_tag.replace("Release_", "").replace("_", "."), 55 | + version=rdkit_tag.replace("Release_", "").replace("_", ".") + "+osmordred", 56 | description="A collection of chemoinformatics and machine-learning software written in C++ and Python", 57 | author="Christopher Kuenneth", 58 | author_email="chris@kuenneth.dev", 59 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Copyright <2025> 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /osmordred_rdkit_2023_09_3_patches/AllChem.py.diff: -------------------------------------------------------------------------------- 1 | diff --git a/rdkit/Chem/AllChem.py b/rdkit/Chem/AllChem.py 2 | index f4439c32a..b30f0510f 100644 3 | --- a/rdkit/Chem/AllChem.py 4 | +++ b/rdkit/Chem/AllChem.py 5 | @@ -42,6 +42,11 @@ try: 6 | except ImportError: 7 | pass 8 | 9 | +try: 10 | + from rdkit.Chem.rdOsmordred import * 11 | +except ImportError: 12 | + pass 13 | + 14 | Mol.Compute2DCoords = Compute2DCoords 15 | Mol.ComputeGasteigerCharges = ComputeGasteigerCharges 16 | logger = logger() 17 | -------------------------------------------------------------------------------- /osmordred_rdkit_2023_09_3_patches/CMakeLists.txt.diff: -------------------------------------------------------------------------------- 1 | diff --git a/CMakeLists.txt b/CMakeLists.txt 2 | index cb79b16e8..5c9f44083 100644 3 | --- a/CMakeLists.txt 4 | +++ b/CMakeLists.txt 5 | @@ -70,6 +70,7 @@ option(RDK_BUILD_FUZZ_TARGETS "build the fuzz targets" OFF) 6 | option(RDK_BUILD_MINIMAL_LIB_RXN "build support for reactions into MinimalLib" ON ) 7 | option(RDK_BUILD_MINIMAL_LIB_SUBSTRUCTLIBRARY "build support for SubstructLibrary into MinimalLib" ON ) 8 | option(RDK_BUILD_MINIMAL_LIB_MCS "build support for MCS into MinimalLib" OFF ) 9 | +option(RDK_BUILD_OSMORDRED "build Osmordred functions" OFF) 10 | 11 | set(RDK_BOOST_VERSION "1.58.0") 12 | 13 | @@ -625,6 +626,11 @@ if(RDK_BUILD_CONTRIB) 14 | add_subdirectory(Contrib) 15 | endif(RDK_BUILD_CONTRIB) 16 | 17 | +if(RDK_BUILD_OSMORDRED) 18 | + set(BLA_VENDOR OpenBLAS) 19 | + find_package(LAPACK REQUIRED) 20 | +endif(RDK_BUILD_OSMORDRED) 21 | + 22 | # export the project targets (to be included in the cmake package configuration) 23 | install( 24 | EXPORT ${RDKit_EXPORTED_TARGETS} 25 | -------------------------------------------------------------------------------- /osmordred_rdkit_2023_09_3_patches/Code_CMakeLists.txt.diff: -------------------------------------------------------------------------------- 1 | diff --git a/Code/CMakeLists.txt b/Code/CMakeLists.txt 2 | index cda2c25ff..eb8c9d8c8 100644 3 | --- a/Code/CMakeLists.txt 4 | +++ b/Code/CMakeLists.txt 5 | @@ -40,3 +40,8 @@ add_subdirectory(MinimalLib) 6 | if(RDK_BUILD_FUZZ_TARGETS) 7 | add_subdirectory(Fuzz) 8 | endif(RDK_BUILD_FUZZ_TARGETS) 9 | + 10 | + 11 | +if(RDK_BUILD_OSMORDRED) 12 | + add_subdirectory(Osmordred) 13 | +endif(RDK_BUILD_OSMORDRED) 14 | -------------------------------------------------------------------------------- /osmordred_rdkit_2023_09_3_patches/font_sha_patch.diff: -------------------------------------------------------------------------------- 1 | diff --git a/Code/GraphMol/MolDraw2D/CMakeLists.txt b/Code/GraphMol/MolDraw2D/CMakeLists.txt 2 | index 6cd0833a2..58a8a3ebc 100644 3 | --- a/Code/GraphMol/MolDraw2D/CMakeLists.txt 4 | +++ b/Code/GraphMol/MolDraw2D/CMakeLists.txt 5 | @@ -17,14 +17,16 @@ if (RDK_BUILD_FREETYPE_SUPPORT AND RDK_INSTALL_COMIC_FONTS) 6 | set(needDownload "FALSE") 7 | endif () 8 | if (needDownload) 9 | - set(MD5Sum "850b0df852f1cda4970887b540f8f333") 10 | - downloadAndCheckMD5("https://fonts.google.com/download?family=Comic%20Neue" 11 | - "${CMAKE_CURRENT_SOURCE_DIR}/Comic_Neue.zip" 12 | - ${MD5Sum}) 13 | - execute_process(COMMAND ${CMAKE_COMMAND} -E tar x 14 | - ${CMAKE_CURRENT_SOURCE_DIR}/Comic_Neue.zip --format=zip 15 | - WORKING_DIRECTORY ${RDKit_DataDir}/Fonts) 16 | + # we started having problems with constantly changing MD5s on the zip file, 17 | + # so now we just check the MD5 of the target file 18 | + downloadAndCheckMD5("https://github.com/google/fonts/raw/main/ofl/comicneue/ComicNeue-Regular.ttf" 19 | + "${RDKit_DataDir}/Fonts/ComicNeue-Regular.ttf" 20 | + "fc1eac54b325542d4c133732658f823b") 21 | + downloadAndCheckMD5("https://github.com/google/fonts/raw/main/ofl/comicneue/OFL.txt" 22 | + "${RDKit_DataDir}/Fonts/OFL.txt" 23 | + "") 24 | endif (needDownload) 25 | + 26 | endif () 27 | 28 | rdkit_headers(MolDraw2D.h 29 | -------------------------------------------------------------------------------- /rdkit/Chem/Osmordred/__init__.py: -------------------------------------------------------------------------------- 1 | from rdkit.Chem.rdOsmordred import * 2 | -------------------------------------------------------------------------------- /skbuild/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.28) 2 | 3 | project(osmordred) 4 | 5 | # Set policies 6 | cmake_policy(SET CMP0144 NEW) 7 | 8 | set(CMAKE_CXX_STANDARD 17) 9 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 10 | 11 | # define the C++ library "_cppmordred" 12 | add_library(_osmordred MODULE osmordred/osmordred.cpp) 13 | 14 | # Find and link python 15 | find_package(Python3 REQUIRED COMPONENTS Interpreter Development) 16 | include_directories(${PYTHON_INCLUDE_DIR}) 17 | 18 | # Find and link RDKit 19 | find_package(RDKit REQUIRED) 20 | include_directories(${RDKit_INCLUDE_DIRS}) 21 | 22 | # Find and link Eigen 23 | find_path(EIGEN3_INCLUDE_DIR Eigen/Dense 24 | PATH_SUFFIXES /eigen 25 | REQUIRED 26 | ) 27 | include_directories(${EIGEN3_INCLUDE_DIR}) 28 | 29 | # Find and link Boost 30 | find_package(Boost REQUIRED python) 31 | include_directories(${Boost_INCLUDE_DIRS}) 32 | 33 | # Find and link LAPACK 34 | find_package(LAPACK REQUIRED) 35 | include_directories(${LAPACK_INCLUDE_DIRS}) 36 | 37 | # need explicit paths to RDKit libraries. 38 | find_library(libRDKitDataStructs NAMES "RDKitDataStructs") 39 | find_library(libRDKitDescriptors NAMES "RDKitDescriptors") 40 | find_library(libRDKitFingerprints NAMES "RDKitFingerprints") 41 | find_library(libRDKitGraphMol NAMES "RDKitGraphMol") 42 | find_library(libRDKitPartialCharges NAMES "RDKitPartialCharges") 43 | find_library(libRDKitRDGeneral NAMES "RDKitRDGeneral") 44 | find_library(libRDKitSmilesParse NAMES "RDKitSmilesParse") 45 | find_library(libRDKitSubgraphs NAMES "RDKitSubgraphs") 46 | find_library(libRDKitSubstructMatch NAMES "RDKitSubstructMatch") 47 | 48 | # Find and link RDKit libraries 49 | target_link_libraries(_osmordred PRIVATE 50 | ${libRDKitDataStructs} 51 | ${libRDKitDescriptors} 52 | ${libRDKitFingerprints} 53 | ${libRDKitGraphMol} 54 | ${libRDKitPartialCharges} 55 | ${libRDKitRDGeneral} 56 | ${libRDKitSmilesParse} 57 | ${libRDKitSubgraphs} 58 | ${libRDKitSubstructMatch} 59 | ${LAPACK_LIBRARIES} 60 | ${Boost_LIBRARIES} 61 | ${Boost_PYTHON_LIBRARY} # Ensure Boost.Python is linked 62 | ${Python3_LIBRARIES} 63 | ) 64 | 65 | set_target_properties(_osmordred PROPERTIES 66 | PREFIX "" 67 | SUFFIX ".so" 68 | ) 69 | 70 | set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) 71 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) 72 | set(CMAKE_INSTALL_RPATH "${Python3_ROOT_DIR}/lib;/opt/homebrew/opt/lapack/lib") 73 | 74 | install(TARGETS _osmordred LIBRARY DESTINATION osmordred) 75 | -------------------------------------------------------------------------------- /skbuild/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CMakeLists.txt 2 | include README.md 3 | include osmordred/osmordred.cpp 4 | -------------------------------------------------------------------------------- /skbuild/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osmoai/osmordred/07b8d22f570712c6ab3527dde195aad42fef4679/skbuild/__init__.py -------------------------------------------------------------------------------- /skbuild/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # clean up generated dirs 5 | rm -rf _skbuild src/osmordred.egg-info dist 6 | 7 | conda run -n osmordred python -m build 8 | 9 | echo Wheel build complete 10 | -------------------------------------------------------------------------------- /skbuild/osmordred/__init__.py: -------------------------------------------------------------------------------- 1 | from ._osmordred import ( 2 | CalcABCIndex, 3 | CalcAcidBase, 4 | CalcAromatic, 5 | CalcAtomCount, 6 | CalcBalabanJ, 7 | CalcBertzCT, 8 | CalcBondCount, 9 | CalcVertexAdjacencyInformation, 10 | CalcWeight, 11 | CalcWienerIndex, 12 | CalcVdwVolumeABC, 13 | CalcTopoPSA, 14 | CalcSLogP, 15 | CalcHydrogenBond, 16 | CalcLogS, 17 | CalcLipinski, 18 | CalcMcGowanVolume, 19 | CalcPolarizability, 20 | CalcRotatableBond, 21 | CalcFragmentComplexity, 22 | CalcConstitutional, 23 | CalcTopologicalIndex, 24 | CalcDetourMatrixEigen, 25 | CalcDetourMatrix, 26 | CalcDistanceMatrixEigen, 27 | CalcDistanceMatrix, 28 | CalcAdjacencyMatrixEigen, 29 | CalcAdjacencyMatrix, 30 | CalcCarbonTypes, 31 | CalcEccentricConnectivityIndex, 32 | CalcBaryszMatrix, 33 | CalcBaryszMatrixEigen, 34 | CalcZagrebIndex, 35 | CalcMoeType, 36 | CalcMolecularDistanceEdge, 37 | CalcEState, 38 | CalcWalkCount, 39 | CalcTopologicalCharge, 40 | CalcChi, 41 | CalcPathCount, 42 | CalcKappaShapeIndex, 43 | CalcRingCount, 44 | CalcMolecularId, 45 | CalcBCUT, 46 | CalcAutocorrelation, 47 | CalcFramework, 48 | CalcExtendedTopochemicalAtom, 49 | CalcExtendedTopochemicalAtom2, 50 | CalcChipath, 51 | CalcChiPath, 52 | CalcChichain, 53 | CalcChicluster, 54 | CalcChipathcluster, 55 | CalcAcidicGroupCount, 56 | CalcBasicGroupCount, 57 | CalcCountAromaticAtoms, 58 | CalcCountAromaticBonds, 59 | CalcBEState, 60 | CalcHEState, 61 | CalcAlphaKappaShapeIndex, 62 | CalcAbrahams, 63 | CalcPol, 64 | CalcMR, 65 | CalcFlexibility, 66 | CalcODT, 67 | CalcSchultz, 68 | CalcRNCGRPCG, 69 | CalcAZV, 70 | CalcASV, 71 | CalcDSV, 72 | CalcAZS, 73 | CalcASZ, 74 | CalcDN2S, 75 | CalcDN2I, 76 | CalcASI, 77 | CalcDSI, 78 | CalcASN, 79 | CalcDSN, 80 | CalcDN2N, 81 | CalcANS, 82 | CalcANV, 83 | CalcAZN, 84 | CalcANZ, 85 | CalcANI, 86 | CalcDSZ, 87 | CalcANN, 88 | CalcDN2Z, 89 | CalcANMat, 90 | CalcAZMat, 91 | CalcASMat, 92 | CalcDSMat, 93 | CalcDN2Mat, 94 | CalcInformationContent, 95 | CalcFrags, 96 | CalcAddFeatures, 97 | 98 | ) 99 | -------------------------------------------------------------------------------- /skbuild/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel", "scikit-build", "numpy==1.26.4"] 3 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /skbuild/setup.py: -------------------------------------------------------------------------------- 1 | from skbuild import setup 2 | from setuptools import find_packages 3 | import sysconfig 4 | import os 5 | 6 | 7 | 8 | setup( 9 | name="osmordred", 10 | version="0.2.0", 11 | description="A Python package to generate osmordred features using RDKit 2023.09.3, Lapack.", 12 | 13 | long_description=open("README.md").read(), 14 | long_description_content_type="text/markdown", 15 | author="Guillaume Godin", 16 | author_email="guillaume.godin@gmail.com", 17 | 18 | license="BSD-3-Clause", 19 | packages=["osmordred"], 20 | cmake_args=[ 21 | f"-DCMAKE_PREFIX_PATH={os.environ.get('CONDA_PREFIX')}", 22 | ], 23 | python_requires=">=3.11.8", 24 | install_requires=["scikit-build", "numpy==1.26.4"], 25 | classifiers=[ 26 | "Programming Language :: Python :: 3", 27 | "License :: OSI Approved :: BSD-3-Clause License", 28 | "Operating System :: OS Independent", 29 | ], 30 | ) 31 | -------------------------------------------------------------------------------- /skbuild/setup_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | function print_error { 5 | echo 6 | echo "Failed to create conda environment!!" 7 | exit 1 8 | } 9 | trap print_error ERR 10 | 11 | echo "Removing existing environment (if present)" 12 | conda env remove -y -n osmordred &>/dev/null || true 13 | 14 | conda_packages="boost==1.82.0 eigen lapack ninja python-build rdkit==2023.9.3" 15 | if [[ "$OSTYPE" =~ ^darwin.* ]]; then 16 | echo "Creating conda env with MacOS packages" 17 | conda_packages="$conda_packages blas=*=*openblas" 18 | elif [[ "$OSTYPE" =~ ^linux.* ]]; then 19 | echo "Creating conda env with Linux packages" 20 | conda_packages="$conda_packages blas=*=*mkl" 21 | else 22 | echo "Don't recogize os: $OSTYPE" 23 | exit 1 24 | fi 25 | 26 | conda create -y -n osmordred $conda_packages python=3.11 -c conda-forge 27 | -------------------------------------------------------------------------------- /skbuild/test/tAll.py: -------------------------------------------------------------------------------- 1 | 2 | from rdkit import Chem 3 | import numpy as np 4 | import pandas as pd 5 | from concurrent.futures import ProcessPoolExecutor, as_completed, TimeoutError 6 | 7 | from tqdm import tqdm 8 | import osmordred as rd 9 | 10 | 11 | 12 | 13 | # Define descriptor computation function 14 | def CalcOsmordred(smiles, version=2, names = False, mynames=[]): 15 | 16 | 17 | if version == 1: 18 | " original version from Mordred" 19 | v = 1 20 | doExEstate = False 21 | else: 22 | " expended descriptors with more features and fixed InformationContent in cpp" 23 | v = 2 24 | doExEstate = True 25 | 26 | mol = Chem.MolFromSmiles(smiles) 27 | if mol is None: 28 | return None # Return an empty array instead of None 29 | 30 | results = [] 31 | descriptor_names = [] 32 | 33 | # Define descriptors with names 34 | descriptors = [ 35 | ("ABCIndex", rd.CalcABCIndex), 36 | ("AcidBase", rd.CalcAcidBase), 37 | ("AdjacencyMatrix", lambda mol: rd.CalcAdjacencyMatrix(mol, v)), 38 | ("Aromatic", rd.CalcAromatic), 39 | ("AtomCount", lambda mol: rd.CalcAtomCount(mol, v)), 40 | ("Autocorrelation", rd.CalcAutocorrelation), 41 | ("BCUT", rd.CalcBCUT), 42 | ("BalabanJ", rd.CalcBalabanJ), 43 | ("BaryszMatrix", rd.CalcBaryszMatrix), 44 | ("BertzCT", rd.CalcBertzCT), 45 | ("BondCount", rd.CalcBondCount), 46 | ("RNCGRPCG", rd.CalcRNCGRPCG), 47 | ("CarbonTypes", lambda mol: rd.CalcCarbonTypes(mol, v)), 48 | ("Chi", rd.CalcChi), 49 | ("Constitutional", rd.CalcConstitutional), 50 | ("DetourMatrix", rd.CalcDetourMatrix), 51 | ("DistanceMatrix", lambda mol: rd.CalcDistanceMatrix(mol, v)), 52 | ("EState", lambda mol: rd.CalcEState(mol, doExEstate)), 53 | ("EccentricConnectivityIndex", rd.CalcEccentricConnectivityIndex), 54 | ("ExtendedTopochemicalAtom", rd.CalcExtendedTopochemicalAtom), 55 | ("FragmentComplexity", rd.CalcFragmentComplexity), 56 | ("Framework", rd.CalcFramework), 57 | ("HydrogenBond", rd.CalcHydrogenBond), 58 | ] 59 | 60 | if version == 1: 61 | descriptors.append(("InformationContentv1", CalcIC)) 62 | else: 63 | descriptors.append(("LogS", rd.CalcLogS)) 64 | descriptors.append(("InformationContentv2", lambda mol: rd.CalcInformationContent(mol, 5))) 65 | 66 | additional_descriptors = [ 67 | ("KappaShapeIndex", rd.CalcKappaShapeIndex), 68 | ("Lipinski", rd.CalcLipinski), 69 | ("McGowanVolume", rd.CalcMcGowanVolume), 70 | ("MoeType", rd.CalcMoeType), 71 | ("MolecularDistanceEdge", rd.CalcMolecularDistanceEdge), 72 | ("MolecularId", rd.CalcMolecularId), 73 | ("PathCount", rd.CalcPathCount), 74 | ("Polarizability", rd.CalcPolarizability), 75 | ("RingCount", rd.CalcRingCount), 76 | ("RotatableBond", rd.CalcRotatableBond), 77 | ("SLogP", rd.CalcSLogP), 78 | ("TopoPSA", rd.CalcTopoPSA), 79 | ("TopologicalCharge", rd.CalcTopologicalCharge), 80 | ("TopologicalIndex", rd.CalcTopologicalIndex), 81 | ("VdwVolumeABC", rd.CalcVdwVolumeABC), 82 | ("VertexAdjacencyInformation", rd.CalcVertexAdjacencyInformation), 83 | ("WalkCount", rd.CalcWalkCount), 84 | ("Weight", rd.CalcWeight), 85 | ("WienerIndex", rd.CalcWienerIndex), 86 | ("ZagrebIndex", rd.CalcZagrebIndex), 87 | ] 88 | 89 | descriptors.extend(additional_descriptors) 90 | # not yet implemented ODT 91 | if version > 1: 92 | extended_descriptors = [ 93 | ("Pol", rd.CalcPol), 94 | ("MR", rd.CalcMR), 95 | ("Flexibility", rd.CalcFlexibility), 96 | ("Schultz", rd.CalcSchultz), 97 | ("AlphaKappaShapeIndex", rd.CalcAlphaKappaShapeIndex), 98 | ("HEState", rd.CalcHEState), 99 | ("BEState", rd.CalcBEState), 100 | ("Abrahams", rd.CalcAbrahams), 101 | ("ANMat", rd.CalcANMat), 102 | ("ASMat", rd.CalcASMat), 103 | ("AZMat", rd.CalcAZMat), 104 | ("DSMat", rd.CalcDSMat), 105 | ("DN2Mat", rd.CalcDN2Mat), 106 | ("Frags", rd.CalcFrags), 107 | ("AddFeatures", rd.CalcAddFeatures), 108 | ] 109 | descriptors.extend(extended_descriptors) 110 | 111 | #try: 112 | for name, func in descriptors: 113 | try: 114 | value = func(mol) 115 | 116 | value = np.atleast_1d(np.array(value)) 117 | results.append(value) 118 | except: 119 | arraylength =np.sum([1 for c in mynames if c.startswith(name)]) 120 | print(name,' error for smiles ', smiles) 121 | 122 | results.append(np.full((arraylength,), np.nan)) 123 | if names: 124 | descriptor_names.extend([f"{name}_{i+1}" for i in range(len(value))]) 125 | if names: 126 | return np.concatenate(results), descriptor_names 127 | return np.concatenate(results) 128 | 129 | 130 | 131 | if __name__ == "__main__": 132 | _, mynames = CalcOsmordred('CCCO',version=2, names=True) 133 | print(len(mynames)) 134 | smiles = ['CCCO','CCCN','c1ccccc1'] 135 | for s in smiles: 136 | mol = Chem.MolFromSmiles(s) 137 | results = CalcOsmordred(s, version=2,names=False,mynames= mynames) 138 | print(list(results)) 139 | 140 | -------------------------------------------------------------------------------- /skbuild/test/tBEState.py: -------------------------------------------------------------------------------- 1 | from rdkit import Chem 2 | import numpy as np 3 | import pandas as pd 4 | from concurrent.futures import ProcessPoolExecutor, as_completed, TimeoutError 5 | 6 | from tqdm import tqdm 7 | import osmordred as rd 8 | 9 | 10 | 11 | if __name__ == "__main__": 12 | smiles = ['CCCO','CCCN','c1ccccc1'] 13 | for s in smiles: 14 | mol = Chem.MolFromSmiles(s) 15 | results = rd.CalcBEState(mol) 16 | print(list(results)) 17 | 18 | -------------------------------------------------------------------------------- /skbuild/test/test.py: -------------------------------------------------------------------------------- 1 | from rdkit import Chem 2 | import numpy as np 3 | import pandas as pd 4 | from concurrent.futures import ProcessPoolExecutor, as_completed 5 | from tqdm import tqdm 6 | 7 | import osmordred as rd 8 | 9 | 10 | # Define descriptor computation function 11 | def CalcOsmordred(smiles, version=2): 12 | 13 | 14 | if version == 1: 15 | " original version from Mordred" 16 | v = 1 17 | doExEstate = False 18 | else: 19 | " expended descriptors with more features and fixed InformationContent in cpp" 20 | v = 2 21 | doExEstate = True 22 | 23 | mol = Chem.MolFromSmiles(smiles) 24 | if mol is None: 25 | return None # Return an empty array instead of None 26 | results = [] 27 | try: 28 | results.append(np.array(rd.CalcABCIndex(mol))) 29 | results.append(np.array(rd.CalcAcidBase(mol))) 30 | results.append(np.array(rd.CalcAdjacencyMatrix(mol, v))) # add sm1 removed in v1.1 31 | results.append(np.array(rd.CalcAromatic(mol))) 32 | results.append(np.array(rd.CalcAtomCount(mol, v))) # add nHetero in v1.1 33 | results.append(np.array(rd.CalcAutocorrelation(mol))) 34 | results.append(np.array(rd.CalcBCUT(mol))) 35 | results.append(np.array(rd.CalcBalabanJ(mol))) 36 | results.append(np.array(rd.CalcBaryszMatrix(mol))) 37 | results.append(np.array(rd.CalcBertzCT(mol))) 38 | results.append(np.array(rd.CalcBondCount(mol))) 39 | results.append(np.array(rd.CalcRNCGRPCG(mol))) # CPSA 2D descriptors on charges 40 | results.append(np.array(rd.CalcCarbonTypes(mol, v))) # add calcFractionCSP3 in v1.1 41 | results.append(np.array(rd.CalcChi(mol))) 42 | results.append(np.array(rd.CalcConstitutional(mol))) 43 | results.append(np.array(rd.CalcDetourMatrix(mol))) # add sm1 since v1.1 44 | results.append(np.array(rd.CalcDistanceMatrix(mol,v))) 45 | results.append(np.array(rd.CalcEState(mol, doExEstate))) # no impact True/False 46 | results.append(np.array(rd.CalcEccentricConnectivityIndex(mol))) 47 | results.append(np.array(rd.CalcExtendedTopochemicalAtom(mol))) 48 | results.append(np.array(rd.CalcFragmentComplexity(mol))) 49 | results.append(np.array(rd.CalcFramework(mol))) 50 | results.append(np.array(rd.CalcHydrogenBond(mol))) 51 | if version==1: 52 | results.append(CalcIC(mol)) 53 | else: 54 | results.append(np.array(rd.CalcLogS(mol))) # added if version > 1! 55 | results.append(np.array(rd.CalcInformationContent(mol,5))) 56 | 57 | results.append(np.array(rd.CalcKappaShapeIndex(mol))) 58 | results.append(np.array(rd.CalcLipinski(mol))) 59 | results.append(np.array(rd.CalcMcGowanVolume(mol))) 60 | results.append(np.array(rd.CalcMoeType(mol))) 61 | results.append(np.array(rd.CalcMolecularDistanceEdge(mol))) 62 | results.append(np.array(rd.CalcMolecularId(mol))) 63 | results.append(np.array(rd.CalcPathCount(mol))) 64 | results.append(np.array(rd.CalcPolarizability(mol))) 65 | results.append(np.array(rd.CalcRingCount(mol))) 66 | results.append(np.array(rd.CalcRotatableBond(mol))) 67 | results.append(np.array(rd.CalcSLogP(mol))) 68 | results.append(np.array(rd.CalcTopoPSA(mol))) 69 | results.append(np.array(rd.CalcTopologicalCharge(mol))) 70 | results.append(np.array(rd.CalcTopologicalIndex(mol))) 71 | results.append(np.array(rd.CalcVdwVolumeABC(mol))) 72 | results.append(np.array(rd.CalcVertexAdjacencyInformation(mol))) 73 | results.append(np.array(rd.CalcWalkCount(mol))) 74 | results.append(np.array(rd.CalcWeight(mol))) 75 | results.append(np.array(rd.CalcWienerIndex(mol))) 76 | results.append(np.array(rd.CalcZagrebIndex(mol))) 77 | if version>1: 78 | # new descriptors added 79 | results.append(np.array(rd.CalcPol(mol))) 80 | results.append(np.array(rd.CalcMR(mol))) 81 | results.append(np.array(rd.CalcODT(mol))) # not yet implemented return 1! 82 | results.append(np.array(rd.CalcFlexibility(mol))) 83 | results.append(np.array(rd.CalcSchultz(mol))) 84 | results.append(np.array(rd.CalcAlphaKappaShapeIndex(mol))) 85 | results.append(np.array(rd.CalcHEState(mol))) # very slightly slower 86 | results.append(np.array(rd.CalcBEState(mol))) # as a true impact 87 | results.append(np.array(rd.CalcAbrahams(mol))) # as a true impact : vf2 smartparser 88 | # new triplet features x5 faster using combined Linear Equation resolution instead of per vector targets... 89 | results.append(np.array(rd.CalcANMat(mol))) 90 | results.append(np.array(rd.CalcASMat(mol))) 91 | results.append(np.array(rd.CalcAZMat(mol))) 92 | results.append(np.array(rd.CalcDSMat(mol))) 93 | results.append(np.array(rd.CalcDN2Mat(mol))) 94 | results.append(np.array(rd.CalcFrags(mol))) 95 | results.append(np.array(rd.CalcAddFeatures(mol))) 96 | 97 | results_to_concat = [np.atleast_1d(r) for r in results] 98 | return np.concatenate(results_to_concat) 99 | except Exception as e: 100 | print(f"Error processing molecule {smiles}: {e}") 101 | return None 102 | 103 | 104 | 105 | 106 | def Calculate(smiles_list, n_jobs=4, version=1): 107 | results = [] 108 | with ProcessPoolExecutor(max_workers=n_jobs) as executor: 109 | # Submit tasks with their indices 110 | futures = {executor.submit(CalcOsmordred, smi, version): idx for idx, smi in enumerate(smiles_list)} 111 | for future in tqdm(as_completed(futures), total=len(futures), desc="Processing molecules"): 112 | idx = futures[future] # Retrieve the index of the SMILES string 113 | try: 114 | result = future.result() 115 | if result is not None: 116 | results.append((idx, result)) # Store the index and the result 117 | except Exception as e: 118 | print(f"Error processing molecule at index {idx}: {e}") 119 | 120 | # Sort results by the original index to maintain order 121 | results.sort(key=lambda x: x[0]) 122 | ordered_results = [res[1] for res in results] 123 | return ordered_results 124 | 125 | 126 | 127 | 128 | 129 | if __name__ == "__main__": 130 | print("Osmordred library contents:") 131 | print(dir(rd)) 132 | version = 2 133 | smiles = ['CCCO','CCCN','c1ccccc1'] 134 | smiles_list = smiles 135 | n_jobs = 1 # Number of cores to use 136 | 137 | print(f"Processing {len(smiles_list)} molecules with {n_jobs} cores...") 138 | results = Calculate(smiles_list, n_jobs=n_jobs, version=version) 139 | print(results) 140 | # Convert to DataFrame and save to file 141 | df_results = pd.DataFrame(results) 142 | 143 | print(f"Finished processing. Results shape: {df_results.shape}") 144 | df_results.to_csv('Myfeatures.csv', index=False) 145 | 146 | # additional wrappers from double/int into std::vector of double 147 | print(list(rd.CalcSchultz(Chem.MolFromSmiles(smiles[-1])))) 148 | print(list(rd.CalcPol(Chem.MolFromSmiles(smiles[-1])))) 149 | print(list(rd.CalcMR(Chem.MolFromSmiles(smiles[-1])))) 150 | print(list(rd.CalcODT(Chem.MolFromSmiles(smiles[-1])))) 151 | print(list(rd.CalcFlexibility(Chem.MolFromSmiles(smiles[-1])))) 152 | print(list(rd.CalcLogS(Chem.MolFromSmiles(smiles[-1])))) 153 | print(list(rd.CalcHydrogenBond(Chem.MolFromSmiles(smiles[-1])))) 154 | print(list(rd.CalcFramework(Chem.MolFromSmiles(smiles[-1])))) 155 | print(list(rd.CalcBertzCT(Chem.MolFromSmiles(smiles[-1])))) 156 | print(list(rd.CalcBalabanJ(Chem.MolFromSmiles(smiles[-1])))) 157 | 158 | print(list(rd.CalcInformationContent(Chem.MolFromSmiles(smiles[0]),5))) 159 | 160 | --------------------------------------------------------------------------------