├── Cheminformatics ├── RDKit-pandas-integration.ipynb ├── markdown-usage.ipynb ├── rendering-of-images-in-IPython.ipynb ├── scaffold-analysis │ ├── Scaffold analysis & Schnellkurs in chemoinformatics.ipynb │ ├── biomedx.png │ ├── merck.jpeg │ ├── pi.gif │ └── rdkit.png └── somemols.smi ├── Kinase-Inhibitors └── approved-or-in-clinical-trials │ ├── Kinase inhibitors - approved or in clinical trials.ipynb │ ├── chembl_drugs.txt │ ├── kin_inh.png │ ├── kin_inh.smi │ ├── kin_inh_approved.smi │ ├── kin_inh_approved_2.smi │ ├── kin_inh_approved_3.smi │ └── kin_inh_phase4.png ├── LICENSE.md ├── Machine Learning ├── Sklearn to ete3 trees.ipynb ├── tree.dot └── tree.png ├── Presentations and Tutorials ├── Molecular Modelling Workshop 2014 │ ├── Scaffold analysis in Python with RDKit and pandas.ipynb │ ├── approved.sdf │ ├── biomedx.png │ ├── merck.jpeg │ └── rdkit.png ├── Protvec demo 2017 │ ├── ProtVec.ipynb │ ├── README.md │ ├── __init__.py │ ├── biovec │ │ ├── .gitignore │ │ ├── README.md │ │ ├── biovec │ │ │ ├── __init__.py │ │ │ ├── binary_amino.py │ │ │ └── models.py │ │ ├── setup.py │ │ └── trained_models │ │ │ └── swissprot_reviewed_protvec │ ├── corpus.txt │ ├── data │ │ ├── family_classification_and_sequence_small.tab │ │ └── uniprot_sprot_small.fasta │ ├── figures │ │ ├── Protein_sentences.png │ │ ├── Protein_sentences2.png │ │ ├── Skip_gram_cbow.png │ │ ├── linear-relationships.png │ │ ├── protein_vectors_wlabel.png │ │ ├── protein_words.png │ │ ├── proteinsequence.png │ │ ├── relationships_plus_vectors2.png │ │ └── table_overview_vocab2.png │ ├── helpers.py │ └── trained_models │ │ ├── model_SwissProt_small │ │ └── swissprot_reviewed_protvec └── RDKit UGM 2014 │ ├── Scaffold analysis of ChEMBL data with pandas and RDKit.ipynb │ ├── biomedx.png │ ├── hackaton │ ├── XLSX export.ipynb │ └── drugs.smi │ ├── merck.jpeg │ └── rdkit.png ├── README.md └── Virtual-Screening ├── filtering └── filter_pains.py └── ligand-3D-conformations ├── prepare_for_docking.py ├── test.sdf └── test.smi /Cheminformatics/markdown-usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# BioMed X web page\n", 8 | "## Internal presentation on Markdown usage on our web page\n", 9 | "###[http://bio.mx/](http://bio.mx/)\n", 10 | "\n", 11 | "### Team pages\n", 12 | "Thank you all for helping. Great thanks to **Marco Tidona** for coding!\n", 13 | "\n", 14 | "### Notes and missing stuff\n", 15 | "- Use Twitter!\n", 16 | "- Each group should be able to update their own page\n", 17 | "\n", 18 | "##### More info on http://192.168.96.60/dokuwiki/doku.php?id=admin:web-page\n", 19 | "
\n", 20 | "
\n", 21 | "
\n", 22 | "
\n", 23 | "
\n", 24 | "
\n", 25 | "
\n", 26 | "
\n", 27 | "
\n", 28 | "
\n", 29 | " \n", 30 | " " 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "# Markdown\n", 38 | "- Plain text formating syntax\n", 39 | "- You can format text without html knowledge\n", 40 | "- http://daringfireball.net/projects/markdown/\n", 41 | "- http://192.168.96.60/dokuwiki/doku.php?id=admin:web-page\n", 42 | "
\n", 43 | "
\n", 44 | "
\n", 45 | "
\n", 46 | "
\n", 47 | "
\n", 48 | "
\n", 49 | "
" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "# Headings\n", 57 | "\n", 58 | "# Header 1\n", 59 | "## Header 2\n", 60 | "### Header 3 (in our case names)\n", 61 | "#### Header 4 (persons position)\n", 62 | "##### Header 5 (previous work & contact)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "# Lists\n", 70 | "- list item 1\n", 71 | "- list item 2" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "# Links\n", 79 | "[LINK](www.google.com) \n", 80 | "E-mail: " 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "# Images\n", 88 | "![My Image](http://bio.mx/img/logo-main.png)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "# Normal text is easy. You just write\n", 96 | "**important** *italic* \n", 97 | "\n", 98 | "Two spaces and enter introduce new lines. \n", 99 | "**See new line.** \n", 100 | "If you miss spaces and have just enter there will be no new line.\n", 101 | "**See no new line.**" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 0, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [] 112 | } 113 | ], 114 | "metadata": { 115 | "kernelspec": { 116 | "display_name": "Python 2", 117 | "language": "python", 118 | "name": "python2" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 2 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython2", 130 | "version": "2.7.10" 131 | } 132 | }, 133 | "nbformat": 4, 134 | "nbformat_minor": 0 135 | } 136 | -------------------------------------------------------------------------------- /Cheminformatics/rendering-of-images-in-IPython.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Custom objects and their rendering in IPython\n", 8 | "### Example of how to use object representations" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": { 15 | "collapsed": false 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "import rdkit.Chem as Chem\n", 20 | "from rdkit.Chem import PandasTools\n", 21 | "from rdkit.Chem import Draw\n", 22 | "from rdkit.Chem import Descriptors\n", 23 | "from rdkit.Chem.Draw import IPythonConsole # Enables RDKit IPython integration" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### Lets say you need to create a custom object and you'd like to control the default representation" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "We'll create an object that can store multiple molecules" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "class primitiveMolsObject():\n", 49 | " def __init__(self, mols=None):\n", 50 | " self.mols = mols\n", 51 | " self.num = len(mols) # Return number of mols" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "Get some mols and put them in a list" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 3, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "mol1 = Chem.MolFromSmiles('NC(=O)CS(=O)C(c1ccccc1)c1ccccc1')" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": { 76 | "collapsed": false 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "mol2 = Chem.MolFromSmiles('CCC(OC(C)=O)C(CC(C)N(C)C)(c1ccccc1)c1ccccc1')" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": { 87 | "collapsed": false 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "mol3 = Chem.MolFromSmiles(' Cc1ccccc1C(OCCN(C)C)c1ccccc1')" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 6, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "mols = [mol1, mol2, mol3]" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "Create model instance wih your mols" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 7, 115 | "metadata": { 116 | "collapsed": false 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "MyMols = primitiveMolsObject(mols)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 8, 126 | "metadata": { 127 | "collapsed": false 128 | }, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "<__main__.primitiveMolsObject instance at 0x7f6cd9567908>" 134 | ] 135 | }, 136 | "execution_count": 8, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "MyMols" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "Default rendering gives very little info about contents" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 9, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/plain": [ 162 | "[,\n", 163 | " ,\n", 164 | " ]" 165 | ] 166 | }, 167 | "execution_count": 9, 168 | "metadata": {}, 169 | "output_type": "execute_result" 170 | } 171 | ], 172 | "source": [ 173 | "MyMols.mols" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 10, 179 | "metadata": { 180 | "collapsed": false 181 | }, 182 | "outputs": [ 183 | { 184 | "data": { 185 | "text/plain": [ 186 | "3" 187 | ] 188 | }, 189 | "execution_count": 10, 190 | "metadata": {}, 191 | "output_type": "execute_result" 192 | } 193 | ], 194 | "source": [ 195 | "MyMols.num" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "### Luckily default representations of objects in ipython can be easily controled with \\_repr\\_html\\_" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "Lets define the object again, this time with functions that control representation" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 11, 215 | "metadata": { 216 | "collapsed": false 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "class primitiveMolsObject2():\n", 221 | " def __init__(self, mols=None):\n", 222 | " self.mols = mols\n", 223 | " self.num = len(mols) # Return number of mols\n", 224 | " \n", 225 | " def _repr_html_(self):\n", 226 | " # Default representation in IPython\n", 227 | " smilesString = ''\n", 228 | " for mol in mols:\n", 229 | " smilesString += Chem.MolToSmiles(mol) + \", \" \n", 230 | " return smilesString #'\"Mol\"/' %s" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 12, 236 | "metadata": { 237 | "collapsed": false 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "MyMols2 = primitiveMolsObject2(mols)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "Much better! We can acually see what hides behind an object" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 13, 254 | "metadata": { 255 | "collapsed": false 256 | }, 257 | "outputs": [ 258 | { 259 | "data": { 260 | "text/html": [ 261 | "NC(=O)CS(=O)C(c1ccccc1)c1ccccc1, CCC(OC(C)=O)C(CC(C)N(C)C)(c1ccccc1)c1ccccc1, Cc1ccccc1C(OCCN(C)C)c1ccccc1, " 262 | ], 263 | "text/plain": [ 264 | "<__main__.primitiveMolsObject2 instance at 0x7f6cd958c050>" 265 | ] 266 | }, 267 | "execution_count": 13, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "MyMols2" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": {}, 279 | "source": [ 280 | "#### Or we can add graphic representation" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 14, 286 | "metadata": { 287 | "collapsed": false 288 | }, 289 | "outputs": [], 290 | "source": [ 291 | "from base64 import b64encode\n", 292 | "from StringIO import StringIO\n", 293 | "\n", 294 | "class primitiveMolsObject3():\n", 295 | " def __init__(self, mols=None):\n", 296 | " self.mols = mols\n", 297 | " self.num = len(mols) # Return number of mols\n", 298 | " \n", 299 | " def _repr_html_(self):\n", 300 | " # Default representation in IPython\n", 301 | " sio = StringIO()\n", 302 | " Draw.MolsToGridImage(self.mols).save(sio,format='PNG')\n", 303 | " s = b64encode(sio.getvalue()) # Encode in base64\n", 304 | " return '\"Mol\"/' %s" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 15, 310 | "metadata": { 311 | "collapsed": false 312 | }, 313 | "outputs": [], 314 | "source": [ 315 | "MyOtherMols = primitiveMolsObject3(mols)" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": {}, 321 | "source": [ 322 | "Much better! We can acually see what hides behind an object" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 16, 328 | "metadata": { 329 | "collapsed": false 330 | }, 331 | "outputs": [ 332 | { 333 | "data": { 334 | "text/html": [ 335 | "\"Mol\"/" 336 | ], 337 | "text/plain": [ 338 | "<__main__.primitiveMolsObject3 instance at 0x7f6cd958cbd8>" 339 | ] 340 | }, 341 | "execution_count": 16, 342 | "metadata": {}, 343 | "output_type": "execute_result" 344 | } 345 | ], 346 | "source": [ 347 | "MyOtherMols" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "Copyright (C) 2014 by Samo Turk, BioMed X GmbH\n", 355 | "\n", 356 | "This work is licensed under the Creative Commons Attribution-ShareAlike 3.0 License. To view a copy of this license, visit http://creativecommons.org/licenses/by-sa/3.0/ or send a letter to Creative Commons, 543 Howard Street, 5th Floor, San Francisco, California, 94105, USA.\n" 357 | ] 358 | } 359 | ], 360 | "metadata": { 361 | "kernelspec": { 362 | "display_name": "Python 2", 363 | "language": "python", 364 | "name": "python2" 365 | }, 366 | "language_info": { 367 | "codemirror_mode": { 368 | "name": "ipython", 369 | "version": 2 370 | }, 371 | "file_extension": ".py", 372 | "mimetype": "text/x-python", 373 | "name": "python", 374 | "nbconvert_exporter": "python", 375 | "pygments_lexer": "ipython2", 376 | "version": "2.7.10" 377 | } 378 | }, 379 | "nbformat": 4, 380 | "nbformat_minor": 0 381 | } 382 | -------------------------------------------------------------------------------- /Cheminformatics/scaffold-analysis/biomedx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Cheminformatics/scaffold-analysis/biomedx.png -------------------------------------------------------------------------------- /Cheminformatics/scaffold-analysis/merck.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Cheminformatics/scaffold-analysis/merck.jpeg -------------------------------------------------------------------------------- /Cheminformatics/scaffold-analysis/pi.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Cheminformatics/scaffold-analysis/pi.gif -------------------------------------------------------------------------------- /Cheminformatics/scaffold-analysis/rdkit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Cheminformatics/scaffold-analysis/rdkit.png -------------------------------------------------------------------------------- /Cheminformatics/somemols.smi: -------------------------------------------------------------------------------- 1 | SMILES Name 2 | C[S+](CCC(N)C(=O)O)CC1OC(n2cnc3c2ncnc3N)C(O)C1O DB00118 3 | Nc1ncnc2c1ncn2C1OC(COP(=O)(O)O)C(O)C1O DB00131 4 | Nc1ncnc2c1ncn2C1OC(CO)C(O)C1O DB00194 5 | Nc1nc(Cl)nc2c1ncn2C1CC(O)C(CO)O1 DB00242 6 | Nc1nc(Cl)nc2c1ncn2C1OC(CO)C(O)C1F DB00631 7 | Nc1nc(F)nc2c1ncn2C1OC(COP(=O)(O)O)C(O)C1O DB01073 8 | COc1nc(N)nc2c1ncn2C1OC(CO)C(O)C1O DB01280 9 | -------------------------------------------------------------------------------- /Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh.png -------------------------------------------------------------------------------- /Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh.smi: -------------------------------------------------------------------------------- 1 | SMILES Name 2 | Cc1cc2c(F)c(Oc3ncnn4cc(OCC(C)OC(=O)C(C)N)c(C)c34)ccc2[nH]1 Brivanib 3 | CCOc1cc2ncc(C#N)c(Nc3ccc(F)c(Cl)c3)c2cc1NC(=O)C=CCN(C)C Pelitinib 4 | COc1cc2c(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3F)ccnc2cc1OCCCN1CCOCC1 Foretinib 5 | COc1cc2nccc(Oc3ccc(NC(=O)Nc4cc(C)on4)c(Cl)c3)c2cc1OC Tivozanib 6 | COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc23)c(Cl)cc1Cl Bosutinib 7 | CC1CCN(C(=O)CC#N)CC1N(C)c1ncnc2[nH]ccc12 Tofacitinib 8 | Clc1ccc(Nc2nnc(Cc3ccncc3)c3ccccc23)cc1 Vatalanib 9 | CCc1cc2c(cc1N1CCC(N3CCOCC3)CC1)C(C)(C)c1[nH]c3cc(C#N)ccc3c1C2=O Alectinib 10 | Cn1ncc(Cl)c1-c1cc(C(=O)NC(CN)Cc2ccc(F)c(F)c2)oc1Cl Uprosertib 11 | Cc1ccc(C(=O)Nc2ccc(CN3CCN(C)CC3)c(C(F)(F)F)c2)cc1C#Cc1cnc2cccnn12 Ponatinib 12 | N#CCC(C1CCCC1)n1cc(-c2ncnc3[nH]ccc23)cn1 Ruxolitinib 13 | C=CC(=O)N1CCCC(n2nc(-c3ccc(Oc4ccccc4)cc3)c3c(N)ncnc32)C1 Ibrutinib 14 | Cn1ncc(Cl)c1-c1cc(C(=O)NC(CN)Cc2cccc(F)c2)sc1Cl Afuresertib 15 | CCC(CO)Nc1nc(NCc2ccccc2)c2ncn(C(C)C)c2n1 Seliciclib 16 | O=C(Nc1nc2cccc(-c3ccc(CN4CCS(=O)(=O)CC4)cc3)n2n1)C1CC1 Filgotinib 17 | CCc1nc(C(N)=O)c(Nc2ccc(N3CCC(N4CCN(C)CC4)CC3)c(OC)c2)nc1NC1CCOCC1 Gilteritinib 18 | CN(C)C1CCN(C(=O)c2ccc(NC(=O)Nc3ccc(-c4nc(N5CCOCC5)nc(N5CCOCC5)n4)cc3)cc2)CC1 Gedatolisib 19 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1 Sorafenib 20 | Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(CCO)CC2)n1 Dasatinib 21 | COc1cc2c(Oc3ccc4[nH]c(C)cc4c3F)ncnc2cc1OCCCN1CCCC1 Cediranib 22 | COC(=O)c1ccc2c(c1)NC(=O)C2=C(Nc1ccc(N(C)C(=O)CN2CCN(C)CC2)cc1)c1ccccc1 Nintedanib 23 | Cn1cnc2c(F)c(Nc3ccc(Br)cc3Cl)c(C(=O)NOCCO)cc21 Selumetinib 24 | CCS(=O)(=O)N1CC(CC#N)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1 Baricitinib 25 | FC(F)(F)c1ccc(C=Cc2nc(COc3ccc(CCCCn4ccnn4)cc3)co2)cc1 Mubritinib 26 | Cc1cc2c(F)c(Oc3ncnn4cc(OCC(C)O)c(C)c34)ccc2[nH]1 Brivanib 27 | COc1cc(OC)c(C=CS(=O)(=O)Cc2ccc(OC)c(OP(=O)(O)O)c2)c(OC)c1 Briciclib 28 | NC(=O)c1cnc2[nH]ccc2c1NC1C2CC3CC1CC(O)(C3)C2 Peficitinib 29 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OP(C)(C)=O)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Ridaforolimus 30 | COc1cc2nccc(Oc3ccc(NC(=O)NC4CC4)c(Cl)c3)c2cc1C(N)=O Lenvatinib 31 | CCC1C(=O)N(C)c2cnc(Nc3ccc(C(=O)NC4CCC(N5CCN(CC6CC6)CC5)CC4)cc3OC)nc2N1C(C)C Volasertib 32 | COc1cc2c(N3CCN(C(=O)Nc4ccc(OC(C)C)cc4)CC3)ncnc2cc1OCCCN1CCCCC1 Tandutinib 33 | Cc1nc(-c2cn3c(n2)-c2ccc(-c4cnn(C(C)(C)C(N)=O)c4)cc2OCC3)n(C(C)C)n1 Taselisib 34 | Cc1cccc(-c2nn3c(c2-c2ccnc4ccc(C(N)=O)cc24)CCC3)n1 Galunisertib 35 | COc1ncc(-c2ccc3nccc(-c4ccnnc4)c3c2)cc1NS(=O)(=O)c1ccc(F)cc1F Omipalisib 36 | OC1CCC(Nc2ncc3nc(Nc4c(F)cc(F)cc4F)n(C4CCOC4)c3n2)CC1 Tanzisertib 37 | CN1CCN(c2ccc3nc(-c4c(N)c5c(F)cccc5[nH]c4=O)[nH]c3c2)CC1 Dovitinib 38 | CC(C)(C)c1nc(-c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(-c2ccnc(N)n2)s1 Dabrafenib 39 | Cc1cnc(Nc2ccc(OCCN3CCCC3)cc2)nc1Nc1cccc(S(=O)(=O)NC(C)(C)C)c1 Fedratinib 40 | C=CC(=O)Nc1cc2c(Nc3ccc(F)c(Cl)c3)ncnc2cc1OCCCN1CCOCC1 Canertinib 41 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OCCO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Everolimus 42 | CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2C(=O)Nc3ccc(F)cc32)c1C Sunitinib 43 | Cc1c(F)cc(C(=O)NC2CC2)cc1-c1ccc(C(=O)NCC(C)(C)C)cn1 Losmapimod 44 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(O)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Sirolimus 45 | CC12OC(CC1(O)CO)n1c3ccccc3c3c4c(c5c6ccccc6n2c5c31)CNC4=O Lestaurtinib 46 | COc1cc2nccc(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3)c2cc1OC Cabozantinib 47 | COc1cc2c(Nc3ccc(Sc4nccn4C)c(Cl)c3)c(C#N)cnc2cc1N1CCC(N2CCCC2)CC1 Balamapimod 48 | CC1(C)CN(C(=O)c2ccc(-c3cccc4nc(NC(=O)C5CC5)nn34)cc2)C1 Solcitinib 49 | CCN1CCN(Cc2ccc(Nc3ncc(F)c(-c4cc(F)c5nc(C)n(C(C)C)c5c4)n3)nc2)CC1 Abemaciclib 50 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)c(F)c2)ccn1 Regorafenib 51 | Cc1ccc(-n2nc(C(C)(C)C)cc2NC(=O)Nc2ccc(OCCN3CCOCC3)c3ccccc23)cc1 Doramapimod 52 | COc1cc(OC)c(C=CS(=O)(=O)Cc2ccc(OC)c(NCC(=O)O)c2)c(OC)c1 Rigosertib 53 | COc1cc(Nc2ncc(F)c(Nc3ccc4c(n3)N(COP(=O)(O)O)C(=O)C(C)(C)O4)n2)cc(OC)c1OC Fostamatinib 54 | Cc1cc(Nc2cc(N3CCN(C)CC3)nc(Sc3ccc(NC(=O)C4CC4)cc3)n2)n[nH]1 Tozasertib 55 | CC1(O)CC(c2nc(-c3ccc4ccc(-c5ccccc5)nc4c3)c3c(N)nccn23)C1 Linsitinib 56 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(O)C(OC)C2)CCC(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Olcorolimus 57 | Cc1cc(C)c(C=C2C(=O)Nc3ccccc32)[nH]1 Semaxanib 58 | CC1COC(Nc2ccc3ncnc(Nc4ccc(OCc5nccs5)c(Cl)c4)c3c2)=N1 Varlitinib 59 | CC(C)NCC(C(=O)N1CCN(c2ncnc3c2C(C)CC3O)CC1)c1ccc(Cl)cc1 Ipatasertib 60 | Cc1ccc(F)c(NC(=O)Nc2ccc(-c3cccc4[nH]nc(N)c34)cc2)c1 Linifanib 61 | CCOCCOC1CCC(CC(C)C2CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C(C)C(OC)CC3CCC(C)C(O)(O3)C(=O)C(=O)N3CCCCC3C(=O)O2)CC1OC Umirolimus 62 | CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(Cl)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Pimecrolimus 63 | CNC(=O)c1ccc(Nc2ncc(C(F)(F)F)c(NCc3nccnc3N(C)S(C)(=O)=O)n2)cc1 Defactinib 64 | Cc1ccc(NC(=O)c2ccc(CN3CCC(N(C)C)C3)c(C(F)(F)F)c2)cc1Nc1nccc(-c2cncnc2)n1 Bafetinib 65 | CC(Oc1cc(-c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)ccc(F)c1Cl Crizotinib 66 | Cc1cn(-c2cc(NC(=O)c3ccc(C)c(Nc4nccc(-c5cccnc5)n4)c3)cc(C(F)(F)F)c2)cn1 Nilotinib 67 | CCC(Nc1ncnc2[nH]cnc12)c1nc2cccc(F)c2c(=O)n1-c1ccccc1 Idelalisib 68 | COc1cc2c(Nc3ccc(Br)cc3F)ncnc2cc1OCC1CCN(C)CC1 Vandetanib 69 | Cc1c(CN2CCN(C(=O)C(C)O)CC2)sc2c(N3CCOCC3)nc(-c3cnc(N)nc3)nc12 Apitolisib 70 | O=C1NC(=O)C(c2cn3c4c(cccc24)CCC3)C1c1c[nH]c2ccccc12 Tivantinib 71 | Nc1ncc(-c2cnn(CCO)c2)c2scc(-c3ccc(NC(=O)Nc4cccc(F)c4)cc3)c12 Ilorasertib 72 | CC1(COc2ccc3c(c2)ncn3-c2ccc3cccc(N4CCC(N)CC4)c3n2)COC1 Crenolanib 73 | CC(=NNC(=N)N)c1cc(NC(=O)CCCCCCCCC(=O)Nc2cc(C(C)=NNC(=N)N)cc(C(C)=NNC(=N)N)c2)cc(C(C)=NNC(=N)N)c1 Semapimod 74 | CC(C)(C)Cn1c(N)nc2ccc(-c3[nH]c(C(C)(C)C)nc3-c3ccc(F)cc3)nc21 Ralimetinib 75 | C=CC(=O)Nc1cccc(Nc2nc(Nc3ccc(OCCOC)cc3)ncc2F)c1 Spebrutinib 76 | CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21 Motesanib 77 | CC1CN(C(=O)c2cc3c(C(=O)C(=O)N(C)C)cn(C)c3cc2Cl)C(C)CN1Cc1ccc(F)cc1 Talmapimod 78 | C1=CCOCc2cc(ccc2OCCN2CCCC2)Nc2nccc(n2)-c2cccc(c2)COC1 Pacritinib 79 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(n3cnnn3)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Zotarolimus 80 | Cc1cnc(NC(=O)Nc2cc(Br)c(C)cc2OCC2CNCCO2)cn1 Rabusertib 81 | Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1 Ceritinib 82 | CN1CCN(C2CCN(C(=O)Nc3cc(Oc4ccc(NC(=O)C5(C(=O)Nc6ccc(F)cc6)CC5)c(F)c4)ccn3)CC2)CC1 Golvatinib 83 | CCC(C)(Nc1ccnc(-c2c[nH]c3ncccc23)n1)C(=O)NCC(F)(F)F Decernotinib 84 | Cc1[nH]c(C=C2C(=O)Nc3ccc(F)cc32)c(C)c1C(=O)NCCN1CCCC1 Toceranib 85 | CC(C)(C)c1cc(NC(=O)Nc2ccc(-c3cn4c(n3)sc3cc(OCCN5CCOCC5)ccc34)cc2)no1 Quizartinib 86 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3cc(C(C)(C)C)nn3-c3ccc4ncccc4c3)c(F)c2)ccn1 Rebastinib 87 | CC(=O)c1c(C)c2cnc(Nc3ccc(N4CCNCC4)cn3)nc2n(C2CCCC2)c1=O Palbociclib 88 | COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 Gefitinib 89 | CNC(=O)c1ccccc1Sc1ccc2c(C=Cc3ccccn3)n[nH]c2c1 Axitinib 90 | CN1CCN(CCOc2cc(OC3CCOCC3)c3c(Nc4c(Cl)ccc5c4OCO5)ncnc3c2)CC1 Saracatinib 91 | CC(Nc1ncnc2nc[nH]c12)c1cc2cccc(Cl)c2c(=O)n1-c1ccccc1 Duvelisib 92 | Cn1cnc2c(F)c(Nc3ccc(Br)cc3F)c(C(=O)NOCCO)cc21 Binimetinib 93 | Cn1c(=O)n(-c2ccc(C(C)(C)C#N)cc2)c2c3cc(-c4cnc5ccccc5c4)ccc3ncc21 Dactolisib 94 | CN(C)CC=CC(=O)Nc1cc2c(Nc3ccc(F)c(Cl)c3)ncnc2cc1OC1CCOC1 Afatinib 95 | Cc1cc(Nc2cc(CN3CCOCC3)c3nc(C)c(Cc4ccc(Cl)cc4F)n3n2)n[nH]1 Gandotinib 96 | CS(=O)(=O)CCNCc1ccc(-c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1 Lapatinib 97 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OC(=O)C(C)(CO)CO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Temsirolimus 98 | CNS(=O)(=O)CC1CCC(N(C)c2[nH]cnc3nccc2-3)CC1 Oclacitinib 99 | COc1cc(Nc2ncc3c(n2)-c2ccc(Cl)cc2C(c2c(F)cccc2OC)=NC3)ccc1C(=O)O Alisertib 100 | CCc1cnn2c(NCc3ccc[n+]([O-])c3)cc(N3CCCCC3CCO)nc12 Dinaciclib 101 | COC(=O)NC(C)CNc1nccc(-c2cn(C(C)C)nc2-c2cc(Cl)cc(NS(C)(=O)=O)c2F)n1 Encorafenib 102 | Cc1cc(F)ccc1-c1nc(NC(CO)CO)nc2c1ccc(=O)n2-c1c(F)cccc1F Dilmapimod 103 | O=C(c1ccc(F)c(F)c1Nc1ccc(I)cc1F)N1CC(O)(C2CCCCN2)C1 Cobimetinib 104 | COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1NC(=O)C=CCN1CCCCC1 Dacomitinib 105 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O Pazopanib 106 | CCOc1cc2ncc(C#N)c(Nc3ccc(OCc4ccccn4)c(Cl)c3)c2cc1NC(=O)C=CCN(C)C Neratinib 107 | Cn1c(=O)c(Oc2ccc(F)cc2F)cc2cnc(NC(CCO)CCO)nc21 Pamapimod 108 | CCCS(=O)(=O)Nc1ccc(F)c(C(=O)c2c[nH]c3ncc(-c4ccc(Cl)cc4)cc23)c1F Vemurafenib 109 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1 Erlotinib 110 | S=C(NCc1ccc2c(c1)OCO2)N1CCN(c2ncnc3c2oc2ccccc23)CC1 Amuvatinib 111 | O=C(NCC(O)CO)c1ccncc1Nc1ccc(I)cc1F Pimasertib 112 | CC(=O)Nc1cccc(-n2c(=O)n(C3CC3)c(=O)c3c(Nc4ccc(I)cc4F)n(C)c(=O)c(C)c32)c1 Trametinib 113 | C=CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(O)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Tacrolimus 114 | N#CCNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1 Momelotinib 115 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1 Imatinib 116 | -------------------------------------------------------------------------------- /Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh_approved.smi: -------------------------------------------------------------------------------- 1 | SMILES Name 2 | COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc23)c(Cl)cc1Cl Bosutinib 3 | CC1CCN(C(=O)CC#N)CC1N(C)c1ncnc2[nH]ccc12 Tofacitinib 4 | Cc1ccc(C(=O)Nc2ccc(CN3CCN(C)CC3)c(C(F)(F)F)c2)cc1C#Cc1cnc2cccnn12 Ponatinib 5 | N#CCC(C1CCCC1)n1cc(-c2ncnc3[nH]ccc23)cn1 Ruxolitinib 6 | C=CC(=O)N1CCCC(n2nc(-c3ccc(Oc4ccccc4)cc3)c3c(N)ncnc32)C1 Ibrutinib 7 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1 Sorafenib 8 | Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(CCO)CC2)n1 Dasatinib 9 | COC(=O)c1ccc2c(c1)NC(=O)C2=C(Nc1ccc(N(C)C(=O)CN2CCN(C)CC2)cc1)c1ccccc1 Nintedanib 10 | CC(C)(C)c1nc(-c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(-c2ccnc(N)n2)s1 Dabrafenib 11 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OCCO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Everolimus 12 | CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2C(=O)Nc3ccc(F)cc32)c1C Sunitinib 13 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(O)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Sirolimus 14 | COc1cc2nccc(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3)c2cc1OC Cabozantinib 15 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)c(F)c2)ccn1 Regorafenib 16 | CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(Cl)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Pimecrolimus 17 | CC(Oc1cc(-c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)ccc(F)c1Cl Crizotinib 18 | Cc1cn(-c2cc(NC(=O)c3ccc(C)c(Nc4nccc(-c5cccnc5)n4)c3)cc(C(F)(F)F)c2)cn1 Nilotinib 19 | CCC(Nc1ncnc2[nH]cnc12)c1nc2cccc(F)c2c(=O)n1-c1ccccc1 Idelalisib 20 | COc1cc2c(Nc3ccc(Br)cc3F)ncnc2cc1OCC1CCN(C)CC1 Vandetanib 21 | Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1 Ceritinib 22 | COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 Gefitinib 23 | CNC(=O)c1ccccc1Sc1ccc2c(C=Cc3ccccn3)n[nH]c2c1 Axitinib 24 | CN(C)CC=CC(=O)Nc1cc2c(Nc3ccc(F)c(Cl)c3)ncnc2cc1OC1CCOC1 Afatinib 25 | CS(=O)(=O)CCNCc1ccc(-c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1 Lapatinib 26 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OC(=O)C(C)(CO)CO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Temsirolimus 27 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O Pazopanib 28 | CCCS(=O)(=O)Nc1ccc(F)c(C(=O)c2c[nH]c3ncc(-c4ccc(Cl)cc4)cc23)c1F Vemurafenib 29 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1 Erlotinib 30 | CC(=O)Nc1cccc(-n2c(=O)n(C3CC3)c(=O)c3c(Nc4ccc(I)cc4F)n(C)c(=O)c(C)c32)c1 Trametinib 31 | C=CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(O)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Tacrolimus 32 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1 Imatinib 33 | -------------------------------------------------------------------------------- /Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh_approved_2.smi: -------------------------------------------------------------------------------- 1 | COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc23)c(Cl)cc1Cl Bosutinib 2 | CC1CCN(C(=O)CC#N)CC1N(C)c1ncnc2[nH]ccc12 Tofacitinib 3 | Cc1ccc(C(=O)Nc2ccc(CN3CCN(C)CC3)c(C(F)(F)F)c2)cc1C#Cc1cnc2cccnn12 Ponatinib 4 | N#CCC(C1CCCC1)n1cc(-c2ncnc3[nH]ccc23)cn1 Ruxolitinib 5 | C=CC(=O)N1CCCC(n2nc(-c3ccc(Oc4ccccc4)cc3)c3c(N)ncnc32)C1 Ibrutinib 6 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1 Sorafenib 7 | Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(CCO)CC2)n1 Dasatinib 8 | COC(=O)c1ccc2c(c1)NC(=O)C2=C(Nc1ccc(N(C)C(=O)CN2CCN(C)CC2)cc1)c1ccccc1 Nintedanib 9 | CC(C)(C)c1nc(-c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(-c2ccnc(N)n2)s1 Dabrafenib 10 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OCCO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Everolimus 11 | CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2C(=O)Nc3ccc(F)cc32)c1C Sunitinib 12 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(O)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Sirolimus 13 | COc1cc2nccc(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3)c2cc1OC Cabozantinib 14 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)c(F)c2)ccn1 Regorafenib 15 | CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(Cl)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Pimecrolimus 16 | CC(Oc1cc(-c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)ccc(F)c1Cl Crizotinib 17 | Cc1cn(-c2cc(NC(=O)c3ccc(C)c(Nc4nccc(-c5cccnc5)n4)c3)cc(C(F)(F)F)c2)cn1 Nilotinib 18 | CCC(Nc1ncnc2[nH]cnc12)c1nc2cccc(F)c2c(=O)n1-c1ccccc1 Idelalisib 19 | COc1cc2c(Nc3ccc(Br)cc3F)ncnc2cc1OCC1CCN(C)CC1 Vandetanib 20 | Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1 Ceritinib 21 | COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 Gefitinib 22 | CNC(=O)c1ccccc1Sc1ccc2c(C=Cc3ccccn3)n[nH]c2c1 Axitinib 23 | CN(C)CC=CC(=O)Nc1cc2c(Nc3ccc(F)c(Cl)c3)ncnc2cc1OC1CCOC1 Afatinib 24 | CS(=O)(=O)CCNCc1ccc(-c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1 Lapatinib 25 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OC(=O)C(C)(CO)CO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Temsirolimus 26 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O Pazopanib 27 | CCCS(=O)(=O)Nc1ccc(F)c(C(=O)c2c[nH]c3ncc(-c4ccc(Cl)cc4)cc23)c1F Vemurafenib 28 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1 Erlotinib 29 | CC(=O)Nc1cccc(-n2c(=O)n(C3CC3)c(=O)c3c(Nc4ccc(I)cc4F)n(C)c(=O)c(C)c32)c1 Trametinib 30 | C=CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(O)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Tacrolimus 31 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1 Imatinib 32 | -------------------------------------------------------------------------------- /Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh_approved_3.smi: -------------------------------------------------------------------------------- 1 | COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc23)c(Cl)cc1Cl Bosutinib 2 | CC1CCN(C(=O)CC#N)CC1N(C)c1ncnc2[nH]ccc12 Tofacitinib 3 | Cc1ccc(C(=O)Nc2ccc(CN3CCN(C)CC3)c(C(F)(F)F)c2)cc1C#Cc1cnc2cccnn12 Ponatinib 4 | N#CCC(C1CCCC1)n1cc(c2ncnc3[nH]ccc23)cn1 Ruxolitinib 5 | C=CC(=O)N1CCCC(n2nc(c3ccc(Oc4ccccc4)cc3)c3c(N)ncnc23)C1 Ibrutinib 6 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1 Sorafenib 7 | Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(CCO)CC2)n1 Dasatinib 8 | COC(=O)c1ccc2c(c1)NC(=O)C2=C(Nc1ccc(N(C)C(=O)CN2CCN(C)CC2)cc1)c1ccccc1 Nintedanib 9 | CC(C)(C)c1nc(c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(c2ccnc(N)n2)s1 Dabrafenib 10 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OCCO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Everolimus 11 | CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2C(=O)Nc3ccc(F)cc23)c1C Sunitinib 12 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(O)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Sirolimus 13 | COc1cc2nccc(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3)c2cc1OC Cabozantinib 14 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)c(F)c2)ccn1 Regorafenib 15 | CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(=CC3CCC(Cl)C(OC)C3)C)C(C)C(O)CC1=O)C(C)CC2OC Pimecrolimus 16 | CC(Oc1cc(c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)ccc(F)c1Cl Crizotinib 17 | Cc1cn(c2cc(NC(=O)c3ccc(C)c(Nc4nccc(c5cccnc5)n4)c3)cc(C(F)(F)F)c2)cn1 Nilotinib 18 | CCC(Nc1ncnc2[nH]cnc12)c1nc2cccc(F)c2c(=O)n1c1ccccc1 Idelalisib 19 | COc1cc2c(Nc3ccc(Br)cc3F)ncnc2cc1OCC1CCN(C)CC1 Vandetanib 20 | Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1 Ceritinib 21 | COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 Gefitinib 22 | CNC(=O)c1ccccc1Sc1ccc2c(C=Cc3ccccn3)n[nH]c2c1 Axitinib 23 | CN(C)CC=CC(=O)Nc1cc2c(Nc3ccc(F)c(Cl)c3)ncnc2cc1OC1CCOC1 Afatinib 24 | CS(=O)(=O)CCNCc1ccc(c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1 Lapatinib 25 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OC(=O)C(C)(CO)CO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Temsirolimus 26 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(=O)(=O)N Pazopanib 27 | CCCS(=O)(=O)Nc1ccc(F)c(C(=O)c2c[nH]c3ncc(c4ccc(Cl)cc4)cc23)c1F Vemurafenib 28 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1 Erlotinib 29 | CC(=O)Nc1cccc(n2c(=O)n(C3CC3)c(=O)c3c(Nc4ccc(I)cc4F)n(C)c(=O)c(C)c23)c1 Trametinib 30 | C=CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(=CC3CCC(O)C(OC)C3)C)C(C)C(O)CC1=O)C(C)CC2OC Tacrolimus 31 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(c2cccnc2)n1 Imatinib 32 | -------------------------------------------------------------------------------- /Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh_phase4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh_phase4.png -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Code in this repository is copyright (C) 2013-2016 by Team SKI @ BioMed X GmbH 2 | 3 | This work is licensed under the Creative Commons Attribution-ShareAlike 3.0 4 | License. To view a copy of this license, visit 5 | http://creativecommons.org/licenses/by-sa/3.0/ or send a letter to Creative 6 | Commons, 543 Howard Street, 5th Floor, San Francisco, California, 94105, USA. 7 | -------------------------------------------------------------------------------- /Machine Learning/tree.dot: -------------------------------------------------------------------------------- 1 | digraph Tree { 2 | node [shape=box] ; 3 | 0 [label="X[3] <= 0.8\ngini = 0.6667\nsamples = 150\nvalue = [50, 50, 50]"] ; 4 | 1 [label="gini = 0.0\nsamples = 50\nvalue = [50, 0, 0]"] ; 5 | 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ; 6 | 2 [label="X[3] <= 1.75\ngini = 0.5\nsamples = 100\nvalue = [0, 50, 50]"] ; 7 | 0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ; 8 | 3 [label="X[2] <= 4.95\ngini = 0.168\nsamples = 54\nvalue = [0, 49, 5]"] ; 9 | 2 -> 3 ; 10 | 4 [label="X[3] <= 1.65\ngini = 0.0408\nsamples = 48\nvalue = [0, 47, 1]"] ; 11 | 3 -> 4 ; 12 | 5 [label="gini = 0.0\nsamples = 47\nvalue = [0, 47, 0]"] ; 13 | 4 -> 5 ; 14 | 6 [label="gini = 0.0\nsamples = 1\nvalue = [0, 0, 1]"] ; 15 | 4 -> 6 ; 16 | 7 [label="X[3] <= 1.55\ngini = 0.4444\nsamples = 6\nvalue = [0, 2, 4]"] ; 17 | 3 -> 7 ; 18 | 8 [label="gini = 0.0\nsamples = 3\nvalue = [0, 0, 3]"] ; 19 | 7 -> 8 ; 20 | 9 [label="X[0] <= 6.95\ngini = 0.4444\nsamples = 3\nvalue = [0, 2, 1]"] ; 21 | 7 -> 9 ; 22 | 10 [label="gini = 0.0\nsamples = 2\nvalue = [0, 2, 0]"] ; 23 | 9 -> 10 ; 24 | 11 [label="gini = 0.0\nsamples = 1\nvalue = [0, 0, 1]"] ; 25 | 9 -> 11 ; 26 | 12 [label="X[2] <= 4.85\ngini = 0.0425\nsamples = 46\nvalue = [0, 1, 45]"] ; 27 | 2 -> 12 ; 28 | 13 [label="X[0] <= 5.95\ngini = 0.4444\nsamples = 3\nvalue = [0, 1, 2]"] ; 29 | 12 -> 13 ; 30 | 14 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1, 0]"] ; 31 | 13 -> 14 ; 32 | 15 [label="gini = 0.0\nsamples = 2\nvalue = [0, 0, 2]"] ; 33 | 13 -> 15 ; 34 | 16 [label="gini = 0.0\nsamples = 43\nvalue = [0, 0, 43]"] ; 35 | 12 -> 16 ; 36 | } -------------------------------------------------------------------------------- /Machine Learning/tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Machine Learning/tree.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Molecular Modelling Workshop 2014/biomedx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Molecular Modelling Workshop 2014/biomedx.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Molecular Modelling Workshop 2014/merck.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Molecular Modelling Workshop 2014/merck.jpeg -------------------------------------------------------------------------------- /Presentations and Tutorials/Molecular Modelling Workshop 2014/rdkit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Molecular Modelling Workshop 2014/rdkit.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/README.md: -------------------------------------------------------------------------------- 1 | ## Demonstration of biovec (protein sequence embeddings) 2 | * Described by Asgari and Mofrat in [A Continuous Distributed Representation of Biological Sequences](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0141287). 3 | 4 | This folder contains a notebook that shows how to use the [biovec](https://github.com/kyu999/biovec) module to generate vectors from protein sequences and how to use those for clustering and protein family classification (with deep learning). 5 | 6 | ### Usage: 7 | * Clone the repository with `git clone https://github.com/Team-SKI/snippets.git` 8 | * Change directory to `Presentations and Tutorials/Protvec` 9 | * Dowload biovec submodule with `git submodule init` followed by `git submodule update` 10 | * Run `jupyter notebook` 11 | 12 | ### Dependencies: 13 | ``` 14 | pandas 15 | numpy 16 | matplotlib 17 | scikit-learn 18 | gensim 19 | keras (with TensorFlow or Theano) 20 | seaborn 21 | ``` 22 | 23 | ##### License and copyright: 24 | 25 | Copyright (C) 2017 by Sabrina Jaeger and Samo Turk, BioMed X GmbH 26 | 27 | This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 License. To view a copy of this license, visit https://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative Commons, 543 Howard Street, 5th Floor, San Francisco, California, 94105, USA. 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/__init__.py -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/biovec/.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | dist/ 3 | build/ 4 | biovec.egg-info 5 | *__pycache__ 6 | *pyc 7 | -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/biovec/README.md: -------------------------------------------------------------------------------- 1 | BioVec 2 | -------------------------------------- 3 | 4 | ### Example 5 | 6 | ``` 7 | import biovec 8 | 9 | pv = biovec.ProtVec("some_fasta_file.fasta", out="output_corpusfile_path.txt") 10 | pv["QAT"] 11 | pv.to_vecs("ATATQSQSMTEEL") 12 | pv.save('model_file_path') 13 | 14 | pv2 = biovec.models.load_protvec('model_file_path') 15 | ``` 16 | 17 | ### Trained Model 18 | 19 | This package includes already trained model in '/trained_models'. 20 | swissprot_reviewed_protvec is a protvec model fed all Swiss-Prot reviewed proteins(551,754 proteins as of 14/07/2016) as the training data. 21 | 22 | # Source 23 | Paper: [ProtVec: A Continuous Distributed Representation of 24 | Biological Sequences](http://arxiv.org/pdf/1503.05140v1.pdf) 25 | 26 | ### Abstract 27 | 通常生物情報は文字の配列で表現されるが、それをベクトルとして表現することによってより分析しやすく情報を収納することができるのではないかと提案されている。具体的な適用範囲としては、 28 | 29 | 1. family classification 30 | 2. protein visualization 31 | 3. structure prediction 32 | 4. disordered protein identification 33 | 5. protein-protein interaction prediction. 34 | 35 | など。 36 | classificationやpredictionはわかりやすい使い方だが、個人的にはprotein visualizationが最も効用が大きいのではないかと感じた。短い配列や、構造が既知の配列でない限り、現状簡単にタンパク質の全容を掴む方法が一般的に普及していないように感じるので、このような表現方法は一定の有用性があると考える。 37 | この考えは一見奇妙に映るが自然言語ではある程度認知されており、word2vecなどは記憶に新しい。 38 | 39 | ### ProtVec実装 40 | 41 | [前処理] 42 | * uniprotのswis-protの全データを収集する 43 | * 各配列を3つのn-gramのリストに変換する 44 | 45 | ``` 46 | 'AGAMQSASM' => [['AGA', 'MQS', 'ASM'], ['GAM','QSA'], ['AMQ', 'SAS']] 47 | ``` 48 | 49 | * word2vecに読み込ませるために、変換した配列をテキストファイル形式に書き出す 50 | 51 | [モデル構築] 52 | 53 | word2vecのライブラリを用いれば基本大丈夫そう。 54 | いろいろあるけど、gensimをここでは使う。ただしSkip-gramを論文では採用しているので注意。 55 | gensimではsgパラメータを1に設定するとskip-gramになる。 56 | > sg defines the training algorithm. By default (sg=0), CBOW is used. Otherwise (sg=1), skip-gram is employed. 57 | 58 | 前処理では上記のように単語をn-gramのリストへ変換するが、sequenceをqueryとしてモデルに投げるときは逆に再変換する必要がある。訓練済みモデルにn-gramを指定することで、対応するベクトルを得ることができる。論文ではそのベクトルの和をその配列に対応するベクトルとして扱っている。 59 | 60 | ``` 61 | seq = 'AGAMQSASM' 62 | n_grams = split_to_grams(seq) 63 | gram_vecs = [to_gram_vec(n_gram) for n_gram in n_grams] 64 | seq_vec = sum(gram_vecs) 65 | ``` 66 | 67 | 論文での、ベクトルの次元数は100。 68 | 元の配列に対応するベクトルはn-gramベクトルの和なので次元数は変わらず、100次元。 69 | negative samplingも行っているので忘れずに。 70 | 71 | ### Visualization of ProtVec 72 | [sklearn.manifold.TSNE](http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html) 73 | t-SNEによって2次元もしくは3次元に次元圧縮を行った後、可視化している。しかしこの圧縮方法が適しているのか、なぜこの方法を取ったのかは言及されていない。scikit-learnではt-SNEは非推奨されており、密なデータであればPCA、疎なデータであればTruncatedSVDが勧められている。圧縮後の次元数がとても低いためだろうか。 74 | 75 | > It is highly recommended to use another dimensionality reduction method (e.g. PCA for dense data or TruncatedSVD for sparse data) to reduce the number of dimensions to a reasonable amount (e.g. 50) if the number of features is very high. This will suppress some noise and speed up the computation of pairwise distances between samples. For more tips see Laurens van der Maaten’s FAQ. 76 | 77 | [Lipschitz continuity](http://izumi-math.jp/F_Wada/fixpoint_theorem.pdf) 78 | 79 | ### Abstract of the paper 80 | 81 | > We propose a new approach for representing biological sequences. This method, named protein-vectors or ProtVec for short, can be utilized in bioinformatics applications such as family classification, protein visualization, structure prediction, disordered protein identification, and protein-protein interaction prediction. Using the Skip-gram neural networks, protein sequences are represented with a single dense n-dimensional vector. This method was evaluated by classifying protein sequences obtained from Swiss-Prot belonging to 7,027 protein families where an average family classification accuracy of 94%±0.03% was obtained, outperforming existing family classification methods. In addition, our model was used to predict disordered proteins from structured proteins. Two databases of disordered sequences were used: the DisProt database as well as a database featuring the disordered regions of nucleoporins rich with phenylalanine-glycine repeats (FG-Nups). Using support vector machine classifiers, FG-Nup sequences were distinguished from structured Protein Data Bank (PDB) sequences with 99.81\% accuracy, and unstructured DisProt sequences from structured DisProt sequences with 100.0\% accuracy. These results indicate that by only providing sequence data for various proteins into this model, information about protein structure can be determined with high accuracy. This so-called embedding model needs to be trained only once and can then be used to ascertain a diverse set of information regarding the proteins of interest. In addition, this representation can be considered as pre-training for various applications of deep learning in bioinformatics. 82 | 83 | ### References 84 | 1. [Disordered Proteins](https://en.wikipedia.org/wiki/Intrinsically_disordered_proteins) 85 | 2. [DisProt](http://www.disprot.org/) 86 | 3. [gemsim word2vec](https://radimrehurek.com/gensim/models/word2vec.html) 87 | 4. [NIPS2013読み会: Distributed Representations of Words and Phrases and their Compositionality](http://www.slideshare.net/unnonouno/nips2013-distributed-representations-of-words-and-phrases-and-their-compositionality) 88 | 5. [Skip gram shirakawa_20141121 89 | ](http://www.slideshare.net/nttdata-msi/skip-gram-shirakawa20141121-41833306) 90 | 6. [論文紹介「Distributed Representations of Words and Phrases and their Compositionality」](http://qiita.com/nishio/items/3860fe198d65d173af6b) 91 | 7. [sklearn.manifold.TSNE](http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html) 92 | 8. [Lipschitz continuity](http://izumi-math.jp/F_Wada/fixpoint_theorem.pdf) 93 | -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/biovec/biovec/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import ProtVec 2 | -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/biovec/biovec/binary_amino.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Binary representation of amino acid residue and amino acid sequence 3 | e.g. 4 | 'A' => [0, 0, 0, 0, 0] 5 | 'AGGP' => [[0, 0, 0, 0, 0], [0, 1, 1, 0, 1], [0, 1, 1, 0, 1], [0, 1, 1, 1, 1]] 6 | ''' 7 | 8 | AMINO_ACID_BINARY_TABLE = { 9 | 'A': [0, 0, 0, 0, 0], 10 | 'C': [0, 0, 0, 0, 1], 11 | 'D': [0, 0, 0, 1, 0], 12 | 'E': [0, 0, 0, 1, 1], 13 | 'F': [0, 0, 1, 0, 0], 14 | 'G': [0, 0, 1, 0, 1], 15 | 'H': [0, 0, 1, 1, 0], 16 | 'I': [0, 0, 1, 1, 1], 17 | 'K': [0, 1, 0, 0, 0], 18 | 'L': [0, 1, 0, 0, 1], 19 | 'M': [0, 1, 0, 1, 0], 20 | 'N': [0, 1, 0, 1, 1], 21 | 'P': [0, 1, 1, 0, 0], 22 | 'Q': [0, 1, 1, 0, 1], 23 | 'R': [0, 1, 1, 1, 1], 24 | 'S': [1, 0, 0, 0, 0], 25 | 'T': [1, 0, 0, 0, 1], 26 | 'V': [1, 0, 0, 1, 0], 27 | 'W': [1, 0, 0, 1, 1], 28 | 'Y': [1, 0, 1, 0, 0] 29 | } 30 | 31 | 32 | def convert_amino_to_binary(amino): 33 | ''' 34 | Convert amino acid to 1-dimentional 5 length binary array 35 | "A" => [0, 0, 0, 0, 0] 36 | ''' 37 | if not AMINO_ACID_BINARY_TABLE.has_key(amino): 38 | return None 39 | return AMINO_ACID_BINARY_TABLE[amino] 40 | 41 | 42 | def convert_amino_acid_sequence_to_vector(sequence): 43 | ''' 44 | "AGGP" => [[0, 0, 0, 0, 0], [0, 1, 1, 0, 1], [0, 1, 1, 0, 1], [0, 1, 1, 1, 1]] 45 | ''' 46 | binary_vector = [convert_amino_to_binary(amino) for amino in sequence] 47 | if None in binary_vector: 48 | return None 49 | return binary_vector 50 | -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/biovec/biovec/models.py: -------------------------------------------------------------------------------- 1 | from gensim.models import word2vec 2 | from Bio import SeqIO 3 | import sys 4 | from gensim.models import word2vec 5 | 6 | 7 | def split_ngrams(seq, n): 8 | """ 9 | 'AGAMQSASM' => [['AGA', 'MQS', 'ASM'], ['GAM','QSA'], ['AMQ', 'SAS']] 10 | """ 11 | a, b, c = zip(*[iter(seq)]*n), zip(*[iter(seq[1:])]*n), zip(*[iter(seq[2:])]*n) 12 | str_ngrams = [] 13 | for ngrams in [a,b,c]: 14 | x = [] 15 | for ngram in ngrams: 16 | x.append("".join(ngram)) 17 | str_ngrams.append(x) 18 | return str_ngrams 19 | 20 | 21 | def generate_corpusfile(corpus_fname, n, out): 22 | ''' 23 | Args: 24 | corpus_fname: corpus file name 25 | n: the number of chunks to split. In other words, "n" for "n-gram" 26 | out: output corpus file path 27 | Description: 28 | Protvec uses word2vec inside, and it requires to load corpus file 29 | to generate corpus. 30 | ''' 31 | f = open(out, "w") 32 | for r in SeqIO.parse(corpus_fname, "fasta"): 33 | ngram_patterns = split_ngrams(r.seq, n) 34 | for ngram_pattern in ngram_patterns: 35 | f.write(" ".join(ngram_pattern) + "\n") 36 | sys.stdout.write(".") 37 | 38 | f.close() 39 | 40 | 41 | def load_protvec(model_fname): 42 | return word2vec.Word2Vec.load(model_fname) 43 | 44 | 45 | class ProtVec(word2vec.Word2Vec): 46 | 47 | def __init__(self, corpus_fname=None, corpus=None, n=3, size=100, out="corpus.txt", sg=1, window=25, min_count=2, workers=3): 48 | """ 49 | Either fname or corpus is required. 50 | 51 | corpus_fname: fasta file for corpus 52 | corpus: corpus object implemented by gensim 53 | n: n of n-gram 54 | out: corpus output file path 55 | min_count: least appearance count in corpus. if the n-gram appear k times which is below min_count, the model does not remember the n-gram 56 | """ 57 | 58 | self.n = n 59 | self.size = size 60 | self.corpus_fname = corpus_fname 61 | 62 | if corpus is None and corpus_fname is None: 63 | raise Exception("Either corpus_fname or corpus is needed!") 64 | 65 | if corpus_fname is not None: 66 | print 'Generate Corpus file from fasta file...' 67 | generate_corpusfile(corpus_fname, n, out) 68 | corpus = word2vec.Text8Corpus(out) 69 | 70 | word2vec.Word2Vec.__init__(self, corpus, size=size, sg=sg, window=window, min_count=min_count, workers=workers) 71 | 72 | def to_vecs(self, seq): 73 | """ 74 | convert sequence to three n-length vectors 75 | e.g. 'AGAMQSASM' => [ array([ ... * 100 ], array([ ... * 100 ], array([ ... * 100 ] ] 76 | """ 77 | ngram_patterns = split_ngrams(seq, self.n) 78 | 79 | protvecs = [] 80 | for ngrams in ngram_patterns: 81 | ngram_vecs = [] 82 | for ngram in ngrams: 83 | try: 84 | ngram_vecs.append(self[ngram]) 85 | except: 86 | raise Exception("Model has never trained this n-gram: " + ngram) 87 | protvecs.append(sum(ngram_vecs)) 88 | return protvecs 89 | -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/biovec/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='biovec', 4 | version='0.1', 5 | description='The implementation of biovec', 6 | url='https://github.com/kyu999/biovec', 7 | author='Takashi Kyue', 8 | author_email='kyukokkyou999@gmail.com', 9 | license='MIT', 10 | packages=['biovec'], 11 | zip_safe=False) 12 | -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/biovec/trained_models/swissprot_reviewed_protvec: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/biovec/trained_models/swissprot_reviewed_protvec -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/data/uniprot_sprot_small.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q6GZX4|001R_FRG3G Putative transcription factor 001R OS=Frog virus 3 (isolate Goorha) GN=FV3-001R PE=4 SV=1 2 | MAFSAEDVLKEYDRRRRMEALLLSLYYPNDRKLLDYKEWSPPRVQVECPKAPVEWNNPPS 3 | EKGLIVGHFSGIKYKGEKAQASEVDVNKMCCWVSKFKDAMRRYQGIQTCKIPGKVLSDLD 4 | AKIKAYNLTVEGVEGFVRYSRVTKQHVAAFLKELRHSKQYENVNLIHYILTDKRVDIQHL 5 | EKDLVKDFKALVESAHRMRQGHMINVKYILYQLLKKHGHGPDGPDILTVKTGSKGVLYDD 6 | SFRKIYTDLGWKFTPL 7 | >sp|Q6GZX3|002L_FRG3G Uncharacterized protein 002L OS=Frog virus 3 (isolate Goorha) GN=FV3-002L PE=4 SV=1 8 | MSIIGATRLQNDKSDTYSAGPCYAGGCSAFTPRGTCGKDWDLGEQTCASGFCTSQPLCAR 9 | IKKTQVCGLRYSSKGKDPLVSAEWDSRGAPYVRCTYDADLIDTQAQVDQFVSMFGESPSL 10 | AERYCMRGVKNTAGELVSRVSSDADPAGGWCRKWYSAHRGPDQDAALGSFCIKNPGAADC 11 | KCINRASDPVYQKVKTLHAYPDQCWYVPCAADVGELKMGTQRDTPTNCPTQVCQIVFNML 12 | DDGSVTMDDVKNTINCDFSKYVPPPPPPKPTPPTPPTPPTPPTPPTPPTPPTPRPVHNRK 13 | VMFFVAGAVLVAILISTVRW 14 | >sp|Q197F8|002R_IIV3 Uncharacterized protein 002R OS=Invertebrate iridescent virus 3 GN=IIV3-002R PE=4 SV=1 15 | MASNTVSAQGGSNRPVRDFSNIQDVAQFLLFDPIWNEQPGSIVPWKMNREQALAERYPEL 16 | QTSEPSEDYSGPVESLELLPLEIKLDIMQYLSWEQISWCKHPWLWTRWYKDNVVRVSAIT 17 | FEDFQREYAFPEKIQEIHFTDTRAEEIKAILETTPNVTRLVIRRIDDMNYNTHGDLGLDD 18 | LEFLTHLMVEDACGFTDFWAPSLTHLTIKNLDMHPRWFGPVMDGIKSMQSTLKYLYIFET 19 | YGVNKPFVQWCTDNIETFYCTNSYRYENVPRPIYVWVLFQEDEWHGYRVEDNKFHRRYMY 20 | STILHKRDTDWVENNPLKTPAQVEMYKFLLRISQLNRDGTGYESDSDPENEHFDDESFSS 21 | GEEDSSDEDDPTWAPDSDDSDWETETEEEPSVAARILEKGKLTITNLMKSLGFKPKPKKI 22 | QSIDRYFCSLDSNYNSEDEDFEYDSDSEDDDSDSEDDC 23 | >sp|Q197F7|003L_IIV3 Uncharacterized protein 003L OS=Invertebrate iridescent virus 3 GN=IIV3-003L PE=4 SV=1 24 | MYQAINPCPQSWYGSPQLEREIVCKMSGAPHYPNYYPVHPNALGGAWFDTSLNARSLTTT 25 | PSLTTCTPPSLAACTPPTSLGMVDSPPHINPPRRIGTLCFDFGSAKSPQRCECVASDRPS 26 | TTSNTAPDTYRLLITNSKTRKNNYGTCRLEPLTYGI 27 | >sp|Q6GZX2|003R_FRG3G Uncharacterized protein 3R OS=Frog virus 3 (isolate Goorha) GN=FV3-003R PE=3 SV=1 28 | MARPLLGKTSSVRRRLESLSACSIFFFLRKFCQKMASLVFLNSPVYQMSNILLTERRQVD 29 | RAMGGSDDDGVMVVALSPSDFKTVLGSALLAVERDMVHVVPKYLQTPGILHDMLVLLTPI 30 | FGEALSVDMSGATDVMVQQIATAGFVDVDPLHSSVSWKDNVSCPVALLAVSNAVRTMMGQ 31 | PCQVTLIIDVGTQNILRDLVNLPVEMSGDLQVMAYTKDPLGKVPAVGVSVFDSGSVQKGD 32 | AHSVGAPDGLVSFHTHPVSSAVELNYHAGWPSNVDMSSLLTMKNLMHVVVAEEGLWTMAR 33 | TLSMQRLTKVLTDAEKDVMRAAAFNLFLPLNELRVMGTKDSNNKSLKTYFEVFETFTIGA 34 | LMKHSGVTPTAFVDRRWLDNTIYHMGFIPWGRDMRFVVEYDLDGTNPFLNTVPTLMSVKR 35 | KAKIQEMFDNMVSRMVTS 36 | >sp|Q6GZX1|004R_FRG3G Uncharacterized protein 004R OS=Frog virus 3 (isolate Goorha) GN=FV3-004R PE=4 SV=1 37 | MNAKYDTDQGVGRMLFLGTIGLAVVVGGLMAYGYYYDGKTPSSGTSFHTASPSFSSRYRY 38 | >sp|Q197F5|005L_IIV3 Uncharacterized protein 005L OS=Invertebrate iridescent virus 3 GN=IIV3-005L PE=3 SV=1 39 | MRYTVLIALQGALLLLLLIDDGQGQSPYPYPGMPCNSSRQCGLGTCVHSRCAHCSSDGTL 40 | CSPEDPTMVWPCCPESSCQLVVGLPSLVNHYNCLPNQCTDSSQCPGGFGCMTRRSKCELC 41 | KADGEACNSPYLDWRKDKECCSGYCHTEARGLEGVCIDPKKIFCTPKNPWQLAPYPPSYH 42 | QPTTLRPPTSLYDSWLMSGFLVKSTTAPSTQEEEDDY 43 | >sp|Q6GZX0|005R_FRG3G Uncharacterized protein 005R OS=Frog virus 3 (isolate Goorha) GN=FV3-005R PE=4 SV=1 44 | MQNPLPEVMSPEHDKRTTTPMSKEANKFIRELDKKPGDLAVVSDFVKRNTGKRLPIGKRS 45 | NLYVRICDLSGTIYMGETFILESWEELYLPEPTKMEVLGTLESCCGIPPFPEWIVMVGED 46 | QCVYAYGDEEILLFAYSVKQLVEEGIQETGISYKYPDDISDVDEEVLQQDEEIQKIRKKT 47 | REFVDKDAQEFQDFLNSLDASLLS 48 | >sp|Q91G88|006L_IIV6 Putative KilA-N domain-containing protein 006L OS=Invertebrate iridescent virus 6 GN=IIV6-006L PE=3 SV=1 49 | MDSLNEVCYEQIKGTFYKGLFGDFPLIVDKKTGCFNATKLCVLGGKRFVDWNKTLRSKKL 50 | IQYYETRCDIKTESLLYEIKGDNNDEITKQITGTYLPKEFILDIASWISVEFYDKCNNII 51 | INYFVNEYKTMDKKTLQSKINEVEEKMQKLLNEKEEELQEKNDKIDELILFSKRMEEDRK 52 | KDREMMIKQEKMLRELGIHLEDVSSQNNELIEKVDEQVEQNAVLNFKIDNIQNKLEIAVE 53 | DRAPQPKQNLKRERFILLKRNDDYYPYYTIRAQDINARSALKRQKNLYNEVSVLLDLTCH 54 | PNSKTLYVRVKDELKQKGVVFNLCKVSISNSKINEEELIKAMETINDEKRDV 55 | >sp|Q6GZW9|006R_FRG3G Uncharacterized protein 006R OS=Frog virus 3 (isolate Goorha) GN=FV3-006R PE=4 SV=1 56 | MYKMYFLKDQKFSLSGTIRINDKTQSEYGSVWCPGLSITGLHHDAIDHNMFEEMETEIIE 57 | YLGPWVQAEYRRIKG 58 | >sp|Q6GZW8|007R_FRG3G Uncharacterized protein 007R OS=Frog virus 3 (isolate Goorha) GN=FV3-007R PE=4 SV=1 59 | MRSIKPLRCCNAHGRHVSQEYGRCTLLLFREKLFLQTGLVCNKQCNAPNNDGAESKHHGI 60 | HHGSRGALALRGAGVHLLASAALGPRVLAGLVPTGRSVQGSVGQCGRVAQIGRARDVAAR 61 | KQESYCEK 62 | >sp|Q197F3|007R_IIV3 Uncharacterized protein 007R OS=Invertebrate iridescent virus 3 GN=IIV3-007R PE=4 SV=1 63 | MEAKNITIDNTTYNFFKFYNINQPLTNLKYLNSERLCFSNAVMGKIVDDASTITITYHRV 64 | YFGISGPKPRQVADLGEYYDVNELLNYDTYTKTQEFAQKYNSLVKPTIDAKNWSGNELVL 65 | LVGNEWYCKTFGKAGSKNVFLYNMIPTIYRDEPQHQEQILKKFMFFNATKNVEQNPNFLD 66 | NVPEEYYHLLLPKSWVEKNLSDKYRKIMETEHKPLVFSCEPAFSFGLCRNTQDKNESYQL 67 | SLCLYEREKPRDAEIVWAAKYDELAAMVRDYLKKTPEFKKYRSFISCMKGLSWKNNEIGD 68 | KDGPKLYPKVIFNRKKGEFVTIFTKDDDVEPETIEDPRTILDRRCVVQAALRLESVFVHN 69 | KVAIQLRINDVLISEWKEASSKPQPLILRRHRFTKPSSSVAKSTSPSLRNSGSDESDLNQ 70 | SDSDKEDERVVPVPKTKRIVKTVKLPN 71 | >sp|Q197F2|008L_IIV3 Uncharacterized protein 008L OS=Invertebrate iridescent virus 3 GN=IIV3-008L PE=4 SV=1 72 | MSFKVYDPIAELIATQFPTSNPDLQIINNDVLVVSPHKITLPMGPQNAGDVTNKAYVDQA 73 | VMSAAVPVASSTTVGTIQMAGDLEGSSGTNPIIAANKITLNKLQKIGPKMVIGNPNSDWN 74 | NTQEIELDSSFRIVDNRLNAGIVPISSTDPNKSNTVIPAPQQNGLFYLDSSGRVWVWAEH 75 | YYKCITPSRYISKWMGVGDFQELTVGQSVMWDSGRPSIETVSTQGLEVEWISSTNFTLSS 76 | LYLIPIVVKVTICIPLLGQPDQMAKFVLYSVSSAQQPRTGIVLTTDSSRSSAPIVSEYIT 77 | VNWFEPKSYSVQLKEVNSDSGTTVTICSDKWLANPFLDCWITIEEVG 78 | >sp|Q6GZW6|009L_FRG3G Putative helicase 009L OS=Frog virus 3 (isolate Goorha) GN=FV3-009L PE=4 SV=1 79 | MDTSPYDFLKLYPWLSRGEADKGTLLDAFPGETFEQSLASDVAMRRAVQDDPAFGHQKLV 80 | ETFLSEDTPYRELLLFHAPGTGKTCTVVSVAERAKEKGLTRGCIVLARGAALLRNFLHEL 81 | VFNCGTGGRYIPEGYADMGDQERTRKMRKAVSSYYQFRTYETFAKSVATMSAEAIRARYD 82 | RFVIVMDEVHHLRSVQAEGVNTYSAISRFLRTVRGCVKMLLTGTPMTNEPGELADVLNLI 83 | LPQDKTIRPEDGIFSNSGDLLKPDELAERVRGRVSYLKAARPDAGLTFAGEVLGGTGMTH 84 | LRLVRLEMSAFQSDAYASAWDQDAGDRNIFSNSRQCSLAVMPDRRWGSAAEARNPSQVRR 85 | MAGQNLAEYSVKYDYLVRVASSSPKTFAYCEYVNGSGLSLLSDILLANGWRRATGRETTP 86 | GKRFALLTASQKNIHKIVQRFNHEDNVDGAYISLLLGSRVVAEGLTFKEVRHTVILTPHW 87 | NYTETAQAIARSWRAGSHDRLKARGEAVAVTVHRLVAVPRGRDTPRSIDSDMYAVSEVKD 88 | KRIKAVERILMTSAADCSLLRSRNLYPSEFDGSRECEYGRCAYRCSNVSVEPGPLPALLG 89 | ASAAEAVAQVRLDGGGDPAIMKVDMSTLWAEVTAGRRYVNRWGDGAVLRAEGGRLELSAP 90 | YGSSEEGRWGDFYKTRNLCYAKMDQDHLRADDLRDSLPQEVEELLTVSPVETIGETASAM 91 | PQEVATAILMACVQARADGKTLNVVRRDALLDFYKGFYAMGPSGWTVWLHARGANAKVYD 92 | GRRWNPADEDTLEFLAARSAKFTDTRIGYYGLYNPNLKDFCIRDVTQGKRDKVDLRKLTV 93 | GRRCVDWDQRTLVHIVARLMKIDGRRDFMPHATLREMRELAEQDPLHEPSDLTSKEACRR 94 | FLFWTQKGDNKFRRQDICKAMEKWFIENDLMEDNFDCGHQHKRRGKFA 95 | >sp|Q91G85|009R_IIV6 Uncharacterized protein 009R OS=Invertebrate iridescent virus 6 GN=IIV6-009R PE=3 SV=1 96 | MIKLFCVLAAFISINSACQSSHQQREEFTVATYHSSSICTTYCYSNCVVASQHKGLNVES 97 | YTCDKPDPYGRETVCKCTLIKCHDI 98 | >sp|Q6GZW5|010R_FRG3G Uncharacterized protein 010R OS=Frog virus 3 (isolate Goorha) GN=FV3-010R PE=4 SV=1 99 | MKMDTDCRHWIVLASVPVLTVLAFKGEGALALAGLLVMAAVAMYRDRTEKKYSAARAPSP 100 | IAGHKTAYVTDPSAFAAGTVPVYPAPSNMGSDRFEGWVGGVLTGVGSSHLDHRKFAERQL 101 | VDRREKMVGYGWTKSFF 102 | >sp|Q197E9|011L_IIV3 Uncharacterized protein 011L OS=Invertebrate iridescent virus 3 GN=IIV3-011L PE=4 SV=1 103 | MMESPKYKKSTCSVTNLGGTCILPQKGATAPKAKDVSPELLVNKMDNLCQDWARTRNEYN 104 | KVHIEQAPTDSYFGVVHSHTPKKKYTSRDSDSEPEATSTRRSATAQRAANLKSSPVDQWS 105 | TTPPQPQPQPAAPTVKKTCASSPPAALSVKRTCTSPPPPPVLIDDDTGEDAFYDTNDPDI 106 | FYDIENGVSELETEGPKRPVYYQRNIRYPIDGSVPQESEQWYDPIDDEFLASSGDVVSLE 107 | PSPIAAFQPTPPKTVQFVPMPEEIIVPPPPPPKTVVDEGVQAMPYTVDQMIQTDFEESPL 108 | LANVNLRTIPIEEVNPNFSPVLMQDMVRDSFVFGTVAQRVMASQRVKQFFKELIEQDVSL 109 | AGRMCMDSGSPQLNLYNSLMGVKLLYRWRSSTTFYRAIVPEIDEPVQVMQDVLSSSEWAK 110 | FDSQAGIPPKMVYIHYKLLNDLVKTLICPNFQLTHAALVCVDCRPEAVGSDGLQDGRQRR 111 | CSNLVSEYHEMTLEDLFNTIKPADLNAKNIILSVLFQMLYAVATVQKQFGMGGLFANADS 112 | VHVRRIQPGGFWHYTVNGLRYSVPNYGYLVILTNFTDVVNYRPDFATTRYFGRRQAKVVP 113 | TRNWYKFVPFTTRYRPFVTVDPITQAKTTAYAPNPPTEGITINEFYKDSSDLRPSVPVDL 114 | NDMITFPVPEFHLTICRLFSFFSKFYDSNFIGNDPFVRNLVDRYSQPFEFPDVYWPEDGV 115 | SRVLACYTIEEIYPNWVDGDTDYVIESYNLD 116 | >sp|Q6GZW4|011R_FRG3G Uncharacterized protein 011R OS=Frog virus 3 (isolate Goorha) GN=FV3-011R PE=4 SV=1 117 | MTSVKTIAMLAMLVIVAALIYMGYRTFTSMQSKLNELESRVNAPQLRPPVMSPIVPLNFI 118 | ESEDLDKELD 119 | >sp|Q6GZW3|012L_FRG3G Uncharacterized protein 012L OS=Frog virus 3 (isolate Goorha) GN=FV3-012L PE=4 SV=1 120 | MCAKLVEMAFGPVNADSPPLTAEEKESAVEKLVGSKPFPALKKKYHDKVPAQDPKYCLFS 121 | FVEVLPSCDIKAAGAEEMCSCCIKRRRGQVFGVACVRGTAHTLAKAKQKADKLVGDYDSV 122 | HVVQTCHVGRPFPLVSSGMAQETVAPSAMEAAEAAMDAKSAEKRKERMRQKLEMRKREQE 123 | IKARNRKLLEDPSCDPDAEEETDLERYATLRVKTTCLLENAKNASAQIKEYLASMRKSAE 124 | AVVAMEAADPTLVENYPGLIRDSRAKMGVSKQDTEAFLKMSSFDCLTAASELETMGF 125 | >sp|Q197E7|013L_IIV3 Uncharacterized protein IIV3-013L OS=Invertebrate iridescent virus 3 GN=IIV3-013L PE=4 SV=1 126 | MYYRDQYGNVKYAPEGMGPHHAASSSHHSAQHHHMTKENFSMDDVHSWFEKYKMWFLYAL 127 | ILALIFGVFMWWSKYNHDKKRSLNTASIFY 128 | >sp|Q6GZW2|013R_FRG3G Uncharacterized protein 013R OS=Frog virus 3 (isolate Goorha) GN=FV3-013R PE=4 SV=1 129 | MANSVAFSSMTWYSPLASDNLYDICVDKVHNRVLCLCHSFGCCTNAVVIWILPSFDEFTP 130 | QTLSCKGP 131 | >sp|Q6GZW1|014R_FRG3G Uncharacterized protein 014R OS=Frog virus 3 (isolate Goorha) GN=FV3-014R PE=4 SV=1 132 | METLVQAYLDIQGKIAEFRREIKALRVEEKAITANLFEAMGEAGVESIRISEDRYLVAEE 133 | KPKRTRSKQQFYQAAEGEGFTQEDVDRLMSLSRGAVTGSSSNVKIRKSAPARNEEDDDG 134 | >sp|Q6GZW0|015R_FRG3G Uncharacterized protein 015R OS=Frog virus 3 (isolate Goorha) GN=FV3-015R PE=4 SV=1 135 | MEQVPIKEMRLSDLRPNNKSIDTDLGGTKLVVIGKPGSGKSTLIKALLDSKRHIIPCAVV 136 | ISGSEEANGFYKGVVPDLFIYHQFSPSIIDRIHRRQVKAKAEMGSKKSWLLVVIDDCMDN 137 | AKMFNDKEVRALFKNGRHWNVLVVIANQYVMDLTPDLRSSVDGVFLFRENNVTYRDKTYA 138 | NFASVVPKKLYPTVMETVCQNYRCMFIDNTKATDNWHDSVFWYKAPYSKSAVAPFGARSY 139 | WKYACSKTGEEMPAVFDNVKILGDLLLKELPEAGEALVTYGGKDGPSDNEDGPSDDEDGP 140 | SDDEEGLSKDGVSEYYQSDLDD 141 | >sp|Q6GZV8|017L_FRG3G Uncharacterized protein 017L OS=Frog virus 3 (isolate Goorha) GN=FV3-017L PE=4 SV=1 142 | METMSDYSKEVSEALSALRGELSALSAAISNTVRAGSYSAPVAKDCKAGHCDSKAVLKSL 143 | SRSARDLDSAVEAVSSNCEWASSGYGKQIARALRDDAVRVKREVESTRDAVDVVTPSCCV 144 | QGLAEEAGKLSEMAAVYRCMATVFETADSHGVREMLAKVDGLKQTMSGFKRLLGKTAEID 145 | GLSDSVIRLGRSIGEVLPATEGKAMRDLVKQCERLNGLVVDGSRKVEEQCSKLRDMASQS 146 | YVVADLASQYDVLGGKAQEALSASDALEQAAAVALRAKAAADAVAKSLDSLDVKKLDRLL 147 | EQASAVSGLLAKKNDLDAVVTSLAGLEALVAKKDELYKICAAVNSVDKSKLELLNVKPDR 148 | LKSLTEQTVVVSQMTTALATFNEDKLDSVLGKYMQMHRFLGMATQLKLMSDSLAEFQPAK 149 | MAQMAAAASQLKDFLTDQTVSRLEKVSAAVDATDVTKYASAFSDGGMVSDMTKAYETVKA 150 | FAAVVNSLDSKKLKLVAECAKK 151 | >sp|Q6GZV7|018L_FRG3G Uncharacterized protein 018L OS=Frog virus 3 (isolate Goorha) GN=FV3-018L PE=3 SV=1 152 | MQNSKTDMCAALWAVTGLVLNVAVRFALEPFKESMGQGWHTAARVAVNGAIVLALADRLS 153 | DSPVTMTLFVMALSASPE 154 | >sp|Q6GZV6|019R_FRG3G Putative serine/threonine-protein kinase 019R OS=Frog virus 3 (isolate Goorha) GN=FV3-019R PE=3 SV=1 155 | MATNYCDEFERNPTRNPRTGRTIKRGGPVFRALERECSDGAARVFPAAAVRGAAAARAAS 156 | PRVAAASPCPEFARDPTRNPRTGRPIKRGGPVFRALERECADYGGASPRRVSPARAFPNR 157 | RVSPARRQSPAEAAEASPCPEFARDPTRNPRTGRTIKRGGPTYRALEAECADYGRLSPIR 158 | SPWSDWSSTGLSPFRSHMRKSPARRSPARRSPARRSLARYTEHLTSDSETEVDYDARNVI 159 | RSQVGPGGVCERFAADPTRNPVTGSPLSRNDPLYTDLMEICKGYPDTPLTKSLTGEGTDD 160 | DTCEAFCRDPTRNPVTGQKMRRNGIEYQMFAEECDCSGISRPSGVSRTSGTSGSSGSSAS 161 | SRPPNSFEAPGASSRPPNSFEASGAARVPGTPSVSRGEPRWMSSISTRHNYDESNPMSVA 162 | FRLRHVKDIRKFLRTVRPGRSGFCATDKGGWLGSAAVSDNVIGQGSWGSVHMVKFRDFPE 163 | EFVVKEAVLMSVSEKHRYKPTVVWDEWAAGSVPDEVVVNNMVTEIAATGMTPFVPLTAGA 164 | GACDSCNPQLLEKAAKVTKCYLQAMEAADFSLDRVLPTMSPDQAASALAQILLGLQSLQT 165 | TLGIMHNDIKAHNILVKRVPPGGYWKVTDSFNGQVFYIPNEGYLCMLADYGVVRLVKPAV 166 | GMDTLYGTRNARFVPRDVGRWGKGAGTEYVVTPIRSKISVVVRGGRFVGVEPNKAVRYWK 167 | NTDTSKVGDVITTNNVFYMGYDIEPDMQVQLDDTNSFPVWESRGDVADCVRTFVGGKRAS 168 | QPGFHRLFYKKTGSAWEKAAETVAKQNPLFSGFTLDGSGLKYIRAATACAYIFPGMAVPR 169 | PGEREIESFTM 170 | >sp|Q6GZV5|020R_FRG3G Uncharacterized protein 020R OS=Frog virus 3 (isolate Goorha) GN=FV3-020R PE=4 SV=1 171 | MLQNYAIVLGMAVAVAIWYFFKIEEEAPPGPNPPKPDPPKPDPPKMHMPKKKPHWMDPHL 172 | TGSQTVQYSRNRSMGDPIRGDLPIIPRDDGWFSTAANPAHTLHAGALSMIAPASTGGGLT 173 | VNKLISAYADKGNAMSGRHNSPSYYGSS 174 | >sp|Q6GZV4|021L_FRG3G Uncharacterized protein 021L OS=Frog virus 3 (isolate Goorha) GN=FV3-021L PE=4 SV=1 175 | METIVLVPRQDQETFSDSRPVLDGDLMLEFLENKIRHPVRRRQPRVVPVTSSDPEVVDDE 176 | DDEDQSDDSDEERQRLYFQYMVLKRMYPTEVIPEMTTYSNVAIMREKYKLLTRRLSLDKH 177 | INEWKKYIIVGMCIMELVMTKLNFDASGFARYQIKSLGAYDQLLAEMADKYYEATPQSSV 178 | EMRLMTTMGMNMAVFMLGKLLGGQMDFLGLLENAFGSSS 179 | >sp|Q197D8|022L_IIV3 Transmembrane protein 022L OS=Invertebrate iridescent virus 3 GN=IIV3-022L PE=4 SV=1 180 | MSFVHKLPTFYTAGVGAIIGGLSLRFNGAKFLSDWYINKYNDSVPAWSLQTCHWAGIALY 181 | CVGWVTLASVIYLKHRDNSILKGSILSCIVISAVWSILEYNQDMFVSNPKLPLISCAMLV 182 | SSLAALVALKYHIKDIFTILGAAIIIILAEYVVLPYQRQYNIVDGIGLPLLLLGFFILYQ 183 | VFSVPNPSTPTGVMVPKPEDEWDIEMAPLNHRDRQVPESELENVK 184 | >sp|Q6GZV2|023R_FRG3G Uncharacterized protein 023R OS=Frog virus 3 (isolate Goorha) GN=FV3-023R PE=4 SV=1 185 | MRVSQTSWIVSRMLEYPRGGFFYSTDMACMMEGLAEELAGGHKDEVLIVSGRNGDDEVFK 186 | EFPNVRAADGLKGPNSIDPETKLVLIIDVSPTAISNALAATLQEFLIPVWVFCNHTRTLT 187 | ASVTRRLGYKLWPKGTYTPYICEKAGVSEVVTYNQPESEKFVAFMSAARQIMDKRKSKKT 188 | MQELAFLPHLAFAEIAMEGDQEMTPTLTAKKVSDIKDEQVNELASAMFRTGKLSHLDMLS 189 | VPDCVYSCGEALKREVAKAKANRERFVVALRNAQYKKYTAGLLEAGTPVKTFTEVIKNWG 190 | AYDTIFLPMGVDWTYTGGSNLIRMMMTPGSHKTVTFVPESDDVHEFCHNKPTVNTMGVES 191 | AATGLAAELNRRWRRDNPVDAS 192 | >sp|Q197D7|023R_IIV3 Uncharacterized protein 023R OS=Invertebrate iridescent virus 3 GN=IIV3-023R PE=4 SV=1 193 | MGSYMLFDSLIKLVENRNPLNHEQKLWLIDVINNTLNLEGKEKLYSLLIVHNKQQTKIYD 194 | PKEPFYDIEKIPVQLQLVWYEFTKMHLKSQNEDRRRKMSLYAGRSP 195 | >sp|Q6GZV1|024R_FRG3G Uncharacterized protein 024R OS=Frog virus 3 (isolate Goorha) GN=FV3-024R PE=3 SV=1 196 | MWQYLPILLMTMISQLEWTVAAVKRYPAGGFITGDKLSRVFEALPWRVAVVSDEPEKYEG 197 | FPILTEEDPAVFEDADCILFAVSDPKCVTGAMKSVFMASSKTAWVVYDGTETRATVRSWM 198 | RRLWRAETYVPLLTHRGFVTDVCVYSQPDSERYVSVMTATAHFYSNRLEVLEEMAFVPHL 199 | AYAKLAMGRYTVLDGCMSVKGSADVAPLNRSMWFLTAAAIPHGEIDTDSLFSDPGAVYSC 200 | GSALREALGSLPEGSTSVVAVRNSSYRKYVRGILGPNFRVETFTNVVKTWGVYDYVLLPM 201 | GISDSYKQGRDLMEKLEMPGGHRVVTFAPENYTVNEVHLNRPLKYAIKRMDLITPMVLRH 202 | VSLNK 203 | >sp|Q197D5|025R_IIV3 Uncharacterized protein 025R OS=Invertebrate iridescent virus 3 GN=IIV3-025R PE=3 SV=1 204 | MNYSVIWAITILILGLVLTLAWARQNPTHPINPLVLNYHTKPSPKRHRMVLVVESFASVD 205 | ALVELVENILSQTIRVASITVVSQRPDHLRQVPLLHQTCTFSRASGLSALFKETSGTLVV 206 | FISKEGFHHFQSPTLLETIDQRGVTAEQTLPGIVLRNTDMPGIDLTTVYRQQRLGLGN 207 | >sp|Q91G70|026R_IIV6 Uncharacterized protein 026R OS=Invertebrate iridescent virus 6 GN=IIV6-026R PE=4 SV=1 208 | MAISFFSDTSYIIKSILLISLFSIIPLEDEVTKLKSSSLRETSELNKEEGITTCLYTFN 209 | >sp|Q6GZU9|027R_FRG3G Uncharacterized protein 027R OS=Frog virus 3 (isolate Goorha) GN=FV3-027R PE=4 SV=1 210 | MANFLQDVNCETVSEYDGPDASIPEGVWEGYVGHDHAALWRTWSYIYECCKKGTLVQFRG 211 | GKLVTFSMFDNPRFSNGAGIDAQKVLDLEDRARELQGYGPVNRRTDVMPVDRWTLNGPLL 212 | RYDKMVLEDVGGTGSNRTMVRAQLEALQDERDVPDCDFILNVRDYPLLRRDGTRPYPQVY 213 | GKGRRLPEPWARGGPHVPVVSMCSGPTYADIAVPTYECIAHAYTSSGRTLPAGGRFVKTP 214 | SADSLPAWRDRKALAVFRGSSTGAGTSTEDNQRLRALQISMSRPDLADVGITKWNLRPRK 215 | TERYDGYRIIEPWQFGRKSPYPAAAKPMTPEQIAGYKYVLCLWGHAPAFRLARDLSLGSV 216 | VLLPSRPPGQEGLDMWHSSVLKPWTHYIPVRGDLSDLEKRIEWCRDNDAECEKIAAAGME 217 | ASLNLLGWEGQLDRWMDVLRSVRLECCPGGYDMPPSPSLVSDSMCVRQMVSFPRYEDIPQ 218 | PSSPMPVLPRCSGTLRGWGLAASLGWDLGDAAEVLNVKRSTAVLSKTVFNNLIYRTPHLR 219 | YTFGVAASDPESTAAVILSEKLKGAVTMRSWLEDSRAWARGRNVASVLCQVSQALLEAQA 220 | AAGTVFGDLSLDTILVVPNPLPEYIYHDGTGGSFGLKLMPGDKWAVVTYGDYTRARIRVL 221 | KGDGRKGHLAVVGPQPVYTKLSERKWHDICCLVSCILRTARTSKRPAARALAAAVARAAG 222 | VKRPDMDAEALEATPYEAREEPLTRFGPAEFINGLVREFKLEEGGWAWTEKNKNIEKVLR 223 | PWERGLPLYPVRLWLSGDRKEAMRACVSSVLKAAPPRPATAAGAHHTFQTYLRTVGADLD 224 | SFPEWAAAAAHLKRLWKSPGSLPAGSASLRAPSVPPPCHGPAWALPFGTRTPGEFPSWFD 225 | PSCLGDWTEAMGQGAPLDLENGPAKAGSDPVAVHSAWETASQLSFEEDGWTESEPRPVRR 226 | EAHVRAKERH 227 | >sp|Q6GZU8|028R_FRG3G Uncharacterized protein 028R OS=Frog virus 3 (isolate Goorha) GN=FV3-028R PE=4 SV=1 228 | MDPNVLKNLSLMLSRRAGVSGGEPPRMIEWPEYGQRSEPCGSQTVWYVDRPVGAPFIKAF 229 | ASEVEERGGGILIHAGKVTFDSAKKLAAMKEVQVFDVKYFSFDLMAVVPEHSLWKRPGDK 230 | GYPEKTAQSFPKIMASDPVCRYHGFRPRDLVHVKPHDVYIVC 231 | >sp|Q197D2|028R_IIV3 Uncharacterized protein 028R OS=Invertebrate iridescent virus 3 GN=IIV3-028R PE=4 SV=1 232 | MDQYITLVELYIYDCNLFKSKNLKSFYKVHRVPEGDIVPKRRGGQLAGVTKSWVETNLVH 233 | FPLWLSEWDETRWGVLNHYPLESWLEKNVSSKVPVNPVMWNFDSECLVYFFHNGRRTPFL 234 | TPKGVVKLQVFYNLMSGKEVEWFYEISNGFLKPHLHQLSNVRELVRLKHAPVVVGAGGPR 235 | LVTEGVYSLRDDDFVVDCSQIAAVKRAIERGESHQSLRKYQCPLFVALTDKFQDTVKLVE 236 | KKFEVQLNELKAETTIQVLREQLRQEKKLKEQVLSLTQSFIPTIGGRGEEFGKPDETPSS 237 | ASVGDDNFPSSTNHTFEARRRPSSLSSGGALKPSKIL 238 | >sp|Q6GZU7|029L_FRG3G Uncharacterized protein 029L OS=Frog virus 3 (isolate Goorha) GN=FV3-029L PE=4 SV=1 239 | MRRMRSGFKHCAIPIDICRWEYILSPLILQDLQGPQQGGSVAVDVTVRCSVRFVHLPHYG 240 | GFNHGTVQRRVDPDDCRILRQLHIVLSLRLCLIDRDRL 241 | >sp|Q91G67|029R_IIV6 Uncharacterized protein 029R OS=Invertebrate iridescent virus 6 GN=IIV6-029R PE=4 SV=1 242 | MVERLGIAVEDRSPKLRKQAIRERFVLFKKNTERVEKYEYYAIRGQSIYINGRLSKLQSE 243 | RYPKMIILLDIFCQPNPRNLFLRFKERIDGKSEWENNFTYAGNNIGCTKEMESDMIRIFN 244 | ELDDEKRDV 245 | >sp|Q197D0|030L_IIV3 uncharacterized protein 030L OS=Invertebrate iridescent virus 3 GN=IIV3-030L PE=4 SV=1 246 | MHPTLKSNAGEWSQPIVNLFYSNFSGNCKALLQYIDNAGITDHIPIKFINVDNPTMRSVV 247 | SAKISHVPALVVLQDDQMSLYVAESVWEWFDNYRTPPPLADGATVDSQASENGEKEAQPT 248 | PPKEGLLTVLELAKQMRKEREQQT 249 | >sp|Q6GZU6|030R_FRG3G Uncharacterized protein 030R OS=Frog virus 3 (isolate Goorha) GN=FV3-030L PE=4 SV=1 250 | MSLYLLLGLKILRYLKMVIVLRCHSAFLLSVKFLREKRRLKMYLGIMLGF 251 | >sp|Q6GZU5|031R_FRG3G Uncharacterized protein 031R OS=Frog virus 3 (isolate Goorha) GN=FV3-031R PE=4 SV=1 252 | MDTPCKLFCIELKEGYVPGTVSHNHMMPYFLAGSGWPVEITFHAATVELKTQEDFPPAIG 253 | IGIHNMTGVPVVETPHSGRMHFVFIFHSKSGRFSATYKCIPVPVVVRDYKTVASVSLTTL 254 | SLEDIVGVKLFGTACDRSS 255 | >sp|Q6GZU4|032R_FRG3G Uncharacterized protein 032R OS=Frog virus 3 (isolate Goorha) GN=FV3-032R PE=4 SV=1 256 | MVTVTELRATAKNLGIRGYSTMRKAELEEAIRDHGRVSEARVASPRRSPARSPRKSPAGR 257 | KSPSKSPAGRKSPSKSPAGRKSPSKSPAGRKSPSKSPAGRKSPSKSPAGRKSPSKSPVRK 258 | SPSKSPVRKSPRKSPAAKLQAGDRPASMNICKNLPKQRLVDIATEMGIDLNRESDGKPKT 259 | KDQLCADIMGGAGRKSPRKSPSRSPVRKSPSRSPVRKSPVRSPRKSPVRVPSPVRSPVKE 260 | KTPVRSPARSEDAGSDLAPRPRRGKAVRLDYDEDDDYSYGASTDNLFSGNKEIPFPTRKR 261 | RTRKPEKVFVDVRSPHTLTDSEDEDDMVEVPELEDKEITMPGVLSPYSDEIVERGYVSQG 262 | GADYINYIYRTEYALESDESFARGARPKTNKRDSDRAVREAAAAAAIARALDRRSQSGND 263 | EPAVRRRSAPTDSSRESRRDREPQRDIAEPQRDIAEPQRDIAEPQRDIAEPRKVRFREAG 264 | SADVRVFERDEPKEYGRVPVRPPLFMPAGEPLQPLKFRPKTPKIDDTIHRAQMVLPSKPS 265 | QKETDNYYKQFAGEAVRPSEPVQWDKDDQVLYHKVPAWDDSSYAAAVSAWPMSVDPKQAE 266 | SVFAEFEQLSAQDSDLIKVRKSIMKALGY 267 | >sp|Q197C8|032R_IIV3 Uncharacterized protein 032R OS=Invertebrate iridescent virus 3 GN=IIV3-032R PE=4 SV=1 268 | MKLMLEIVKNISEPVGKLAIWFNETYQVDVSETINKWNELTGMNITVQENAVSADDTTAE 269 | ETEYSVVVNENPTRTAARTRKESKTAAKPRKMQIPKTKDVCQHIFKSGSRAGEQCTTKPK 270 | NNALFCSAHRVRNSVTSNATEASEKTVAKTNGTAAPQKRGVKSKSPTVIPSDFDDSDSSS 271 | SATRGLRKAPTLSPRKPPPTTTTASSAQEEEDEQQAHFSGSSSPPPKNNGNGAVYSDSSS 272 | DEDDDDAHHTTVIPLLKKGARKPLDENVQFTSDSSDEED 273 | >sp|Q91G65|032R_IIV6 Uncharacterized protein 032R OS=Invertebrate iridescent virus 6 GN=IIV6-032R PE=4 SV=1 274 | MGVYKFCYNKKKEVGQVAVLQKERLIFYIVTKEKSYLKPTLANFSNAIDSLYNECLLRKC 275 | CKLAIPKIGCCLDRLYWKTVKNIIIDKLCKKGIEVVVYYI 276 | >sp|Q6GZU3|033R_FRG3G Transmembrane protein 033R OS=Frog virus 3 (isolate Goorha) GN=FV3-033R PE=4 SV=1 277 | MSGIQLDKETILKYSSAALVALSAVVAVMMVSNNSESWKPILVGAVVAASGAAAYQSWWP 278 | KQS 279 | >sp|Q6GZU2|034R_FRG3G Uncharacterized protein 034R OS=Frog virus 3 (isolate Goorha) GN=FV3-034R PE=4 SV=1 280 | MSAGHLRKRRYVKVGDIHDMGPILGGVHDVSSPPPNVHYQQQDDHNDPGCMIHYPGEGWF 281 | SSMSTVEKLMLGAVIVAAVVVGVRMFMSSGNSSATSSFSTAPYFMG 282 | >sp|Q91G63|034R_IIV6 Uncharacterized protein 034R OS=Invertebrate iridescent virus 6 GN=IIV6-034R PE=4 SV=1 283 | MKQNLLILLSLLLVVVAIMWWLYEKKKEVPLPPPTPPTPPTPTGVPFLPMYAGLSSPVQY 284 | NPADYLYGWEKYPHGPAWSFGDRVPYAEAKNALGGHFGGGLYSPRDPILESKLGGVYIGN 285 | DLYTVGGVGGDGHW 286 | >sp|Q6GZU1|035L_FRG3G Uncharacterized protein 035L OS=Frog virus 3 (isolate Goorha) GN=FV3-035L PE=4 SV=1 287 | MIWVWPATGRGPGWWGIRRDPWGPEDSSCPCPRLPLSPTGPVGHSGPMGQCHPPVPSYRR 288 | GRRDQKDPPLRRQTSPPLPPHPWDRPLPWVPWIPLDLCRHGDPRHPWDPGAQSGYPRVRE 289 | VRGVPADRPLRPCPRQGPRTAATRKESSCRIPS 290 | >sp|Q6GZU0|036L_FRG3G Uncharacterized protein 036L OS=Frog virus 3 (isolate Goorha) GN=FV3-036L PE=4 SV=1 291 | MTLPDVSGSLGPLSPGTNGTLWAVGPRVVRYQIPALAYLTPGALWTLRTRGTSLTSGPIG 292 | TRDSIRTLHAVHYDVWTLGPLGPLGPTSPRGPSARPCRLQTDSLHSTDARCYRCKMLQMQ 293 | DATDARCKKDMSPFSFPGILEPSHLVGSLKSPRVDPGVPCRPLALWGHPYQCLRLVPLYQ 294 | RCLHPHCFPAAPGRPWDPWCRPDRLDP 295 | >sp|Q197C3|037L_IIV3 Uncharacterized protein 037L OS=Invertebrate iridescent virus 3 GN=IIV3-037L PE=4 SV=1 296 | MNAATSGIQLNAQTLSQQPAMNTPLIHRSFRDDYTGLVSAGDGLYKRKLKVPSTTRCNKF 297 | KWCSIGWSIGALIIFLVYKLEKPHVQPTSNGNLSLIEPEKLVSESQLIQKILNATTPQTT 298 | TPEIPSSTEPQELVTEILNTTTPQTTTPEIPSSTEPQELVTEIPSSTEPQEEIFSIFKSP 299 | KPEEPGGINSIPQYEQESNNVEDEPPPNKPEEEEDHDNQPLEERHTVPILGDVIIRNKTI 300 | IIDGGNETIIIKP 301 | >sp|Q6GZT9|037R_FRG3G uncharacterized protein 037R OS=Frog virus 3 (isolate Goorha) GN=FV3-037R PE=4 SV=1 302 | MQVFLDLDETLIHSIPVSRLGWTKSKPYPVKPFTVQDAGTPLSVMMGSSKAVNDGRKRLA 303 | TRLSLFKRTVLTDHIMCWRPTLRTFLNGLFASGYKINVWTAASKPYALEVVKALNLKSYG 304 | MGLLVTAQDYPKGSVKRLKYLTGLDAVKIPLSNTAIVDDREEVKRAQPTRAVHIKPFTAS 305 | SANTACSESDELKRVTASLAIIAGRSRRR 306 | >sp|Q6GZT7|039R_FRG3G Uncharacterized protein 039R OS=Frog virus 3 (isolate Goorha) GN=FV3-039R PE=4 SV=1 307 | MTSYCDTLKALAAESDSTGSERATIRMYMAMFSDASLRPAVSDTVASILGTDSLDHEDAE 308 | MMLKFKLLFFSGSANASATSHYPKADDPQRFARSVSRGPSRVRRPARNSASRPVRR 309 | >sp|Q6GZT6|040R_FRG3G Uncharacterized protein 040R OS=Frog virus 3 (isolate Goorha) GN=FV3-040R PE=3 SV=1 310 | MIRALCTIVLIAAGVAVALYLSLVYGYYMSVGVQDASWLTALTGNRPDAKVPFFDKAVGE 311 | APEDKVAYTERPYPVSSTQSPTTTQSPTTTTLKPTTMAVLASIGATPTPVVCHNVRGDMQ 312 | GIACNVVMKKTVAAALKVQPEAKKDNVNAQYRYGMWTPLRRSRSPFGVWNIPKKLAIAAP 313 | DV 314 | >sp|Q197C0|040R_IIV3 Uncharacterized protein 040R OS=Invertebrate iridescent virus 3 GN=IIV3-040R PE=4 SV=1 315 | MVTMAIKNFHIQDDRLKNGRGNKTMSESDYNTSDSGGWVLVRKKRDRSTRPPDVVDRWSN 316 | STSTFPMGLDQIKIKRNGCVNTY 317 | >sp|Q91G57|041L_IIV6 Uncharacterized protein 041L OS=Invertebrate iridescent virus 6 GN=IIV6-041L PE=4 SV=1 318 | MNFIRENETKYVLSTYQSMTPKNLMEYLLKYNYDNDCVYIFNNLPKDLQKEVDDLAKEVV 319 | KANDEQIKAQDEQIKANDQKLKQLDVMIEFMKQYNKQLDNDIYLLEHQLENKRELNRQLG 320 | IF 321 | >sp|Q6GZT5|041R_FRG3G Uncharacterized protein 041R OS=Frog virus 3 (isolate Goorha) GN=FV3-041R PE=4 SV=1 322 | MRVVVNAKALEVPVGMSFTEWTRTLSPGSSPRFLAWNPVRPRTFKDVTDPFWNGKVFDLL 323 | GVVNGKDDLLFPASEIQEWLEYAPNVDLAELERIFVATHRHRGMMGFAAAVQDSLVHVDP 324 | DSVDVTRVKDGLHKELDEHASKAAATDVRLKRLRSVKPVDGFSDPVLIRTVFSVTVPEFG 325 | DRTAYEIVDSAVPTGSCPYISAGPFVKTIPGFKPAPEWPAQTAHAEGAVFFKADAEFPDT 326 | KPLKDMYRKYSGAAVVPGDVTYPAVITFDVPQGSRHVPPEDFAARVAESLSLDLRGRPLV 327 | EMGRVVSVRLDGMRFRPYVLTDLLVSDPDASHVMQTDELNRAHKIKGTVYAQVCGTGQTV 328 | SFQEKTDEDSGEAYISLRVRARDRKGVEELMEAAGRVMAIYSRRESEIVSFYALYDKTVA 329 | KEAAPPRPPRKSKAPEPTGDKADRKLLRTLAPDIFLPTYSRKCLHMPVILRGAELEDARK 330 | KGLNLMDFPLFGESERLTYACKHPQHPYPGLRANLLPNKAKYPFVPCCYSKDQAVRPNSK 331 | WTAYTTGNAEARRQGRIREGVMQAEPLPEGALIFLRRVLGQETGSKFFALRTTGVPETPV 332 | NAVHVAVFQRSLTAEEQAEERAAMALDPSAMGACAQELYVEPDVDWDRWRREMGDPNVPF 333 | NLLKYFRALETRYDCDIYIMDNKGIIHTKAVRGRLRYRSRRPTVILHLREESCVPVMTPP 334 | SDWTRGPVRNGILTFSPIDPITVKLHDLYQDSRPVYVDGVRVPPLRSDWLPCSGQVVDRA 335 | GKARVFVVTPTGKMSRGSFTLVTWPMPPLAAPILRTDTGFPRGRSDSPLSFLGSRFVPSG 336 | YRRSVETGAIREITGILDGACEACLLTHDPVLVPDPSWSDGGPPVYEDPVPSRALEGFTG 337 | AEKKARMLVEYAKKAISIREGSCTQESVRSFAANGGFVVSPGALDGMKVFNPRFEAPGPF 338 | AEADWAVKVPDVKTARRLVYALRVASVNGTCPVQEYASASLVPNFYKTSTDFVQSPAYTI 339 | NVWRNDLDQSAVKKTRRAVVDWERGLAVPWPLPETELGFSYSLRFAGISRTFMAMNHPTW 340 | ESAAFAALTWAKSGYCPGVTSNQIPEGEKVPTYACVKGMKPAKVLESGDGTLKLDKSSYG 341 | DVRVSGVMIYRASEGKPMQYVSLLM 342 | >sp|Q6GZT4|042L_FRG3G Uncharacterized protein 042L OS=Frog virus 3 (isolate Goorha) GN=FV3-042L PE=4 SV=1 343 | MFAPPSSLFVPATAPAPSTSGFTIPANLRRDAYVCPFATAEKERKEREQQQPASKGLNHD 344 | LAAQEPLHPSLVSRFPSNYRGSFLR 345 | >sp|Q91G56|042R_IIV6 Uncharacterized protein 042R OS=Invertebrate iridescent virus 6 GN=IIV6-042R PE=4 SV=1 346 | MATLQQAQQQNNQLTQQNNQLTQQNNQLTQRVNELTRFLEDANRKIQIKENVIKSSEAEN 347 | RKNLAEINRLHSENHRLIQQSTRTICQKCSMRSN 348 | >sp|Q91G55|043L_IIV6 Uncharacterized protein 043L OS=Invertebrate iridescent virus 6 GN=IIV6-043L PE=4 SV=1 349 | MDLINNKLNIEIQKFCLDLEKKYNINYNNLIDLWFNKESTERLIKCEVNLENKIKFNQKY 350 | NSDTIKIMNILFLICSDGVFGKIENNDVKPLTDEDEKICVKFGYKIMIGCLNDIPI 351 | >sp|Q6GZT3|043R_FRG3G Uncharacterized protein 043R OS=Frog virus 3 (isolate Goorha) GN=FV3-043R PE=4 SV=1 352 | MEEVDGCAGPNSEAGALTAGALTAGAFAVTAGAGVAGAGVAGVGWCSWCSWCSWCWCSWC 353 | SWCWCSWCWCSWCWCSWCWCSWCWCSWCWCSWCWCSWCLSKGWEDRGGLEGCKSCKGWCL 354 | CSHCWCWCSWCWCSWCSWCLSKGWEDRGGLEGCKSCKGWCLCSHCRCWSIN 355 | >sp|Q197B6|044L_IIV3 Putative serine/threonine-protein kinase 040L OS=Invertebrate iridescent virus 3 GN=IIV3-044L PE=3 SV=1 356 | MPLSVFAEEFAEKSVKRYIGQGLWLPCNLSDYYYYQEFHDEGGYGSIHRVMDKATGNEVI 357 | MKHSYKLDFSPGILPEWWSKFGSLTDDLRERVVSNHQLRVSREAQILVQASTVLPEMKLH 358 | DYFDDGESFILIMDYGGRSLENIASSHKKKITNLVRYRAYKGNWFYKNWLKQVVDYMIKI 359 | YHKIKILYDIGIYHNDLKPENVLVDGDHITIIDFGVADFVPDENERKTWSCYDFRGTIDY 360 | IPPEVGTTGSFDPWHQTVWCFGVMLYFLSFMEYPFHIDNQFLEYALEGEKLDKLPEPFAQ 361 | LIRECLSVDPDKRPLTSLLDRLTELHHHLQTIDVW 362 | >sp|Q6GZN9|044R_FRG3G Uncharacterized protein 044R OS=Frog virus 3 (isolate Goorha) GN=FV3-044R PE=4 SV=1 363 | MVVRLAVRANMPKDSLARDSLPKDSLARDFLSDKTSPTDGTQSSDRYLLKIVTAVDYVHL 364 | T 365 | >sp|Q91G54|044R_IIV6 Uncharacterized protein 044R OS=Invertebrate iridescent virus 6 GN=IIV6-044R PE=4 SV=1 366 | MYLYQKIKNCLLLTMYQKKNKSHMYDILQSYLYYQKPIPKNLYSHPKKNLYLNIHHYKNI 367 | NKDLM 368 | >sp|Q6GZT1|045L_FRG3G Uncharacterized protein 045L OS=Frog virus 3 (isolate Goorha) GN=FV3-045L PE=4 SV=1 369 | MDDVEYRTEFSARERAGGDIEEGLELFGPATFKGMEGDPVQRFYNGIESAGRNLIRDGHI 370 | KLNKQEQTRLLSSVLRITYPNYKNPMGTVLGFYVTDGGRGPIDKGRLSHVQSFMEEVTDM 371 | DLRDLIRYCRLWLALK 372 | >sp|Q197B5|045R_IIV3 Uncharacterized protein 045R OS=Invertebrate iridescent virus 3 GN=IIV3-045R PE=4 SV=1 373 | MYKCSQGAMNTEKVMEKFVIQSRFREMYPDKAKAIAGMTVPARYADSVEDMVAFANEKIR 374 | VQKAKVEAEKNARQAMGAPAKFDKYGKYKY 375 | >sp|Q6GZT0|046L_FRG3G Uncharacterized protein 046L OS=Frog virus 3 (isolate Goorha) GN=FV3-046L PE=4 SV=1 376 | MYSVRNSGCSVGCSPRQGASPIMFGPSLGAMLSAPVVRASAPVVRASSPVVKRKSLVKRK 377 | SPVKRSPLKKRSQMRTSPCEA 378 | >sp|Q6GZS9|047L_FRG3G Uncharacterized protein 047L OS=Frog virus 3 (isolate Goorha) GN=FV3-047L PE=4 SV=1 379 | MHTISDWLNLNAPVRCFHVRQLSESEWRFTVNDTIRVVASVDGPWTVDAKGVEDLKMHKL 380 | YVPGPAKCWTRARDKAMAAALAEAVSESETCAADIVRPAVAKNTPRRPVVKRRVDAVKPA 381 | APDNLESWTKDDWYELDL 382 | >sp|Q6GZS8|048L_FRG3G Uncharacterized protein 048L OS=Frog virus 3 (isolate Goorha) GN=FV3-048L PE=4 SV=1 383 | MTAKTLDPSDYNVRDDSTTGMFTPVDRFVCDPESDRIIVRKIPPEWTIGNSMRFVHFTKE 384 | FTQTFDPSESPSNIVRHTNGKKK 385 | >sp|Q6GZS7|049L_FRG3G Putative SAP domain-containing protein 049L OS=Frog virus 3 (isolate Goorha) GN=FV3-049L PE=4 SV=1 386 | MAAPKAEGEDKPKRVRKSRAKPKPETKEVKKPKSKEFCTADDSSDDYNEVKPSPAMIALM 387 | AVKEIPESEDVPDKSDSEAEAPVPAIVKKRRTPPKKAESSDDKKLDEATGEQVIDEDALS 388 | KLTIQTLKGMCKTRNLKISGNKAALVQRLIEADGIAHIIPTTATVVQKVKKTKRPAVFSK 389 | VDSELKLIPCPGREHMLMDEATGLVFLDEDPSTAVGFIEHGEVFGLDSEHMTVCKNMGIR 390 | YSWTEDYLC 391 | >sp|Q91G50|049L_IIV6 Transmembrane protein 049L OS=Invertebrate iridescent virus 6 GN=IIV6-049L PE=4 SV=1 392 | MDKIEELKIEELKIEIPQRKTKFFHDSENSDKRDEEETLNPTITSKAKILIKSKNFWIET 393 | LIFVISVFGALCVAFGIMLIGFLLWLVSNTISILYFIKQKQYPLSLQQMVFLITTCIGVY 394 | NNV 395 | >sp|Q197B1|049R_IIV3 Uncharacterized protein 049R OS=Invertebrate iridescent virus 3 GN=IIV3-049R PE=4 SV=1 396 | MLRIENTVCKSACRVDSATAQPVYSSFDGENFKAEIHSKLDSFERKLNASPTYRDEEGGG 397 | NPEHYETLSQEINDLQSQIENLSLEVENLQGSSSSPSNVAAALAELSQSIRTIKEQLEAN 398 | RKERYNLTVTVANLTAAVNAAKKTGSESTTATATTTTNYETQLKAFEAQIKALDNQLQTQ 399 | KNLVQTTSVEAKNDRDSLRKTIEVIRLTVKTLQDQVESQTGPKKRRKSPIENQPTAGSEL 400 | ATLTTNLTFLTQRVEKLSQGVATHTTAMFTLEETMKKVHTTLQEATASNTNNIDAIRTRV 401 | QELADKIALFDQVQYSVGYEMAKKNPDSTKLRTDLDSAISTVNEEKKSLLTVKDSVQSLK 402 | TQLDELKRTLENDGDVSSLRQTVHDMASSIRDETATIYNKINALEEGLKRGGQTTTTPLT 403 | QLQTRVEEIDKTIVKWNNQHGEWTTRLNKLEAGVSNNQTLMNRFIQQVNGDVNPLKELPA 404 | ELETFKMTITNTWAQLNKKFLDFSAKTDTSVDNFTKKFTEIHPQIASLVDKMDQQIRDNP 405 | HTTEKLMDEIRQLKSAMTRLGTQSSGKPIFSINTKSSYNEKSKKTIFGHPGIIFPETVKI 406 | SSIYITLAAKEADGKEDARLFELTATSTHNNITSTIKQFEKKCTEETILEDYNPPLVIDA 407 | QTKLVLSCNQKVFGVAIFTLQYS 408 | >sp|Q6GZS6|050L_FRG3G Uncharacterized protein 050L OS=Frog virus 3 (isolate Goorha) GN=FV3-050L PE=4 SV=1 409 | MQVYSPSKISQQLETFLNSVANGLGHTMSHAMSQTFSETIVASVAKKAPKTSVLAAAQAA 410 | MQAEDKVSKPKKVKKTKSYADAAPKRVKKVKAPKEDTVVSEPEEAVVEQQEKQQPEKAVV 411 | EQQEKQQPEEAVVEQQEKQQPEEAVVEQQEKQQPEEAVVESEQPEQPEQPERQQQAQPER 412 | QQQAQPERQQQAQPEEAEDAEQEPVEQPTAKPKKVRKTQTESEDKPKRG 413 | >sp|Q6GZS5|051R_FRG3G Uncharacterized protein 051R OS=Frog virus 3 (isolate Goorha) GN=FV3-051R PE=4 SV=1 414 | MTVRITTTTGYPHGLLNPTVPLPMSRYTLVQETAQDILYTPMGKELALMGTCTKTECKYV 415 | RMQEDAHLLAEASKKALTMRLEQNPELKDLLASSGSQPIVYGDTRLVAHLTLLRGVSVLS 416 | PEGMVFSEDAMRKLYKGTIDMFVSDPSSLLNVDRATLTLESLRAMVKASGNWPSTSATAA 417 | VAIPEAPVTGVSVSDKVVMEHSASVYATQKMDFERSLLIRHLLAMDPAAEADVSHLVSRM 418 | DARTRAASSRLAAMYHDGLLDSAVTDGLVPPDQRLLEPMSTPSTPEVHAPQDGMSFEVPH 419 | VLTFAGGPVKVDDHVYDTPLHYAYNLAIRRMFADFGEGDLDDVHVSQVSVIYSDMLDKWI 420 | DAMYPQTLWRLMTEKFSGNQSCLAVLLSTDGADVKWTGRTEEESFLISDMMGQIKTGWIR 421 | SGPPSSSPLSSADIAGTDFFYGWLSYMSRTYATALNVISEMTLARLLDLPDIPEEVRQPT 422 | DREQAALGSDYVRSAWRVCYSEFVHKFEGKNLFASVDYCVKTHLKALKVSRDSVTGTAKT 423 | LSAKGYGTLIALPVIRLAMKS 424 | >sp|Q6GZS4|052L_FRG3G Uncharacterized protein 052L OS=Frog virus 3 (isolate Goorha) GN=FV3-052L PE=3 SV=1 425 | MVKYVVTGGCGFLGSHIVKCILKYAPEVTEVVAYDINISHIMTMWSSKLKVVRGDVMDVM 426 | ALAKAVDGADVVIHTAGIVDVWYRHTDDEIYRVNVSGTKNVLMCCINAGVQVLVNTSSME 427 | VVGPNTTSGVFVRGGERTPYNTVHDHVYPLSKDRAEKLVKHYTGVAAAPGMPALKTCSLR 428 | PTGIYGEGCDLLEKFFHDTVNAGNVAYGGSPPDSEHGRVYVGNVAWMHLLAARALLAGGE 429 | SAHKVNGEAFFCYDDSPYMSYDAFNAELFEDRGFGYVYVPYWVMKPMAAYNDLKRKFLGC 430 | FGVKRSPILNSYTLALARTSFTVKTSKARRMFGYMPLYEWSEAKRRTKDWISTLK 431 | >sp|Q197A7|053L_IIV3 Uncharacterized protein 053L OS=Invertebrate iridescent virus 3 GN=IIV3-053L PE=4 SV=1 432 | MEQYLQAFEFVEEMVVLPKYLSWELYHHLAVLLREKYPKTYKNKGYIFNIKVKSILDNRI 433 | TPTGQIVLVVMFQSDLYVPQVGHVFTERIRVNSVDDRYQWITIEPLTVFLRSNIPYKPNT 434 | LVTVQICSIKMDNTLCFGTILD 435 | >sp|Q6GZS3|053R_FRG3G Putative myristoylated protein 053R OS=Frog virus 3 (isolate Goorha) GN=FV3-053R PE=3 SV=1 436 | MGAAESINTVNIVTKAYAKIMTTMVTDQDITADQSQVFSIDHVKGDVVIKGDVFTQMLVI 437 | NLASLMKAIATQSAQDQLIDNIAQQAQAAVSGLNLAQYAYVSNNIDRLITACVQMSTDMR 438 | VSCKSKVTMTQSFSVTDVEGDVRVTGVKFNQFANILSSCAMDASVNNDQARDIVSQIKQR 439 | GDAKASGLDPTTLIVIIVLVMVGAPMGAGFMAGRRAIGPLLASVGLIGGGAVALGYVPRP 440 | VKIEGFSSDPDFTLAQPAATVKGLTFTAAVAKLKSTDGYGALFWKNYDVKGTTAVKLQET 441 | LSYFAPAGYDPASWAGVGDSAPPFRIFPGLYQGKGDPGARPRAAYGYAGPVAGPKKGDAY 442 | LDGDTGSYYVLGDSWKMRGTISGHQNGRTDYWGTVDPTTTAALTGSERYIWVDPFTLVKS 443 | TVWLFTGSPKKWTQQQTAPLDIPLTNTPSDFNVWVYKDDTAVQAVKWSSVGAGVAGAALT 444 | ASALLMPDSVASSEMSPAVGTGTPAIGTGSPAVGTGFPAHRG 445 | >sp|Q6GZS2|054L_FRG3G Uncharacterized protein 054L OS=Frog virus 3 (isolate Goorha) GN=FV3-054L PE=4 SV=1 446 | MPLRLCQGRKDRASDPVRDDGSPPRLFVSQVCRRAPKDPQGFQGHRGGQNVGDCSPIFHQ 447 | EKKQVMRRFYSLCEWK 448 | >sp|Q197A6|054L_IIV3 Uncharacterized protein 054L OS=Invertebrate iridescent virus 3 GN=IIV3-054L PE=4 SV=1 449 | MASEATVESVETKVESPIVESPVDQGLLESIKNFMDDLAVVTENENFQDYHTIVRRIDET 450 | KVKSYNKLVGGFREFFSLNKTALMEGNFEGLIEPHISYKTESGSFFFNFQTTYLETDEAN 451 | QEIIKEHLNHIWAQIRSENKCPEQLYIDEIFQKLKNKDQLTMDDQLIRDLFTKFQTANFN 452 | VTALIRAGCSKAREFLTNNGSQKSSSTFRLIETIENVNVDNFTQMDFMALISKISAIFSE 453 | SGESNPLNLCLSSLFGGGNTNQPSLTSMFPFPTPPLPDNVLLDNLDQLTLEQQSETTGDD 454 | DHHSFEPEK 455 | >sp|Q6GZS1|055L_FRG3G Putative helicase 055L OS=Frog virus 3 (isolate Goorha) GN=FV3-055L PE=4 SV=1 456 | MAKLLRLNAIDGDMPGAGEADLFTLAPGGKAYVPFAWGSRVLGCKPPPAHGAARERGSVS 457 | LRPHQKGVLKEAWGHVTSKGYCMLKCPPGFGKTFMALELWRRLGLPALVLTNRRVLATQW 458 | RDSATRFLPDSRVFTSGTPPPDALPRDLYVTGPASLRNRRIKAKDSPAKFLLIVDEAHQL 459 | TSPVSCRVLLSVRPSHLLGLSATPMRYDDYHAALGAFFGREDSTVDRVDPRPHEVEILST 460 | GVHIEPEFSKITGKMDWNSVIKAQSDNPERDAALADRMLLRPDVKWLVLCKRVDHVKRMA 461 | ETLSSRSGKKVDVLHGSKDEWDRDAWCVVGTYSKAGTGFDACERTGLCLAADVDRYFEQC 462 | LGRLRANGGTVLDPVDDLGVLRKHSKNREAVYIAAGCTIKKTKCDASRPSQSTPTPTGSS 463 | QPAPRTRRPQR 464 | >sp|Q67475|055R_FRG3G Uncharacterized protein 055R OS=Frog virus 3 (isolate Goorha) GN=FV3-055R PE=4 SV=1 465 | MLPQNSQVVHGVQDGPPVGPQPAQALLKVPVDVRRQAQAGPLAGVEPRPRLGVGAHHTPG 466 | VPVPLILGAVQHVHLLPGPRGQCLGHPLDVVHPLAQHQPLYVGPEEHPVGQGGVPLGVIG 467 | LGLDHRVPVHLARDLAKLGLYVHARAEYLHLVGPGVDPVHRAVLPAEKGPQCSVVVVVPH 468 | GCSAQTQQVRGPHRQEDPTRHGGRQLVGLIHNQEKFCGRVLGLDPPVSQRSRTRHIQVPG 469 | QGVGRRGAGCKDPRVRKEPGRRVPPLSRQHPPVCQDEGGQPQPPPQLQGHEGLAEAGRAL 470 | EHAVPLGGDVAPGLLQDPFLVRSERDGAPLPGCAVSGRRFASQDPGTPGEGDVGLSPGRE 471 | GKQVRFPRAGHVSIYRVEP 472 | >sp|Q6GZR9|056R_FRG3G Uncharacterized protein 056R OS=Frog virus 3 (isolate Goorha) GN=FV3-056R PE=4 SV=1 473 | MGVYSPAPRTPRGPWNIRIRFLSWSNSFLLEVKKNYGDVYLCDVCPVRPPGLQAPREQPV 474 | LHDRKVLHLYGQDSGVRDVQEVLWNPVSHQEVRRDNHGVLHGRRRARVRQAEEGRRRRQE 475 | GHRFRDWERLHQRVEGCPGLQGHGF 476 | >sp|O55703|056R_IIV6 Uncharacterized protein 056R OS=Invertebrate iridescent virus 6 GN=IIV6-056R PE=4 SV=1 477 | MEQKIDKKNSYSFGITSSTTVHVLGEVVAIGGILYYTHSQVNQLNTKIASLEKQILDLTN 478 | ILKHLSPHSFQQLQSSPTTQSTPPLPQSTPQSQQSQQSAVLPQRPSFLGGSTPKESQSFP 479 | GVETRSLGEKTTRGKLKSPHPSQIPVTQENFHYPYQTMNKTMRWEFLKPVESDESEDETN 480 | CQNGVCTLQKQEKNVTFNGSVEQLKYGNFSPQRSTKTMIGSPSIRPLIPHESIESVSESI 481 | ESSQDQSFSSRETISGNFKSKDDHQLSESEINQLVSKAIRTKK 482 | >sp|Q197A3|057L_IIV3 Uncharacterized protein 057L OS=Invertebrate iridescent virus 3 GN=IIV3-057L PE=4 SV=1 483 | MFKIYRTSCMGQHQSQFLHSGTVVQTVDGVTTTSFFQPCLVFPFSIEIISISLVSLNTTN 484 | ETKLIKMSIMENSELVDYNESAYTLAHLPGKQMTYLKYPAPFTIRQHQPFFFVHHGDLGD 485 | ASLTLEYRIK 486 | >sp|Q6GZR8|057R_FRG3G Putative phosphotransferase 057R OS=Frog virus 3 (isolate Goorha) GN=FV3-057R PE=4 SV=1 487 | MAMVSNVKYFADALQGTQGKVGTFTVLGENVFFKRGDGTDTVCGLEMVAGRILRARSDVH 488 | FCEPKYFVEMDDGEKVCSFELLDCKPLGSMAPGRKGKKSVGSVTQYLSGLYQTFAAAAAA 489 | HSVGVVHSDLHTGNVMLCPEPVSHYVYNLGGGEMLSLETNGVRAVVVDLGMARIPGKNTV 490 | ACDIFVHVGHVVNGRPDYAADVRTLTLGSCYDMVMMCASGKPSLEERMLCYEVMAAYNNL 491 | FAGVCAPSKGGWFVDHYPSMCAVMEATIPDSVASRGGGSWLLAVANMCKLLVPRPYVKRA 492 | CGKEKAHAMWMTLFTELGLTAKKSISKVDMVDAVQRLRAIADGSEIPPASLMKAACAVGL 493 | LTASVAEACYEKVEEIKASHVGMLRWKDALDAWVRLPVRCSGSVPKLGSTVILHTESGTE 494 | ETVVTQSMLRQIVKTREALDMAQAASDAVWTDTAYYEADDELMKGAHEESAEDFATSFLK 495 | GGTTGPIAKRCRLILKSL 496 | >sp|Q6GZR7|058R_FRG3G Uncharacterized protein 058R OS=Frog virus 3 (isolate Goorha) GN=FV3-058R PE=4 SV=1 497 | MEIRDTTVGLDRPVQSGAWDPGATREQLALAGISGRCDLGGRDEDLWSRKSQKDETFKDC 498 | ERRRGGEAPRLLCDKWRRDREAEAVRRRVQSQRRQGGREDPDGPARVRGRRRRVAVCGHS 499 | PDRGPAREEHKGPAGGSDHKDLGHHKGPEVGSGLRGPQARQVCGRSRGHEVQRVQKGLRQ 500 | GVQGHGCREVALDARLCLVRVLGRSGQKGRHGRLSVASRRLDGQEKVEKRGLFSLGA 501 | -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/figures/Protein_sentences.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/Protein_sentences.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/figures/Protein_sentences2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/Protein_sentences2.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/figures/Skip_gram_cbow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/Skip_gram_cbow.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/figures/linear-relationships.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/linear-relationships.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/figures/protein_vectors_wlabel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/protein_vectors_wlabel.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/figures/protein_words.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/protein_words.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/figures/proteinsequence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/proteinsequence.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/figures/relationships_plus_vectors2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/relationships_plus_vectors2.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/figures/table_overview_vocab2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/table_overview_vocab2.png -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/helpers.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | """ 5 | Copyright (C) 2017 by Samo Turk and Sabrina Jaeger, BioMed X GmbH 6 | This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 License. 7 | To view a copy of this license, visit https://creativecommons.org/licenses/by-sa/4.0/ or send a letter 8 | to Creative Commons, 543 Howard Street, 5th Floor, San Francisco, California, 94105, USA. 9 | """ 10 | 11 | class nGram: 12 | """Class for storing n-grams with useful default depiction in jupyter. 13 | >>>nGram(split_ngrams('ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ',3))""" 14 | 15 | def __init__(self, n_gram): 16 | self.n_gram = n_gram 17 | def __len___(self): 18 | return len(self.n_gram) 19 | def __str__(self): # Stringe representation 20 | return 'n-gram with %i sentences' %len(self.n_gram) 21 | __repr__ = __str__ # Default representation 22 | def __contains__(self, word): # Contains method enable usage of "'Word' in nGram" 23 | if word in [item for sublist in self.n_gram for item in sublist]: 24 | return True 25 | else: 26 | return False 27 | contains = __contains__ # nGram.contains('word') 28 | def __iter__(self): # Iterate over sentences (for sentence in nGram:...) 29 | for x in self.n_gram: 30 | yield x 31 | def _repr_html_(self): # default jupyter representation 32 | colors = ['Red','Maroon','Yellow','Olive','Lime','Green','Aqua','Teal','Blue','Navy','Fuchsia','Purple'] 33 | html = "" 34 | for i,ng in enumerate(self.n_gram): 35 | ng_2 = '' 36 | for n,c in zip(ng, colors[:len(ng)]): # depicts only as many as we have colors 37 | ng_2 += '' %c 38 | ng_2 += n 39 | ng_2 += '' 40 | html += " "*i + ng_2 41 | if len(ng) > len(colors): # append ... if we run out of colors 42 | html += "..." 43 | html += "\n" 44 | html += "" 45 | return html 46 | 47 | class DfVec: 48 | """ 49 | Helper class to store vectors in a pandas DataFrame 50 | 51 | Parameters 52 | ---------- 53 | vec: np.array 54 | """ 55 | def __init__(self, vec): 56 | self.vec = vec 57 | def __str__(self): 58 | return "%d dimensional vector" % len(self.vec) 59 | def __repr__(self): 60 | return "%d dimensional vector" % len(self.vec) 61 | def __len__(self): 62 | return len(self.vec) 63 | 64 | def confusion_matrix_plot(cm, names, title='Confusion matrix', cmap=plt.cm.Blues): 65 | """Creates confusion matrix plot from confusion_matrix(observations, predictions). 66 | You can calculate confusion matrix with help of ``sklearn.metrics.confusion_matrix`` 67 | 68 | Parameters 69 | ---------- 70 | cm : np.array 71 | Confusion matrix 72 | names : list 73 | Names of classes 74 | title : str 75 | Title of the plot 76 | cmap : plt.cm 77 | Matplotlib colormap 78 | 79 | Returns 80 | ------- 81 | plt.figure 82 | matplotlib figure 83 | """ 84 | fig = plt.figure() 85 | axes = fig.add_axes([0, 0, 1, 1]) 86 | im = axes.imshow(cm, interpolation='nearest', cmap=cmap) 87 | fig.colorbar(im) 88 | tick_marks = np.arange(len(names)) 89 | plt.xticks(tick_marks, names, rotation=45) 90 | plt.yticks(tick_marks, names,) 91 | axes.set_ylabel('True value') 92 | axes.set_xlabel('Predicted value') 93 | axes.set_title(title) 94 | return fig 95 | -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/trained_models/model_SwissProt_small: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/trained_models/model_SwissProt_small -------------------------------------------------------------------------------- /Presentations and Tutorials/Protvec demo 2017/trained_models/swissprot_reviewed_protvec: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/trained_models/swissprot_reviewed_protvec -------------------------------------------------------------------------------- /Presentations and Tutorials/RDKit UGM 2014/biomedx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/RDKit UGM 2014/biomedx.png -------------------------------------------------------------------------------- /Presentations and Tutorials/RDKit UGM 2014/hackaton/XLSX export.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:d1c239959bbfc2922992b270c32e0a3f52ded415ef8c116012f7cc9a9a74d1dd" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Demo of SaveXlsxFromFrame function" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "collapsed": false, 21 | "input": [ 22 | "import rdkit.Chem as Chem\n", 23 | "from rdkit.Chem import Draw\n", 24 | "from rdkit.Chem import PandasTools\n", 25 | "from rdkit.Chem.Draw import IPythonConsole # Enables RDKit IPython integration\n", 26 | "import pandas as pd" 27 | ], 28 | "language": "python", 29 | "metadata": {}, 30 | "outputs": [], 31 | "prompt_number": 1 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "Load data" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "collapsed": false, 43 | "input": [ 44 | "df = pd.read_csv('drugs.smi', delimiter=\"\\s\")" 45 | ], 46 | "language": "python", 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "output_type": "stream", 51 | "stream": "stderr", 52 | "text": [ 53 | "/usr/lib/python2.7/site-packages/pandas/io/parsers.py:635: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators; you can avoid this warning by specifying engine='python'.\n", 54 | " ParserWarning)\n" 55 | ] 56 | } 57 | ], 58 | "prompt_number": 2 59 | }, 60 | { 61 | "cell_type": "code", 62 | "collapsed": false, 63 | "input": [ 64 | "df.columns" 65 | ], 66 | "language": "python", 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "metadata": {}, 71 | "output_type": "pyout", 72 | "prompt_number": 3, 73 | "text": [ 74 | "Index([u'SMILES', u'Name'], dtype='object')" 75 | ] 76 | } 77 | ], 78 | "prompt_number": 3 79 | }, 80 | { 81 | "cell_type": "code", 82 | "collapsed": false, 83 | "input": [ 84 | "smiles = 'SMILES'\n", 85 | "names = 'Name'" 86 | ], 87 | "language": "python", 88 | "metadata": {}, 89 | "outputs": [], 90 | "prompt_number": 4 91 | }, 92 | { 93 | "cell_type": "code", 94 | "collapsed": false, 95 | "input": [ 96 | "df = df[df[smiles].notnull()]" 97 | ], 98 | "language": "python", 99 | "metadata": {}, 100 | "outputs": [], 101 | "prompt_number": 5 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "Add mol objects" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "collapsed": false, 113 | "input": [ 114 | "PandasTools.AddMoleculeColumnToFrame(df, smilesCol=smiles)" 115 | ], 116 | "language": "python", 117 | "metadata": {}, 118 | "outputs": [], 119 | "prompt_number": 6 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Add some columns with number" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "collapsed": false, 131 | "input": [ 132 | "from rdkit.Chem import Descriptors\n", 133 | "df['logp'] = df['ROMol'].map(Descriptors.MolLogP)\n", 134 | "df['mw'] = df['ROMol'].map(Descriptors.MolWt)\n", 135 | "df['hac'] = df['ROMol'].map(Descriptors.HeavyAtomCount)" 136 | ], 137 | "language": "python", 138 | "metadata": {}, 139 | "outputs": [], 140 | "prompt_number": 7 141 | }, 142 | { 143 | "cell_type": "code", 144 | "collapsed": false, 145 | "input": [ 146 | "len(df)" 147 | ], 148 | "language": "python", 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "metadata": {}, 153 | "output_type": "pyout", 154 | "prompt_number": 8, 155 | "text": [ 156 | "1000" 157 | ] 158 | } 159 | ], 160 | "prompt_number": 8 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "Check dtypes of columns" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "collapsed": false, 172 | "input": [ 173 | "df.dtypes" 174 | ], 175 | "language": "python", 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "metadata": {}, 180 | "output_type": "pyout", 181 | "prompt_number": 9, 182 | "text": [ 183 | "SMILES object\n", 184 | "Name object\n", 185 | "ROMol object\n", 186 | "logp float64\n", 187 | "mw float64\n", 188 | "hac int64\n", 189 | "dtype: object" 190 | ] 191 | } 192 | ], 193 | "prompt_number": 9 194 | }, 195 | { 196 | "cell_type": "code", 197 | "collapsed": false, 198 | "input": [ 199 | "df.head(1)" 200 | ], 201 | "language": "python", 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "html": [ 206 | "
\n", 207 | "\n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | "
SMILESNameROMollogpmwhac
0 NC1=NCC(c2ccccc2)O1 CHEMBL106258 \"Mol\"/ 1.0726 162.192 12
\n", 231 | "
" 232 | ], 233 | "metadata": {}, 234 | "output_type": "pyout", 235 | "prompt_number": 10, 236 | "text": [ 237 | " SMILES Name ROMol logp mw hac\n", 238 | "0 NC1=NCC(c2ccccc2)O1 CHEMBL106258 \"Mol\"/ 1.0726 162.192 12" 239 | ] 240 | } 241 | ], 242 | "prompt_number": 10 243 | }, 244 | { 245 | "cell_type": "code", 246 | "collapsed": false, 247 | "input": [ 248 | "len(df)" 249 | ], 250 | "language": "python", 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "metadata": {}, 255 | "output_type": "pyout", 256 | "prompt_number": 11, 257 | "text": [ 258 | "1000" 259 | ] 260 | } 261 | ], 262 | "prompt_number": 11 263 | }, 264 | { 265 | "cell_type": "code", 266 | "collapsed": false, 267 | "input": [ 268 | "import numpy as np\n", 269 | "import os\n", 270 | "from cStringIO import StringIO\n", 271 | "\n", 272 | "def SaveXlsxFromFrame(frame, outFile, molCol='ROMol', size=(300,300)):\n", 273 | " \"\"\"\n", 274 | " Saves pandas DataFrame as a xlsx file with embedded images.\n", 275 | " It maps numpy data types to excel cell types:\n", 276 | " int, float -> number\n", 277 | " datetime -> datetime\n", 278 | " object -> string (limited to 32k character - xlsx limitations)\n", 279 | " \n", 280 | " Cells with compound images are a bit larger than images due to excel.\n", 281 | " Column width weirdness explained (from xlsxwriter docs):\n", 282 | " The width corresponds to the column width value that is specified in Excel. \n", 283 | " It is approximately equal to the length of a string in the default font of Calibri 11. \n", 284 | " Unfortunately, there is no way to specify \u201cAutoFit\u201d for a column in the Excel file format.\n", 285 | " This feature is only available at runtime from within Excel.\n", 286 | " \"\"\"\n", 287 | " \n", 288 | " import xlsxwriter # don't want to make this a RDKit dependency\n", 289 | " \n", 290 | " cols = list(frame.columns)\n", 291 | " print cols\n", 292 | " cols.remove(molCol)\n", 293 | " dataTypes = dict(frame.dtypes)\n", 294 | "\n", 295 | " workbook = xlsxwriter.Workbook(outFile) # New workbook\n", 296 | " worksheet = workbook.add_worksheet() # New work sheet\n", 297 | " worksheet.set_column('A:A', size[0]/6.) # column width\n", 298 | " \n", 299 | " # Write first row with column names\n", 300 | " c2 = 1\n", 301 | " for x in cols:\n", 302 | " worksheet.write_string(0, c2, x)\n", 303 | " c2 += 1\n", 304 | " \n", 305 | " c = 1\n", 306 | " for index, row in frame.iterrows():\n", 307 | " image_data = StringIO()\n", 308 | " img = Draw.MolToImage(row[molCol], size=size)\n", 309 | " img.save(image_data, format='PNG')\n", 310 | " \n", 311 | " worksheet.set_row(c, height=size[1]) # looks like height is not in px?\n", 312 | " worksheet.insert_image(c, 0, \"f\", {'image_data': image_data})\n", 313 | "\n", 314 | " \n", 315 | " for c2, x in enumerate(cols, start=1):\n", 316 | " if str(dataTypes[x]) == \"object\":\n", 317 | " worksheet.write_string(c, c2, str(row[x])[:32000]) # string length is limited in xlsx\n", 318 | " elif ('float' in str(dataTypes[x])) or ('int' in str(dataTypes[x])):\n", 319 | " if (row[x] != np.nan) or (row[x] != np.inf):\n", 320 | " worksheet.write_number(c, c2, row[x])\n", 321 | " elif 'datetime' in str(dataTypes[x]):\n", 322 | " worksheet.write_datetime(c, c2, row[x])\n", 323 | " c += 1\n", 324 | "\n", 325 | " workbook.close()\n", 326 | " image_data.close()" 327 | ], 328 | "language": "python", 329 | "metadata": {}, 330 | "outputs": [], 331 | "prompt_number": 12 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "#### Lets write selected columns and first 500 compounds" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "collapsed": false, 343 | "input": [ 344 | "% time SaveXlsxFromFrame(df[['ROMol', smiles, names, 'logp', 'hac']][:500], 'demo.xlsx', size=(200,200))" 345 | ], 346 | "language": "python", 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "output_type": "stream", 351 | "stream": "stdout", 352 | "text": [ 353 | "['ROMol', 'SMILES', 'Name', 'logp', 'hac']\n", 354 | "CPU times: user 4.52 s, sys: 46.7 ms, total: 4.57 s" 355 | ] 356 | }, 357 | { 358 | "output_type": "stream", 359 | "stream": "stdout", 360 | "text": [ 361 | "\n", 362 | "Wall time: 4.58 s\n" 363 | ] 364 | } 365 | ], 366 | "prompt_number": 13 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "\n", 373 | "\n", 374 | "Copyright (C) 2014 by Samo Turk, [BioMed X GmbH](http://bio.mx)\n", 375 | "\n", 376 | "This work is licensed under the Creative Commons Attribution-ShareAlike 3.0 License. To view a copy of this license, visit http://creativecommons.org/licenses/by-sa/3.0/ or send a letter to Creative Commons, 543 Howard Street, 5th Floor, San Francisco, California, 94105, USA.\n" 377 | ] 378 | } 379 | ], 380 | "metadata": {} 381 | } 382 | ] 383 | } -------------------------------------------------------------------------------- /Presentations and Tutorials/RDKit UGM 2014/hackaton/drugs.smi: -------------------------------------------------------------------------------- 1 | SMILES Name 2 | NC1=NCC(c2ccccc2)O1 CHEMBL106258 3 | C=CC1(C)CC(OC(=O)CSCCN(CC)CC)C2(C)C3C(=O)CCC3(CCC2C)C(C)C1O CHEMBL498466 4 | CCCCn1c2ncn(CC(C)=O)c2c(=O)n(CCCC)c1=O CHEMBL277465 5 | CCC(C(N)=O)N1CCCC1=O CHEMBL1286 6 | C(C1CO1)N1CCC(C2CCN(CC3CO3)CC2)CC1 CHEMBL462393 7 | CN1CCN(c2c(F)cc3c(=O)c(C(=O)O)cn4c3c2SCC4)CC1 CHEMBL295619 8 | ClC(Cl)Cl CHEMBL44618 9 | CNCCCCOc1ccccc1Cc1ccccc1 CHEMBL1192517 10 | CCCCCN(CCCOC)C(=O)C(CCC(=O)O)NC(=O)c1ccc(Cl)c(Cl)c1 CHEMBL550781 11 | CCN(CC)CCNC(C(=O)OCCC(C)C)c1ccccc1 CHEMBL253592 12 | CCCC(CCC)C(=O)NCC(N)=O CHEMBL471638 13 | O=c1[nH]c2cc(C(F)(F)F)c(N3CCOCC3)cc2n(CP(=O)(O)O)c1=O CHEMBL19892 14 | CCN(c1nncs1)P(=O)(N1CC1)N1CC1 CHEMBL474928 15 | Nc1ccncc1N CHEMBL354077 16 | CN(C)S(=O)(=O)NC1CC2c3cccc4c3c(cn4C)CC2N(C)C1 CHEMBL12314 17 | O=P(O)(O)C(Cl)(Cl)P(=O)(O)O CHEMBL12318 18 | O=C(C=CC=Cc1ccc2c(c1)OCO2)N1CCCCC1 CHEMBL43185 19 | [Na+].O=S(=O)([O-])NC1CCCCC1 CHEMBL273977 20 | CC(C)NCC(O)COc1cccc2ccccc21 CHEMBL275742 21 | CC(C)NCC(O)COc1ccc(COCCOC(C)C)cc1 CHEMBL645 22 | NCCCS(=O)(=O)O CHEMBL149082 23 | CC(C)(CC1Cc2ccccc2C1)NCC(O)COc1cc(CCC(=O)O)cc(F)c1F CHEMBL1198855 24 | CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1 CHEMBL593 25 | CC(=O)C(=Cc1cc(O)c(O)c([N+](=O)[O-])c1)C(C)=O CHEMBL167055 26 | NCCCNc1ccc2c3c(nn2CCNCCO)-c2c(O)ccc(O)c2C(=O)c31 CHEMBL203666 27 | COC(=O)C1C(O)CCC2CN3CCc4c5ccccc5[nH]c4C3CC21 CHEMBL15245 28 | CN1CCc2cccc3c2C1Cc1ccc(O)c(O)c1-3 CHEMBL53 29 | CC(C(O)c1ccc(O)cc1)N1CCC(Cc2ccccc2)CC1 CHEMBL305187 30 | CC(C)NCC(O)COc1ccc(CC(N)=O)cc1 CHEMBL24 31 | CC12CCC(=O)C=C1CCC1C3CCC(C(=O)CO)C3(C=O)CC(O)C12 CHEMBL273453 32 | OCC1OC(O)(CO)C(O)C1O CHEMBL604608 33 | CCN(CC)CCc1nc(-c2ccccc2)no1 CHEMBL1620875 34 | Cc1ccc(OCC(O)CNC(C)(C)C)c2oc(=O)ccc12 CHEMBL349807 35 | Cc1c(C)c(=O)oc2cc(OCc3nnc(C(C)C)s3)ccc12 CHEMBL19004 36 | C=CCN(C1=NCCN1)c1c(Cl)cccc1Cl CHEMBL278581 37 | CN1c2ccc(Cl)cc2C2c3ccccc3CCN2CC1=O CHEMBL1723823 38 | CCCCCCCC(=O)O CHEMBL324846 39 | CCOC1OC2OC3(C)CCC4C(C)CCC(C1C)C42OO3 CHEMBL301267 40 | CNCC(O)c1ccc(O)c(O)c1 CHEMBL1740 41 | C=C1CC(N)C(C(=O)O)C1 CHEMBL343803 42 | CC(=O)Nc1ccc(C(=O)O)cc1 CHEMBL112687 43 | CCc1c(C)n2cc(C(=O)c3ccccc3)nc2nc1OC CHEMBL281164 44 | Cc1cccc(OCC(O)CNC(C)(C)C)c1C CHEMBL347795 45 | CC(=O)Nc1cccc(N2CCN(CCCCNS(=O)(=O)CC3CCCCC3)CC2)c1 CHEMBL209821 46 | CCn1c2ccc(OC(C)=O)cc2c(C)c1-c1ccc(OC(C)=O)cc1 CHEMBL32227 47 | O=C(NP(=O)(N1CC1)N1CC1)OCc1ccccc1 CHEMBL504088 48 | CC(C)(C)C(=O)C(Oc1ccc(Cl)cc1)n1ccnc1 CHEMBL1437764 49 | CCOC(=S)SSC(=S)OCC CHEMBL331743 50 | O=C1c2ccccc2OC=C(Cl)N1CCCCN1CC=C(c2ccccn2)CC1 CHEMBL345237 51 | NCC(CC(=O)O)c1ccc(Cl)cc1 CHEMBL301742 52 | CC(=O)c1ccc2c(c1)C(=O)C1(CC1)O2 CHEMBL18098 53 | CCCCCCCC(=O)OC(O)C(O)CO CHEMBL1200882 54 | COc1ccc(CCNCC(O)c2ccc(O)cc2)cc1OC CHEMBL493682 55 | Cc1cc(-c2ccc(O)cc2)nnc1NCCN1CCOCC1 CHEMBL150980 56 | CCCCCCCCCCCCCC(=O)O CHEMBL111077 57 | CC(C)n1cc2c3c1cccc3C1CC(C(=O)NC3CCCCC3)CN(C)C1C2 CHEMBL160293 58 | NC(=O)Cc1cccc(C(=O)c2ccccc2)c1N CHEMBL1021 59 | CCCC(NC(C)C(=O)N1C(C(=O)O)CC2CCCCC21)C(=O)O CHEMBL1201368 60 | Cc1c(F)cc2c(=O)c(C(=O)O)cn3c2c1CCC3C CHEMBL170052 61 | CN(C)CCN1C(=O)c2cccc3cc([N+](=O)[O-])cc(c32)C1=O CHEMBL43482 62 | [Cl-].[Cl-].[Sr+2] CHEMBL2219640 63 | CN(C)CCOC(C)(c1ccccc1)c1ccccc1 CHEMBL1742465 64 | CN(C)S(=O)(=O)c1ccc2c(c1)N(CCCN1CCN(C)CC1)c1ccccc1S2 CHEMBL609109 65 | CCN(CC)C(=O)N1CCN(C)CC1 CHEMBL684 66 | COc1cc2c(cc1OCCCF)CCN1CC(CC(C)C)C(O)CC21 CHEMBL579217 67 | O=C(Nc1cccc(C2=NCCN2)c1)Nc1cccc(C2=NCCN2)c1 CHEMBL427342 68 | CC1COC2(c3ccccc3Cl)c3cc(Cl)ccc3NC(=O)CN12 CHEMBL1743261 69 | CCCCCCCCCCCCOCCCN CHEMBL1887483 70 | CCOC(=O)C(C#N)=C(c1ccccc1)c1ccccc1 CHEMBL1889451 71 | O=C(NCC1CCCCN1)c1cc(OCC(F)(F)F)ccc1OCC(F)(F)F CHEMBL652 72 | Cc1cnc(NCCCN(C)C)c2c1[nH]c1ccc3cc(O)ccc3c12 CHEMBL1908351 73 | O=C1NC(=O)C2C1C1C2C2C=CC1C1C(=O)NC(=O)C12 CHEMBL1908340 74 | CC(O)C(=O)OP(=O)(O)O CHEMBL2111170 75 | CCOC(=O)C1(c2ccccc2)CCN(CCCNc2ccccc2)CC1 CHEMBL2110995 76 | COc1ccc(Cl)c2c1CCCC2N(C)C CHEMBL2111119 77 | C#CCOC1CN2CCC1CC2 CHEMBL2111051 78 | CCC(=O)OC(CC(=O)O)C[N+](C)(C)C CHEMBL1516469 79 | C=CCC1(CC(C)C)C(=O)NC(=S)NC1=O CHEMBL2110937 80 | CCNC(C)C(O)c1ccc(O)c(O)c1 CHEMBL2110629 81 | [Cl-].[Cl-].[Ra+2] CHEMBL2111187 82 | OC1CN2CCCC(O)C2C1O CHEMBL371197 83 | CC12CCC3C(CCC4CC(=O)CCC43C)C1CCC2C(=O)COC(=O)CCC(=O)O CHEMBL2110850 84 | Cc1cc(OCCN(C)C)c(C(C)C)cc1OC(=O)OC(C)C CHEMBL2110682 85 | COc1ccc2c(C)c(C(C)=O)c(C)nc2c1 CHEMBL2104051 86 | Cc1cccc2c1NC(=O)C2(c1ccc(OS(=O)(=O)O)cc1)c1ccc(OS(=O)(=O)O)cc1 CHEMBL2104944 87 | NCC1CC(=O)N(Cc2ccccc2)C1 CHEMBL2104682 88 | O=[N+]([O-])c1ccc(-c2nc3ccccc3c(N(CCO)CCO)n2)o1 CHEMBL2107101 89 | CCN(CC)CCOc1c2occc2c(O)c2c(=O)cc(C)oc12 CHEMBL2104025 90 | C[N+](C)(C)CC(O)CC(=O)[O-] CHEMBL1149 91 | CCCCOC(=O)c1ccc(N)cc1 CHEMBL127516 92 | O=C1CN=C(c2ccccc2Cl)c2cc([N+](=O)[O-])ccc2N1 CHEMBL452 93 | CC1=CC(=O)NS(=O)(=O)O1 CHEMBL176687 94 | CC(C)(C)NCC(O)COc1ccccc1OCC1CCCO1 CHEMBL347830 95 | CC12CCC3c4ccc(O)cc4CCC3C1CCC2O CHEMBL286452 96 | CNC(C)C(O)c1ccc(O)c(O)c1 CHEMBL416557 97 | CC(=O)OC1CCC2C3C(C)CC4=CC(=O)CCC4C3CCC12C CHEMBL452329 98 | CC(=O)NC(Cc1ccccc1)C(=O)O CHEMBL134570 99 | COc1ccc2c3c1OC1CC(O)C=CC31CCN(C)C2 CHEMBL659 100 | c1ccc2c(c1)Nc1ccccc1S2 CHEMBL828 101 | CCC(=O)NS(=O)(=O)c1ccc(-c2c(C)onc2-c2ccccc2)cc1 CHEMBL1206690 102 | CN1C2CCC1CC(OC(=O)c1c[nH]c3ccccc13)C2 CHEMBL56564 103 | CN(C)CCN1C(=O)c2cccc3cc(N)cc(c32)C1=O CHEMBL428676 104 | CC(C)CC(N)C(=O)O CHEMBL291962 105 | CC(CN1CC(=O)NC(=O)C1)N1CC(=O)NC(=O)C1 CHEMBL444186 106 | O=C(c1ccc2nonc2c1)N1CCCCC1 CHEMBL1276138 107 | COc1ccc(S(=O)(=O)Nc2nnc(CC(C)C)s2)cc1 CHEMBL1481457 108 | CC(C)NCC(O)c1cccc(O)c1 CHEMBL327122 109 | O=C(COc1ccc(Cl)cc1)N1CCN(Cc2ccc3c(c2)OCO3)CC1 CHEMBL254857 110 | COc1cc(C=C2CCCC(=Cc3ccc(O)c(OC)c3)C2=O)ccc1O CHEMBL17205 111 | Cc1onc(-c2c(F)cccc2Cl)c1C(=O)NC1C(=O)N2C1SC(C)(C)C2C(=O)O CHEMBL222645 112 | CC12C=CC(=O)C=C1CCC1C2CCC2(C)OC(=O)CCC12 CHEMBL1571 113 | OCCN1CCN(C2CC(c3ccc(F)cc3)c3ccc(C(F)(F)F)cc32)CC1 CHEMBL95636 114 | CN(C)CCC1c2ccc(Cl)cc2CCc2cccnc21 CHEMBL1738982 115 | CC12CCC3C(C=CC4=CC(=O)CCC43C)C1CCC2(O)CCC(=O)O CHEMBL1616951 116 | O=C1C=C(N2CC2)C(=O)C(N2CC2)=C1N1CC1 CHEMBL313302 117 | CN1C(=O)CC(c2ccccc2)C1=O CHEMBL797 118 | CCN(CC)CC1CCCCN1CC(=O)N1c2ccccc2C(=O)Nc2cccnc21 CHEMBL17045 119 | OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1 CHEMBL370753 120 | O=C(O)CCCCCCCNC(=O)c1ccccc1O CHEMBL16503 121 | CSc1ccc(C(=O)c2[nH]c(=O)[nH]c2C)cc1 CHEMBL249856 122 | CS(=O)(=O)Nc1ccc(OCC(O)CNCCOc2ccc(-n3ccnc3)cc2)cc1 CHEMBL99585 123 | CCc1nc(N)nc(N)c1-c1ccc(Cl)c(Cl)c1 CHEMBL21799 124 | CCCCCCCCCCC(=O)O CHEMBL108030 125 | O=C(O)C(S)C(S)C(=O)O CHEMBL1201073 126 | C=C(c1ccccc1OCc1cccc(Cl)c1)n1ccnc1 CHEMBL27289 127 | NCCCC(N)C(=O)O CHEMBL446143 128 | NCC(Cl)C(O)c1cnc(N)[nH]1 CHEMBL1171272 129 | OC(CCl)CN1CC[N+]2(CC1)CC[N+]1(CCN(CC(O)CCl)CC1)CC2 CHEMBL1187562 130 | CN1CCC(OC(c2ccccc2)c2ccccc2)CC1 CHEMBL1492 131 | CN1c2ccccc2C(NCCCCCCC(=O)O)c2ccc(Cl)cc2S1(=O)=O CHEMBL1289110 132 | CC(=O)Oc1ccc(C2(c3ccc(OC(C)=O)cc3)C(=O)Nc3ccccc32)cc1 CHEMBL1402684 133 | COc1ccc(C(=O)c2ccccc2O)c(O)c1 CHEMBL1326877 134 | CC1=CC(=O)c2ccccc2C1=O CHEMBL590 135 | CC1(C)SCCN(S(=O)(=O)c2ccc(Oc3ccncc3)cc2)C1C(=O)NO CHEMBL75094 136 | CCOC(=O)NC(C)(C)Cc1ccc(Cl)cc1 CHEMBL1697686 137 | CC(CCc1ccc(O)cc1)NCCc1ccc(O)c(O)c1 CHEMBL926 138 | Cc1cc(N)c2cc(NC(=O)Nc3ccc4nc(C)cc(N)c4c3)ccc2n1 CHEMBL87223 139 | COc1ccc(N2CC(CN3CCC(O)(c4ccc5c(c4)OCO5)CC3)OC2=O)cc1 CHEMBL45686 140 | NC(=O)CCCCC1CCSS1 CHEMBL1403899 141 | CC(C)(C)[SiH2]O[Si](C)(C)O[SiH2]C(C)(C)C CHEMBL1200838 142 | NC(=O)c1ccc([N+](=O)[O-])cc1Cl CHEMBL1450565 143 | Cc1cccc(N2CC(CO)OC2=O)c1 CHEMBL18116 144 | Nc1cc2[nH]cnc2c(=O)[nH]1 CHEMBL31882 145 | CC1CC2=CC(=O)CCC2(C)C2CCC3(C)C(CCC3(C)O)C12 CHEMBL259548 146 | COc1ccc2c(c1)c1c3n2CCN(C)C3=NCC1 CHEMBL1619017 147 | NCc1cccnc1 CHEMBL1988272 148 | O=P(O)(O)Oc1ccc(C(c2ccc(OP(=O)(O)O)cc2)c2ccccn2)cc1 CHEMBL2111180 149 | [Na+].NC(CCC(=O)[O-])C(=O)O CHEMBL2107256 150 | [Na+].[K+].O=C([O-])C(O)C(O)C(=O)[O-] CHEMBL2219738 151 | CCCN(c1ccncc1)n1ccc2ccccc21 CHEMBL29835 152 | CC(C)NCC(O)c1ccc(O)c(NS(C)(=O)=O)c1 CHEMBL30746 153 | CCOc1ccc(S(=O)(=O)O)c2cccnc12 CHEMBL1356732 154 | CCOCn1cnc2c1c(=O)n(CCCCC(C)(C)O)c(=O)n2C CHEMBL2104934 155 | CN1CCCCC1C(C)(C)OC(=O)C(c1ccccc1)c1ccccc1 CHEMBL2106597 156 | CCOc1ccc2nc(SC(C)(C(=O)O)c3ccccc3)sc2c1 CHEMBL2107083 157 | O=C(c1ccccc1)N1CC2COCC2C1 CHEMBL2106524 158 | C=CC1(O)CCC2C3CCC4=C(CCC(=O)C4)C3CCC21C CHEMBL2106812 159 | O=c1[nH]c2ccccc2c2cc(CO)nn21 CHEMBL2106923 160 | CC(=O)NC(CSCC(C)C(=O)c1ccccc1)C(=O)O CHEMBL2104015 161 | Cc1cccc(Cl)c1NC(=O)C=C1SCC(=O)N1C CHEMBL2106933 162 | COc1ccc(C2Sc3ccccc3N(CCN(C)C(C)C)C(=O)C2OC(C)=O)cc1 CHEMBL2107305 163 | Cc1nnc(NS(=O)(=O)c2ccc(NC(=O)c3ccccc3C(=O)O)cc2)s1 CHEMBL2106914 164 | COc1ccc(-n2c(C)c[nH]c2=O)cc1 CHEMBL2104723 165 | CCCC(C)(O)C1CC23C=CC1(OC)C1Oc4c5c(ccc4OC(C)=O)CC2N(C)CCC513 CHEMBL2104593 166 | CCOC(=O)C1(c2cccc(O)c2)CCN(C)CC1 CHEMBL1182665 167 | CN(CCC1c2ccccc2COc2ccccc21)CC(O)COc1ccccc1 CHEMBL2106287 168 | CCCCCCn1c(=O)c2c(ncn2C)n(C)c1=O CHEMBL2105338 169 | CC(NCCC(=O)C1CCCCC1)C(O)c1ccccc1 CHEMBL2104023 170 | O=C1CCCCC1c1c(O)ccc2ccccc21 CHEMBL2106809 171 | O=C1N=C2SCCN2C(=O)C1Cc1ccc(Cl)cc1 CHEMBL2106465 172 | NC(Cc1ccccn1)c1ccccc1 CHEMBL2107647 173 | N=C(N)C(O)c1ccc2c(c1)OCO2 CHEMBL2104702 174 | CC12CCC3c4ccc(OC5CCCC5)cc4CCC3C1CC(O)C2O CHEMBL2107004 175 | CCOC(=O)N(Cc1ccccc1)C1CC1 CHEMBL2104204 176 | CN1CCC23c4c5c(O)ccc4CC1C2CCC(C)(O)C3O5 CHEMBL2104436 177 | CC1NC(C2CC2)CC1c1ccc(Cl)cc1 CHEMBL2105225 178 | CN(C)CCCOC1(Cc2ccccc2)CCCCCC1 CHEMBL2110767 179 | CCN(CC)CCCNCc1cc(Cl)c2cccnc2c1O CHEMBL2110791 180 | CNC1C(O)C(OC2C(N)CC(N)C(OC3OC(CN)C(O)C(O)C3O)C2O)OCC1(C)O CHEMBL2110604 181 | CC(CSC(=O)C(C)(C)C)C(=O)N(CC(=O)O)C1CCCC1 CHEMBL2051970 182 | CN1CCC23c4c5c(O)ccc4CC1C2CCCC3O5 CHEMBL2106274 183 | OC(CN1CCCCC1)c1cc(-c2ccccc2)on1 CHEMBL2105453 184 | COc1cc(C=CC(N)=O)cc(OC)c1OC CHEMBL2107098 185 | O=C(c1ccc(Cl)cc1)n1cc(Cc2nnn[nH]2)c2ccccc21 CHEMBL2104903 186 | Cc1cc(C(=O)c2cc(I)c(O)c(I)c2)c(C)o1 CHEMBL2107414 187 | NC(=O)C(CCN1CCN2CCCC2C1)(c1ccccc1)c1ccccc1 CHEMBL2106959 188 | CC(C)NC(=N)NC(=N)Nc1ccc(Cl)c(Cl)c1 CHEMBL1213553 189 | O=c1[nH]c2ccccc2n1C1CCN(CCCc2noc3cc(F)ccc23)CC1 CHEMBL2111040 190 | O=C1CCC2C3Cc4cccc(O)c4C2(CCN3CC2CC2)C1 CHEMBL2104366 191 | CC(=O)c1ccc2c(c1)N(CC(C)N(C)C)c1ccccc1S2 CHEMBL2104054 192 | C=C(CC(=O)c1ccc(-c2ccccc2Cl)cc1)C(=O)O CHEMBL2104844 193 | Cc1cc(NS(=O)(=O)c2ccc(N)cc2)n(-c2cccc(Cl)c2)n1 CHEMBL2104945 194 | O=S1(=O)CCNCN1 CHEMBL2104848 195 | COc1ccc(C(=O)N=c2[cH-][n+](N3C(C)CCCC3C)no2)cc1 CHEMBL2104889 196 | CCc1c(-c2ccccc2)[nH]n(C2CCN(C)CC2)c1=O CHEMBL2104576 197 | C=C(C)CNCC(=O)N(C)c1ccc(Cl)cc1C(=O)c1ccccc1 CHEMBL2106239 198 | CC(C)C1(C(C)C)OCC(CO)O1 CHEMBL2104577 199 | CC(=O)OCC(=O)OCCC(SC(=O)c1ccco1)=C(C)N(C=O)Cc1cnc(C)nc1N CHEMBL2104090 200 | CC1CC(OC(=O)CN2CCCC2=O)CC(C)(C)C1 CHEMBL2105253 201 | CC1(C)C2CCC1(C)C(=O)C2Br CHEMBL2106181 202 | CC1C=CC(C)N1CC(=O)N=C(N)N CHEMBL2104785 203 | CN1C(CC(=O)c2ccccc2)CCCC1CC(O)c1ccccc1 CHEMBL2103769 204 | NCCOC(c1ccc(F)cc1)c1ccc(F)cc1 CHEMBL2105476 205 | CN1C2CCC1CC(OC(=O)C(C)(CO)c1ccccc1)C2 CHEMBL2104939 206 | CCOc1ccccc1C(=O)NN=C(C)C(=O)O CHEMBL2107047 207 | O=C(NCCCN1CCOCC1)c1ccc(Cl)cc1 CHEMBL2104298 208 | COc1ccc2[nH]c(C)c(CC(=O)NO)c2c1 CHEMBL2104618 209 | c1ccc(C2(c3ccccc3)OCC(C3CCCCN3)O2)cc1 CHEMBL72982 210 | CC(=O)OC(Cc1ccccc1)(c1ccccc1)C(C)CN1CCCC1 CHEMBL2110705 211 | CN1C2CCC1CC(NC(=O)c1cc(Cl)cc3c1OC(C)(C)C3)C2 CHEMBL2111147 212 | CNCCCC1(c2ccccc2)OC(C)(C)c2ccccc21 CHEMBL299233 213 | CCN(CC)CCn1c(=O)oc2ccccc2c1=O CHEMBL2110855 214 | CNCC(O)CC12CCC(C3CCCCC31)C1CCCCC12 CHEMBL2111067 215 | [Ca+2].[Ca+2].[Ca+2].O=P([O-])([O-])[O-].O=P([O-])([O-])[O-] CHEMBL2106566 216 | [K+].[K+].NC(CC(=O)[O-])C(=O)[O-] CHEMBL2106910 217 | CC1CCCCN1CCCOC(=O)c1ccccc1 CHEMBL127865 218 | CC1CC(OC(=O)c2ccccc2)CC(C)(C)N1 CHEMBL2110936 219 | CCCc1cc2c(s1)CNC1CCc3cc(OC(C)=O)c(OC(C)=O)cc3C21 CHEMBL2104655 220 | CC(CN(C(=O)c1ccc(C#N)cc1)c1ccccn1)N1CCN(c2cccc3c2OCCO3)CC1 CHEMBL372205 221 | CCC(=O)N(c1ccccn1)C(C)CN1CCCCC1 CHEMBL2110985 222 | CCCCN1CCC(CNC(=O)c2c3ccccc3n3c2OCCC3)CC1 CHEMBL356359 223 | OC(c1ccccc1)(c1ccccc1)C1CCNCC1 CHEMBL127508 224 | CC(C)(C)NCC(O)COc1ccccc1-c1ccccc1 CHEMBL2104145 225 | CN(C)CCN(Cc1ccsc1)c1ccccn1 CHEMBL2107485 226 | CCNC1=Nc2ccc(Cl)cc2C(c2ccccc2)S1 CHEMBL2106208 227 | CC1=C(C(=O)O)N2C(=O)C(NC(=O)C(N)c3ccc(O)c(Cl)c3)C2SC1 CHEMBL2106477 228 | CCCOC(=O)c1ccc(N)cc1 CHEMBL2107010 229 | CCCn1c2nc(C34CC5CC3CC(C4)C5)[nH]c2c(=O)n(CCC)c1=O CHEMBL2103819 230 | CCCCc1ccc(C(N)=O)nc1 CHEMBL2106646 231 | CN1CCC23CCCCC2C1Cc1ccc(O)cc13 CHEMBL20803 232 | CN(C)C(=O)COc1ccc2c(c1)CC(NCC(O)c1ccc(O)c(CCO)c1)CC2 CHEMBL2111083 233 | CCCCCCC(C(C)O)n1cnc2c1nc[nH]c2=O CHEMBL2304038 234 | CC(Oc1ccccc1)C(=O)NC1C(=O)N2C1SC(C)(C)C2C(=O)O CHEMBL1614637 235 | C#CCNC1CCc2ccc(OC(=O)N(C)CC)cc21 CHEMBL255231 236 | C=CCc1ccccc1OCC(CNC(C)C)=NO CHEMBL2105963 237 | NC(=O)c1cnc2[nH]ccc2c1NC1C2CC3CC1CC(O)(C3)C2 CHEMBL3137308 238 | CCC1(O)C(=O)OCc2c(=O)n3c(cc21)-c1nc2ccccc2c(C=NOC(C)(C)C)c1C3 CHEMBL113051 239 | O=C1c2ccccc2CN2C(=O)c3ccccc3CN12 CHEMBL1536675 240 | Cc1cnc(C(=O)NCCc2ccc(S(=O)(=O)NC(=O)NC3CCCCC3)cc2)cn1 CHEMBL1073 241 | CCN(CC)CCNc1ccc(CO)c2sc3ccccc3c(=O)c12 CHEMBL22077 242 | CCCCCCCCCCCCCCCC(O)C(N)CO CHEMBL1442934 243 | NC(C(=O)NC1C(=O)N2C(C(=O)O)=C(CSc3c[nH]nn3)CSC12)c1ccc(O)cc1 CHEMBL1095284 244 | CSCC1CN(N=Cc2ccc([N+](=O)[O-])o2)C(=O)O1 CHEMBL514315 245 | CCOC(=O)c1ccccc1C(=O)OCC CHEMBL388558 246 | Sc1[nH]cnc2nncc1-2 CHEMBL119423 247 | NC(Cc1ccccc1)C(=O)O CHEMBL301523 248 | CC1CC(=O)NN=C1c1cc(-n2ccnc2)cs1 CHEMBL2104670 249 | CC1NC(=O)COC1c1ccccc1 CHEMBL2106282 250 | NC(=O)NCCN1CCN(c2cccc(C(F)(F)F)c2)CC1 CHEMBL2106753 251 | Cc1ccc(F)cc1NC1=NCCN1 CHEMBL2106760 252 | Fc1ccc(NC(=S)Nc2cc(Cl)cc(Cl)c2)cc1 CHEMBL2105198 253 | CCCCCC(C(=O)N1CCCC1C(=O)CC(CCCNC(=N)N)C(=O)O)N(CC)C(=O)CN CHEMBL2107753 254 | CC(C(=O)O)c1ccc(C(=O)c2cccs2)c(Cl)c1 CHEMBL2104170 255 | Cc1cc(=O)oc2cc(OCCN3CCOCC3)cc(OCCN3CCOCC3)c12 CHEMBL2105164 256 | C=CCC1CN(C)CCC1(OC(=O)CC)c1ccccc1 CHEMBL2103995 257 | Cc1cc(N(C)C)ccc1[PH](=O)O CHEMBL2105341 258 | CC(C)N1CCNC1=S CHEMBL2103937 259 | CCC1(O)CCN2CCc3ccc(C)cc3C2C1 CHEMBL2105550 260 | Cc1cc(C)nc(NS(=O)(=O)c2ccc(N=Nc3ccc(O)c(C(=O)O)c3)cc2)n1 CHEMBL2107040 261 | CC(C)[N+](C)(C)CC(O)COc1cccc2ccccc21 CHEMBL1710851 262 | CN(C)CCN1c2ccccc2SC(c2ccccc2)CC1=O CHEMBL2111123 263 | CC(C)NCC(O)c1ccccc1Cl CHEMBL1902627 264 | C=CCC(Cc1cc(OC)c(OC)c(OC)c1)NC CHEMBL2111182 265 | COC1CC2CC(OC(=O)C(O)(c3ccccc3)c3ccccc3)CC1[N+]2(C)C CHEMBL2111174 266 | CCCCCOc1ccccc1C(N)=O CHEMBL2105369 267 | COc1ccc(N(CCCC(=O)O)C(=O)c2ccc(Cl)cc2)cc1 CHEMBL2107589 268 | CC(=O)NC(CCSC(=O)C(C)(C)Oc1ccc(Cl)cc1)C(=O)O CHEMBL2107469 269 | COCCCP(CCCOC)CCCOC CHEMBL2107255 270 | CC(C)Oc1ccccc1-c1cc(=O)c2cc(C(=O)O)ccc2o1 CHEMBL2106345 271 | CCC(C#N)(CC(C)N(C)C)c1ccccc1 CHEMBL2104277 272 | CN(CCO)c1nc2c(c(=O)n(C)c(=O)n2C)n1C CHEMBL2104570 273 | CCOC(=O)C1(c2ccccc2)CCN(CCOCc2ccccc2)CC1 CHEMBL2104234 274 | Cc1cc(O)c2c3[nH]c(-c4ccco4)nc3ccc2n1 CHEMBL2104697 275 | O=C(O)CSC(SCC(=O)O)SCC(=O)O CHEMBL2105322 276 | Oc1ccc(C(O)CNC2CCC2)cc1O CHEMBL2106796 277 | CCOP(=S)(OCC)Oc1cc(C)nc(N(C)C)n1 CHEMBL2106985 278 | CCCCOc1ccc(C(=O)OCCOCCN(CC)CC)cc1N CHEMBL2110889 279 | O=C(O)CCCC=CCC1C(O)CC(O)C1C=CC(O)COc1cccc(C(F)(F)F)c1 CHEMBL2220419 280 | CCCN1CCCC2c3cc(O)ccc3OCC21 CHEMBL2220428 281 | C#Cc1cnc2nc(O)n(C(CC)CC)c2n1 CHEMBL3039529 282 | O=c1ccc2c(C(O)CNCCCCCCOCC(F)(F)c3ccccc3)ccc(O)c2[nH]1 CHEMBL3039530 283 | Cn1ncc(Cl)c1-c1cc(C(=O)NC(CN)Cc2ccc(F)c(F)c2)oc1Cl CHEMBL3137336 284 | Oc1cccc(O)c1O CHEMBL307145 285 | O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 CHEMBL1524273 286 | CC12CCC3C(CCC4CC(O)CCC43C)C13OC3CC2c1ccc(=O)oc1 CHEMBL250172 287 | CC(O)C(N)C(=O)O CHEMBL291747 288 | O=C(Nc1ccc(O)cc1)c1ccccc1O CHEMBL389507 289 | O=c1oc2cc(O)ccc2s1 CHEMBL442687 290 | OC(Cn1ccnc1)c1ccc(CCc2ccccc2)cc1 CHEMBL41849 291 | CC(=N)N1CCC(SC2=C(C(=O)O)N3C(=O)C(C(C)O)C3C2)C1 CHEMBL339323 292 | CN(C(=O)C=Cc1ccoc1)C1CCC2(O)C3Cc4ccc(O)c5c4C2(CCN3CC2CC2)C1O5 CHEMBL267495 293 | CC(C)(C)NCC(O)COc1cc(Cl)ccc1Cl CHEMBL156791 294 | COc1ccc(-c2cc(C(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1F CHEMBL28636 295 | O=c1[nH]c2n[nH]cc2c(=O)[nH]1 CHEMBL859 296 | COc1ccc2c(c1)C13CCCCC1C(C2)N(C)CC3 CHEMBL22207 297 | Cc1c(C(N)=O)cc([N+](=O)[O-])cc1[N+](=O)[O-] CHEMBL472565 298 | N#CC(c1ccc(Cl)cc1)c1c(Cl)cc(-n2ncc(=O)[nH]c2=O)cc1Cl CHEMBL284733 299 | CCCC(=O)OC(C(Cl)(Cl)Cl)P(=O)(OC)OC CHEMBL1570266 300 | CCC1Nc2cc(Cl)c(S(N)(=O)=O)cc2S(=O)(=O)N1 CHEMBL2106262 301 | CC[N+]1(CCOC(=O)c2cc(OC)c(OC)c(OC)c2)CCCC1 CHEMBL2111183 302 | C[N+]1(CC2CC2)CCC23CC(=O)CCC2(O)C1Cc1ccc(C(N)=O)c(O)c13 CHEMBL2364619 303 | COc1ccc2c(c1)[nH]c1c2CCN2CC3C(C)OC(OC)=C(C(=O)OCCN(C)C)C3CC12 CHEMBL2106350 304 | Cn1ncnc1C1c2n[nH]c(=O)c3cc(F)cc(c32)NC1c1ccc(F)cc1 CHEMBL3137320 305 | CCc1nn(CCCN2CCN(c3cccc(Cl)c3)CC2)c(=O)n1CC CHEMBL1743259 306 | NS(=O)(=O)c1ccc(C(=O)O)cc1 CHEMBL414 307 | Cc1ncc2c(c1O)COC2c1ccc(Cl)cc1 CHEMBL191886 308 | CC(=O)C1CCC2C3CCC4CC(O)CCC4(C)C3C(=O)CC12C CHEMBL190279 309 | CC(=O)NC(CCC([O-])=C[N+]#N)C(=O)O CHEMBL475736 310 | Cc1cc(Cn2nnc3c2nc(N)nc3-c2ccco2)ccc1N CHEMBL447664 311 | CCC(CO[N+](=O)[O-])(CO[N+](=O)[O-])CO[N+](=O)[O-] CHEMBL488280 312 | Cc1ccc(S(=O)(=O)C(CC#Cc2ccc(Cl)cc2)C(=O)O)cc1 CHEMBL267770 313 | Nc1nc2[nH]cnc2c(=S)[nH]1 CHEMBL727 314 | OCC(O)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO CHEMBL63558 315 | COc1ccc(-c2nc(C(F)(F)F)[nH]c2-c2ccc(OC)cc2)cc1 CHEMBL430150 316 | CCN(CC)CCNC(=O)c1cc(S(C)(=O)=O)ccc1OC CHEMBL84158 317 | CC1=NN(CCOc2ccc3ccccc3c2)C(=O)C1 CHEMBL8425 318 | Cc1nc(N)nc(N)c1-c1ccc(Cl)c(Cl)c1 CHEMBL264373 319 | CC(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)O)C(=O)O CHEMBL1329032 320 | CCCCC1=NC2(CCCC2)C(=O)N1Cc1ccc(-c2ccccc2-c2nn[nH]n2)cc1 CHEMBL1513 321 | OCC(Br)(Br)Br CHEMBL1697852 322 | Oc1ccc2[nH]cc(CCCCN3CC=C(c4ccccc4)CC3)c2c1 CHEMBL431367 323 | CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1 CHEMBL36 324 | ClC(Cl)=C(Cl)Cl CHEMBL114062 325 | OCC1OC(n2cnc3c2ncnc3S)C(O)C1O CHEMBL448290 326 | Cc1ccc(-n2nccn2)c(C(=O)N2CCN(c3nc4cc(Cl)ccc4o3)CCC2C)c1 CHEMBL1083659 327 | CNc1nc(Cl)c(SC)c(N2CCN(C)CC2)n1 CHEMBL407641 328 | CC(=O)Oc1cccc2c1C(=O)c1c(OC(C)=O)cc(C(=O)O)cc1C2=O CHEMBL41286 329 | CC(=O)OCC1=C(C(=O)O)N2C(=O)C(NC(=O)CSc3ccncc3)C2SC1 CHEMBL1599 330 | CCCCCN(C)CCC(O)(P(=O)(O)O)P(=O)(O)O CHEMBL997 331 | Fc1ccc(Cn2c3ccccc3nc2NC2CCNCC2)cc1 CHEMBL61301 332 | CC1(O)CCC2C3CCC4=CC(=O)CCC4=C3C=CC21C CHEMBL166444 333 | Cc1ccc(S(=O)(=O)NC(=O)NN2CC3CCCC3C2)cc1 CHEMBL427216 334 | CCc1cccc2cc(C(O)CNC(C)(C)C)oc21 CHEMBL296035 335 | Cc1nc2c(NCc3c(C)cccc3C)cc(C(=O)NCCO)cn2c1C CHEMBL497011 336 | C=CCNc1nc(NCC=C)nc(N2CCN(C(c3ccc(F)cc3)c3ccc(F)cc3)CC2)n1 CHEMBL1183717 337 | OCc1ccccc1OC1OC(CO)C(O)C(O)C1O CHEMBL462997 338 | CNNCc1ccc(C(=O)NC(C)C)cc1 CHEMBL1321 339 | CCN(CC)C(=O)C1CN2CCc3cc(OC)c(OC)cc3C2CC1OC(C)=O CHEMBL1201250 340 | CN(C)CCC(c1ccc(Cl)cc1)c1ccccn1 CHEMBL1201353 341 | O=C(Nc1ncc([N+](=O)[O-])s1)c1cccs1 CHEMBL1082354 342 | Cc1cccc(C(=O)O)c1O CHEMBL448399 343 | Cc1c2[nH]c3ccc(O)cc3c2c(C)c2c[n+](C)ccc12 CHEMBL16699 344 | CCCn1cnc2c1c(=O)n(CCCCC(C)=O)c(=O)n2C CHEMBL1079905 345 | COc1cc2c(cc1OC)-c1cc(=Nc3c(C)cc(C)cc3C)n(C)c(=O)n1CC2 CHEMBL285913 346 | CC(Cl)C(=O)Nc1ccc(C2=NNC(=O)CC2C)cc1 CHEMBL116368 347 | COc1ccc(Cl)cc1C(=O)NCCc1ccc(C(=O)O)cc1 CHEMBL149930 348 | [K+].[K+].[K+].O=C([O-])CC(O)(CC(=O)[O-])C(=O)[O-] CHEMBL1200458 349 | CC(C)S(=O)(=O)n1c(N)nc2ccc(C(=NO)c3ccccc3)cc21 CHEMBL283403 350 | CCOP(=S)(OCC)Oc1ccc(Cl)cc1Cl CHEMBL1396626 351 | NS(=O)(=O)c1ccc(N2C(=O)CC(c3ccccc3)C2=O)c(Cl)c1 CHEMBL315877 352 | O=C1NCCN1CCN1CCN(C2CC(c3ccc(F)cc3)c3ccccc32)CC1 CHEMBL73461 353 | O=C(O)c1cc(-c2ccccc2)nc2ccccc21 CHEMBL348000 354 | CC(C(=O)O)c1ccc(C(=O)c2ccccc2)s1 CHEMBL365795 355 | CCOCc1nc2c(c3ccccc3nc2N)n1CC(C)(C)O CHEMBL383322 356 | Cn1cnc2c1cc(C(=O)NOCCO)c(Nc1ccc(Br)cc1Cl)c2F CHEMBL1614701 357 | CC(=O)OC1(C(C)=O)CCC2C3C=C(Cl)C4=CC(=O)C5CC5C4(C)C3CCC21C CHEMBL139835 358 | COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1C(NC(C)=O)CC2 CHEMBL107 359 | CC(O)(CC(=O)O)C1CCCCC1 CHEMBL52091 360 | Nc1nccs1 CHEMBL344760 361 | CCOC(=O)NC1=C(N2CC2)C(=O)C(NC(=O)OCC)=C(N2CC2)C1=O CHEMBL36016 362 | O=C(O)CCCCC(=O)O CHEMBL1157 363 | C=CCN1CCCC1CNC(=O)c1cc2nn[nH]c2cc1OC CHEMBL290194 364 | O=c1[nH]c2ccccc2n1CCCN1CCN(C(c2ccccc2)c2ccccc2)CC1 CHEMBL13828 365 | CC1CS(=O)(=O)CCN1N=Cc1ccc([N+](=O)[O-])o1 CHEMBL290960 366 | COc1cc(C2c3cc4c(cc3C(O)C3COC(=O)C32)OCO4)cc(OC)c1OC CHEMBL61 367 | NS(=O)(=O)Oc1ccc2c(c1)oc(=O)c1c2CCCCC1 CHEMBL286738 368 | NCCCCC(N)C(=O)O CHEMBL8085 369 | O=c1ccc2ccccc2o1 CHEMBL6466 370 | CNC(=O)OCc1cccc(COC(=O)NC)n1 CHEMBL1620144 371 | CC(COc1ccccc1)NN CHEMBL1909286 372 | C(COCCOCC1CO1)OCCOCC1CO1 CHEMBL460287 373 | CC(N)(Cc1ccc(O)cc1)C(=O)O CHEMBL1330596 374 | C[N+](C)(C)CCOP(=O)([O-])OCC(O)CO CHEMBL1567463 375 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCN(C)CC3)cc21 CHEMBL267648 376 | CCNC1CN(CCCOC)S(=O)(=O)c2sc(S(N)(=O)=O)cc21 CHEMBL220491 377 | Cc1cccc(O)c1 CHEMBL298312 378 | O=P1(N(CCCl)CCCl)OCCCN1CCCl CHEMBL462019 379 | O=C(O)CCc1nc2ccccc2[nH]1 CHEMBL596009 380 | OCc1cc(C(O)CNCCCCCCOCCOCc2c(Cl)cccc2Cl)ccc1O CHEMBL1198857 381 | CC1CC(=O)NN=C1c1ccc(NN=C(C#N)C#N)cc1 CHEMBL313136 382 | COc1ccc(-c2c(Cl)ncn2-c2ccc(S(N)(=O)=O)cc2)cc1F CHEMBL435381 383 | O=C1c2cccc3cc([N+](=O)[O-])cc(c32)C(=O)N1CCN1CCCC1 CHEMBL46874 384 | O=C(C1CC(N2CCN(c3ncccn3)CC2)CN1)N1CCC(F)(F)C1 CHEMBL515387 385 | CCOC(=O)CC(O)(CC(=O)OCC)C(=O)OCC CHEMBL464988 386 | Cc1[nH]c2cccc(OCC(O)CNCCOc3ccccc3)c2c1Cl CHEMBL1742464 387 | Cc1ccc(C23CC2CNC3)cc1 CHEMBL511099 388 | C=CC(=O)Nc1cc2c(cc1OCCCN1CCOCC1)ncnc2Nc1ccc(F)c(Cl)c1 CHEMBL31965 389 | CC(Oc1c(Cl)cccc1Cl)C1=NCCN1 CHEMBL17860 390 | CN1c2ccc(Cl)cc2C(c2ccccc2)=NCC1=S CHEMBL1891606 391 | CCc1ccc(C(=O)C(C)CN2CCCC2)cc1 CHEMBL1797127 392 | CN1CCC(=C2c3ccccc3CCc3cccnc32)CC1 CHEMBL946 393 | C#CC1(O)CCC2C3CCC4=CC(=O)CCC4=C3C=CC21C CHEMBL1908319 394 | CN(C)c1ccc(C2CC3(C)C(CCC3(O)C=CCO)C3CCC4=CC(=O)CCC4=C23)cc1 CHEMBL1908329 395 | CC1(O)CCC2C3CCC4=C(O)C(=O)CCC4(C)C3CCC21C CHEMBL1908006 396 | NC(=O)C1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1 CHEMBL1909072 397 | COC(=O)CCC[N+](C)(C)C CHEMBL1896433 398 | O=S(=O)(O)CCCS(=O)(=O)O CHEMBL2111092 399 | CC[N+](C)(C)CCSCC[N+](C)(C)CC CHEMBL2110709 400 | c1c(CCc2ccncc2)c2ccccc2n1Cc1ccccc1 CHEMBL2110797 401 | C=C(CN(C)C)C(Cc1ccccc1)(OC(=O)CC)c1ccccc1 CHEMBL2104265 402 | CCN(CC)Cc1ccc2oc(-c3ccccc3)c(C)c(=O)c2c1 CHEMBL2104916 403 | CC(C)(CO)NCC(O)c1ccccc1 CHEMBL2106269 404 | OCC(O)C1OC2OC(C(Cl)(Cl)Cl)OC2C1O CHEMBL2104181 405 | CNC(C)C(=O)c1ccc(OC)cc1 CHEMBL2106893 406 | O=C(O)c1ccccc1C(=O)c1ccc(O)c(-c2ccccc2)c1 CHEMBL2104248 407 | CC(=O)N=c1sccn1CC(O)c1cccs1 CHEMBL2105962 408 | COc1cc(OC)c(C(=O)OCCN2CCOCC2)c(OC)c1 CHEMBL2106750 409 | O=C1NC(=O)C2(c3ccc(Cl)cc3)CC12 CHEMBL2104097 410 | CN1CCN(C2=Nc3cc(Cl)ccc3Cc3sccc32)CC1 CHEMBL2105484 411 | Oc1cccc(Oc2nnn[nH]2)c1 CHEMBL2106850 412 | O=C(CCn1nnc(-c2cccc(Br)c2)n1)N1CCCCC1 CHEMBL2106508 413 | COc1cc(C(=O)NC(COc2ccccc2)CN2CCCC2)cc(OC)c1OC CHEMBL2106241 414 | CC(N)C(=O)NC(CCC(=O)C=[N+]=[N-])C(=O)NC(CCC(=O)C=[N+]=[N-])C(=O)O CHEMBL2105946 415 | CC(=O)NC(CSC(=O)c1ccccc1OC(C)=O)C(=O)O CHEMBL2104494 416 | O=S(=O)(O)c1ccc2[nH]c(-c3ccccc3)nc2c1 CHEMBL1987518 417 | COC1OC(C(COCc2ccccc2)OCc2ccccc2)C(OCc2ccccc2)C1O CHEMBL2105173 418 | NC(=O)c1[nH]nc(C2OC(CO)C(O)C2O)c1O CHEMBL2105330 419 | CCCCCCCCCNc1ncnc2[nH]ccc21 CHEMBL2106899 420 | COc1cc(C(=O)OCCCNCC2CC2(c2ccccc2)c2ccccc2)cc(OC)c1OC CHEMBL2107590 421 | CC1=CC(=NOCC(=O)O)c2ccccc2C1=O CHEMBL2110691 422 | Cn1c2ncn(CC(O)CN3CCN(CCCSc4ccccc4)CC3)c2c(=O)n(C)c1=O CHEMBL155962 423 | CN1C2=CC(=O)C(=NNC(N)=O)C=C2CC1S(=O)(=O)O CHEMBL1697827 424 | CC1(NC2=NS(=O)(=O)C3SC(Cl)=CC3N2)CC1 CHEMBL2107789 425 | CC(C(=O)O)c1cccc2c(-c3ccccc3)coc21 CHEMBL2107396 426 | CC(C)CN1CC2CN(C(C)C)CC(C1)C21CCCCC1 CHEMBL2104579 427 | CCN(CC)CCOc1ccc(C(=O)CCc2ccccc2)cc1 CHEMBL2104343 428 | Nc1c(NC(=O)C(F)(F)C(F)F)cc(C(F)(F)F)cc1[N+](=O)[O-] CHEMBL2106844 429 | c1ccc2c(c1)Sc1ccccc1N2CCCN1CCN(CCC2OCCCO2)CC1 CHEMBL2106987 430 | CC(C)N1CCN=C1CN1CCCCSc2ccc(Cl)cc21 CHEMBL2104610 431 | CCN1CCCC1CNC(=O)c1cc(C(C)=O)ccc1OC CHEMBL2106206 432 | COc1ccccc1Oc1c(CS(=O)(=O)c2ccc(C)cn2)nc(-c2ccncc2)nc1OC CHEMBL2107803 433 | CCCN1CC(NC(=O)N(CC)CC)CC2c3cccc4[nH]cc(c43)CC21 CHEMBL2106986 434 | COc1ccc(C=NNC(=O)c2ccncc2)cc1OC CHEMBL2104481 435 | CC1(C)OC2C3OC(C)(C)OCC3OC2(C(=O)O)O1 CHEMBL2110720 436 | COc1cc2nc(N(C)CCCNC(=O)C3CCCO3)nc(N)c2cc1OC CHEMBL709 437 | Cc1ccsc1C=CC1=NCCCN1C CHEMBL1240978 438 | O=C(Cn1ccnc1)c1ccc2ccccc2c1 CHEMBL416801 439 | Oc1ccc(CCCCNCC(O)c2ccc(O)c(O)c2)cc1 CHEMBL1201251 440 | Cc1cccc(Nc2cc(Cl)nc(SCC(=O)NCCO)n2)c1C CHEMBL1908320 441 | CCCC(C)(O)C1CC23C=CC1(O)C1Oc4c5c(ccc4O)CC2N(C)CCC513 CHEMBL1908334 442 | NC(=O)C1c2ccccc2CCc2ccccc21 CHEMBL1868301 443 | CC(CN1CCCC1)C(=O)c1ccc(C(F)(F)F)cc1 CHEMBL1951050 444 | Cc1c(CN2CCN(C(=O)C(C)O)CC2)sc2c1nc(-c1cnc(N)nc1)nc2N1CCOCC1 CHEMBL1922094 445 | [O-][n+]1cccc(CO)c1 CHEMBL1985503 446 | O=[N+]([O-])c1cccc(C2CN3CCSC3=N2)c1 CHEMBL2110956 447 | C=CCOc1cc(Cl)ccc1C(=O)NCCN(CC)CC CHEMBL2103989 448 | O=[N+]([O-])OCC(O[N+](=O)[O-])C(CO[N+](=O)[O-])O[N+](=O)[O-] CHEMBL2107583 449 | CCCCC(=O)OC1C(C=O)=CC(O)C(O)C1O CHEMBL2105321 450 | CC(C)C(CCN(C)C)(C(N)=O)c1cccc2ccccc21 CHEMBL2107641 451 | CCCCOCCOC(=O)C(c1ccccc1)N1CCCCC1 CHEMBL2104630 452 | CCC1C(O)N2C3CC45c6ccccc6N(C)C4C2CC1C3C5O CHEMBL2105617 453 | NC(CCC(=O)NCCS(=O)(=O)O)C(=O)O CHEMBL2106758 454 | CC=C(C)C(=O)OC1CC2CCC(C1)N2C CHEMBL2107120 455 | CN(C)c1cccc2c1cccc2S(=O)(=O)Nc1ccc(NC(=O)C(C)(C)COC(=O)CN)cc1 CHEMBL2104762 456 | NNC(=O)OC1CC2CCC(C1)N2C(=O)Oc1ccccc1 CHEMBL2104515 457 | OC(OCC1OC(O)C(O)C(O)C1O)C(Cl)(Cl)Cl CHEMBL2105990 458 | CCOC(=O)C1(c2ccccc2)CCN(CCC(O)c2ccccc2)CC1 CHEMBL2105385 459 | CCCCc1ccc(NC(=O)CN(CC(=O)O)CC(=O)O)cc1 CHEMBL2104651 460 | CC(=O)C1(O)CCC2C3CCC4=CC(=O)C=CC4(C)C3C(O)CC21C CHEMBL2105540 461 | CC(=O)NS(=O)(=O)c1cc(N)ccc1Sc1ccc(N)cc1 CHEMBL2110764 462 | CC(CO)(CO)NCc1cc2c3ccccc3ccc2c2ccccc12 CHEMBL61495 463 | CN1CCC23c4c5c(O)ccc4CC1C2CCC(=O)C3(C)O5 CHEMBL2110992 464 | COc1cc(O)c(C(=O)Nc2nc(C(=O)NCCN(C(C)C)C(C)C)cs2)cc1OC CHEMBL2107723 465 | Cc1ccccc1C(=O)N1CCC(=NOS(=O)(=O)O)c2ccc(Cl)cc21 CHEMBL2104373 466 | Nc1nc(N)c2c(n1)nc(N)nc2-c1ccccc1 CHEMBL2107705 467 | CC(CCc1ccc(O)cc1)NCC(O)c1ccc(O)cc1 CHEMBL2103767 468 | Cc1cc(O)c(Cl)c(C)c1Cl CHEMBL2104302 469 | COC1OC2OC3(CN4CCCCC4)C(O)CC(C13)C2C CHEMBL2107624 470 | C=CC1CNCCC1CCCc1ccnc2ccc(OC)cc12 CHEMBL2104497 471 | CC(S)C(=O)NCC(=O)O CHEMBL1314 472 | CC1=C(C)C(=O)C(CCCCC#CCCCC#CCO)=C(C)C1=O CHEMBL304818 473 | OCC(O)C(O)C(O)C(O)CO CHEMBL16105 474 | CC1(C)NC(c2ccccc2)C(=O)N1C1C(=O)N2C1SC(C)(C)C2C(=O)O CHEMBL1201116 475 | Cc1c(C)n(Cc2ccccc2)c2ccc(C(=O)OCCN(C)C)cc12 CHEMBL1518149 476 | CC12OC(=O)C1(C(O)C1C=CCCC1)NC(=O)C2CCCl CHEMBL371405 477 | CN(C)CCCC1(c2ccc(F)cc2)OCc2cc(C#N)ccc21 CHEMBL1508 478 | CC#CCn1c(N2CCCC(N)C2)nc2c1c(=O)n(Cc1nc(C)c3ccccc3n1)c(=O)n2C CHEMBL237500 479 | Cc1c(=S)ssc1-c1cnccn1 CHEMBL178459 480 | c1ccc2c(c1)sc1ccccc12 CHEMBL219828 481 | CC(=O)OC1CN2CCC1CC2 CHEMBL20835 482 | COc1cc2c(cc1OC)C(c1ccccc1)CN(C)CC2 CHEMBL343569 483 | C=CCC1(C(C)CCC)C(=O)NC(=S)NC1=O CHEMBL440 484 | [O-][S+](Cc1ccccn1)c1nc2ccccc2[nH]1 CHEMBL9861 485 | C[N+]1(CCCCC[N+]2(C)CCCC2)CCCC1 CHEMBL1271 486 | CCN(CC)CCNC(=O)c1ccc(NC(C)=O)cc1 CHEMBL1097 487 | CCCCC1CC(=O)C2(O)OC3C(NC)C(O)C(NC)C(O)C3OC2O1 CHEMBL1614655 488 | COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)C1c1cccc([N+](=O)[O-])c1 CHEMBL1428 489 | CCc1nn(C2CCCC2)c2c1CCn1c(-c3cccs3)nnc1-2 CHEMBL217899 490 | COc1ccc2c(=O)c(C)c(-c3ccccc3)oc2c1CN(C)C CHEMBL519364 491 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O CHEMBL192 492 | CC[N+](C)(C)CCN(C)CC[N+](C)(C)CC CHEMBL1180418 493 | CC(C)COCC(CN(Cc1ccccc1)c1ccccc1)N1CCCC1 CHEMBL1008 494 | CCOP(=O)(OCC)SCC[N+](C)(C)C CHEMBL1201341 495 | O=C(OC1CN2CCC1CC2)N1CCc2ccccc2C1c1ccccc1 CHEMBL1734 496 | CNCCCC1c2ccccc2C=Cc2ccccc21 CHEMBL668 497 | OCC1OC(O)C(O)C(O)C1O CHEMBL1233058 498 | C=C(Br)CC1(C(C)CC)C(=O)NC(=O)NC1=O CHEMBL469253 499 | OCCN1CCN(CCC=C2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1 CHEMBL54661 500 | Cc1cccc(OCC(O)CNC(C)C)c1 CHEMBL67096 501 | CCn1c(=O)c2ccccc2n(-c2cccc([N+](=O)[O-])c2)c1=O CHEMBL88990 502 | O=C1c2ccccc2C(=O)C1c1ccc(Br)cc1 CHEMBL288496 503 | CC1(C)CCCC(NC(=O)C(S)Cc2ccccc2)C(=O)N1CC(=O)O CHEMBL107747 504 | C=C(C)C1CC=C(C)CC1 CHEMBL449062 505 | [Zn+2].O=S(=O)([O-])[O-] CHEMBL1200929 506 | CCN(CC)CCOc1ccccc1OC(=Cc1ccccc1)C(C)=O CHEMBL164840 507 | CC(C)NCC(O)c1ccc([N+](=O)[O-])cc1 CHEMBL127349 508 | COc1cc([S+](C)[O-])ccc1-c1nc2ccc[nH]c-2n1 CHEMBL286020 509 | Cc1cc(C)cc(NC(=O)Cc2ccc(OC(C)(C)C(=O)O)cc2)c1 CHEMBL18901 510 | O=NN(CCCl)C(=O)NCCO CHEMBL284907 511 | NC(C(=O)O)c1ccc(CO)c(O)c1 CHEMBL472562 512 | CC(C)Nc1ncccn1 CHEMBL1740513 513 | CC1(c2ccccc2)OC(C(=O)O)=CC1=O CHEMBL278488 514 | CC(C)(C)NCC(O)c1cc(Cl)c(N)c(C(F)(F)F)c1 CHEMBL86749 515 | FC(F)(F)COCC(F)(F)F CHEMBL477874 516 | Oc1ccc(Nc2ccnc3cc(Cl)ccc32)cc1CN1CCCC1 CHEMBL1213257 517 | O=C(C(O)CS(=O)(=O)c1ccc2cc(Cl)ccc2c1)N1CCC(N2CCCNC2=O)CC1 CHEMBL1095032 518 | O=C(Nc1ccc([N+](=O)[O-])cc1Cl)c1cc(Cl)ccc1O CHEMBL1448 519 | CCCn1c2nc[nH]c2c(=O)[nH]c1=O CHEMBL279898 520 | CCN(CC)CCNC(=O)c1cc(Cl)c(NC(=O)COc2ccc(Cl)cc2)cc1OC CHEMBL10445 521 | CC(CN1CCOCC1)C(C(=O)N1CCCC1)(c1ccccc1)c1ccccc1 CHEMBL431928 522 | CC1OC(n2cc(F)c(=O)[nH]c2=O)C(O)C1O CHEMBL1130 523 | CCCCC1(COC(=O)CCC(=O)O)C(=O)N(c2ccccc2)N(c2ccccc2)C1=O CHEMBL1414320 524 | COc1c2ccoc2c(OC)c2oc(CSC)cc(=O)c12 CHEMBL297550 525 | O=C(O)CCC(=O)c1ccc(C2CCCCC2)c(Cl)c1 CHEMBL1697824 526 | CCCN1CCC(c2cccc(S(C)(=O)=O)c2)CC1 CHEMBL596802 527 | Cc1cc(O)cc(C)c1Cl CHEMBL398440 528 | CN(C)C(=O)CCCOC(=O)C(C)(C)Oc1ccc(Cl)cc1 CHEMBL1697831 529 | CC1(C)CC1C(=O)NC(=CCCCCSCC(N)C(=O)O)C(=O)O CHEMBL766 530 | CCOCCP(CCOCC)CCP(CCOCC)CCOCC CHEMBL1615784 531 | COCCc1ccc(OCC(O)C(C)(C)N)cc1 CHEMBL1742421 532 | CCCN1CCCC2Cc3n[nH]cc3CC21 CHEMBL240773 533 | CNc1ccc(C=Cc2ccc(OCCOCCOCCF)nc2)cc1 CHEMBL1908919 534 | CN1CCN(c2cc3c(nn2)Oc2ccccc2N3C)CC1 CHEMBL1886755 535 | CCCCOC(=O)CC(CC(=O)OCCCC)(OC(C)=O)C(=O)OCCCC CHEMBL1904556 536 | Cc1ccccc1N1CCN(CCc2nnc3n2CCCC3)CC1 CHEMBL1201216 537 | CCc1nn(CCCN2CCN(c3cccc(Cl)c3)CC2)c(=O)n1CCOc1ccccc1 CHEMBL623 538 | c1ccc2c(c1)Sc1ccccc1N2CC12CCN(CC1)CC2 CHEMBL1908311 539 | C=NC(C(=O)NC1C(=O)N2C1SC(C)(C)C2C(=O)O)c1ccccc1 CHEMBL1908324 540 | Oc1ccccc1-c1nnco1 CHEMBL1903897 541 | CN(C)C1C(O)=C(C(N)=O)C(=O)C2(O)C(O)=C3C(=O)c4c(O)cccc4C(C)(O)C3CC12 CHEMBL1236180 542 | Cc1cc(NC(=O)c2c(C)cccc2C)no1 CHEMBL1958077 543 | Cc1c(Cl)c(=O)oc2cc(OP(=O)(OCCCl)OCCCl)ccc12 CHEMBL1897362 544 | COc1ccc(F)cc1CC(O)(C1CCOCC1)C1CNCCO1 CHEMBL2103857 545 | COc1ccc2c(c1)CC(C)N(CCc1ccc(N)cc1)CC2 CHEMBL2110599 546 | OCCN1CCN(CCC=C2c3ccccc3COc3ccc(Cl)cc32)CC1 CHEMBL2110968 547 | CC(C)(C)C(O)C=Cc1ccc2c(c1)OCO2 CHEMBL1983350 548 | CC(C)CN(CC(C)C)CC(COC1c2[nH]cnc2N(C)C(=O)N1C)OC(=O)c1ccccc1 CHEMBL2106742 549 | NC(=O)C(O)C(O)C(O)C(O)C=O CHEMBL2107404 550 | CCOc1cc(OCC)cc(OCCN2CCOCC2)c1 CHEMBL2106361 551 | O=C1CN=C(c2ccccc2F)c2cc(Cl)ccc2N1CC1CC1 CHEMBL2106743 552 | CCOC(=O)C(CCc1ccccc1)NC(C)C(=O)N1C2CCC(CC2)C1C(=O)O CHEMBL2106476 553 | O=C1NC(=O)C2=C(CCCC2)C1=Cc1ccccc1 CHEMBL2107135 554 | CCCCCOc1ccccc1C(=CSC)n1ccnc1 CHEMBL2107447 555 | CC(=O)OC(C)C(=O)C1(O)CCC2C3CCC4=CC(=O)C=CC4(C)C3(F)C(O)CC21C CHEMBL2107401 556 | CN(C)CCC1(c2ccccc2)CCc2ccccc2C1=O CHEMBL2104267 557 | C=CCN1CCC23c4c5c(OC(C)=O)ccc4CC1C2C=CC(OC(C)=O)C3O5 CHEMBL2106214 558 | CC(C)=CCN1CCC2(C)c3cc(O)ccc3CC1C2(C)C CHEMBL2106302 559 | CC(C)c1ccc(C(=O)N(C)CCN2CCN(C(=O)c3cccc4ccoc43)CC2)cc1 CHEMBL2104002 560 | Cn1c2ncn(CCOC(O)C(Cl)(Cl)Cl)c2c(=O)n(C)c1=O CHEMBL2104491 561 | CC(N)C(=O)c1ccccc1 CHEMBL2104047 562 | CSCCC(NC(C)=O)C(=O)Oc1ccc(NC(C)=O)cc1 CHEMBL2107605 563 | NN=c1ccc(C(N)=O)n[nH]1 CHEMBL2106246 564 | CC(=O)OC1C=COC=C2CC34SSC5(CC6=COC=CC(O)C6N5C3=O)C(=O)N4C21 CHEMBL2104550 565 | CCN(CC)CCOC(=O)C1N2C(=O)C(NC(=O)c3c(OC)cccc3OC)C2SC1(C)C CHEMBL2105448 566 | CC1(c2ccccc2)CC(=O)N(CN2CCOCC2)C1=O CHEMBL2105127 567 | CC1(C)OC(C)(C)C(=CNCCNC=C2C(=O)C(C)(C)OC2(C)C)C1=O CHEMBL2106748 568 | [Ca+2].O=P([O-])([O-])OCC(O)CO CHEMBL2106124 569 | [Na+].NC(CCC(=O)O)C(=O)[O-] CHEMBL2106738 570 | [Na+].O=CC(O)C(O)C(O)C(O)C(=O)[O-] CHEMBL2104556 571 | [K+].O=C([O-])c1ccccc1 CHEMBL2105241 572 | [K+].O=C([O-])C(O)C(O)C(=O)O CHEMBL2105248 573 | NC(CN(Cc1ccccc1)c1ccccc1)=NO CHEMBL2110778 574 | Cc1c(CCOP(=O)(O)O)sc[n+]1Cc1cnc(C)nc1N CHEMBL1236378 575 | CCCCCC(C)(O)C=CC1C(O)CC(=O)C1CC=CCCCC(=O)O CHEMBL2105959 576 | CC(COc1ccccc1Cc1ccccc1)N1CCCCC1 CHEMBL2105910 577 | COc1cc(C2c3ccc(N)c(N)c3OC(N)=C2C#N)cc(Br)c1OC CHEMBL2103852 578 | O=C(NN1CCCCCC1)NS(=O)(=O)c1ccc(Cl)cc1 CHEMBL2106693 579 | CC12CCC3C(CCC4CC5SC5CC43C)C1CCC2O CHEMBL2106161 580 | CCOc1c(N2CCOCC2)cnn(C)c1=O CHEMBL2103954 581 | Cc1cc(S(C)(=O)=O)c(S(C)(=O)=O)cc1C(=O)NC(=N)N CHEMBL2107802 582 | CC(C)(Oc1ccc(C(O)c2ccc(Cl)cc2)cc1)C(=O)O CHEMBL2106581 583 | CCCC(=O)c1ccccc1OCCN(C(C)C)C(C)C CHEMBL2107306 584 | CC(C)(C)C(=O)NNCc1ccccc1 CHEMBL2106941 585 | CCCC(CC)C1(CC)C(=O)NC(=O)NC1=O CHEMBL2107053 586 | O=C(COc1ccc(Cl)cc1)OCCNC12CC3CC(CC(C3)C1)C2 CHEMBL2104053 587 | CN(C)C1CC(c2ccccc2)c2ccccc21 CHEMBL1190753 588 | c1cn(Cc2ccccc2)c(-c2nnn[nH]2)n1 CHEMBL2105919 589 | CC1(O)CCC2C3CC4=CC(=O)CCC4(C)C3CCC21C CHEMBL2103992 590 | OCC1(CO)COC(C(Cl)(Cl)Cl)OC1 CHEMBL2103943 591 | COC(=O)Nc1nc2ccc(C3(c4ccc(F)cc4)OCCO3)cc2[nH]1 CHEMBL2104600 592 | NC(Cc1cccc(N(CCCl)CCCl)c1)C(=O)O CHEMBL2107075 593 | CCC(C)OC(=O)N1CCCCC1CCO CHEMBL2104314 594 | COC(=O)c1ccccc1C(=O)c1ccc(OCCN2CCCCC2)cc1 CHEMBL2104821 595 | Cc1c2ccc(NC(=O)N3CCOCC3)cc2oc(=O)c1CCN1CCOCC1 CHEMBL2105094 596 | CCCCOCCOC(=O)c1cccnc1 CHEMBL2105161 597 | CCCNC(=O)c1ccc2c(c1)N(C(C)CN1CCCC1)c1ccccc1S2 CHEMBL2104029 598 | CCC1(CO)CCCN2CCc3c4ccccc4[nH]c3C21 CHEMBL2105559 599 | CN1C2CCC1CC(NC(=O)c1cn(C3CCCCC3)c3ccccc3c1=O)C2 CHEMBL2110961 600 | CN1CCC2=C(C1)C(c1ccccc1)c1ccccc12 CHEMBL278398 601 | CC(CCc1ccc(O)cc1)NCC(O)c1ccc(O)cc1 CHEMBL509336 602 | Cc1[nH]cnc1CSCCNC(=N)NCCCc1c[nH]cn1 CHEMBL12608 603 | CC(C)NNCc1ccc2c(c1)OCO2 CHEMBL2111045 604 | C[S+](C)CCOC(=O)C(c1ccccc1)C1CCCCC1 CHEMBL2111042 605 | CC12CCC3C(C4CC4C4=CC(=O)CCC43C)C1CCC2(O)CCC(=O)O CHEMBL2111001 606 | Cc1ccc(O)c(-n2nc3ccccc3n2)c1 CHEMBL1564747 607 | O=C(O)COc1ccccc1C=NNC(=O)c1ccncc1 CHEMBL1590674 608 | C#CCC1(OC(=O)CC)CCN(CCc2ccccc2)CC1 CHEMBL1742418 609 | COc1ccc(CCNCC(O)COc2ccc(OCC(C)=O)c3c2CCC(=O)N3)cc1OC CHEMBL1742442 610 | NC(Cc1c[nH]cn1)C(=O)O CHEMBL17962 611 | CC(NC(CCc1ccccc1)C(=O)O)C(=O)N1Cc2ccccc2CC1C(=O)O CHEMBL1733 612 | [K+].O=[N+]([O-])[O-] CHEMBL1644029 613 | NC(CC(O)COc1cccc2ccccc21)=NO CHEMBL357513 614 | CNC(=O)Oc1ccccc1OC(C)C CHEMBL446060 615 | O=C1NCN(c2ccccc2)C12CCN(CCCOc1ccc(F)cc1)CC2 CHEMBL79834 616 | CCCC(Nc1nc(-c2ccc(NC(=O)NCC)c(OC)c2)ncc1C)c1cccnc1 CHEMBL552212 617 | CC(C(=O)O)c1ccc2c(c1)[nH]c1ccc(Cl)cc12 CHEMBL1316 618 | C=C1C(CO)C(O)CC1n1cnc2c1nc(N)[nH]c2=O CHEMBL713 619 | CC1CCC2C(C)C(N3CCS(=O)(=O)CC3)OC3OC4(C)CCC1C23OO4 CHEMBL516268 620 | CN(C)CCC(N)(P(=O)(O)O)P(=O)(O)O CHEMBL122455 621 | CCN(CC)CCOC(=O)C(O)(c1ccccc1)c1ccccc1 CHEMBL70352 622 | OCCNCCO CHEMBL119604 623 | OCC1CNCC(O)C1O CHEMBL206468 624 | Cc1ccc(S(=O)(=O)NC(=O)NC2CCCCC2)cc1 CHEMBL312394 625 | CCCOC(=O)c1ccc(O)cc1 CHEMBL194014 626 | CC(CCC(=O)O)C1CCC2C3C(=O)CC4CC(=O)CCC4(C)C3CC(=O)C12C CHEMBL514446 627 | O=P(O)(O)OCC1OC(O)(COP(=O)(O)O)C(O)C1O CHEMBL97893 628 | O=C(O)CN(CCN(CC(=O)O)CC(=O)O)CCN(CC(=O)O)CC(=O)O CHEMBL780 629 | Cn1c2ncn(CCOC(=O)C(C)(C)Oc3ccc(Cl)cc3)c2c(=O)n(C)c1=O CHEMBL1318939 630 | NC(CS)C(=O)O CHEMBL863 631 | Cc1ccccc1CNc1ncnc2c1ncn2C1OC(CO)C(O)C1O CHEMBL331382 632 | O=C(O)C(Cc1cc(=O)[nH]c2ccccc12)NC(=O)c1ccc(Cl)cc1 CHEMBL1697771 633 | Cc1nn(C)c2c1C(c1cccc(Cl)c1)=NCCN2 CHEMBL174697 634 | Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1Cl)=NC2 CHEMBL646 635 | CC1(C)c2cc(C3=NNC(=O)CC3)ccc2NC1=O CHEMBL38224 636 | O=C1C[N+]([O-])=C(c2ccccc2)c2cc(Cl)ccc2N1 CHEMBL1597677 637 | NCCc1ccc(O)c(O)c1 CHEMBL59 638 | CC1(O)CCC2C3CC=C4CC(O)CCC4(C)C3CCC21C CHEMBL1079446 639 | Cn1c2ncn(CC(=O)O)c2c(=O)n(C)c1=O CHEMBL70246 640 | CN1C2CCC1CC(OC1c3ccccc3CCc3ccccc31)C2 CHEMBL1946186 641 | CCCCCC(=O)OCC(=O)C1C(C)CC2C3CC(F)C4=CC(=O)C=CC4(C)C3C(O)CC21C CHEMBL2107415 642 | FC(F)(Cl)Cl CHEMBL2106634 643 | CCCCCC1Nc2cc(C(F)(F)F)c(S(N)(=O)=O)cc2S(=O)(=O)N1 CHEMBL2107203 644 | CC(I)C1OCC(CO)O1 CHEMBL2105827 645 | NC(=O)c1cc[n+](CC2=C(C(=O)[O-])N3C(=O)C(NC(=O)Cc4cccs4)C3SC2)cc1 CHEMBL2105567 646 | O=P(O)(O)OC1C(O)C(O)C(O)C(OP(=O)(O)O)C1OP(=O)(O)O CHEMBL2106016 647 | CN(C)CCN(Cc1cccs1)c1ccccc1 CHEMBL2111026 648 | CCOCC1COc2c(Cl)c3c(cc2O1)[nH]cc(C(=O)OCC)c3=O CHEMBL2104710 649 | COc1ccc2cc(C(C)CO)ccc2c1 CHEMBL2105135 650 | O=c1nc2cccccc-2n1Cc1ccccc1 CHEMBL2104035 651 | Cc1cc(C)[n+]([N-]C(=O)c2ccc(Cl)c(S(N)(=O)=O)c2)c(C)c1 CHEMBL2106604 652 | Cc1c2c(cn1C)NC(=O)CN=C2c1ccccc1 CHEMBL2104741 653 | CCC(CC)(C(=O)O)c1cccc2ccccc21 CHEMBL2106771 654 | COc1cc2c(cc1OC)C(CCc1ccc(Cl)cc1)N(C)CC2 CHEMBL2106915 655 | CN(C)CC1COC2(O1)c1ccccc1CCc1ccccc12 CHEMBL2104552 656 | Oc1ccc(C2CNCc3c(O)c(O)ccc32)cc1O CHEMBL2105532 657 | O=C(OCc1ccccc1)C1(CCN2CCCCCC2)CCCCC1=O CHEMBL2105937 658 | CCC(CCCCC1CCC2CC(=O)CC12)OC CHEMBL2104105 659 | COc1ccc2c(c1OC)C13CCN(C)C(C2)C1(O)CCC(O)C3 CHEMBL2104603 660 | CCN(CC)CC(=O)Nc1c(C)cccc1C(=O)OC CHEMBL2105549 661 | CCC(OC(N)=O)C1CCCCC1 CHEMBL2104652 662 | CCc1oc2ccccc2c1C(=O)c1ccc(OCCN(CC)CC)cc1 CHEMBL2106228 663 | CCc1nncn1C1CCCCC1 CHEMBL2105027 664 | CCN1CCCC1CNS(=O)(=O)c1cc(Cl)c(N)cc1OC CHEMBL2105073 665 | COC(=O)c1c2ccccc2n2c1C1CN(C)CCN1c1ccccc1C2 CHEMBL2110702 666 | CN(C)CCOCCOC(=O)N1c2ccccc2Sc2ccccc21 CHEMBL2110862 667 | CC1NCc2cc(-c3ccc4c(=O)c(C(=O)O)cn(C5CC5)c4c3OC(F)F)ccc21 CHEMBL215303 668 | O=C(O)CSc1nnc(Br)n1-c1ccc(C2CC2)c2ccccc21 CHEMBL2105720 669 | Clc1ccc([I+]c2ccc(Cl)cc2Cl)c(Cl)c1 CHEMBL2110834 670 | CCN(CC)CCOC(=O)C1(c2ccc(C)c(C)c2)CCCC1 CHEMBL2110967 671 | CC(=O)OC1(C(C)CN(C)C)CCCCC1c1ccccc1 CHEMBL2110963 672 | CCCCCCCCCCCC[N+](C)(C)CC(=O)OCC CHEMBL2111046 673 | Clc1ccc(OCC2=NCCc3ccccc32)cc1 CHEMBL2110933 674 | O=C(O)CN1CCN(CC(=O)O)CCN(C(CO)C(O)CO)CCN(CC(=O)O)CC1 CHEMBL2111181 675 | C#CCOCC1COC(Cn2ccnc2)(c2ccc(Cl)cc2Cl)O1 CHEMBL2110654 676 | [Ca+2].[Br-].[Br-] CHEMBL2105852 677 | [Mg+2].O=C([O-])C(O)C(O)C(O)C(O)CO.O=C([O-])C(O)C(O)C(O)C(O)CO CHEMBL2107145 678 | Cn1cccc1C(=O)C(C#N)C(=O)Nc1ccccc1 CHEMBL2111033 679 | [Mg+2].[Mg+2].[Mg+2].O=P([O-])([O-])[O-].O=P([O-])([O-])[O-] CHEMBL2106873 680 | CC(=O)C1=C(O)C(N)C2Cc3c(C)c4ccc(C)c(O)c4c(O)c3C(=O)C2(O)C1=O CHEMBL2110884 681 | N=C(N)c1ccc(C=C2c3ccccc3-c3ccccc32)cc1 CHEMBL2111004 682 | CCN(CC)C(C)CC(=O)Nc1ccccc1 CHEMBL2104563 683 | CC(=NCc1ccccc1)c1ccccc1O CHEMBL2105284 684 | CC(Cc1ccccc1)NCCC#N CHEMBL2105566 685 | CCN(CC)CC(C)(C)COC(=O)C(COP(=O)(O)O)c1ccccc1 CHEMBL2104488 686 | CCOC(=O)c1nc2c3cc(C)ccc3n(CC)c(=O)n2n1 CHEMBL2105192 687 | Cc1cc(Cl)cc(C(=NCCCC(N)=O)c2ccc(Cl)cc2)c1O CHEMBL2104937 688 | COc1ccccc1N1CCN(CC(O)c2ccc(O)c(O)c2)CC1 CHEMBL2105227 689 | CC(ON=C1c2ncncc2-c2cc(Br)cc(Br)c21)C(=O)NCCN(C)C CHEMBL2106169 690 | CC(=O)Oc1ccc([N+](=O)[O-])c(OC(C)=O)c1C(=O)Nc1cc(C(F)(F)F)cc(C(F)(F)F)c1 CHEMBL2104705 691 | COc1ccccc1N1CCN(CCc2c(C)[nH]c3cc(OC)c(OC)cc32)CC1 CHEMBL2105252 692 | COc1ccc(OC)c(C(O)C(C)NC(C)(C)C)c1 CHEMBL289093 693 | Cn1ncc(Cl)c1-c1cc(C(=O)NC(CN)Cc2cccc(F)c2)sc1Cl CHEMBL2219422 694 | O=C(c1ccco1)N(c1cnccn1)C1CCN(CCc2ccccc2)CC1 CHEMBL161084 695 | COc1c(-c2ccc3cc(NS(C)(=O)=O)ccc3c2)cc(-n2ccc(=O)[nH]c2=O)cc1C(C)(C)C CHEMBL3137312 696 | CCC1(c2ccccc2)OCC(C2CCCCN2)O1 CHEMBL305904 697 | CN(C)CCC(O)(P(=O)(O)O)P(=O)(O)O CHEMBL55812 698 | COc1cc2nccc(Oc3ccc(NC(=O)NC4CC4)c(Cl)c3)c2cc1C(N)=O CHEMBL1289601 699 | NC(=O)Oc1ccc(Cc2ccccc2)cc1 CHEMBL608856 700 | COCC1CN=C(c2ccccc2Cl)c2cc(Br)ccc2N1C CHEMBL1290783 701 | CCCCOC(=O)c1ccc(O)cc1 CHEMBL459008 702 | CCOC(=O)C(CCc1ccccc1)NC(C)C(=O)N1C(=O)N(C)CC1C(=O)O CHEMBL317094 703 | O=C1c2ccccc2C(=O)C1c1ccc(F)cc1 CHEMBL24924 704 | CCC(O)C(CC(C)N(C)C)(c1ccccc1)c1ccccc1 CHEMBL162243 705 | CCOC(=O)Oc1ccc(CCNC(=O)C(CCSC)NC(C)=O)cc1OC(=O)OCC CHEMBL2106351 706 | O=C(OCCOCCN1CCCCC1)N1c2ccccc2Sc2cccnc21 CHEMBL2104900 707 | CC(Oc1c(Cl)cccc1Cl)C1=NCCN1 CHEMBL2107417 708 | CCCC1(CC(=O)O)OCCc2c3ccccc3[nH]c21 CHEMBL1968386 709 | CCN(CC)CCN1c2ccccc2Sc2ccccc21 CHEMBL1620412 710 | C=C1CC(C)C2C3Cc4ccc(O)cc4C2(CCN3CC2CCC2)C1 CHEMBL2111060 711 | CCOCCOC(=O)C=Cc1ccc(OC)cc1 CHEMBL2104045 712 | COc1ccccc1OCC1CNC(=O)O1 CHEMBL2104790 713 | Oc1cc2c(cc1O)C(COc1ccc(Cl)cc1)NCC2 CHEMBL2106010 714 | CCN(CC)CCOCC(O)COc1ccccc1OC CHEMBL2107454 715 | NC(=O)c1ccccc1OCC(=O)OCC1CCCO1 CHEMBL2104233 716 | COc1cc(N)c(Cl)cc1C(=O)NC1CCNCC1OC CHEMBL2107465 717 | COC1=CC(=O)OC1C(O)c1ccccc1Cl CHEMBL2106442 718 | CCCC(=O)OC1(C(=O)COC(C)=O)CCC2C3CC(C)C4=CC(=O)CCC4(C)C3C(O)CC21C CHEMBL2104661 719 | CC(C)C(=O)OC(OC(=O)NCC(CC(=O)O)c1ccc(Cl)cc1)C(C)C CHEMBL2107312 720 | CCc1c(C(=O)C(N)=O)c2c(cccc2OCC(=O)OC)n1Cc1ccccc1 CHEMBL2105659 721 | CN(C)CCOC(=O)c1ccc(Cl)c(N)c1 CHEMBL2106019 722 | Cc1nc2cc(C(c3ccccc3)n3ccnc3)ccc2[nH]1 CHEMBL2105112 723 | CCOC(=O)OC(C)OC(=O)C1N2C(=O)C(N=CN3CCCCCC3)C2SC1(C)C CHEMBL2106658 724 | NC(=O)c1ccc(Cl)c(S(N)(=O)=O)c1 CHEMBL2107090 725 | CNCC1CCc2sc(C)nc2C1 CHEMBL2104686 726 | O=C1OC2C(O)C(O)OC2C1O CHEMBL2107425 727 | CC(S)C(=O)NCC(=O)O CHEMBL2106444 728 | COc1ccc(C(O)CN2CCN(C(c3ccccc3)c3ccccc3)CC2)cc1OC CHEMBL2074664 729 | N=C(N)c1ccc(C=Cc2ccc(C(=N)N)cc2)cc1 CHEMBL142304 730 | CN(C)CCC=C1c2ccccc2C(C)(C)c2ccccc21 CHEMBL110094 731 | O=C1c2ccccc2S(=O)(=O)N1CCCCN1CCN(c2ncccn2)CC1 CHEMBL8412 732 | OC(CNc1ncccn1)c1ccccc1 CHEMBL2110896 733 | O=c1[nH]c2ccccc2c(=O)n1CCCN1CCN(c2cccc(Cl)c2)CC1 CHEMBL2110792 734 | CN(C)CCC=C1c2cccn2CCc2ccc(Cl)cc21 CHEMBL2110942 735 | OCCCN1CCN(CCCC2c3ccccc3Sc3ccc(Cl)cc32)CC1 CHEMBL2110631 736 | CC1(C)SC2C(NC(=O)CSc3ccccc3)C(=O)N2C1C(=O)O CHEMBL2111130 737 | CCCc1c2oc(C(=O)O)cc(=O)c2cc2c(=O)cc(C(=O)O)oc21 CHEMBL2110970 738 | CC(C)NC1C2CCC(CC2)C1(O)c1ccc(Cl)c(Cl)c1 CHEMBL2110879 739 | CC(=O)C=C(C)c1ccc(-c2ccccc2)cc1 CHEMBL2104473 740 | N=C(N)NC(=O)c1ccc(N2CCN(C(=O)c3ccc[nH]3)CC2)c(C(F)(F)F)c1 CHEMBL2105423 741 | CC(=C(CCO)SSCC1CCCO1)N(C=O)Cc1cnc(C)nc1N CHEMBL1740659 742 | C=CCNc1nc(C(=O)NN=CC=Cc2ccc([N+](=O)[O-])o2)cs1 CHEMBL2105145 743 | CCN(CC)CCN1C(=O)c2ccccc2C1c1ccccc1 CHEMBL2104536 744 | CC(C)OC(=O)OC1CCCCC1 CHEMBL2104038 745 | CC(C)(C)NCC(O)COCC1COc2ccccc2O1 CHEMBL2106691 746 | COC(=O)C1(c2ccccc2)CCCN(C)C(C)C1 CHEMBL2104520 747 | CC(=O)NC(C(O)CC(=O)C(=O)O)C(O)C(O)C(O)CO CHEMBL2105945 748 | CC[N+](C)(C)CCOC(=O)C(O)(c1ccccc1)c1ccccc1 CHEMBL2110848 749 | [Ca+2].O=P[O-].O=P[O-] CHEMBL2218895 750 | CSC[S+]([O-])CC(CO)NC(=O)C=Cc1c(C)nc(O)nc1O CHEMBL2303630 751 | Cc1cnc(NC(=O)Nc2cc(Br)c(C)cc2OCC2CNCCO2)cn1 CHEMBL3039517 752 | Cc1nc2ccccc2n1Cc1ccc(Cl)cc1 CHEMBL152649 753 | O=c1n(Cl)c(=O)n(Cl)c(=O)n1Cl CHEMBL1698868 754 | OCCOCCOCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1 CHEMBL2104263 755 | CCN(CC)CCOC(=O)N1CCC(Cc2ccccc2)CC1 CHEMBL2105607 756 | CCC(C)(C)NCC(O)COc1cc(CCC(=O)c2ccc(C)cc2)ccc1OC CHEMBL2104075 757 | CCCCCCNC(=N)NC(=N)N CHEMBL2106939 758 | O=C(N1CCCC1)C(CCN1CCOCC1)(c1ccccc1)c1ccccc1 CHEMBL2106152 759 | [Ca+2].[Ca+2].[Ca+2].O=C([O-])CC(O)(CC(=O)[O-])C(=O)[O-].O=C([O-])CC(O)(CC(=O)[O-])C(=O)[O-] CHEMBL2106123 760 | C[N+](C)(C)CCOC(=O)C(O)(c1ccccc1)C1CC2C=CC1C2 CHEMBL2110818 761 | Cc1ccc(C(C)OC(=O)C2(C)CCC(C(=O)O)C2(C)C)cc1 CHEMBL2110648 762 | O=C(Nc1ccc(Cl)c(S(=O)(=O)N2CCNCC2)c1O)Nc1cccc(F)c1Cl CHEMBL2178579 763 | CCC(=O)N(c1ccc(Cl)c(Cl)c1)C1CCCC1N(C)C CHEMBL20679 764 | Cl.CCCCNc1ccc(C(=O)OCCN(CC)CC)cc1 CHEMBL593548 765 | CCN(CC)CCOC(=O)CC(O)(c1ccccc1)c1ccccc1 CHEMBL2107672 766 | COC1C(O)C(N)C(OC2OC(C(C)N)CCC2N)C(O)C1N(C)C(=O)CN CHEMBL3084803 767 | Oc1cc(O)c(Cl)cn1 CHEMBL1730601 768 | [Na+].O=C([O-])CCC(=O)O CHEMBL1200345 769 | Cc1ccc2c(-c3ccccc3)nc(=O)n(C(C)C)c2c1 CHEMBL268501 770 | CC12CCC3C(CCC4=CC(=O)CCC43C)C1CCC2=O CHEMBL274826 771 | CCC(c1ccc(O)cc1)C(CC)c1ccc(O)cc1 CHEMBL9225 772 | CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1 CHEMBL565 773 | CC(O)C1C(=O)N2C(C(=O)O)=C(C3CCCO3)SC12 CHEMBL556262 774 | Nc1c2c(nc3c1CCCC3)CCC2 CHEMBL130880 775 | CC1(c2nc3cccc(C(N)=O)c3[nH]2)CCCN1 CHEMBL506871 776 | O=C(O)C=CC(=O)O CHEMBL503160 777 | O=C1NC(CCCl)Oc2ccccc21 CHEMBL124815 778 | O=c1[nH]cnc2c(CN3CC(O)C(CO)C3)c[nH]c21 CHEMBL269864 779 | O=[N+]([O-])c1cn2c(n1)OCC(OCc1ccc(OC(F)(F)F)cc1)C2 CHEMBL227875 780 | CCCCC(CC)CNC(=O)CC(C)O CHEMBL452859 781 | Cc1nnc(SCC2=C(C(=O)O)N3C(=O)C(NC(=O)Cn4cnnn4)C3SC2)s1 CHEMBL1435 782 | Cc1cc(NC(=O)C(=O)c2cc(Cc3ccc(C#N)cc3)n3ccccc23)sn1 CHEMBL271068 783 | CC(O)(CC(=O)O)CC(=O)O CHEMBL50444 784 | COc1ccccc1O CHEMBL13766 785 | c1ccc(Cn2c3ccccc3nc2N2CCNCC2)cc1 CHEMBL56900 786 | COc1cc(OC)c(Cl)c2c1C(=O)C1(O2)C(OC)=CC(=O)CC1C CHEMBL562 787 | CCN(CC)CCCC(C)Nc1cc(OC)cc2cccnc21 CHEMBL472698 788 | CC(CCC(=O)O)C1CCC2C3C(O)CC4CC(O)CCC4(C)C3CCC12C CHEMBL1551 789 | Cc1cc(NS(=O)(=O)c2ccc(N)cc2)nc(C)n1 CHEMBL485696 790 | Cn1c2ncn(CCO)c2c(=O)n(C)c1=O CHEMBL699 791 | CC(C)NCC(O)COc1ccccc1-n1cccc1 CHEMBL27077 792 | CCCCCCCCC=CCCCCCCC(N)(CO)C(=O)O CHEMBL1200394 793 | O=[N+]([O-])OC1COC2C(O)COC12 CHEMBL1311 794 | CCOC(=O)c1c(C)cc2c(CO)nnc(O)c2c1C CHEMBL153427 795 | O=C(C=Cc1cccc(F)c1)NC1CC1 CHEMBL132663 796 | CC(Cc1cccc(C(F)(F)F)c1)NCCOC(=O)c1ccccc1 CHEMBL400599 797 | Cc1cc(C)cc(C(=O)NCc2ccncc2)c1 CHEMBL1581974 798 | Nc1ccc(C(=O)Nc2ccccc2N)cc1 CHEMBL1351761 799 | CCC(O)c1ccccc1 CHEMBL1397202 800 | CCn1cc(C(=O)O)c(=O)c2cnc(N3CCCC3)nc21 CHEMBL311350 801 | O=C(O)C1CSC(c2ccccc2O)N1C(=O)CCS CHEMBL309962 802 | [N-]=[N+]=CC(=O)OCC(N)C(=O)O CHEMBL1095699 803 | C=CC1(C)CC(=O)C2(O)C(C)(O1)C(OC(C)=O)C(O)C1C(C)(C)CCC(O)C12C CHEMBL52606 804 | CC(=O)NN=Cc1ccc([N+](=O)[O-])o1 CHEMBL1565322 805 | O=[As]O[As]=O CHEMBL1200978 806 | CC(C)N(CCC(CCN1CCCCC1)(C(N)=O)c1ccccc1Cl)C(C)C CHEMBL276177 807 | [Cl-].C[N+](C)(C)CCO CHEMBL282468 808 | COP(=O)(OC)C(O)C(Cl)(Cl)Cl CHEMBL167150 809 | CN(C)CCCC1(c2ccc(F)cc2)OCc2cc(C#N)ccc21 CHEMBL549 810 | C=CCC1(C(C)C)C(=O)NC(=O)NC1=O CHEMBL7863 811 | O=c1n(CC2CO2)c(=O)n(CC2CO2)c(=O)n1CC1CO1 CHEMBL453863 812 | CS(=O)(=O)OCCCNCCCOS(C)(=O)=O CHEMBL96292 813 | OCCN1CCN(CCC=C2c3ccccc3Sc3ccc(Cl)cc32)CC1 CHEMBL87385 814 | CCC(C)C(C)(COC(N)=O)COC(N)=O CHEMBL1200922 815 | CC1CC(=O)NN=C1c1ccc2c(c1)OCC(=O)N2 CHEMBL46765 816 | C#CC1(O)C=CC2C3CCC4=CC(=O)CCC4C3CCC21CC CHEMBL1213583 817 | Oc1cc(O)c2c(c1)OC(c1ccc(O)c(O)c1)C(O)C2O CHEMBL123809 818 | COc1ccc(C2(C#N)CCC(C(=O)O)CC2)cc1OC1CCCC1 CHEMBL511115 819 | CN1CCC(=C2c3ccccc3C(O)c3ccccc32)CC1 CHEMBL440557 820 | NCCc1c[nH]cn1 CHEMBL90 821 | C#CC1(O)CCC2C3CCc4cc(O)ccc4C3C(OC)CC21C CHEMBL1628161 822 | CCCOC(C(=O)OC1CCN(C)CC1)(c1ccccc1)c1ccccc1 CHEMBL1078261 823 | O=C(CCCN1CC=C(n2c(=O)[nH]c3ccccc32)CC1)c1ccc(F)cc1 CHEMBL1108 824 | COc1cc(Cc2cnc(N)nc2N)cc(OC)c1N CHEMBL122351 825 | [Na+].O=C1OC(C(O)CO)C([O-])=C1O CHEMBL591665 826 | Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(CC(O)C(O)C(O)COP(=O)(O)O)c2cc1C CHEMBL1201794 827 | Nc1ncnc2c1ncn2C1OC(CO)C(O)C1O CHEMBL1090 828 | COCC(NC(C)=O)C(=O)NCc1ccccc1 CHEMBL58323 829 | CCN(CC)C(=S)S CHEMBL961 830 | CC(C)(C)C1(O)CCN2CC3c4ccccc4CCc4cccc(c43)C2C1 CHEMBL8514 831 | CCC1(c2ccc(N)cc2)CCC(=O)NC1=O CHEMBL488 832 | [Na+].O=C([O-])C(Cl)Cl CHEMBL306823 833 | CCOC(=O)COc1ccc2c(c1)CC(NCC(O)c1cccc(Cl)c1)CC2 CHEMBL1193948 834 | CCOC(=O)C(CCc1ccccc1)NC(C)C(=O)N1CC2(CC1C(=O)O)SCCS2 CHEMBL431 835 | C[N+]1(C)CCN(CC(O)(c2ccccc2)C2CCCCC2)CC1 CHEMBL1201325 836 | COC(=O)C1=CCCN(C)C1 CHEMBL7303 837 | CCCN1CC(CSC)CC2c3cccc4[nH]cc(c43)CC21 CHEMBL531 838 | CN(C)CCCN1c2ccccc2Sc2ccccc21 CHEMBL564 839 | CN(CCCl)CCCl CHEMBL427 840 | NC(=O)C(c1ccccc1)(c1ccccc1)C1CCN(CCc2ccc3c(c2)CCO3)C1 CHEMBL1346 841 | C#CCOc1ccc2c(C)cc(=O)oc2c1 CHEMBL1377940 842 | O=[N+]([O-])c1ccc(Nc2ccc(N=C=S)cc2)cc1 CHEMBL93385 843 | O=C(C(c1ccccc1)c1ccccc1)N1CCN(CC(O)COc2cccc3ncccc32)CC1 CHEMBL65067 844 | O=C(C[S+]([O-])C(c1ccccc1)c1ccccc1)NO CHEMBL93077 845 | COCN1c2ccc([N+](=O)[O-])cc2C(c2ccccc2)=NCC1=O CHEMBL146817 846 | CN(C(=O)c1c(O)c2ccccc2n(C)c1=O)c1ccccc1 CHEMBL11672 847 | COC(S)=NCCn1c(C)ncc1[N+](=O)[O-] CHEMBL135000 848 | CCC1(CC)C(=O)NCC(C)C1=O CHEMBL1200790 849 | NCCCCCC(O)(P(=O)(O)O)P(=O)(O)O CHEMBL55214 850 | COc1ccc2c(c1)CN(C)CC2c1ccc(Cl)c(Cl)c1 CHEMBL287257 851 | O=S(=O)(c1nc(-c2ccc(F)cc2)c(-c2ccc(F)cc2)[nH]1)C(F)(F)C(F)F CHEMBL18647 852 | CC(C)NCC(O)c1ccc2ccccc2c1 CHEMBL16476 853 | CC(=O)c1ccc2c(c1)N(CCCN1CCC(CCO)CC1)c1ccccc1S2 CHEMBL1584 854 | O=C1CN(N=Cc2ccc(-c3ccc([N+](=O)[O-])cc3)o2)C(=O)N1 CHEMBL1201288 855 | CCOC(=O)C1(c2ccccc2)CCN(CCc2ccccc2)CC1 CHEMBL285517 856 | O=C(O)CCNC(=O)c1ccccc1 CHEMBL1231530 857 | Oc1ccc(OCc2ccccc2)cc1 CHEMBL1388 858 | Cn1c(COC(N)=O)ncc1[N+](=O)[O-] CHEMBL290299 859 | O=C1CSC2(CCN(CCCN3c4ccccc4Sc4ccc(Cl)cc43)CC2)N1 CHEMBL1483796 860 | FC(F)(F)C(F)(F)C(F)(F)F CHEMBL1663 861 | COc1ccc(-c2noc(CC(=O)O)c2-c2ccc(OC)cc2)cc1 CHEMBL259972 862 | NCCCC(=O)O CHEMBL96 863 | CCNC1(c2ccccc2)CCCCC1 CHEMBL279924 864 | CCC(C(=O)O)C1(O)CCCCC1 CHEMBL1697739 865 | CC(C)OP(=O)(F)OC(C)C CHEMBL1025 866 | CC(N)C(O)c1ccccc1 CHEMBL61006 867 | Cc1cc(=O)oc2cc(O)ccc12 CHEMBL12208 868 | CC(C)n1c2ccccc2n2cnc(-c3noc(C4CC4)n3)c2c1=O CHEMBL279867 869 | Cc1ncc([N+](=O)[O-])n1CC(C)O CHEMBL498847 870 | O=C(O)CNC(=O)CNC(=O)CNC(=O)CS CHEMBL1615778 871 | CCN(CC)CCNC(=O)c1ccc(N)c(Cl)c1 CHEMBL1618378 872 | CCCOc1cc(N)ccc1C(=O)OCCN(CC)CC CHEMBL1195 873 | CC(C)NCCCC1(C(N)=O)c2ccccc2-c2ccccc21 CHEMBL1201242 874 | Cc1cc(S(=O)(=O)O)c2c(C)ccc(C(C)C)cc1-2 CHEMBL1852533 875 | CN(C)C1CCc2[nH]c3c(cc(F)cc3F)c2C1 CHEMBL1882682 876 | COC1OC2OC3(C)CCC4C(C)CCC(C1C)C42OO3 CHEMBL566534 877 | CC(C)C(=O)Nc1cccc(C2CN3CCSC3=N2)c1 CHEMBL2110807 878 | CNC1(C)C2CCC(C2)C1(C)C CHEMBL2103881 879 | CC1CN(N=Cc2ccc([N+](=O)[O-])o2)C(=O)O1 CHEMBL2107517 880 | OC(C1=CC(=C(c2ccccn2)c2ccccn2)C=C1)(c1ccccn1)c1ccccn1 CHEMBL2105304 881 | COC(=O)C(C)c1cccc(C(c2ccccc2)n2ccnc2)c1 CHEMBL2107439 882 | [O-][N+]1=C(c2ccccc2)c2cc(Cl)ccc2N=C(NCC2CC2)C1 CHEMBL2104165 883 | Cc1ccccc1OCC(O)CN1CC=CCC1 CHEMBL2107693 884 | O=C(Nc1ccc(CC2CCC(C(O)c3ccccc3)N2)cc1)C1CCc2nccc(=O)n21 CHEMBL2107826 885 | O=C1c2ccccc2C(=O)C1c1ccc(C(F)(F)F)cc1 CHEMBL2105129 886 | CCC(C)(CC)OC(N)=O CHEMBL2104208 887 | COc1ccccc1OCCNCC(O)COc1cccc2[nH]nnc21 CHEMBL2104914 888 | C[N+](C)(CCO)CC(=O)[O-] CHEMBL2105274 889 | CC(C)(O)CC(C)(O)c1ccc(Cl)cc1 CHEMBL2106273 890 | CN(C)C(=O)COC1c2ccccc2CCc2ccccc21 CHEMBL2104766 891 | CN(C)C(=O)NC1CCC(CCN2CCN(c3cccc(Cl)c3Cl)CC2)CC1 CHEMBL2028019 892 | OCc1cccc(Cc2c[nH]cn2)c1O CHEMBL2104355 893 | c1cc(CNCc2ccncc2)ccn1 CHEMBL2103958 894 | C[N+]1(C)CCC(OC(=O)C(O)(c2cccs2)c2ccccc2)C1 CHEMBL2110853 895 | CC[N+]1(CC)CCC(OC(=O)C(O)(c2ccccc2)c2ccccc2)C1 CHEMBL2110788 896 | c1ccc2c(c1)CCN1CCNCC21 CHEMBL2104690 897 | CC(CC#N)N(C)CC(=O)N(C)c1ccc(Cl)cc1C(=O)c1ccccc1F CHEMBL2104077 898 | O=C(O)CCCNC(=O)c1ccc(Cl)cc1O CHEMBL2107776 899 | O=C(O)CC1(CO)CCCCC1 CHEMBL2110826 900 | CN(C(=O)CNC1CC1)c1ccc(Cl)cc1C(=O)c1ccccc1Cl CHEMBL2104597 901 | COC(=O)CCC=C=CCC1C(O)CC(O)C1C=CC(O)COc1ccccc1 CHEMBL2104232 902 | O=c1cc(CN2CCOCC2)c2cc(O)c(O)cc2o1 CHEMBL2104742 903 | CC(C)C(OC(=O)c1cccnc1)c1ccc(Cl)cc1 CHEMBL2107111 904 | CCCCN(C(=O)C(C)C)c1nc(C)co1 CHEMBL2105068 905 | CCc1cc(S(=O)(=O)O)c2cc(C(C)C)cccc1-2 CHEMBL2105570 906 | COc1cc2c(cc1OC)S(=O)(=O)OC(C(=O)NC(C)CC(C)(C)N(C)C)C2 CHEMBL2105495 907 | O=C1COC(c2ccc(C(F)(F)F)cc2)CN1 CHEMBL2105520 908 | COC(=O)c1cnc2cc(OC(C)C)c(OC(C)C)cc2c1O CHEMBL2105380 909 | Nc1ncc(S(=O)(=O)c2ccc(N)cc2)s1 CHEMBL2107139 910 | COc1ccc2c3c1OC1C(OC(=O)c4cccnc4)CCC4C(C2)N(C)CCC314 CHEMBL2104623 911 | NC(=O)c1ccc(Oc2ccc(CNCCC3CCOCC3)cc2F)nc1 CHEMBL2103878 912 | OCC(O)C(O)C(O)C(O)COC1OC(CO)C(O)C(O)C1O CHEMBL2104398 913 | CCCCOCC(O)COc1ccccc1 CHEMBL2104223 914 | CN(C)CCN(C)n1cc(-c2ccccc2)c2ccccc21 CHEMBL2104611 915 | CCCCC(C)C(O)C=CC1C(O)CC2OC(=C(C#N)CCCC(=O)O)CC21 CHEMBL2104724 916 | Cn1c2ncn(CCCNC3COC4C(O[N+](=O)[O-])COC34)c2c(=O)n(C)c1=O CHEMBL2107483 917 | CCOC(=O)C1(c2ccccc2)CCC=CC1N(C)C CHEMBL2104560 918 | CCNC1=Nc2ccc(Cl)cc2C(C)(c2ccccc2)O1 CHEMBL2106227 919 | COc1c2c(cc3c1CN(C)CC3)OCO2 CHEMBL1606295 920 | CCN(CC)CCOC1(c2ccccc2)CCN(CCNc2ccccc2)CC1 CHEMBL2110911 921 | CCn1cc(C(=O)O)c(=O)c2cc3c(cc21)CCO3 CHEMBL2110851 922 | CCC1CC(=O)C2Oc3c4c(ccc3OC)CC3C1C42CCN3CC1CC1 CHEMBL2110790 923 | NC(=O)OCC1C(NC(=O)C(=NOCC(=O)O)c2csc(N)n2)C(=O)N1S(=O)(=O)O CHEMBL1614658 924 | CN(C)c1cnccn1 CHEMBL2110770 925 | CN=C(NC)NCc1ccc(OC)cc1 CHEMBL2110972 926 | COC(=O)C1CC2=CC(=O)CCC2(C)C2CCC3(C)C(CCC3(O)CCC(=O)O)C12 CHEMBL2110977 927 | CCCCN(CCCC)C(=O)OCC[N+](C)(C)CC CHEMBL2110746 928 | CNC(=O)CCN1CCN(CCC=C2c3ccccc3Sc3ccc(Cl)cc32)CC1 CHEMBL2110894 929 | CC[N+](C)(C)CCOC(=O)CCC(=O)OCC[N+](C)(C)CC CHEMBL2111153 930 | NCc1ccccc1Sc1ccccc1CO CHEMBL310160 931 | C[N+]1(C)CCC(OC(=O)C(O)(c2ccccc2)c2ccccc2)CC1 CHEMBL2110714 932 | NC1CCCC1c1ccccc1 CHEMBL2110918 933 | N.O=C(O)c1ccccc1 CHEMBL2105970 934 | S=C=Nc1ccc(N=C=S)cc1 CHEMBL2104676 935 | COC(=O)Nc1cc(N2CC=CCC2)nc2nc(=O)on21 CHEMBL2104058 936 | CC1C[S+]([O-])C(C)(C)C(C(=O)O)N1 CHEMBL2105470 937 | c1csc(Cc2ccccc2OCC2CNCCO2)c1 CHEMBL2105438 938 | O=C1CCCC(=O)C1C(=S)Nc1ccc(Cl)cc1 CHEMBL2105264 939 | N#CCNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1 CHEMBL1078178 940 | CCOP(=S)(OCC)Oc1cc(C)nc(C(C)C)n1 CHEMBL388560 941 | OC(O)C(Cl)(Cl)Cl CHEMBL455917 942 | O=C1c2ccccc2S(=O)(=O)N1C1CCCNC1=O CHEMBL72115 943 | O=C(O)c1ccccc1Nc1cccc(Cl)c1Cl CHEMBL23115 944 | Nc1ccc(OCCCCCN2C(=O)c3ccccc3C2=O)cc1 CHEMBL94192 945 | CC(C)C1CCC(C)CC1O CHEMBL256087 946 | CC(=O)Nc1ccc(OC(=O)c2ccccc2O)cc1 CHEMBL92590 947 | N=C(N)NN=Cc1c(Cl)cccc1Cl CHEMBL420 948 | O=[N+]([O-])OCC(CO)(CO[N+](=O)[O-])CO[N+](=O)[O-] CHEMBL466660 949 | CN(C)CC1CCn2cc(c3ccccc32)C2=C(c3cn(c4ccccc34)CCO1)C(=O)NC2=O CHEMBL91829 950 | C#CC1(OC(N)=O)CCCCC1 CHEMBL1576 951 | BrC(Br)Br CHEMBL345248 952 | Cc1cc2cc3c(C)cc(=O)oc3c(C)c2o1 CHEMBL1475 953 | N#Cc1cc(-c2ccccc2)nnc1NCCN1CCOCC1 CHEMBL150365 954 | CC(N)C(O)c1ccc(O)c(O)c1 CHEMBL677 955 | COc1cc2c(cc1OC)C1CC(=O)C(CC(C)C)CN1CC2 CHEMBL117785 956 | CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1cccc(Cl)c1Cl CHEMBL1480 957 | CCC1(C)CC(=O)NC1=O CHEMBL696 958 | C=C1CC2C3C=C(C)C4=CC(=O)CCC4(C)C3CCC2(C)C1(OC(C)=O)C(C)=O CHEMBL1328968 959 | c1ccc(Cc2nc3ccccc3[nH]2)cc1 CHEMBL355063 960 | CCCCCCCCC=CCCCCCCCC(=O)OCC(O)CO CHEMBL428593 961 | CCOC(=O)C(CCc1ccccc1)NC1CCCN2CCCC(C(=O)O)N2C1=O CHEMBL515606 962 | CCC1(c2c[nH]cn2)Cc2ccc(F)cc2C1 CHEMBL1255582 963 | CC(=O)NS(=O)(=O)c1ccc(N)cc1 CHEMBL455 964 | Nc1c(CC(=O)O)cccc1C(=O)c1ccc(Br)cc1 CHEMBL1077 965 | CCS(=O)(=O)c1ccc(F)cc1 CHEMBL93309 966 | Cc1cc(CC(=O)O)n(C)c1C(=O)c1ccc(Cl)cc1 CHEMBL19490 967 | CCCCCc1ccc(C)cc1O CHEMBL1512677 968 | CC1(C)SC2C(Br)C(=O)N2C1C(=O)O CHEMBL73622 969 | CCn1cc(C(=O)O)c(=O)c2ccc(Cc3ccccc3)nc21 CHEMBL35337 970 | CCN(CC)CCOC(=O)C(c1ccccc1)c1ccccc1 CHEMBL353846 971 | CNC1CCCN(c2c(F)cc3c(=O)c(C(=O)O)cn(C4CC4)c3c2OC)C1 CHEMBL1210954 972 | CC(C)CNC1CCS(=O)(=O)c2sc(S(N)(=O)=O)cc21 CHEMBL417975 973 | CNC1=Nc2ccc(Cl)cc2C(c2ccccc2)=[N+]([O-])C1 CHEMBL451 974 | NC12CC3CC(CC(C3)C1)C2 CHEMBL660 975 | CN1CCCN=C1COC(=O)C(O)(c1ccccc1)C1CCCCC1 CHEMBL1495 976 | Nc1c2ccccc2nc2c1CCCC2 CHEMBL95 977 | O=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1 CHEMBL1201191 978 | O=CCCCC=O CHEMBL1235482 979 | O=C1C(O)N=C(c2ccccc2F)c2cc(Cl)ccc2N1CCO CHEMBL64677 980 | CC12CC(O)C3C(CC(F)C4=CC(=O)C=CC43C)C1CCC2(O)C(=O)CO CHEMBL1200774 981 | O=C(O)Cc1nn(Cc2nc3cc(C(F)(F)F)ccc3s2)c(=O)c2ccccc12 CHEMBL10372 982 | Cc1cc(Cl)ccc1N=C1SCS1 CHEMBL93885 983 | CN1c2c(=O)nc(N)[nH]c2NCC1CNc1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1 CHEMBL1231574 984 | CNC(=N)NC(=O)Nc1c(C)cccc1C CHEMBL448416 985 | CCCCC(C)(O)CC=CC1C(O)CC(=O)C1CCC=CCCC(=O)OC CHEMBL266979 986 | Cc1cccc(C(C)c2c[nH]cn2)c1C CHEMBL778 987 | COc1ccc(Cc2cnc(N)nc2N)cc1OC CHEMBL19633 988 | Oc1ccc(Cl)cc1 CHEMBL57053 989 | NCCCCC(NC1CCc2ccccc2N(CC(=O)O)C1=O)C(=O)O CHEMBL430554 990 | CCOC(=O)c1ccc(OC(=O)CCCCCN=C(N)N)cc1 CHEMBL87563 991 | COc1ccc(C=Cc2cc(OC)c(OC)c(OC)c2)cc1OP(=O)(O)O CHEMBL1206232 992 | CN1CCC(=C2c3ccsc3CCc3ccccc32)CC1 CHEMBL294951 993 | CN(C)CCCOC1C(C(O)CO)OC2OC(C)(C)OC12 CHEMBL1316321 994 | CC1(C(=O)O)CSC(c2ccc(O)cc2O)=N1 CHEMBL432481 995 | CCN(CC)C(=O)NC1C=C2c3cccc4[nH]cc(c43)CC2N(C)C1 CHEMBL157138 996 | CC(C[N+](C)(C)C)OC(N)=O CHEMBL1482 997 | Cc1ccc(C(=CCN2CCCC2)c2cccc(C=CC(=O)O)n2)cc1 CHEMBL1224 998 | CCOC(=O)C(C)(N)Cc1ccc(O)c(O)c1 CHEMBL1201233 999 | CNS(=O)(=O)CCc1ccc2[nH]cc(C3CCN(C)CC3)c2c1 CHEMBL1278 1000 | CC(CN1CC(=O)NC(=O)C1)N1CC(=O)NC(=O)C1 CHEMBL1738 1001 | Cc1cc(O)ccc1Cl CHEMBL1230222 1002 | -------------------------------------------------------------------------------- /Presentations and Tutorials/RDKit UGM 2014/merck.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/RDKit UGM 2014/merck.jpeg -------------------------------------------------------------------------------- /Presentations and Tutorials/RDKit UGM 2014/rdkit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/RDKit UGM 2014/rdkit.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #### Code snippets from Team-SKI 2 | This repository contains code snippets that shall demonstrate the use of RDKit, pandas, and other python libaries for common computer-aided drug design tasks. 3 | 4 | ``` 5 | ├── Cheminformatics 6 | │ └── scaffold-analysis 7 | │ 8 | ├── Kinase inhibitors 9 | │ └── approved-or-in-clinical-trials 10 | │ 11 | ├── Presentations and Tutorials 12 | │ ├── Molecular Modelling Workshop 2014 13 | │ └── Protvec demo 2017 14 | │ └── RDKit UGM 2014 15 | │ └── hackaton 16 | │ 17 | └── Virtual-Screening 18 | ``` 19 | These projects were supported by [BioMed X](http://bio.mx/) Innovation Center, Heidelberg 20 | 21 | --- 22 | #### Presentations and tutorials 23 | * **Molecular Modelling Workshop 2014** 24 | *Scaffold analysis in Python with RDKit and pandas* 25 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Presentations%20and%20tutorials/Molecular%20Modelling%20Workshop%202014/Scaffold%20analysis%20in%20Python%20with%20RDKit%20and%20pandas%20-%20MMWS%20Erlangen%202014.ipynb) 26 | 27 | * **Protvec demo 2017** 28 | *Demo of encoding protein sequences via vectors* 29 | 30 | * **RDKit UGM 2014** - [RDKit UGM](https://github.com/rdkit/UGM_2014) 31 | *Scaffold analysis of ChEMBL data with pandas and RDKit* 32 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Presentations%20and%20tutorials/RDKit%20UGM%202014/Scaffold%20analysis%20of%20ChEMBL%20data%20with%20pandas%20and%20RDKit%20-%20RDKit%20UGM2014.ipynb) 33 | 34 | *hackaton contribution* 35 | Demo of SaveXlsxFromFrame function that can export PandasDataFrame to Excel including images of molecules. 36 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Presentations%20and%20tutorials/RDKit%20UGM%202014/rdkit_hackaton/XLSX%20export.ipynb) -- Resulting demo xlsx: [download](https://github.com/Team-SKI/snippets/blob/master/IPython/rdkit_hackaton/demo.xlsx) 37 | 38 | --- 39 | #### Cheminformatics 40 | * **Scaffold analysis in Python with RDKit and pandas** 41 | Internal presentation given at BioMed X team meeting, March 2014. 42 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Cheminformatics/Basics/Scaffold%20analysis%20%26%20Schnellkurs%20in%20chemoinformatics.ipynb) 43 | 44 | * **Markdown usage** 45 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Cheminformatics/Basics/Markdown%20demo.ipynb) 46 | 47 | * **RDKit pandas integration** 48 | Demo of new functions that were integrated in [RDKit] (https://github.com/rdkit/rdkit/commit/8269bc9002cf3c6b106c847d86bcbabc016b697e), 2013. 49 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Cheminformatics/Basics/RDKit%26pandas%20demo%20of%20new%20functions.ipynb) 50 | 51 | * **Rendering of images in IPython** 52 | Example of how to use object representations. 53 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Cheminformatics/Basics/Custom%20objects%20and%20their%20rendering%20in%20IPython.ipynb) 54 | 55 | --- 56 | #### Kinase-Inhibitors 57 | * **approved or in clinical trials** 58 | Notebook that extracts all kinase inhibitors that are in clinical trials or are on the market. 59 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Kinase%20inhibitors/Kinase%20inhibitors%20-%20approved%20or%20in%20clinical%20trials.ipynb) 60 | 61 | --- 62 | #### Virtual-Screening 63 | * **ligand-3D-conformations** 64 | [prepare_for_docking.py](https://github.com/Team-SKI/snippets/blob/master/Structural%20bioinformatics/prepare_for_docking.py): Script that uses Open Babel to generate 3D structures of compounds. 65 | For usage info run `prepare_for_docking.py -h` 66 | 67 | * **filtering** 68 | [filter_pains.py](https://github.com/Team-SKI/snippets/blob/master/Cheminformatics/Screening/filter_pains.py): Script that uses RDKit to remove PAINS compounds from sdf or smile files. 69 | For usage info run `filter_pains.py -h` 70 | -------------------------------------------------------------------------------- /Virtual-Screening/ligand-3D-conformations/prepare_for_docking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Samo Turk" 4 | __copyright__ = "Copyright (C) 2014 by BioMed X GmbH" 5 | __credits__ = ["Simone Fulle", "Katra Kolsek"] 6 | __license__ = "GPL" 7 | __version__ = "0.1" 8 | __email__ = "turk@bio.mx" 9 | __status__ = "Development" 10 | 11 | import argparse 12 | import sys 13 | import os 14 | import pybel 15 | 16 | def gen3D(mol, pH=7.4, forcefield="MMFF94", s1=10, s2=500): 17 | """ 18 | Add protons for certain pH and generates 3D conformation and minimizes molecule 19 | - pH: defaults to 7.4 20 | - forcefield: options - MMFF94, UFF or Ghemical, defaults to MMFF94 21 | - s1: how many steps for make3D, defaults to 10 22 | - s2: how many steps for localopt, defaults to 500 23 | """ 24 | mol.OBMol.AddHydrogens(False, True, pH) 25 | mol.make3D(forcefield=forcefield, steps=s1) 26 | mol.localopt(forcefield=forcefield, steps=s2) 27 | return mol 28 | 29 | def arg_parser(): 30 | parser = argparse.ArgumentParser(description='Generate 3D structures of molecules.') 31 | parser.add_argument('-i', '--infile', help="Specifies input file") 32 | parser.add_argument('-o', '--outfile', help="Specifies output file") 33 | parser.add_argument('-p', '--ph', default=7.4, help="Specifies pH for hydrogen addition") 34 | parser.add_argument('-f', '--forcefield', default='MMFF94', help="Specifies forcefield. MMFF94, UFF or Ghemical, defaults to MMFF94") 35 | parser.add_argument('-s1', '--steps1', default=10, help="Specifies how many steps for make3D, defaults to 10") 36 | parser.add_argument('-s2', '--steps2', default=500, help="Specifies how many steps for localopt, defaults to 500") 37 | 38 | return parser 39 | 40 | if __name__ == "__main__": 41 | parser = arg_parser() 42 | if len(sys.argv) == 1: 43 | argv = ['-h'] 44 | else: 45 | argv = sys.argv[1:] 46 | args = parser.parse_args(argv) 47 | 48 | informat = args.infile.split(".")[-1] 49 | outformat = args.outfile.split(".")[-1] 50 | 51 | # Limit acceptable in and out formats 52 | informats = ['smi', 'ism', 'sdf', 'mol2', 'mol', 'pdb'] 53 | outformats = ['mol2', 'sdf', 'pdbqt'] 54 | 55 | 56 | if informat in informats and outformat in outformats: 57 | mols = pybel.readfile(informat, args.infile) 58 | numMols = 0 59 | for mol in mols: 60 | numMols += 1 61 | mols = pybel.readfile(informat, args.infile) 62 | os.system('cls' if os.name == 'nt' else 'clear') # Clear the terminal 63 | molsdetected = str(numMols) + " molecules detected." 64 | print(molsdetected) 65 | i = 1 66 | output = pybel.Outputfile(outformat, args.outfile, overwrite=True) 67 | for mol in mols: 68 | mol = gen3D(mol, pH=float(args.ph), forcefield=args.forcefield, s1=int(args.steps1), s2=int(args.steps2)) 69 | os.system('cls' if os.name == 'nt' else 'clear') 70 | print(molsdetected) 71 | print("Processing molecule titled: " + str(mol.title)) 72 | output.write(mol) 73 | print(str(i) + " out of " + str(numMols)) 74 | i += 1 75 | output.close() 76 | else: 77 | print("Format either of infile or outfile not recognized!\nSupported formats are: " + " ".join(list(set(informats+outformats)))) 78 | --------------------------------------------------------------------------------