├── Cheminformatics
├── RDKit-pandas-integration.ipynb
├── markdown-usage.ipynb
├── rendering-of-images-in-IPython.ipynb
├── scaffold-analysis
│ ├── Scaffold analysis & Schnellkurs in chemoinformatics.ipynb
│ ├── biomedx.png
│ ├── merck.jpeg
│ ├── pi.gif
│ └── rdkit.png
└── somemols.smi
├── Kinase-Inhibitors
└── approved-or-in-clinical-trials
│ ├── Kinase inhibitors - approved or in clinical trials.ipynb
│ ├── chembl_drugs.txt
│ ├── kin_inh.png
│ ├── kin_inh.smi
│ ├── kin_inh_approved.smi
│ ├── kin_inh_approved_2.smi
│ ├── kin_inh_approved_3.smi
│ └── kin_inh_phase4.png
├── LICENSE.md
├── Machine Learning
├── Sklearn to ete3 trees.ipynb
├── tree.dot
└── tree.png
├── Presentations and Tutorials
├── Molecular Modelling Workshop 2014
│ ├── Scaffold analysis in Python with RDKit and pandas.ipynb
│ ├── approved.sdf
│ ├── biomedx.png
│ ├── merck.jpeg
│ └── rdkit.png
├── Protvec demo 2017
│ ├── ProtVec.ipynb
│ ├── README.md
│ ├── __init__.py
│ ├── biovec
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── biovec
│ │ │ ├── __init__.py
│ │ │ ├── binary_amino.py
│ │ │ └── models.py
│ │ ├── setup.py
│ │ └── trained_models
│ │ │ └── swissprot_reviewed_protvec
│ ├── corpus.txt
│ ├── data
│ │ ├── family_classification_and_sequence_small.tab
│ │ └── uniprot_sprot_small.fasta
│ ├── figures
│ │ ├── Protein_sentences.png
│ │ ├── Protein_sentences2.png
│ │ ├── Skip_gram_cbow.png
│ │ ├── linear-relationships.png
│ │ ├── protein_vectors_wlabel.png
│ │ ├── protein_words.png
│ │ ├── proteinsequence.png
│ │ ├── relationships_plus_vectors2.png
│ │ └── table_overview_vocab2.png
│ ├── helpers.py
│ └── trained_models
│ │ ├── model_SwissProt_small
│ │ └── swissprot_reviewed_protvec
└── RDKit UGM 2014
│ ├── Scaffold analysis of ChEMBL data with pandas and RDKit.ipynb
│ ├── biomedx.png
│ ├── hackaton
│ ├── XLSX export.ipynb
│ └── drugs.smi
│ ├── merck.jpeg
│ └── rdkit.png
├── README.md
└── Virtual-Screening
├── filtering
└── filter_pains.py
└── ligand-3D-conformations
├── prepare_for_docking.py
├── test.sdf
└── test.smi
/Cheminformatics/markdown-usage.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# BioMed X web page\n",
8 | "## Internal presentation on Markdown usage on our web page\n",
9 | "###[http://bio.mx/](http://bio.mx/)\n",
10 | "\n",
11 | "### Team pages\n",
12 | "Thank you all for helping. Great thanks to **Marco Tidona** for coding!\n",
13 | "\n",
14 | "### Notes and missing stuff\n",
15 | "- Use Twitter!\n",
16 | "- Each group should be able to update their own page\n",
17 | "\n",
18 | "##### More info on http://192.168.96.60/dokuwiki/doku.php?id=admin:web-page\n",
19 | "
\n",
20 | "
\n",
21 | "
\n",
22 | "
\n",
23 | "
\n",
24 | "
\n",
25 | "
\n",
26 | "
\n",
27 | "
\n",
28 | "
\n",
29 | " \n",
30 | " "
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "# Markdown\n",
38 | "- Plain text formating syntax\n",
39 | "- You can format text without html knowledge\n",
40 | "- http://daringfireball.net/projects/markdown/\n",
41 | "- http://192.168.96.60/dokuwiki/doku.php?id=admin:web-page\n",
42 | "
\n",
43 | "
\n",
44 | "
\n",
45 | "
\n",
46 | "
\n",
47 | "
\n",
48 | "
\n",
49 | "
"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {},
55 | "source": [
56 | "# Headings\n",
57 | "\n",
58 | "# Header 1\n",
59 | "## Header 2\n",
60 | "### Header 3 (in our case names)\n",
61 | "#### Header 4 (persons position)\n",
62 | "##### Header 5 (previous work & contact)"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "# Lists\n",
70 | "- list item 1\n",
71 | "- list item 2"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "# Links\n",
79 | "[LINK](www.google.com) \n",
80 | "E-mail: "
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "# Images\n",
88 | ""
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "# Normal text is easy. You just write\n",
96 | "**important** *italic* \n",
97 | "\n",
98 | "Two spaces and enter introduce new lines. \n",
99 | "**See new line.** \n",
100 | "If you miss spaces and have just enter there will be no new line.\n",
101 | "**See no new line.**"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 0,
107 | "metadata": {
108 | "collapsed": false
109 | },
110 | "outputs": [],
111 | "source": []
112 | }
113 | ],
114 | "metadata": {
115 | "kernelspec": {
116 | "display_name": "Python 2",
117 | "language": "python",
118 | "name": "python2"
119 | },
120 | "language_info": {
121 | "codemirror_mode": {
122 | "name": "ipython",
123 | "version": 2
124 | },
125 | "file_extension": ".py",
126 | "mimetype": "text/x-python",
127 | "name": "python",
128 | "nbconvert_exporter": "python",
129 | "pygments_lexer": "ipython2",
130 | "version": "2.7.10"
131 | }
132 | },
133 | "nbformat": 4,
134 | "nbformat_minor": 0
135 | }
136 |
--------------------------------------------------------------------------------
/Cheminformatics/rendering-of-images-in-IPython.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Custom objects and their rendering in IPython\n",
8 | "### Example of how to use object representations"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {
15 | "collapsed": false
16 | },
17 | "outputs": [],
18 | "source": [
19 | "import rdkit.Chem as Chem\n",
20 | "from rdkit.Chem import PandasTools\n",
21 | "from rdkit.Chem import Draw\n",
22 | "from rdkit.Chem import Descriptors\n",
23 | "from rdkit.Chem.Draw import IPythonConsole # Enables RDKit IPython integration"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "### Lets say you need to create a custom object and you'd like to control the default representation"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "We'll create an object that can store multiple molecules"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 2,
43 | "metadata": {
44 | "collapsed": false
45 | },
46 | "outputs": [],
47 | "source": [
48 | "class primitiveMolsObject():\n",
49 | " def __init__(self, mols=None):\n",
50 | " self.mols = mols\n",
51 | " self.num = len(mols) # Return number of mols"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "Get some mols and put them in a list"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 3,
64 | "metadata": {
65 | "collapsed": false
66 | },
67 | "outputs": [],
68 | "source": [
69 | "mol1 = Chem.MolFromSmiles('NC(=O)CS(=O)C(c1ccccc1)c1ccccc1')"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 4,
75 | "metadata": {
76 | "collapsed": false
77 | },
78 | "outputs": [],
79 | "source": [
80 | "mol2 = Chem.MolFromSmiles('CCC(OC(C)=O)C(CC(C)N(C)C)(c1ccccc1)c1ccccc1')"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 5,
86 | "metadata": {
87 | "collapsed": false
88 | },
89 | "outputs": [],
90 | "source": [
91 | "mol3 = Chem.MolFromSmiles(' Cc1ccccc1C(OCCN(C)C)c1ccccc1')"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 6,
97 | "metadata": {
98 | "collapsed": false
99 | },
100 | "outputs": [],
101 | "source": [
102 | "mols = [mol1, mol2, mol3]"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "metadata": {},
108 | "source": [
109 | "Create model instance wih your mols"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 7,
115 | "metadata": {
116 | "collapsed": false
117 | },
118 | "outputs": [],
119 | "source": [
120 | "MyMols = primitiveMolsObject(mols)"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 8,
126 | "metadata": {
127 | "collapsed": false
128 | },
129 | "outputs": [
130 | {
131 | "data": {
132 | "text/plain": [
133 | "<__main__.primitiveMolsObject instance at 0x7f6cd9567908>"
134 | ]
135 | },
136 | "execution_count": 8,
137 | "metadata": {},
138 | "output_type": "execute_result"
139 | }
140 | ],
141 | "source": [
142 | "MyMols"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "Default rendering gives very little info about contents"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 9,
155 | "metadata": {
156 | "collapsed": false
157 | },
158 | "outputs": [
159 | {
160 | "data": {
161 | "text/plain": [
162 | "[,\n",
163 | " ,\n",
164 | " ]"
165 | ]
166 | },
167 | "execution_count": 9,
168 | "metadata": {},
169 | "output_type": "execute_result"
170 | }
171 | ],
172 | "source": [
173 | "MyMols.mols"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": 10,
179 | "metadata": {
180 | "collapsed": false
181 | },
182 | "outputs": [
183 | {
184 | "data": {
185 | "text/plain": [
186 | "3"
187 | ]
188 | },
189 | "execution_count": 10,
190 | "metadata": {},
191 | "output_type": "execute_result"
192 | }
193 | ],
194 | "source": [
195 | "MyMols.num"
196 | ]
197 | },
198 | {
199 | "cell_type": "markdown",
200 | "metadata": {},
201 | "source": [
202 | "### Luckily default representations of objects in ipython can be easily controled with \\_repr\\_html\\_"
203 | ]
204 | },
205 | {
206 | "cell_type": "markdown",
207 | "metadata": {},
208 | "source": [
209 | "Lets define the object again, this time with functions that control representation"
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": 11,
215 | "metadata": {
216 | "collapsed": false
217 | },
218 | "outputs": [],
219 | "source": [
220 | "class primitiveMolsObject2():\n",
221 | " def __init__(self, mols=None):\n",
222 | " self.mols = mols\n",
223 | " self.num = len(mols) # Return number of mols\n",
224 | " \n",
225 | " def _repr_html_(self):\n",
226 | " # Default representation in IPython\n",
227 | " smilesString = ''\n",
228 | " for mol in mols:\n",
229 | " smilesString += Chem.MolToSmiles(mol) + \", \" \n",
230 | " return smilesString #'
' %s"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": 12,
236 | "metadata": {
237 | "collapsed": false
238 | },
239 | "outputs": [],
240 | "source": [
241 | "MyMols2 = primitiveMolsObject2(mols)"
242 | ]
243 | },
244 | {
245 | "cell_type": "markdown",
246 | "metadata": {},
247 | "source": [
248 | "Much better! We can acually see what hides behind an object"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": 13,
254 | "metadata": {
255 | "collapsed": false
256 | },
257 | "outputs": [
258 | {
259 | "data": {
260 | "text/html": [
261 | "NC(=O)CS(=O)C(c1ccccc1)c1ccccc1, CCC(OC(C)=O)C(CC(C)N(C)C)(c1ccccc1)c1ccccc1, Cc1ccccc1C(OCCN(C)C)c1ccccc1, "
262 | ],
263 | "text/plain": [
264 | "<__main__.primitiveMolsObject2 instance at 0x7f6cd958c050>"
265 | ]
266 | },
267 | "execution_count": 13,
268 | "metadata": {},
269 | "output_type": "execute_result"
270 | }
271 | ],
272 | "source": [
273 | "MyMols2"
274 | ]
275 | },
276 | {
277 | "cell_type": "markdown",
278 | "metadata": {},
279 | "source": [
280 | "#### Or we can add graphic representation"
281 | ]
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": 14,
286 | "metadata": {
287 | "collapsed": false
288 | },
289 | "outputs": [],
290 | "source": [
291 | "from base64 import b64encode\n",
292 | "from StringIO import StringIO\n",
293 | "\n",
294 | "class primitiveMolsObject3():\n",
295 | " def __init__(self, mols=None):\n",
296 | " self.mols = mols\n",
297 | " self.num = len(mols) # Return number of mols\n",
298 | " \n",
299 | " def _repr_html_(self):\n",
300 | " # Default representation in IPython\n",
301 | " sio = StringIO()\n",
302 | " Draw.MolsToGridImage(self.mols).save(sio,format='PNG')\n",
303 | " s = b64encode(sio.getvalue()) # Encode in base64\n",
304 | " return '
' %s"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": 15,
310 | "metadata": {
311 | "collapsed": false
312 | },
313 | "outputs": [],
314 | "source": [
315 | "MyOtherMols = primitiveMolsObject3(mols)"
316 | ]
317 | },
318 | {
319 | "cell_type": "markdown",
320 | "metadata": {},
321 | "source": [
322 | "Much better! We can acually see what hides behind an object"
323 | ]
324 | },
325 | {
326 | "cell_type": "code",
327 | "execution_count": 16,
328 | "metadata": {
329 | "collapsed": false
330 | },
331 | "outputs": [
332 | {
333 | "data": {
334 | "text/html": [
335 | "
"
336 | ],
337 | "text/plain": [
338 | "<__main__.primitiveMolsObject3 instance at 0x7f6cd958cbd8>"
339 | ]
340 | },
341 | "execution_count": 16,
342 | "metadata": {},
343 | "output_type": "execute_result"
344 | }
345 | ],
346 | "source": [
347 | "MyOtherMols"
348 | ]
349 | },
350 | {
351 | "cell_type": "markdown",
352 | "metadata": {},
353 | "source": [
354 | "Copyright (C) 2014 by Samo Turk, BioMed X GmbH\n",
355 | "\n",
356 | "This work is licensed under the Creative Commons Attribution-ShareAlike 3.0 License. To view a copy of this license, visit http://creativecommons.org/licenses/by-sa/3.0/ or send a letter to Creative Commons, 543 Howard Street, 5th Floor, San Francisco, California, 94105, USA.\n"
357 | ]
358 | }
359 | ],
360 | "metadata": {
361 | "kernelspec": {
362 | "display_name": "Python 2",
363 | "language": "python",
364 | "name": "python2"
365 | },
366 | "language_info": {
367 | "codemirror_mode": {
368 | "name": "ipython",
369 | "version": 2
370 | },
371 | "file_extension": ".py",
372 | "mimetype": "text/x-python",
373 | "name": "python",
374 | "nbconvert_exporter": "python",
375 | "pygments_lexer": "ipython2",
376 | "version": "2.7.10"
377 | }
378 | },
379 | "nbformat": 4,
380 | "nbformat_minor": 0
381 | }
382 |
--------------------------------------------------------------------------------
/Cheminformatics/scaffold-analysis/biomedx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Cheminformatics/scaffold-analysis/biomedx.png
--------------------------------------------------------------------------------
/Cheminformatics/scaffold-analysis/merck.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Cheminformatics/scaffold-analysis/merck.jpeg
--------------------------------------------------------------------------------
/Cheminformatics/scaffold-analysis/pi.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Cheminformatics/scaffold-analysis/pi.gif
--------------------------------------------------------------------------------
/Cheminformatics/scaffold-analysis/rdkit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Cheminformatics/scaffold-analysis/rdkit.png
--------------------------------------------------------------------------------
/Cheminformatics/somemols.smi:
--------------------------------------------------------------------------------
1 | SMILES Name
2 | C[S+](CCC(N)C(=O)O)CC1OC(n2cnc3c2ncnc3N)C(O)C1O DB00118
3 | Nc1ncnc2c1ncn2C1OC(COP(=O)(O)O)C(O)C1O DB00131
4 | Nc1ncnc2c1ncn2C1OC(CO)C(O)C1O DB00194
5 | Nc1nc(Cl)nc2c1ncn2C1CC(O)C(CO)O1 DB00242
6 | Nc1nc(Cl)nc2c1ncn2C1OC(CO)C(O)C1F DB00631
7 | Nc1nc(F)nc2c1ncn2C1OC(COP(=O)(O)O)C(O)C1O DB01073
8 | COc1nc(N)nc2c1ncn2C1OC(CO)C(O)C1O DB01280
9 |
--------------------------------------------------------------------------------
/Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh.png
--------------------------------------------------------------------------------
/Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh.smi:
--------------------------------------------------------------------------------
1 | SMILES Name
2 | Cc1cc2c(F)c(Oc3ncnn4cc(OCC(C)OC(=O)C(C)N)c(C)c34)ccc2[nH]1 Brivanib
3 | CCOc1cc2ncc(C#N)c(Nc3ccc(F)c(Cl)c3)c2cc1NC(=O)C=CCN(C)C Pelitinib
4 | COc1cc2c(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3F)ccnc2cc1OCCCN1CCOCC1 Foretinib
5 | COc1cc2nccc(Oc3ccc(NC(=O)Nc4cc(C)on4)c(Cl)c3)c2cc1OC Tivozanib
6 | COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc23)c(Cl)cc1Cl Bosutinib
7 | CC1CCN(C(=O)CC#N)CC1N(C)c1ncnc2[nH]ccc12 Tofacitinib
8 | Clc1ccc(Nc2nnc(Cc3ccncc3)c3ccccc23)cc1 Vatalanib
9 | CCc1cc2c(cc1N1CCC(N3CCOCC3)CC1)C(C)(C)c1[nH]c3cc(C#N)ccc3c1C2=O Alectinib
10 | Cn1ncc(Cl)c1-c1cc(C(=O)NC(CN)Cc2ccc(F)c(F)c2)oc1Cl Uprosertib
11 | Cc1ccc(C(=O)Nc2ccc(CN3CCN(C)CC3)c(C(F)(F)F)c2)cc1C#Cc1cnc2cccnn12 Ponatinib
12 | N#CCC(C1CCCC1)n1cc(-c2ncnc3[nH]ccc23)cn1 Ruxolitinib
13 | C=CC(=O)N1CCCC(n2nc(-c3ccc(Oc4ccccc4)cc3)c3c(N)ncnc32)C1 Ibrutinib
14 | Cn1ncc(Cl)c1-c1cc(C(=O)NC(CN)Cc2cccc(F)c2)sc1Cl Afuresertib
15 | CCC(CO)Nc1nc(NCc2ccccc2)c2ncn(C(C)C)c2n1 Seliciclib
16 | O=C(Nc1nc2cccc(-c3ccc(CN4CCS(=O)(=O)CC4)cc3)n2n1)C1CC1 Filgotinib
17 | CCc1nc(C(N)=O)c(Nc2ccc(N3CCC(N4CCN(C)CC4)CC3)c(OC)c2)nc1NC1CCOCC1 Gilteritinib
18 | CN(C)C1CCN(C(=O)c2ccc(NC(=O)Nc3ccc(-c4nc(N5CCOCC5)nc(N5CCOCC5)n4)cc3)cc2)CC1 Gedatolisib
19 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1 Sorafenib
20 | Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(CCO)CC2)n1 Dasatinib
21 | COc1cc2c(Oc3ccc4[nH]c(C)cc4c3F)ncnc2cc1OCCCN1CCCC1 Cediranib
22 | COC(=O)c1ccc2c(c1)NC(=O)C2=C(Nc1ccc(N(C)C(=O)CN2CCN(C)CC2)cc1)c1ccccc1 Nintedanib
23 | Cn1cnc2c(F)c(Nc3ccc(Br)cc3Cl)c(C(=O)NOCCO)cc21 Selumetinib
24 | CCS(=O)(=O)N1CC(CC#N)(n2cc(-c3ncnc4[nH]ccc34)cn2)C1 Baricitinib
25 | FC(F)(F)c1ccc(C=Cc2nc(COc3ccc(CCCCn4ccnn4)cc3)co2)cc1 Mubritinib
26 | Cc1cc2c(F)c(Oc3ncnn4cc(OCC(C)O)c(C)c34)ccc2[nH]1 Brivanib
27 | COc1cc(OC)c(C=CS(=O)(=O)Cc2ccc(OC)c(OP(=O)(O)O)c2)c(OC)c1 Briciclib
28 | NC(=O)c1cnc2[nH]ccc2c1NC1C2CC3CC1CC(O)(C3)C2 Peficitinib
29 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OP(C)(C)=O)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Ridaforolimus
30 | COc1cc2nccc(Oc3ccc(NC(=O)NC4CC4)c(Cl)c3)c2cc1C(N)=O Lenvatinib
31 | CCC1C(=O)N(C)c2cnc(Nc3ccc(C(=O)NC4CCC(N5CCN(CC6CC6)CC5)CC4)cc3OC)nc2N1C(C)C Volasertib
32 | COc1cc2c(N3CCN(C(=O)Nc4ccc(OC(C)C)cc4)CC3)ncnc2cc1OCCCN1CCCCC1 Tandutinib
33 | Cc1nc(-c2cn3c(n2)-c2ccc(-c4cnn(C(C)(C)C(N)=O)c4)cc2OCC3)n(C(C)C)n1 Taselisib
34 | Cc1cccc(-c2nn3c(c2-c2ccnc4ccc(C(N)=O)cc24)CCC3)n1 Galunisertib
35 | COc1ncc(-c2ccc3nccc(-c4ccnnc4)c3c2)cc1NS(=O)(=O)c1ccc(F)cc1F Omipalisib
36 | OC1CCC(Nc2ncc3nc(Nc4c(F)cc(F)cc4F)n(C4CCOC4)c3n2)CC1 Tanzisertib
37 | CN1CCN(c2ccc3nc(-c4c(N)c5c(F)cccc5[nH]c4=O)[nH]c3c2)CC1 Dovitinib
38 | CC(C)(C)c1nc(-c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(-c2ccnc(N)n2)s1 Dabrafenib
39 | Cc1cnc(Nc2ccc(OCCN3CCCC3)cc2)nc1Nc1cccc(S(=O)(=O)NC(C)(C)C)c1 Fedratinib
40 | C=CC(=O)Nc1cc2c(Nc3ccc(F)c(Cl)c3)ncnc2cc1OCCCN1CCOCC1 Canertinib
41 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OCCO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Everolimus
42 | CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2C(=O)Nc3ccc(F)cc32)c1C Sunitinib
43 | Cc1c(F)cc(C(=O)NC2CC2)cc1-c1ccc(C(=O)NCC(C)(C)C)cn1 Losmapimod
44 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(O)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Sirolimus
45 | CC12OC(CC1(O)CO)n1c3ccccc3c3c4c(c5c6ccccc6n2c5c31)CNC4=O Lestaurtinib
46 | COc1cc2nccc(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3)c2cc1OC Cabozantinib
47 | COc1cc2c(Nc3ccc(Sc4nccn4C)c(Cl)c3)c(C#N)cnc2cc1N1CCC(N2CCCC2)CC1 Balamapimod
48 | CC1(C)CN(C(=O)c2ccc(-c3cccc4nc(NC(=O)C5CC5)nn34)cc2)C1 Solcitinib
49 | CCN1CCN(Cc2ccc(Nc3ncc(F)c(-c4cc(F)c5nc(C)n(C(C)C)c5c4)n3)nc2)CC1 Abemaciclib
50 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)c(F)c2)ccn1 Regorafenib
51 | Cc1ccc(-n2nc(C(C)(C)C)cc2NC(=O)Nc2ccc(OCCN3CCOCC3)c3ccccc23)cc1 Doramapimod
52 | COc1cc(OC)c(C=CS(=O)(=O)Cc2ccc(OC)c(NCC(=O)O)c2)c(OC)c1 Rigosertib
53 | COc1cc(Nc2ncc(F)c(Nc3ccc4c(n3)N(COP(=O)(O)O)C(=O)C(C)(C)O4)n2)cc(OC)c1OC Fostamatinib
54 | Cc1cc(Nc2cc(N3CCN(C)CC3)nc(Sc3ccc(NC(=O)C4CC4)cc3)n2)n[nH]1 Tozasertib
55 | CC1(O)CC(c2nc(-c3ccc4ccc(-c5ccccc5)nc4c3)c3c(N)nccn23)C1 Linsitinib
56 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(O)C(OC)C2)CCC(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Olcorolimus
57 | Cc1cc(C)c(C=C2C(=O)Nc3ccccc32)[nH]1 Semaxanib
58 | CC1COC(Nc2ccc3ncnc(Nc4ccc(OCc5nccs5)c(Cl)c4)c3c2)=N1 Varlitinib
59 | CC(C)NCC(C(=O)N1CCN(c2ncnc3c2C(C)CC3O)CC1)c1ccc(Cl)cc1 Ipatasertib
60 | Cc1ccc(F)c(NC(=O)Nc2ccc(-c3cccc4[nH]nc(N)c34)cc2)c1 Linifanib
61 | CCOCCOC1CCC(CC(C)C2CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C(C)C(OC)CC3CCC(C)C(O)(O3)C(=O)C(=O)N3CCCCC3C(=O)O2)CC1OC Umirolimus
62 | CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(Cl)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Pimecrolimus
63 | CNC(=O)c1ccc(Nc2ncc(C(F)(F)F)c(NCc3nccnc3N(C)S(C)(=O)=O)n2)cc1 Defactinib
64 | Cc1ccc(NC(=O)c2ccc(CN3CCC(N(C)C)C3)c(C(F)(F)F)c2)cc1Nc1nccc(-c2cncnc2)n1 Bafetinib
65 | CC(Oc1cc(-c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)ccc(F)c1Cl Crizotinib
66 | Cc1cn(-c2cc(NC(=O)c3ccc(C)c(Nc4nccc(-c5cccnc5)n4)c3)cc(C(F)(F)F)c2)cn1 Nilotinib
67 | CCC(Nc1ncnc2[nH]cnc12)c1nc2cccc(F)c2c(=O)n1-c1ccccc1 Idelalisib
68 | COc1cc2c(Nc3ccc(Br)cc3F)ncnc2cc1OCC1CCN(C)CC1 Vandetanib
69 | Cc1c(CN2CCN(C(=O)C(C)O)CC2)sc2c(N3CCOCC3)nc(-c3cnc(N)nc3)nc12 Apitolisib
70 | O=C1NC(=O)C(c2cn3c4c(cccc24)CCC3)C1c1c[nH]c2ccccc12 Tivantinib
71 | Nc1ncc(-c2cnn(CCO)c2)c2scc(-c3ccc(NC(=O)Nc4cccc(F)c4)cc3)c12 Ilorasertib
72 | CC1(COc2ccc3c(c2)ncn3-c2ccc3cccc(N4CCC(N)CC4)c3n2)COC1 Crenolanib
73 | CC(=NNC(=N)N)c1cc(NC(=O)CCCCCCCCC(=O)Nc2cc(C(C)=NNC(=N)N)cc(C(C)=NNC(=N)N)c2)cc(C(C)=NNC(=N)N)c1 Semapimod
74 | CC(C)(C)Cn1c(N)nc2ccc(-c3[nH]c(C(C)(C)C)nc3-c3ccc(F)cc3)nc21 Ralimetinib
75 | C=CC(=O)Nc1cccc(Nc2nc(Nc3ccc(OCCOC)cc3)ncc2F)c1 Spebrutinib
76 | CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21 Motesanib
77 | CC1CN(C(=O)c2cc3c(C(=O)C(=O)N(C)C)cn(C)c3cc2Cl)C(C)CN1Cc1ccc(F)cc1 Talmapimod
78 | C1=CCOCc2cc(ccc2OCCN2CCCC2)Nc2nccc(n2)-c2cccc(c2)COC1 Pacritinib
79 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(n3cnnn3)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Zotarolimus
80 | Cc1cnc(NC(=O)Nc2cc(Br)c(C)cc2OCC2CNCCO2)cn1 Rabusertib
81 | Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1 Ceritinib
82 | CN1CCN(C2CCN(C(=O)Nc3cc(Oc4ccc(NC(=O)C5(C(=O)Nc6ccc(F)cc6)CC5)c(F)c4)ccn3)CC2)CC1 Golvatinib
83 | CCC(C)(Nc1ccnc(-c2c[nH]c3ncccc23)n1)C(=O)NCC(F)(F)F Decernotinib
84 | Cc1[nH]c(C=C2C(=O)Nc3ccc(F)cc32)c(C)c1C(=O)NCCN1CCCC1 Toceranib
85 | CC(C)(C)c1cc(NC(=O)Nc2ccc(-c3cn4c(n3)sc3cc(OCCN5CCOCC5)ccc34)cc2)no1 Quizartinib
86 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3cc(C(C)(C)C)nn3-c3ccc4ncccc4c3)c(F)c2)ccn1 Rebastinib
87 | CC(=O)c1c(C)c2cnc(Nc3ccc(N4CCNCC4)cn3)nc2n(C2CCCC2)c1=O Palbociclib
88 | COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 Gefitinib
89 | CNC(=O)c1ccccc1Sc1ccc2c(C=Cc3ccccn3)n[nH]c2c1 Axitinib
90 | CN1CCN(CCOc2cc(OC3CCOCC3)c3c(Nc4c(Cl)ccc5c4OCO5)ncnc3c2)CC1 Saracatinib
91 | CC(Nc1ncnc2nc[nH]c12)c1cc2cccc(Cl)c2c(=O)n1-c1ccccc1 Duvelisib
92 | Cn1cnc2c(F)c(Nc3ccc(Br)cc3F)c(C(=O)NOCCO)cc21 Binimetinib
93 | Cn1c(=O)n(-c2ccc(C(C)(C)C#N)cc2)c2c3cc(-c4cnc5ccccc5c4)ccc3ncc21 Dactolisib
94 | CN(C)CC=CC(=O)Nc1cc2c(Nc3ccc(F)c(Cl)c3)ncnc2cc1OC1CCOC1 Afatinib
95 | Cc1cc(Nc2cc(CN3CCOCC3)c3nc(C)c(Cc4ccc(Cl)cc4F)n3n2)n[nH]1 Gandotinib
96 | CS(=O)(=O)CCNCc1ccc(-c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1 Lapatinib
97 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OC(=O)C(C)(CO)CO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Temsirolimus
98 | CNS(=O)(=O)CC1CCC(N(C)c2[nH]cnc3nccc2-3)CC1 Oclacitinib
99 | COc1cc(Nc2ncc3c(n2)-c2ccc(Cl)cc2C(c2c(F)cccc2OC)=NC3)ccc1C(=O)O Alisertib
100 | CCc1cnn2c(NCc3ccc[n+]([O-])c3)cc(N3CCCCC3CCO)nc12 Dinaciclib
101 | COC(=O)NC(C)CNc1nccc(-c2cn(C(C)C)nc2-c2cc(Cl)cc(NS(C)(=O)=O)c2F)n1 Encorafenib
102 | Cc1cc(F)ccc1-c1nc(NC(CO)CO)nc2c1ccc(=O)n2-c1c(F)cccc1F Dilmapimod
103 | O=C(c1ccc(F)c(F)c1Nc1ccc(I)cc1F)N1CC(O)(C2CCCCN2)C1 Cobimetinib
104 | COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1NC(=O)C=CCN1CCCCC1 Dacomitinib
105 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O Pazopanib
106 | CCOc1cc2ncc(C#N)c(Nc3ccc(OCc4ccccn4)c(Cl)c3)c2cc1NC(=O)C=CCN(C)C Neratinib
107 | Cn1c(=O)c(Oc2ccc(F)cc2F)cc2cnc(NC(CCO)CCO)nc21 Pamapimod
108 | CCCS(=O)(=O)Nc1ccc(F)c(C(=O)c2c[nH]c3ncc(-c4ccc(Cl)cc4)cc23)c1F Vemurafenib
109 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1 Erlotinib
110 | S=C(NCc1ccc2c(c1)OCO2)N1CCN(c2ncnc3c2oc2ccccc23)CC1 Amuvatinib
111 | O=C(NCC(O)CO)c1ccncc1Nc1ccc(I)cc1F Pimasertib
112 | CC(=O)Nc1cccc(-n2c(=O)n(C3CC3)c(=O)c3c(Nc4ccc(I)cc4F)n(C)c(=O)c(C)c32)c1 Trametinib
113 | C=CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(O)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Tacrolimus
114 | N#CCNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1 Momelotinib
115 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1 Imatinib
116 |
--------------------------------------------------------------------------------
/Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh_approved.smi:
--------------------------------------------------------------------------------
1 | SMILES Name
2 | COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc23)c(Cl)cc1Cl Bosutinib
3 | CC1CCN(C(=O)CC#N)CC1N(C)c1ncnc2[nH]ccc12 Tofacitinib
4 | Cc1ccc(C(=O)Nc2ccc(CN3CCN(C)CC3)c(C(F)(F)F)c2)cc1C#Cc1cnc2cccnn12 Ponatinib
5 | N#CCC(C1CCCC1)n1cc(-c2ncnc3[nH]ccc23)cn1 Ruxolitinib
6 | C=CC(=O)N1CCCC(n2nc(-c3ccc(Oc4ccccc4)cc3)c3c(N)ncnc32)C1 Ibrutinib
7 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1 Sorafenib
8 | Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(CCO)CC2)n1 Dasatinib
9 | COC(=O)c1ccc2c(c1)NC(=O)C2=C(Nc1ccc(N(C)C(=O)CN2CCN(C)CC2)cc1)c1ccccc1 Nintedanib
10 | CC(C)(C)c1nc(-c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(-c2ccnc(N)n2)s1 Dabrafenib
11 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OCCO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Everolimus
12 | CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2C(=O)Nc3ccc(F)cc32)c1C Sunitinib
13 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(O)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Sirolimus
14 | COc1cc2nccc(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3)c2cc1OC Cabozantinib
15 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)c(F)c2)ccn1 Regorafenib
16 | CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(Cl)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Pimecrolimus
17 | CC(Oc1cc(-c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)ccc(F)c1Cl Crizotinib
18 | Cc1cn(-c2cc(NC(=O)c3ccc(C)c(Nc4nccc(-c5cccnc5)n4)c3)cc(C(F)(F)F)c2)cn1 Nilotinib
19 | CCC(Nc1ncnc2[nH]cnc12)c1nc2cccc(F)c2c(=O)n1-c1ccccc1 Idelalisib
20 | COc1cc2c(Nc3ccc(Br)cc3F)ncnc2cc1OCC1CCN(C)CC1 Vandetanib
21 | Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1 Ceritinib
22 | COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 Gefitinib
23 | CNC(=O)c1ccccc1Sc1ccc2c(C=Cc3ccccn3)n[nH]c2c1 Axitinib
24 | CN(C)CC=CC(=O)Nc1cc2c(Nc3ccc(F)c(Cl)c3)ncnc2cc1OC1CCOC1 Afatinib
25 | CS(=O)(=O)CCNCc1ccc(-c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1 Lapatinib
26 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OC(=O)C(C)(CO)CO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Temsirolimus
27 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O Pazopanib
28 | CCCS(=O)(=O)Nc1ccc(F)c(C(=O)c2c[nH]c3ncc(-c4ccc(Cl)cc4)cc23)c1F Vemurafenib
29 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1 Erlotinib
30 | CC(=O)Nc1cccc(-n2c(=O)n(C3CC3)c(=O)c3c(Nc4ccc(I)cc4F)n(C)c(=O)c(C)c32)c1 Trametinib
31 | C=CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(O)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Tacrolimus
32 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1 Imatinib
33 |
--------------------------------------------------------------------------------
/Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh_approved_2.smi:
--------------------------------------------------------------------------------
1 | COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc23)c(Cl)cc1Cl Bosutinib
2 | CC1CCN(C(=O)CC#N)CC1N(C)c1ncnc2[nH]ccc12 Tofacitinib
3 | Cc1ccc(C(=O)Nc2ccc(CN3CCN(C)CC3)c(C(F)(F)F)c2)cc1C#Cc1cnc2cccnn12 Ponatinib
4 | N#CCC(C1CCCC1)n1cc(-c2ncnc3[nH]ccc23)cn1 Ruxolitinib
5 | C=CC(=O)N1CCCC(n2nc(-c3ccc(Oc4ccccc4)cc3)c3c(N)ncnc32)C1 Ibrutinib
6 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1 Sorafenib
7 | Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(CCO)CC2)n1 Dasatinib
8 | COC(=O)c1ccc2c(c1)NC(=O)C2=C(Nc1ccc(N(C)C(=O)CN2CCN(C)CC2)cc1)c1ccccc1 Nintedanib
9 | CC(C)(C)c1nc(-c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(-c2ccnc(N)n2)s1 Dabrafenib
10 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OCCO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Everolimus
11 | CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2C(=O)Nc3ccc(F)cc32)c1C Sunitinib
12 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(O)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Sirolimus
13 | COc1cc2nccc(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3)c2cc1OC Cabozantinib
14 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)c(F)c2)ccn1 Regorafenib
15 | CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(Cl)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Pimecrolimus
16 | CC(Oc1cc(-c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)ccc(F)c1Cl Crizotinib
17 | Cc1cn(-c2cc(NC(=O)c3ccc(C)c(Nc4nccc(-c5cccnc5)n4)c3)cc(C(F)(F)F)c2)cn1 Nilotinib
18 | CCC(Nc1ncnc2[nH]cnc12)c1nc2cccc(F)c2c(=O)n1-c1ccccc1 Idelalisib
19 | COc1cc2c(Nc3ccc(Br)cc3F)ncnc2cc1OCC1CCN(C)CC1 Vandetanib
20 | Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1 Ceritinib
21 | COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 Gefitinib
22 | CNC(=O)c1ccccc1Sc1ccc2c(C=Cc3ccccn3)n[nH]c2c1 Axitinib
23 | CN(C)CC=CC(=O)Nc1cc2c(Nc3ccc(F)c(Cl)c3)ncnc2cc1OC1CCOC1 Afatinib
24 | CS(=O)(=O)CCNCc1ccc(-c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1 Lapatinib
25 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OC(=O)C(C)(CO)CO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Temsirolimus
26 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O Pazopanib
27 | CCCS(=O)(=O)Nc1ccc(F)c(C(=O)c2c[nH]c3ncc(-c4ccc(Cl)cc4)cc23)c1F Vemurafenib
28 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1 Erlotinib
29 | CC(=O)Nc1cccc(-n2c(=O)n(C3CC3)c(=O)c3c(Nc4ccc(I)cc4F)n(C)c(=O)c(C)c32)c1 Trametinib
30 | C=CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(C)=CC3CCC(O)C(OC)C3)C(C)C(O)CC1=O)C(C)CC2OC Tacrolimus
31 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1 Imatinib
32 |
--------------------------------------------------------------------------------
/Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh_approved_3.smi:
--------------------------------------------------------------------------------
1 | COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc23)c(Cl)cc1Cl Bosutinib
2 | CC1CCN(C(=O)CC#N)CC1N(C)c1ncnc2[nH]ccc12 Tofacitinib
3 | Cc1ccc(C(=O)Nc2ccc(CN3CCN(C)CC3)c(C(F)(F)F)c2)cc1C#Cc1cnc2cccnn12 Ponatinib
4 | N#CCC(C1CCCC1)n1cc(c2ncnc3[nH]ccc23)cn1 Ruxolitinib
5 | C=CC(=O)N1CCCC(n2nc(c3ccc(Oc4ccccc4)cc3)c3c(N)ncnc23)C1 Ibrutinib
6 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1 Sorafenib
7 | Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(CCO)CC2)n1 Dasatinib
8 | COC(=O)c1ccc2c(c1)NC(=O)C2=C(Nc1ccc(N(C)C(=O)CN2CCN(C)CC2)cc1)c1ccccc1 Nintedanib
9 | CC(C)(C)c1nc(c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(c2ccnc(N)n2)s1 Dabrafenib
10 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OCCO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Everolimus
11 | CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2C(=O)Nc3ccc(F)cc23)c1C Sunitinib
12 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(O)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Sirolimus
13 | COc1cc2nccc(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3)c2cc1OC Cabozantinib
14 | CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)c(F)c2)ccn1 Regorafenib
15 | CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(=CC3CCC(Cl)C(OC)C3)C)C(C)C(O)CC1=O)C(C)CC2OC Pimecrolimus
16 | CC(Oc1cc(c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)ccc(F)c1Cl Crizotinib
17 | Cc1cn(c2cc(NC(=O)c3ccc(C)c(Nc4nccc(c5cccnc5)n4)c3)cc(C(F)(F)F)c2)cn1 Nilotinib
18 | CCC(Nc1ncnc2[nH]cnc12)c1nc2cccc(F)c2c(=O)n1c1ccccc1 Idelalisib
19 | COc1cc2c(Nc3ccc(Br)cc3F)ncnc2cc1OCC1CCN(C)CC1 Vandetanib
20 | Cc1cc(Nc2ncc(Cl)c(Nc3ccccc3S(=O)(=O)C(C)C)n2)c(OC(C)C)cc1C1CCNCC1 Ceritinib
21 | COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1 Gefitinib
22 | CNC(=O)c1ccccc1Sc1ccc2c(C=Cc3ccccn3)n[nH]c2c1 Axitinib
23 | CN(C)CC=CC(=O)Nc1cc2c(Nc3ccc(F)c(Cl)c3)ncnc2cc1OC1CCOC1 Afatinib
24 | CS(=O)(=O)CCNCc1ccc(c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1 Lapatinib
25 | COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OC(=O)C(C)(CO)CO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C Temsirolimus
26 | Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(=O)(=O)N Pazopanib
27 | CCCS(=O)(=O)Nc1ccc(F)c(C(=O)c2c[nH]c3ncc(c4ccc(Cl)cc4)cc23)c1F Vemurafenib
28 | C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1 Erlotinib
29 | CC(=O)Nc1cccc(n2c(=O)n(C3CC3)c(=O)c3c(Nc4ccc(I)cc4F)n(C)c(=O)c(C)c23)c1 Trametinib
30 | C=CCC1C=C(C)CC(C)CC(OC)C2OC(O)(C(=O)C(=O)N3CCCCC3C(=O)OC(C(=CC3CCC(O)C(OC)C3)C)C(C)C(O)CC1=O)C(C)CC2OC Tacrolimus
31 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(c2cccnc2)n1 Imatinib
32 |
--------------------------------------------------------------------------------
/Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh_phase4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Kinase-Inhibitors/approved-or-in-clinical-trials/kin_inh_phase4.png
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Code in this repository is copyright (C) 2013-2016 by Team SKI @ BioMed X GmbH
2 |
3 | This work is licensed under the Creative Commons Attribution-ShareAlike 3.0
4 | License. To view a copy of this license, visit
5 | http://creativecommons.org/licenses/by-sa/3.0/ or send a letter to Creative
6 | Commons, 543 Howard Street, 5th Floor, San Francisco, California, 94105, USA.
7 |
--------------------------------------------------------------------------------
/Machine Learning/tree.dot:
--------------------------------------------------------------------------------
1 | digraph Tree {
2 | node [shape=box] ;
3 | 0 [label="X[3] <= 0.8\ngini = 0.6667\nsamples = 150\nvalue = [50, 50, 50]"] ;
4 | 1 [label="gini = 0.0\nsamples = 50\nvalue = [50, 0, 0]"] ;
5 | 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
6 | 2 [label="X[3] <= 1.75\ngini = 0.5\nsamples = 100\nvalue = [0, 50, 50]"] ;
7 | 0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
8 | 3 [label="X[2] <= 4.95\ngini = 0.168\nsamples = 54\nvalue = [0, 49, 5]"] ;
9 | 2 -> 3 ;
10 | 4 [label="X[3] <= 1.65\ngini = 0.0408\nsamples = 48\nvalue = [0, 47, 1]"] ;
11 | 3 -> 4 ;
12 | 5 [label="gini = 0.0\nsamples = 47\nvalue = [0, 47, 0]"] ;
13 | 4 -> 5 ;
14 | 6 [label="gini = 0.0\nsamples = 1\nvalue = [0, 0, 1]"] ;
15 | 4 -> 6 ;
16 | 7 [label="X[3] <= 1.55\ngini = 0.4444\nsamples = 6\nvalue = [0, 2, 4]"] ;
17 | 3 -> 7 ;
18 | 8 [label="gini = 0.0\nsamples = 3\nvalue = [0, 0, 3]"] ;
19 | 7 -> 8 ;
20 | 9 [label="X[0] <= 6.95\ngini = 0.4444\nsamples = 3\nvalue = [0, 2, 1]"] ;
21 | 7 -> 9 ;
22 | 10 [label="gini = 0.0\nsamples = 2\nvalue = [0, 2, 0]"] ;
23 | 9 -> 10 ;
24 | 11 [label="gini = 0.0\nsamples = 1\nvalue = [0, 0, 1]"] ;
25 | 9 -> 11 ;
26 | 12 [label="X[2] <= 4.85\ngini = 0.0425\nsamples = 46\nvalue = [0, 1, 45]"] ;
27 | 2 -> 12 ;
28 | 13 [label="X[0] <= 5.95\ngini = 0.4444\nsamples = 3\nvalue = [0, 1, 2]"] ;
29 | 12 -> 13 ;
30 | 14 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1, 0]"] ;
31 | 13 -> 14 ;
32 | 15 [label="gini = 0.0\nsamples = 2\nvalue = [0, 0, 2]"] ;
33 | 13 -> 15 ;
34 | 16 [label="gini = 0.0\nsamples = 43\nvalue = [0, 0, 43]"] ;
35 | 12 -> 16 ;
36 | }
--------------------------------------------------------------------------------
/Machine Learning/tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Machine Learning/tree.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Molecular Modelling Workshop 2014/biomedx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Molecular Modelling Workshop 2014/biomedx.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Molecular Modelling Workshop 2014/merck.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Molecular Modelling Workshop 2014/merck.jpeg
--------------------------------------------------------------------------------
/Presentations and Tutorials/Molecular Modelling Workshop 2014/rdkit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Molecular Modelling Workshop 2014/rdkit.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/README.md:
--------------------------------------------------------------------------------
1 | ## Demonstration of biovec (protein sequence embeddings)
2 | * Described by Asgari and Mofrat in [A Continuous Distributed Representation of Biological Sequences](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0141287).
3 |
4 | This folder contains a notebook that shows how to use the [biovec](https://github.com/kyu999/biovec) module to generate vectors from protein sequences and how to use those for clustering and protein family classification (with deep learning).
5 |
6 | ### Usage:
7 | * Clone the repository with `git clone https://github.com/Team-SKI/snippets.git`
8 | * Change directory to `Presentations and Tutorials/Protvec`
9 | * Dowload biovec submodule with `git submodule init` followed by `git submodule update`
10 | * Run `jupyter notebook`
11 |
12 | ### Dependencies:
13 | ```
14 | pandas
15 | numpy
16 | matplotlib
17 | scikit-learn
18 | gensim
19 | keras (with TensorFlow or Theano)
20 | seaborn
21 | ```
22 |
23 | ##### License and copyright:
24 |
25 | Copyright (C) 2017 by Sabrina Jaeger and Samo Turk, BioMed X GmbH
26 |
27 | This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 License. To view a copy of this license, visit https://creativecommons.org/licenses/by-sa/4.0/ or send a letter to Creative Commons, 543 Howard Street, 5th Floor, San Francisco, California, 94105, USA.
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/__init__.py
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/biovec/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | dist/
3 | build/
4 | biovec.egg-info
5 | *__pycache__
6 | *pyc
7 |
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/biovec/README.md:
--------------------------------------------------------------------------------
1 | BioVec
2 | --------------------------------------
3 |
4 | ### Example
5 |
6 | ```
7 | import biovec
8 |
9 | pv = biovec.ProtVec("some_fasta_file.fasta", out="output_corpusfile_path.txt")
10 | pv["QAT"]
11 | pv.to_vecs("ATATQSQSMTEEL")
12 | pv.save('model_file_path')
13 |
14 | pv2 = biovec.models.load_protvec('model_file_path')
15 | ```
16 |
17 | ### Trained Model
18 |
19 | This package includes already trained model in '/trained_models'.
20 | swissprot_reviewed_protvec is a protvec model fed all Swiss-Prot reviewed proteins(551,754 proteins as of 14/07/2016) as the training data.
21 |
22 | # Source
23 | Paper: [ProtVec: A Continuous Distributed Representation of
24 | Biological Sequences](http://arxiv.org/pdf/1503.05140v1.pdf)
25 |
26 | ### Abstract
27 | 通常生物情報は文字の配列で表現されるが、それをベクトルとして表現することによってより分析しやすく情報を収納することができるのではないかと提案されている。具体的な適用範囲としては、
28 |
29 | 1. family classification
30 | 2. protein visualization
31 | 3. structure prediction
32 | 4. disordered protein identification
33 | 5. protein-protein interaction prediction.
34 |
35 | など。
36 | classificationやpredictionはわかりやすい使い方だが、個人的にはprotein visualizationが最も効用が大きいのではないかと感じた。短い配列や、構造が既知の配列でない限り、現状簡単にタンパク質の全容を掴む方法が一般的に普及していないように感じるので、このような表現方法は一定の有用性があると考える。
37 | この考えは一見奇妙に映るが自然言語ではある程度認知されており、word2vecなどは記憶に新しい。
38 |
39 | ### ProtVec実装
40 |
41 | [前処理]
42 | * uniprotのswis-protの全データを収集する
43 | * 各配列を3つのn-gramのリストに変換する
44 |
45 | ```
46 | 'AGAMQSASM' => [['AGA', 'MQS', 'ASM'], ['GAM','QSA'], ['AMQ', 'SAS']]
47 | ```
48 |
49 | * word2vecに読み込ませるために、変換した配列をテキストファイル形式に書き出す
50 |
51 | [モデル構築]
52 |
53 | word2vecのライブラリを用いれば基本大丈夫そう。
54 | いろいろあるけど、gensimをここでは使う。ただしSkip-gramを論文では採用しているので注意。
55 | gensimではsgパラメータを1に設定するとskip-gramになる。
56 | > sg defines the training algorithm. By default (sg=0), CBOW is used. Otherwise (sg=1), skip-gram is employed.
57 |
58 | 前処理では上記のように単語をn-gramのリストへ変換するが、sequenceをqueryとしてモデルに投げるときは逆に再変換する必要がある。訓練済みモデルにn-gramを指定することで、対応するベクトルを得ることができる。論文ではそのベクトルの和をその配列に対応するベクトルとして扱っている。
59 |
60 | ```
61 | seq = 'AGAMQSASM'
62 | n_grams = split_to_grams(seq)
63 | gram_vecs = [to_gram_vec(n_gram) for n_gram in n_grams]
64 | seq_vec = sum(gram_vecs)
65 | ```
66 |
67 | 論文での、ベクトルの次元数は100。
68 | 元の配列に対応するベクトルはn-gramベクトルの和なので次元数は変わらず、100次元。
69 | negative samplingも行っているので忘れずに。
70 |
71 | ### Visualization of ProtVec
72 | [sklearn.manifold.TSNE](http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html)
73 | t-SNEによって2次元もしくは3次元に次元圧縮を行った後、可視化している。しかしこの圧縮方法が適しているのか、なぜこの方法を取ったのかは言及されていない。scikit-learnではt-SNEは非推奨されており、密なデータであればPCA、疎なデータであればTruncatedSVDが勧められている。圧縮後の次元数がとても低いためだろうか。
74 |
75 | > It is highly recommended to use another dimensionality reduction method (e.g. PCA for dense data or TruncatedSVD for sparse data) to reduce the number of dimensions to a reasonable amount (e.g. 50) if the number of features is very high. This will suppress some noise and speed up the computation of pairwise distances between samples. For more tips see Laurens van der Maaten’s FAQ.
76 |
77 | [Lipschitz continuity](http://izumi-math.jp/F_Wada/fixpoint_theorem.pdf)
78 |
79 | ### Abstract of the paper
80 |
81 | > We propose a new approach for representing biological sequences. This method, named protein-vectors or ProtVec for short, can be utilized in bioinformatics applications such as family classification, protein visualization, structure prediction, disordered protein identification, and protein-protein interaction prediction. Using the Skip-gram neural networks, protein sequences are represented with a single dense n-dimensional vector. This method was evaluated by classifying protein sequences obtained from Swiss-Prot belonging to 7,027 protein families where an average family classification accuracy of 94%±0.03% was obtained, outperforming existing family classification methods. In addition, our model was used to predict disordered proteins from structured proteins. Two databases of disordered sequences were used: the DisProt database as well as a database featuring the disordered regions of nucleoporins rich with phenylalanine-glycine repeats (FG-Nups). Using support vector machine classifiers, FG-Nup sequences were distinguished from structured Protein Data Bank (PDB) sequences with 99.81\% accuracy, and unstructured DisProt sequences from structured DisProt sequences with 100.0\% accuracy. These results indicate that by only providing sequence data for various proteins into this model, information about protein structure can be determined with high accuracy. This so-called embedding model needs to be trained only once and can then be used to ascertain a diverse set of information regarding the proteins of interest. In addition, this representation can be considered as pre-training for various applications of deep learning in bioinformatics.
82 |
83 | ### References
84 | 1. [Disordered Proteins](https://en.wikipedia.org/wiki/Intrinsically_disordered_proteins)
85 | 2. [DisProt](http://www.disprot.org/)
86 | 3. [gemsim word2vec](https://radimrehurek.com/gensim/models/word2vec.html)
87 | 4. [NIPS2013読み会: Distributed Representations of Words and Phrases and their Compositionality](http://www.slideshare.net/unnonouno/nips2013-distributed-representations-of-words-and-phrases-and-their-compositionality)
88 | 5. [Skip gram shirakawa_20141121
89 | ](http://www.slideshare.net/nttdata-msi/skip-gram-shirakawa20141121-41833306)
90 | 6. [論文紹介「Distributed Representations of Words and Phrases and their Compositionality」](http://qiita.com/nishio/items/3860fe198d65d173af6b)
91 | 7. [sklearn.manifold.TSNE](http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html)
92 | 8. [Lipschitz continuity](http://izumi-math.jp/F_Wada/fixpoint_theorem.pdf)
93 |
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/biovec/biovec/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import ProtVec
2 |
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/biovec/biovec/binary_amino.py:
--------------------------------------------------------------------------------
1 | '''
2 | Binary representation of amino acid residue and amino acid sequence
3 | e.g.
4 | 'A' => [0, 0, 0, 0, 0]
5 | 'AGGP' => [[0, 0, 0, 0, 0], [0, 1, 1, 0, 1], [0, 1, 1, 0, 1], [0, 1, 1, 1, 1]]
6 | '''
7 |
8 | AMINO_ACID_BINARY_TABLE = {
9 | 'A': [0, 0, 0, 0, 0],
10 | 'C': [0, 0, 0, 0, 1],
11 | 'D': [0, 0, 0, 1, 0],
12 | 'E': [0, 0, 0, 1, 1],
13 | 'F': [0, 0, 1, 0, 0],
14 | 'G': [0, 0, 1, 0, 1],
15 | 'H': [0, 0, 1, 1, 0],
16 | 'I': [0, 0, 1, 1, 1],
17 | 'K': [0, 1, 0, 0, 0],
18 | 'L': [0, 1, 0, 0, 1],
19 | 'M': [0, 1, 0, 1, 0],
20 | 'N': [0, 1, 0, 1, 1],
21 | 'P': [0, 1, 1, 0, 0],
22 | 'Q': [0, 1, 1, 0, 1],
23 | 'R': [0, 1, 1, 1, 1],
24 | 'S': [1, 0, 0, 0, 0],
25 | 'T': [1, 0, 0, 0, 1],
26 | 'V': [1, 0, 0, 1, 0],
27 | 'W': [1, 0, 0, 1, 1],
28 | 'Y': [1, 0, 1, 0, 0]
29 | }
30 |
31 |
32 | def convert_amino_to_binary(amino):
33 | '''
34 | Convert amino acid to 1-dimentional 5 length binary array
35 | "A" => [0, 0, 0, 0, 0]
36 | '''
37 | if not AMINO_ACID_BINARY_TABLE.has_key(amino):
38 | return None
39 | return AMINO_ACID_BINARY_TABLE[amino]
40 |
41 |
42 | def convert_amino_acid_sequence_to_vector(sequence):
43 | '''
44 | "AGGP" => [[0, 0, 0, 0, 0], [0, 1, 1, 0, 1], [0, 1, 1, 0, 1], [0, 1, 1, 1, 1]]
45 | '''
46 | binary_vector = [convert_amino_to_binary(amino) for amino in sequence]
47 | if None in binary_vector:
48 | return None
49 | return binary_vector
50 |
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/biovec/biovec/models.py:
--------------------------------------------------------------------------------
1 | from gensim.models import word2vec
2 | from Bio import SeqIO
3 | import sys
4 | from gensim.models import word2vec
5 |
6 |
7 | def split_ngrams(seq, n):
8 | """
9 | 'AGAMQSASM' => [['AGA', 'MQS', 'ASM'], ['GAM','QSA'], ['AMQ', 'SAS']]
10 | """
11 | a, b, c = zip(*[iter(seq)]*n), zip(*[iter(seq[1:])]*n), zip(*[iter(seq[2:])]*n)
12 | str_ngrams = []
13 | for ngrams in [a,b,c]:
14 | x = []
15 | for ngram in ngrams:
16 | x.append("".join(ngram))
17 | str_ngrams.append(x)
18 | return str_ngrams
19 |
20 |
21 | def generate_corpusfile(corpus_fname, n, out):
22 | '''
23 | Args:
24 | corpus_fname: corpus file name
25 | n: the number of chunks to split. In other words, "n" for "n-gram"
26 | out: output corpus file path
27 | Description:
28 | Protvec uses word2vec inside, and it requires to load corpus file
29 | to generate corpus.
30 | '''
31 | f = open(out, "w")
32 | for r in SeqIO.parse(corpus_fname, "fasta"):
33 | ngram_patterns = split_ngrams(r.seq, n)
34 | for ngram_pattern in ngram_patterns:
35 | f.write(" ".join(ngram_pattern) + "\n")
36 | sys.stdout.write(".")
37 |
38 | f.close()
39 |
40 |
41 | def load_protvec(model_fname):
42 | return word2vec.Word2Vec.load(model_fname)
43 |
44 |
45 | class ProtVec(word2vec.Word2Vec):
46 |
47 | def __init__(self, corpus_fname=None, corpus=None, n=3, size=100, out="corpus.txt", sg=1, window=25, min_count=2, workers=3):
48 | """
49 | Either fname or corpus is required.
50 |
51 | corpus_fname: fasta file for corpus
52 | corpus: corpus object implemented by gensim
53 | n: n of n-gram
54 | out: corpus output file path
55 | min_count: least appearance count in corpus. if the n-gram appear k times which is below min_count, the model does not remember the n-gram
56 | """
57 |
58 | self.n = n
59 | self.size = size
60 | self.corpus_fname = corpus_fname
61 |
62 | if corpus is None and corpus_fname is None:
63 | raise Exception("Either corpus_fname or corpus is needed!")
64 |
65 | if corpus_fname is not None:
66 | print 'Generate Corpus file from fasta file...'
67 | generate_corpusfile(corpus_fname, n, out)
68 | corpus = word2vec.Text8Corpus(out)
69 |
70 | word2vec.Word2Vec.__init__(self, corpus, size=size, sg=sg, window=window, min_count=min_count, workers=workers)
71 |
72 | def to_vecs(self, seq):
73 | """
74 | convert sequence to three n-length vectors
75 | e.g. 'AGAMQSASM' => [ array([ ... * 100 ], array([ ... * 100 ], array([ ... * 100 ] ]
76 | """
77 | ngram_patterns = split_ngrams(seq, self.n)
78 |
79 | protvecs = []
80 | for ngrams in ngram_patterns:
81 | ngram_vecs = []
82 | for ngram in ngrams:
83 | try:
84 | ngram_vecs.append(self[ngram])
85 | except:
86 | raise Exception("Model has never trained this n-gram: " + ngram)
87 | protvecs.append(sum(ngram_vecs))
88 | return protvecs
89 |
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/biovec/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(name='biovec',
4 | version='0.1',
5 | description='The implementation of biovec',
6 | url='https://github.com/kyu999/biovec',
7 | author='Takashi Kyue',
8 | author_email='kyukokkyou999@gmail.com',
9 | license='MIT',
10 | packages=['biovec'],
11 | zip_safe=False)
12 |
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/biovec/trained_models/swissprot_reviewed_protvec:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/biovec/trained_models/swissprot_reviewed_protvec
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/data/uniprot_sprot_small.fasta:
--------------------------------------------------------------------------------
1 | >sp|Q6GZX4|001R_FRG3G Putative transcription factor 001R OS=Frog virus 3 (isolate Goorha) GN=FV3-001R PE=4 SV=1
2 | MAFSAEDVLKEYDRRRRMEALLLSLYYPNDRKLLDYKEWSPPRVQVECPKAPVEWNNPPS
3 | EKGLIVGHFSGIKYKGEKAQASEVDVNKMCCWVSKFKDAMRRYQGIQTCKIPGKVLSDLD
4 | AKIKAYNLTVEGVEGFVRYSRVTKQHVAAFLKELRHSKQYENVNLIHYILTDKRVDIQHL
5 | EKDLVKDFKALVESAHRMRQGHMINVKYILYQLLKKHGHGPDGPDILTVKTGSKGVLYDD
6 | SFRKIYTDLGWKFTPL
7 | >sp|Q6GZX3|002L_FRG3G Uncharacterized protein 002L OS=Frog virus 3 (isolate Goorha) GN=FV3-002L PE=4 SV=1
8 | MSIIGATRLQNDKSDTYSAGPCYAGGCSAFTPRGTCGKDWDLGEQTCASGFCTSQPLCAR
9 | IKKTQVCGLRYSSKGKDPLVSAEWDSRGAPYVRCTYDADLIDTQAQVDQFVSMFGESPSL
10 | AERYCMRGVKNTAGELVSRVSSDADPAGGWCRKWYSAHRGPDQDAALGSFCIKNPGAADC
11 | KCINRASDPVYQKVKTLHAYPDQCWYVPCAADVGELKMGTQRDTPTNCPTQVCQIVFNML
12 | DDGSVTMDDVKNTINCDFSKYVPPPPPPKPTPPTPPTPPTPPTPPTPPTPPTPRPVHNRK
13 | VMFFVAGAVLVAILISTVRW
14 | >sp|Q197F8|002R_IIV3 Uncharacterized protein 002R OS=Invertebrate iridescent virus 3 GN=IIV3-002R PE=4 SV=1
15 | MASNTVSAQGGSNRPVRDFSNIQDVAQFLLFDPIWNEQPGSIVPWKMNREQALAERYPEL
16 | QTSEPSEDYSGPVESLELLPLEIKLDIMQYLSWEQISWCKHPWLWTRWYKDNVVRVSAIT
17 | FEDFQREYAFPEKIQEIHFTDTRAEEIKAILETTPNVTRLVIRRIDDMNYNTHGDLGLDD
18 | LEFLTHLMVEDACGFTDFWAPSLTHLTIKNLDMHPRWFGPVMDGIKSMQSTLKYLYIFET
19 | YGVNKPFVQWCTDNIETFYCTNSYRYENVPRPIYVWVLFQEDEWHGYRVEDNKFHRRYMY
20 | STILHKRDTDWVENNPLKTPAQVEMYKFLLRISQLNRDGTGYESDSDPENEHFDDESFSS
21 | GEEDSSDEDDPTWAPDSDDSDWETETEEEPSVAARILEKGKLTITNLMKSLGFKPKPKKI
22 | QSIDRYFCSLDSNYNSEDEDFEYDSDSEDDDSDSEDDC
23 | >sp|Q197F7|003L_IIV3 Uncharacterized protein 003L OS=Invertebrate iridescent virus 3 GN=IIV3-003L PE=4 SV=1
24 | MYQAINPCPQSWYGSPQLEREIVCKMSGAPHYPNYYPVHPNALGGAWFDTSLNARSLTTT
25 | PSLTTCTPPSLAACTPPTSLGMVDSPPHINPPRRIGTLCFDFGSAKSPQRCECVASDRPS
26 | TTSNTAPDTYRLLITNSKTRKNNYGTCRLEPLTYGI
27 | >sp|Q6GZX2|003R_FRG3G Uncharacterized protein 3R OS=Frog virus 3 (isolate Goorha) GN=FV3-003R PE=3 SV=1
28 | MARPLLGKTSSVRRRLESLSACSIFFFLRKFCQKMASLVFLNSPVYQMSNILLTERRQVD
29 | RAMGGSDDDGVMVVALSPSDFKTVLGSALLAVERDMVHVVPKYLQTPGILHDMLVLLTPI
30 | FGEALSVDMSGATDVMVQQIATAGFVDVDPLHSSVSWKDNVSCPVALLAVSNAVRTMMGQ
31 | PCQVTLIIDVGTQNILRDLVNLPVEMSGDLQVMAYTKDPLGKVPAVGVSVFDSGSVQKGD
32 | AHSVGAPDGLVSFHTHPVSSAVELNYHAGWPSNVDMSSLLTMKNLMHVVVAEEGLWTMAR
33 | TLSMQRLTKVLTDAEKDVMRAAAFNLFLPLNELRVMGTKDSNNKSLKTYFEVFETFTIGA
34 | LMKHSGVTPTAFVDRRWLDNTIYHMGFIPWGRDMRFVVEYDLDGTNPFLNTVPTLMSVKR
35 | KAKIQEMFDNMVSRMVTS
36 | >sp|Q6GZX1|004R_FRG3G Uncharacterized protein 004R OS=Frog virus 3 (isolate Goorha) GN=FV3-004R PE=4 SV=1
37 | MNAKYDTDQGVGRMLFLGTIGLAVVVGGLMAYGYYYDGKTPSSGTSFHTASPSFSSRYRY
38 | >sp|Q197F5|005L_IIV3 Uncharacterized protein 005L OS=Invertebrate iridescent virus 3 GN=IIV3-005L PE=3 SV=1
39 | MRYTVLIALQGALLLLLLIDDGQGQSPYPYPGMPCNSSRQCGLGTCVHSRCAHCSSDGTL
40 | CSPEDPTMVWPCCPESSCQLVVGLPSLVNHYNCLPNQCTDSSQCPGGFGCMTRRSKCELC
41 | KADGEACNSPYLDWRKDKECCSGYCHTEARGLEGVCIDPKKIFCTPKNPWQLAPYPPSYH
42 | QPTTLRPPTSLYDSWLMSGFLVKSTTAPSTQEEEDDY
43 | >sp|Q6GZX0|005R_FRG3G Uncharacterized protein 005R OS=Frog virus 3 (isolate Goorha) GN=FV3-005R PE=4 SV=1
44 | MQNPLPEVMSPEHDKRTTTPMSKEANKFIRELDKKPGDLAVVSDFVKRNTGKRLPIGKRS
45 | NLYVRICDLSGTIYMGETFILESWEELYLPEPTKMEVLGTLESCCGIPPFPEWIVMVGED
46 | QCVYAYGDEEILLFAYSVKQLVEEGIQETGISYKYPDDISDVDEEVLQQDEEIQKIRKKT
47 | REFVDKDAQEFQDFLNSLDASLLS
48 | >sp|Q91G88|006L_IIV6 Putative KilA-N domain-containing protein 006L OS=Invertebrate iridescent virus 6 GN=IIV6-006L PE=3 SV=1
49 | MDSLNEVCYEQIKGTFYKGLFGDFPLIVDKKTGCFNATKLCVLGGKRFVDWNKTLRSKKL
50 | IQYYETRCDIKTESLLYEIKGDNNDEITKQITGTYLPKEFILDIASWISVEFYDKCNNII
51 | INYFVNEYKTMDKKTLQSKINEVEEKMQKLLNEKEEELQEKNDKIDELILFSKRMEEDRK
52 | KDREMMIKQEKMLRELGIHLEDVSSQNNELIEKVDEQVEQNAVLNFKIDNIQNKLEIAVE
53 | DRAPQPKQNLKRERFILLKRNDDYYPYYTIRAQDINARSALKRQKNLYNEVSVLLDLTCH
54 | PNSKTLYVRVKDELKQKGVVFNLCKVSISNSKINEEELIKAMETINDEKRDV
55 | >sp|Q6GZW9|006R_FRG3G Uncharacterized protein 006R OS=Frog virus 3 (isolate Goorha) GN=FV3-006R PE=4 SV=1
56 | MYKMYFLKDQKFSLSGTIRINDKTQSEYGSVWCPGLSITGLHHDAIDHNMFEEMETEIIE
57 | YLGPWVQAEYRRIKG
58 | >sp|Q6GZW8|007R_FRG3G Uncharacterized protein 007R OS=Frog virus 3 (isolate Goorha) GN=FV3-007R PE=4 SV=1
59 | MRSIKPLRCCNAHGRHVSQEYGRCTLLLFREKLFLQTGLVCNKQCNAPNNDGAESKHHGI
60 | HHGSRGALALRGAGVHLLASAALGPRVLAGLVPTGRSVQGSVGQCGRVAQIGRARDVAAR
61 | KQESYCEK
62 | >sp|Q197F3|007R_IIV3 Uncharacterized protein 007R OS=Invertebrate iridescent virus 3 GN=IIV3-007R PE=4 SV=1
63 | MEAKNITIDNTTYNFFKFYNINQPLTNLKYLNSERLCFSNAVMGKIVDDASTITITYHRV
64 | YFGISGPKPRQVADLGEYYDVNELLNYDTYTKTQEFAQKYNSLVKPTIDAKNWSGNELVL
65 | LVGNEWYCKTFGKAGSKNVFLYNMIPTIYRDEPQHQEQILKKFMFFNATKNVEQNPNFLD
66 | NVPEEYYHLLLPKSWVEKNLSDKYRKIMETEHKPLVFSCEPAFSFGLCRNTQDKNESYQL
67 | SLCLYEREKPRDAEIVWAAKYDELAAMVRDYLKKTPEFKKYRSFISCMKGLSWKNNEIGD
68 | KDGPKLYPKVIFNRKKGEFVTIFTKDDDVEPETIEDPRTILDRRCVVQAALRLESVFVHN
69 | KVAIQLRINDVLISEWKEASSKPQPLILRRHRFTKPSSSVAKSTSPSLRNSGSDESDLNQ
70 | SDSDKEDERVVPVPKTKRIVKTVKLPN
71 | >sp|Q197F2|008L_IIV3 Uncharacterized protein 008L OS=Invertebrate iridescent virus 3 GN=IIV3-008L PE=4 SV=1
72 | MSFKVYDPIAELIATQFPTSNPDLQIINNDVLVVSPHKITLPMGPQNAGDVTNKAYVDQA
73 | VMSAAVPVASSTTVGTIQMAGDLEGSSGTNPIIAANKITLNKLQKIGPKMVIGNPNSDWN
74 | NTQEIELDSSFRIVDNRLNAGIVPISSTDPNKSNTVIPAPQQNGLFYLDSSGRVWVWAEH
75 | YYKCITPSRYISKWMGVGDFQELTVGQSVMWDSGRPSIETVSTQGLEVEWISSTNFTLSS
76 | LYLIPIVVKVTICIPLLGQPDQMAKFVLYSVSSAQQPRTGIVLTTDSSRSSAPIVSEYIT
77 | VNWFEPKSYSVQLKEVNSDSGTTVTICSDKWLANPFLDCWITIEEVG
78 | >sp|Q6GZW6|009L_FRG3G Putative helicase 009L OS=Frog virus 3 (isolate Goorha) GN=FV3-009L PE=4 SV=1
79 | MDTSPYDFLKLYPWLSRGEADKGTLLDAFPGETFEQSLASDVAMRRAVQDDPAFGHQKLV
80 | ETFLSEDTPYRELLLFHAPGTGKTCTVVSVAERAKEKGLTRGCIVLARGAALLRNFLHEL
81 | VFNCGTGGRYIPEGYADMGDQERTRKMRKAVSSYYQFRTYETFAKSVATMSAEAIRARYD
82 | RFVIVMDEVHHLRSVQAEGVNTYSAISRFLRTVRGCVKMLLTGTPMTNEPGELADVLNLI
83 | LPQDKTIRPEDGIFSNSGDLLKPDELAERVRGRVSYLKAARPDAGLTFAGEVLGGTGMTH
84 | LRLVRLEMSAFQSDAYASAWDQDAGDRNIFSNSRQCSLAVMPDRRWGSAAEARNPSQVRR
85 | MAGQNLAEYSVKYDYLVRVASSSPKTFAYCEYVNGSGLSLLSDILLANGWRRATGRETTP
86 | GKRFALLTASQKNIHKIVQRFNHEDNVDGAYISLLLGSRVVAEGLTFKEVRHTVILTPHW
87 | NYTETAQAIARSWRAGSHDRLKARGEAVAVTVHRLVAVPRGRDTPRSIDSDMYAVSEVKD
88 | KRIKAVERILMTSAADCSLLRSRNLYPSEFDGSRECEYGRCAYRCSNVSVEPGPLPALLG
89 | ASAAEAVAQVRLDGGGDPAIMKVDMSTLWAEVTAGRRYVNRWGDGAVLRAEGGRLELSAP
90 | YGSSEEGRWGDFYKTRNLCYAKMDQDHLRADDLRDSLPQEVEELLTVSPVETIGETASAM
91 | PQEVATAILMACVQARADGKTLNVVRRDALLDFYKGFYAMGPSGWTVWLHARGANAKVYD
92 | GRRWNPADEDTLEFLAARSAKFTDTRIGYYGLYNPNLKDFCIRDVTQGKRDKVDLRKLTV
93 | GRRCVDWDQRTLVHIVARLMKIDGRRDFMPHATLREMRELAEQDPLHEPSDLTSKEACRR
94 | FLFWTQKGDNKFRRQDICKAMEKWFIENDLMEDNFDCGHQHKRRGKFA
95 | >sp|Q91G85|009R_IIV6 Uncharacterized protein 009R OS=Invertebrate iridescent virus 6 GN=IIV6-009R PE=3 SV=1
96 | MIKLFCVLAAFISINSACQSSHQQREEFTVATYHSSSICTTYCYSNCVVASQHKGLNVES
97 | YTCDKPDPYGRETVCKCTLIKCHDI
98 | >sp|Q6GZW5|010R_FRG3G Uncharacterized protein 010R OS=Frog virus 3 (isolate Goorha) GN=FV3-010R PE=4 SV=1
99 | MKMDTDCRHWIVLASVPVLTVLAFKGEGALALAGLLVMAAVAMYRDRTEKKYSAARAPSP
100 | IAGHKTAYVTDPSAFAAGTVPVYPAPSNMGSDRFEGWVGGVLTGVGSSHLDHRKFAERQL
101 | VDRREKMVGYGWTKSFF
102 | >sp|Q197E9|011L_IIV3 Uncharacterized protein 011L OS=Invertebrate iridescent virus 3 GN=IIV3-011L PE=4 SV=1
103 | MMESPKYKKSTCSVTNLGGTCILPQKGATAPKAKDVSPELLVNKMDNLCQDWARTRNEYN
104 | KVHIEQAPTDSYFGVVHSHTPKKKYTSRDSDSEPEATSTRRSATAQRAANLKSSPVDQWS
105 | TTPPQPQPQPAAPTVKKTCASSPPAALSVKRTCTSPPPPPVLIDDDTGEDAFYDTNDPDI
106 | FYDIENGVSELETEGPKRPVYYQRNIRYPIDGSVPQESEQWYDPIDDEFLASSGDVVSLE
107 | PSPIAAFQPTPPKTVQFVPMPEEIIVPPPPPPKTVVDEGVQAMPYTVDQMIQTDFEESPL
108 | LANVNLRTIPIEEVNPNFSPVLMQDMVRDSFVFGTVAQRVMASQRVKQFFKELIEQDVSL
109 | AGRMCMDSGSPQLNLYNSLMGVKLLYRWRSSTTFYRAIVPEIDEPVQVMQDVLSSSEWAK
110 | FDSQAGIPPKMVYIHYKLLNDLVKTLICPNFQLTHAALVCVDCRPEAVGSDGLQDGRQRR
111 | CSNLVSEYHEMTLEDLFNTIKPADLNAKNIILSVLFQMLYAVATVQKQFGMGGLFANADS
112 | VHVRRIQPGGFWHYTVNGLRYSVPNYGYLVILTNFTDVVNYRPDFATTRYFGRRQAKVVP
113 | TRNWYKFVPFTTRYRPFVTVDPITQAKTTAYAPNPPTEGITINEFYKDSSDLRPSVPVDL
114 | NDMITFPVPEFHLTICRLFSFFSKFYDSNFIGNDPFVRNLVDRYSQPFEFPDVYWPEDGV
115 | SRVLACYTIEEIYPNWVDGDTDYVIESYNLD
116 | >sp|Q6GZW4|011R_FRG3G Uncharacterized protein 011R OS=Frog virus 3 (isolate Goorha) GN=FV3-011R PE=4 SV=1
117 | MTSVKTIAMLAMLVIVAALIYMGYRTFTSMQSKLNELESRVNAPQLRPPVMSPIVPLNFI
118 | ESEDLDKELD
119 | >sp|Q6GZW3|012L_FRG3G Uncharacterized protein 012L OS=Frog virus 3 (isolate Goorha) GN=FV3-012L PE=4 SV=1
120 | MCAKLVEMAFGPVNADSPPLTAEEKESAVEKLVGSKPFPALKKKYHDKVPAQDPKYCLFS
121 | FVEVLPSCDIKAAGAEEMCSCCIKRRRGQVFGVACVRGTAHTLAKAKQKADKLVGDYDSV
122 | HVVQTCHVGRPFPLVSSGMAQETVAPSAMEAAEAAMDAKSAEKRKERMRQKLEMRKREQE
123 | IKARNRKLLEDPSCDPDAEEETDLERYATLRVKTTCLLENAKNASAQIKEYLASMRKSAE
124 | AVVAMEAADPTLVENYPGLIRDSRAKMGVSKQDTEAFLKMSSFDCLTAASELETMGF
125 | >sp|Q197E7|013L_IIV3 Uncharacterized protein IIV3-013L OS=Invertebrate iridescent virus 3 GN=IIV3-013L PE=4 SV=1
126 | MYYRDQYGNVKYAPEGMGPHHAASSSHHSAQHHHMTKENFSMDDVHSWFEKYKMWFLYAL
127 | ILALIFGVFMWWSKYNHDKKRSLNTASIFY
128 | >sp|Q6GZW2|013R_FRG3G Uncharacterized protein 013R OS=Frog virus 3 (isolate Goorha) GN=FV3-013R PE=4 SV=1
129 | MANSVAFSSMTWYSPLASDNLYDICVDKVHNRVLCLCHSFGCCTNAVVIWILPSFDEFTP
130 | QTLSCKGP
131 | >sp|Q6GZW1|014R_FRG3G Uncharacterized protein 014R OS=Frog virus 3 (isolate Goorha) GN=FV3-014R PE=4 SV=1
132 | METLVQAYLDIQGKIAEFRREIKALRVEEKAITANLFEAMGEAGVESIRISEDRYLVAEE
133 | KPKRTRSKQQFYQAAEGEGFTQEDVDRLMSLSRGAVTGSSSNVKIRKSAPARNEEDDDG
134 | >sp|Q6GZW0|015R_FRG3G Uncharacterized protein 015R OS=Frog virus 3 (isolate Goorha) GN=FV3-015R PE=4 SV=1
135 | MEQVPIKEMRLSDLRPNNKSIDTDLGGTKLVVIGKPGSGKSTLIKALLDSKRHIIPCAVV
136 | ISGSEEANGFYKGVVPDLFIYHQFSPSIIDRIHRRQVKAKAEMGSKKSWLLVVIDDCMDN
137 | AKMFNDKEVRALFKNGRHWNVLVVIANQYVMDLTPDLRSSVDGVFLFRENNVTYRDKTYA
138 | NFASVVPKKLYPTVMETVCQNYRCMFIDNTKATDNWHDSVFWYKAPYSKSAVAPFGARSY
139 | WKYACSKTGEEMPAVFDNVKILGDLLLKELPEAGEALVTYGGKDGPSDNEDGPSDDEDGP
140 | SDDEEGLSKDGVSEYYQSDLDD
141 | >sp|Q6GZV8|017L_FRG3G Uncharacterized protein 017L OS=Frog virus 3 (isolate Goorha) GN=FV3-017L PE=4 SV=1
142 | METMSDYSKEVSEALSALRGELSALSAAISNTVRAGSYSAPVAKDCKAGHCDSKAVLKSL
143 | SRSARDLDSAVEAVSSNCEWASSGYGKQIARALRDDAVRVKREVESTRDAVDVVTPSCCV
144 | QGLAEEAGKLSEMAAVYRCMATVFETADSHGVREMLAKVDGLKQTMSGFKRLLGKTAEID
145 | GLSDSVIRLGRSIGEVLPATEGKAMRDLVKQCERLNGLVVDGSRKVEEQCSKLRDMASQS
146 | YVVADLASQYDVLGGKAQEALSASDALEQAAAVALRAKAAADAVAKSLDSLDVKKLDRLL
147 | EQASAVSGLLAKKNDLDAVVTSLAGLEALVAKKDELYKICAAVNSVDKSKLELLNVKPDR
148 | LKSLTEQTVVVSQMTTALATFNEDKLDSVLGKYMQMHRFLGMATQLKLMSDSLAEFQPAK
149 | MAQMAAAASQLKDFLTDQTVSRLEKVSAAVDATDVTKYASAFSDGGMVSDMTKAYETVKA
150 | FAAVVNSLDSKKLKLVAECAKK
151 | >sp|Q6GZV7|018L_FRG3G Uncharacterized protein 018L OS=Frog virus 3 (isolate Goorha) GN=FV3-018L PE=3 SV=1
152 | MQNSKTDMCAALWAVTGLVLNVAVRFALEPFKESMGQGWHTAARVAVNGAIVLALADRLS
153 | DSPVTMTLFVMALSASPE
154 | >sp|Q6GZV6|019R_FRG3G Putative serine/threonine-protein kinase 019R OS=Frog virus 3 (isolate Goorha) GN=FV3-019R PE=3 SV=1
155 | MATNYCDEFERNPTRNPRTGRTIKRGGPVFRALERECSDGAARVFPAAAVRGAAAARAAS
156 | PRVAAASPCPEFARDPTRNPRTGRPIKRGGPVFRALERECADYGGASPRRVSPARAFPNR
157 | RVSPARRQSPAEAAEASPCPEFARDPTRNPRTGRTIKRGGPTYRALEAECADYGRLSPIR
158 | SPWSDWSSTGLSPFRSHMRKSPARRSPARRSPARRSLARYTEHLTSDSETEVDYDARNVI
159 | RSQVGPGGVCERFAADPTRNPVTGSPLSRNDPLYTDLMEICKGYPDTPLTKSLTGEGTDD
160 | DTCEAFCRDPTRNPVTGQKMRRNGIEYQMFAEECDCSGISRPSGVSRTSGTSGSSGSSAS
161 | SRPPNSFEAPGASSRPPNSFEASGAARVPGTPSVSRGEPRWMSSISTRHNYDESNPMSVA
162 | FRLRHVKDIRKFLRTVRPGRSGFCATDKGGWLGSAAVSDNVIGQGSWGSVHMVKFRDFPE
163 | EFVVKEAVLMSVSEKHRYKPTVVWDEWAAGSVPDEVVVNNMVTEIAATGMTPFVPLTAGA
164 | GACDSCNPQLLEKAAKVTKCYLQAMEAADFSLDRVLPTMSPDQAASALAQILLGLQSLQT
165 | TLGIMHNDIKAHNILVKRVPPGGYWKVTDSFNGQVFYIPNEGYLCMLADYGVVRLVKPAV
166 | GMDTLYGTRNARFVPRDVGRWGKGAGTEYVVTPIRSKISVVVRGGRFVGVEPNKAVRYWK
167 | NTDTSKVGDVITTNNVFYMGYDIEPDMQVQLDDTNSFPVWESRGDVADCVRTFVGGKRAS
168 | QPGFHRLFYKKTGSAWEKAAETVAKQNPLFSGFTLDGSGLKYIRAATACAYIFPGMAVPR
169 | PGEREIESFTM
170 | >sp|Q6GZV5|020R_FRG3G Uncharacterized protein 020R OS=Frog virus 3 (isolate Goorha) GN=FV3-020R PE=4 SV=1
171 | MLQNYAIVLGMAVAVAIWYFFKIEEEAPPGPNPPKPDPPKPDPPKMHMPKKKPHWMDPHL
172 | TGSQTVQYSRNRSMGDPIRGDLPIIPRDDGWFSTAANPAHTLHAGALSMIAPASTGGGLT
173 | VNKLISAYADKGNAMSGRHNSPSYYGSS
174 | >sp|Q6GZV4|021L_FRG3G Uncharacterized protein 021L OS=Frog virus 3 (isolate Goorha) GN=FV3-021L PE=4 SV=1
175 | METIVLVPRQDQETFSDSRPVLDGDLMLEFLENKIRHPVRRRQPRVVPVTSSDPEVVDDE
176 | DDEDQSDDSDEERQRLYFQYMVLKRMYPTEVIPEMTTYSNVAIMREKYKLLTRRLSLDKH
177 | INEWKKYIIVGMCIMELVMTKLNFDASGFARYQIKSLGAYDQLLAEMADKYYEATPQSSV
178 | EMRLMTTMGMNMAVFMLGKLLGGQMDFLGLLENAFGSSS
179 | >sp|Q197D8|022L_IIV3 Transmembrane protein 022L OS=Invertebrate iridescent virus 3 GN=IIV3-022L PE=4 SV=1
180 | MSFVHKLPTFYTAGVGAIIGGLSLRFNGAKFLSDWYINKYNDSVPAWSLQTCHWAGIALY
181 | CVGWVTLASVIYLKHRDNSILKGSILSCIVISAVWSILEYNQDMFVSNPKLPLISCAMLV
182 | SSLAALVALKYHIKDIFTILGAAIIIILAEYVVLPYQRQYNIVDGIGLPLLLLGFFILYQ
183 | VFSVPNPSTPTGVMVPKPEDEWDIEMAPLNHRDRQVPESELENVK
184 | >sp|Q6GZV2|023R_FRG3G Uncharacterized protein 023R OS=Frog virus 3 (isolate Goorha) GN=FV3-023R PE=4 SV=1
185 | MRVSQTSWIVSRMLEYPRGGFFYSTDMACMMEGLAEELAGGHKDEVLIVSGRNGDDEVFK
186 | EFPNVRAADGLKGPNSIDPETKLVLIIDVSPTAISNALAATLQEFLIPVWVFCNHTRTLT
187 | ASVTRRLGYKLWPKGTYTPYICEKAGVSEVVTYNQPESEKFVAFMSAARQIMDKRKSKKT
188 | MQELAFLPHLAFAEIAMEGDQEMTPTLTAKKVSDIKDEQVNELASAMFRTGKLSHLDMLS
189 | VPDCVYSCGEALKREVAKAKANRERFVVALRNAQYKKYTAGLLEAGTPVKTFTEVIKNWG
190 | AYDTIFLPMGVDWTYTGGSNLIRMMMTPGSHKTVTFVPESDDVHEFCHNKPTVNTMGVES
191 | AATGLAAELNRRWRRDNPVDAS
192 | >sp|Q197D7|023R_IIV3 Uncharacterized protein 023R OS=Invertebrate iridescent virus 3 GN=IIV3-023R PE=4 SV=1
193 | MGSYMLFDSLIKLVENRNPLNHEQKLWLIDVINNTLNLEGKEKLYSLLIVHNKQQTKIYD
194 | PKEPFYDIEKIPVQLQLVWYEFTKMHLKSQNEDRRRKMSLYAGRSP
195 | >sp|Q6GZV1|024R_FRG3G Uncharacterized protein 024R OS=Frog virus 3 (isolate Goorha) GN=FV3-024R PE=3 SV=1
196 | MWQYLPILLMTMISQLEWTVAAVKRYPAGGFITGDKLSRVFEALPWRVAVVSDEPEKYEG
197 | FPILTEEDPAVFEDADCILFAVSDPKCVTGAMKSVFMASSKTAWVVYDGTETRATVRSWM
198 | RRLWRAETYVPLLTHRGFVTDVCVYSQPDSERYVSVMTATAHFYSNRLEVLEEMAFVPHL
199 | AYAKLAMGRYTVLDGCMSVKGSADVAPLNRSMWFLTAAAIPHGEIDTDSLFSDPGAVYSC
200 | GSALREALGSLPEGSTSVVAVRNSSYRKYVRGILGPNFRVETFTNVVKTWGVYDYVLLPM
201 | GISDSYKQGRDLMEKLEMPGGHRVVTFAPENYTVNEVHLNRPLKYAIKRMDLITPMVLRH
202 | VSLNK
203 | >sp|Q197D5|025R_IIV3 Uncharacterized protein 025R OS=Invertebrate iridescent virus 3 GN=IIV3-025R PE=3 SV=1
204 | MNYSVIWAITILILGLVLTLAWARQNPTHPINPLVLNYHTKPSPKRHRMVLVVESFASVD
205 | ALVELVENILSQTIRVASITVVSQRPDHLRQVPLLHQTCTFSRASGLSALFKETSGTLVV
206 | FISKEGFHHFQSPTLLETIDQRGVTAEQTLPGIVLRNTDMPGIDLTTVYRQQRLGLGN
207 | >sp|Q91G70|026R_IIV6 Uncharacterized protein 026R OS=Invertebrate iridescent virus 6 GN=IIV6-026R PE=4 SV=1
208 | MAISFFSDTSYIIKSILLISLFSIIPLEDEVTKLKSSSLRETSELNKEEGITTCLYTFN
209 | >sp|Q6GZU9|027R_FRG3G Uncharacterized protein 027R OS=Frog virus 3 (isolate Goorha) GN=FV3-027R PE=4 SV=1
210 | MANFLQDVNCETVSEYDGPDASIPEGVWEGYVGHDHAALWRTWSYIYECCKKGTLVQFRG
211 | GKLVTFSMFDNPRFSNGAGIDAQKVLDLEDRARELQGYGPVNRRTDVMPVDRWTLNGPLL
212 | RYDKMVLEDVGGTGSNRTMVRAQLEALQDERDVPDCDFILNVRDYPLLRRDGTRPYPQVY
213 | GKGRRLPEPWARGGPHVPVVSMCSGPTYADIAVPTYECIAHAYTSSGRTLPAGGRFVKTP
214 | SADSLPAWRDRKALAVFRGSSTGAGTSTEDNQRLRALQISMSRPDLADVGITKWNLRPRK
215 | TERYDGYRIIEPWQFGRKSPYPAAAKPMTPEQIAGYKYVLCLWGHAPAFRLARDLSLGSV
216 | VLLPSRPPGQEGLDMWHSSVLKPWTHYIPVRGDLSDLEKRIEWCRDNDAECEKIAAAGME
217 | ASLNLLGWEGQLDRWMDVLRSVRLECCPGGYDMPPSPSLVSDSMCVRQMVSFPRYEDIPQ
218 | PSSPMPVLPRCSGTLRGWGLAASLGWDLGDAAEVLNVKRSTAVLSKTVFNNLIYRTPHLR
219 | YTFGVAASDPESTAAVILSEKLKGAVTMRSWLEDSRAWARGRNVASVLCQVSQALLEAQA
220 | AAGTVFGDLSLDTILVVPNPLPEYIYHDGTGGSFGLKLMPGDKWAVVTYGDYTRARIRVL
221 | KGDGRKGHLAVVGPQPVYTKLSERKWHDICCLVSCILRTARTSKRPAARALAAAVARAAG
222 | VKRPDMDAEALEATPYEAREEPLTRFGPAEFINGLVREFKLEEGGWAWTEKNKNIEKVLR
223 | PWERGLPLYPVRLWLSGDRKEAMRACVSSVLKAAPPRPATAAGAHHTFQTYLRTVGADLD
224 | SFPEWAAAAAHLKRLWKSPGSLPAGSASLRAPSVPPPCHGPAWALPFGTRTPGEFPSWFD
225 | PSCLGDWTEAMGQGAPLDLENGPAKAGSDPVAVHSAWETASQLSFEEDGWTESEPRPVRR
226 | EAHVRAKERH
227 | >sp|Q6GZU8|028R_FRG3G Uncharacterized protein 028R OS=Frog virus 3 (isolate Goorha) GN=FV3-028R PE=4 SV=1
228 | MDPNVLKNLSLMLSRRAGVSGGEPPRMIEWPEYGQRSEPCGSQTVWYVDRPVGAPFIKAF
229 | ASEVEERGGGILIHAGKVTFDSAKKLAAMKEVQVFDVKYFSFDLMAVVPEHSLWKRPGDK
230 | GYPEKTAQSFPKIMASDPVCRYHGFRPRDLVHVKPHDVYIVC
231 | >sp|Q197D2|028R_IIV3 Uncharacterized protein 028R OS=Invertebrate iridescent virus 3 GN=IIV3-028R PE=4 SV=1
232 | MDQYITLVELYIYDCNLFKSKNLKSFYKVHRVPEGDIVPKRRGGQLAGVTKSWVETNLVH
233 | FPLWLSEWDETRWGVLNHYPLESWLEKNVSSKVPVNPVMWNFDSECLVYFFHNGRRTPFL
234 | TPKGVVKLQVFYNLMSGKEVEWFYEISNGFLKPHLHQLSNVRELVRLKHAPVVVGAGGPR
235 | LVTEGVYSLRDDDFVVDCSQIAAVKRAIERGESHQSLRKYQCPLFVALTDKFQDTVKLVE
236 | KKFEVQLNELKAETTIQVLREQLRQEKKLKEQVLSLTQSFIPTIGGRGEEFGKPDETPSS
237 | ASVGDDNFPSSTNHTFEARRRPSSLSSGGALKPSKIL
238 | >sp|Q6GZU7|029L_FRG3G Uncharacterized protein 029L OS=Frog virus 3 (isolate Goorha) GN=FV3-029L PE=4 SV=1
239 | MRRMRSGFKHCAIPIDICRWEYILSPLILQDLQGPQQGGSVAVDVTVRCSVRFVHLPHYG
240 | GFNHGTVQRRVDPDDCRILRQLHIVLSLRLCLIDRDRL
241 | >sp|Q91G67|029R_IIV6 Uncharacterized protein 029R OS=Invertebrate iridescent virus 6 GN=IIV6-029R PE=4 SV=1
242 | MVERLGIAVEDRSPKLRKQAIRERFVLFKKNTERVEKYEYYAIRGQSIYINGRLSKLQSE
243 | RYPKMIILLDIFCQPNPRNLFLRFKERIDGKSEWENNFTYAGNNIGCTKEMESDMIRIFN
244 | ELDDEKRDV
245 | >sp|Q197D0|030L_IIV3 uncharacterized protein 030L OS=Invertebrate iridescent virus 3 GN=IIV3-030L PE=4 SV=1
246 | MHPTLKSNAGEWSQPIVNLFYSNFSGNCKALLQYIDNAGITDHIPIKFINVDNPTMRSVV
247 | SAKISHVPALVVLQDDQMSLYVAESVWEWFDNYRTPPPLADGATVDSQASENGEKEAQPT
248 | PPKEGLLTVLELAKQMRKEREQQT
249 | >sp|Q6GZU6|030R_FRG3G Uncharacterized protein 030R OS=Frog virus 3 (isolate Goorha) GN=FV3-030L PE=4 SV=1
250 | MSLYLLLGLKILRYLKMVIVLRCHSAFLLSVKFLREKRRLKMYLGIMLGF
251 | >sp|Q6GZU5|031R_FRG3G Uncharacterized protein 031R OS=Frog virus 3 (isolate Goorha) GN=FV3-031R PE=4 SV=1
252 | MDTPCKLFCIELKEGYVPGTVSHNHMMPYFLAGSGWPVEITFHAATVELKTQEDFPPAIG
253 | IGIHNMTGVPVVETPHSGRMHFVFIFHSKSGRFSATYKCIPVPVVVRDYKTVASVSLTTL
254 | SLEDIVGVKLFGTACDRSS
255 | >sp|Q6GZU4|032R_FRG3G Uncharacterized protein 032R OS=Frog virus 3 (isolate Goorha) GN=FV3-032R PE=4 SV=1
256 | MVTVTELRATAKNLGIRGYSTMRKAELEEAIRDHGRVSEARVASPRRSPARSPRKSPAGR
257 | KSPSKSPAGRKSPSKSPAGRKSPSKSPAGRKSPSKSPAGRKSPSKSPAGRKSPSKSPVRK
258 | SPSKSPVRKSPRKSPAAKLQAGDRPASMNICKNLPKQRLVDIATEMGIDLNRESDGKPKT
259 | KDQLCADIMGGAGRKSPRKSPSRSPVRKSPSRSPVRKSPVRSPRKSPVRVPSPVRSPVKE
260 | KTPVRSPARSEDAGSDLAPRPRRGKAVRLDYDEDDDYSYGASTDNLFSGNKEIPFPTRKR
261 | RTRKPEKVFVDVRSPHTLTDSEDEDDMVEVPELEDKEITMPGVLSPYSDEIVERGYVSQG
262 | GADYINYIYRTEYALESDESFARGARPKTNKRDSDRAVREAAAAAAIARALDRRSQSGND
263 | EPAVRRRSAPTDSSRESRRDREPQRDIAEPQRDIAEPQRDIAEPQRDIAEPRKVRFREAG
264 | SADVRVFERDEPKEYGRVPVRPPLFMPAGEPLQPLKFRPKTPKIDDTIHRAQMVLPSKPS
265 | QKETDNYYKQFAGEAVRPSEPVQWDKDDQVLYHKVPAWDDSSYAAAVSAWPMSVDPKQAE
266 | SVFAEFEQLSAQDSDLIKVRKSIMKALGY
267 | >sp|Q197C8|032R_IIV3 Uncharacterized protein 032R OS=Invertebrate iridescent virus 3 GN=IIV3-032R PE=4 SV=1
268 | MKLMLEIVKNISEPVGKLAIWFNETYQVDVSETINKWNELTGMNITVQENAVSADDTTAE
269 | ETEYSVVVNENPTRTAARTRKESKTAAKPRKMQIPKTKDVCQHIFKSGSRAGEQCTTKPK
270 | NNALFCSAHRVRNSVTSNATEASEKTVAKTNGTAAPQKRGVKSKSPTVIPSDFDDSDSSS
271 | SATRGLRKAPTLSPRKPPPTTTTASSAQEEEDEQQAHFSGSSSPPPKNNGNGAVYSDSSS
272 | DEDDDDAHHTTVIPLLKKGARKPLDENVQFTSDSSDEED
273 | >sp|Q91G65|032R_IIV6 Uncharacterized protein 032R OS=Invertebrate iridescent virus 6 GN=IIV6-032R PE=4 SV=1
274 | MGVYKFCYNKKKEVGQVAVLQKERLIFYIVTKEKSYLKPTLANFSNAIDSLYNECLLRKC
275 | CKLAIPKIGCCLDRLYWKTVKNIIIDKLCKKGIEVVVYYI
276 | >sp|Q6GZU3|033R_FRG3G Transmembrane protein 033R OS=Frog virus 3 (isolate Goorha) GN=FV3-033R PE=4 SV=1
277 | MSGIQLDKETILKYSSAALVALSAVVAVMMVSNNSESWKPILVGAVVAASGAAAYQSWWP
278 | KQS
279 | >sp|Q6GZU2|034R_FRG3G Uncharacterized protein 034R OS=Frog virus 3 (isolate Goorha) GN=FV3-034R PE=4 SV=1
280 | MSAGHLRKRRYVKVGDIHDMGPILGGVHDVSSPPPNVHYQQQDDHNDPGCMIHYPGEGWF
281 | SSMSTVEKLMLGAVIVAAVVVGVRMFMSSGNSSATSSFSTAPYFMG
282 | >sp|Q91G63|034R_IIV6 Uncharacterized protein 034R OS=Invertebrate iridescent virus 6 GN=IIV6-034R PE=4 SV=1
283 | MKQNLLILLSLLLVVVAIMWWLYEKKKEVPLPPPTPPTPPTPTGVPFLPMYAGLSSPVQY
284 | NPADYLYGWEKYPHGPAWSFGDRVPYAEAKNALGGHFGGGLYSPRDPILESKLGGVYIGN
285 | DLYTVGGVGGDGHW
286 | >sp|Q6GZU1|035L_FRG3G Uncharacterized protein 035L OS=Frog virus 3 (isolate Goorha) GN=FV3-035L PE=4 SV=1
287 | MIWVWPATGRGPGWWGIRRDPWGPEDSSCPCPRLPLSPTGPVGHSGPMGQCHPPVPSYRR
288 | GRRDQKDPPLRRQTSPPLPPHPWDRPLPWVPWIPLDLCRHGDPRHPWDPGAQSGYPRVRE
289 | VRGVPADRPLRPCPRQGPRTAATRKESSCRIPS
290 | >sp|Q6GZU0|036L_FRG3G Uncharacterized protein 036L OS=Frog virus 3 (isolate Goorha) GN=FV3-036L PE=4 SV=1
291 | MTLPDVSGSLGPLSPGTNGTLWAVGPRVVRYQIPALAYLTPGALWTLRTRGTSLTSGPIG
292 | TRDSIRTLHAVHYDVWTLGPLGPLGPTSPRGPSARPCRLQTDSLHSTDARCYRCKMLQMQ
293 | DATDARCKKDMSPFSFPGILEPSHLVGSLKSPRVDPGVPCRPLALWGHPYQCLRLVPLYQ
294 | RCLHPHCFPAAPGRPWDPWCRPDRLDP
295 | >sp|Q197C3|037L_IIV3 Uncharacterized protein 037L OS=Invertebrate iridescent virus 3 GN=IIV3-037L PE=4 SV=1
296 | MNAATSGIQLNAQTLSQQPAMNTPLIHRSFRDDYTGLVSAGDGLYKRKLKVPSTTRCNKF
297 | KWCSIGWSIGALIIFLVYKLEKPHVQPTSNGNLSLIEPEKLVSESQLIQKILNATTPQTT
298 | TPEIPSSTEPQELVTEILNTTTPQTTTPEIPSSTEPQELVTEIPSSTEPQEEIFSIFKSP
299 | KPEEPGGINSIPQYEQESNNVEDEPPPNKPEEEEDHDNQPLEERHTVPILGDVIIRNKTI
300 | IIDGGNETIIIKP
301 | >sp|Q6GZT9|037R_FRG3G uncharacterized protein 037R OS=Frog virus 3 (isolate Goorha) GN=FV3-037R PE=4 SV=1
302 | MQVFLDLDETLIHSIPVSRLGWTKSKPYPVKPFTVQDAGTPLSVMMGSSKAVNDGRKRLA
303 | TRLSLFKRTVLTDHIMCWRPTLRTFLNGLFASGYKINVWTAASKPYALEVVKALNLKSYG
304 | MGLLVTAQDYPKGSVKRLKYLTGLDAVKIPLSNTAIVDDREEVKRAQPTRAVHIKPFTAS
305 | SANTACSESDELKRVTASLAIIAGRSRRR
306 | >sp|Q6GZT7|039R_FRG3G Uncharacterized protein 039R OS=Frog virus 3 (isolate Goorha) GN=FV3-039R PE=4 SV=1
307 | MTSYCDTLKALAAESDSTGSERATIRMYMAMFSDASLRPAVSDTVASILGTDSLDHEDAE
308 | MMLKFKLLFFSGSANASATSHYPKADDPQRFARSVSRGPSRVRRPARNSASRPVRR
309 | >sp|Q6GZT6|040R_FRG3G Uncharacterized protein 040R OS=Frog virus 3 (isolate Goorha) GN=FV3-040R PE=3 SV=1
310 | MIRALCTIVLIAAGVAVALYLSLVYGYYMSVGVQDASWLTALTGNRPDAKVPFFDKAVGE
311 | APEDKVAYTERPYPVSSTQSPTTTQSPTTTTLKPTTMAVLASIGATPTPVVCHNVRGDMQ
312 | GIACNVVMKKTVAAALKVQPEAKKDNVNAQYRYGMWTPLRRSRSPFGVWNIPKKLAIAAP
313 | DV
314 | >sp|Q197C0|040R_IIV3 Uncharacterized protein 040R OS=Invertebrate iridescent virus 3 GN=IIV3-040R PE=4 SV=1
315 | MVTMAIKNFHIQDDRLKNGRGNKTMSESDYNTSDSGGWVLVRKKRDRSTRPPDVVDRWSN
316 | STSTFPMGLDQIKIKRNGCVNTY
317 | >sp|Q91G57|041L_IIV6 Uncharacterized protein 041L OS=Invertebrate iridescent virus 6 GN=IIV6-041L PE=4 SV=1
318 | MNFIRENETKYVLSTYQSMTPKNLMEYLLKYNYDNDCVYIFNNLPKDLQKEVDDLAKEVV
319 | KANDEQIKAQDEQIKANDQKLKQLDVMIEFMKQYNKQLDNDIYLLEHQLENKRELNRQLG
320 | IF
321 | >sp|Q6GZT5|041R_FRG3G Uncharacterized protein 041R OS=Frog virus 3 (isolate Goorha) GN=FV3-041R PE=4 SV=1
322 | MRVVVNAKALEVPVGMSFTEWTRTLSPGSSPRFLAWNPVRPRTFKDVTDPFWNGKVFDLL
323 | GVVNGKDDLLFPASEIQEWLEYAPNVDLAELERIFVATHRHRGMMGFAAAVQDSLVHVDP
324 | DSVDVTRVKDGLHKELDEHASKAAATDVRLKRLRSVKPVDGFSDPVLIRTVFSVTVPEFG
325 | DRTAYEIVDSAVPTGSCPYISAGPFVKTIPGFKPAPEWPAQTAHAEGAVFFKADAEFPDT
326 | KPLKDMYRKYSGAAVVPGDVTYPAVITFDVPQGSRHVPPEDFAARVAESLSLDLRGRPLV
327 | EMGRVVSVRLDGMRFRPYVLTDLLVSDPDASHVMQTDELNRAHKIKGTVYAQVCGTGQTV
328 | SFQEKTDEDSGEAYISLRVRARDRKGVEELMEAAGRVMAIYSRRESEIVSFYALYDKTVA
329 | KEAAPPRPPRKSKAPEPTGDKADRKLLRTLAPDIFLPTYSRKCLHMPVILRGAELEDARK
330 | KGLNLMDFPLFGESERLTYACKHPQHPYPGLRANLLPNKAKYPFVPCCYSKDQAVRPNSK
331 | WTAYTTGNAEARRQGRIREGVMQAEPLPEGALIFLRRVLGQETGSKFFALRTTGVPETPV
332 | NAVHVAVFQRSLTAEEQAEERAAMALDPSAMGACAQELYVEPDVDWDRWRREMGDPNVPF
333 | NLLKYFRALETRYDCDIYIMDNKGIIHTKAVRGRLRYRSRRPTVILHLREESCVPVMTPP
334 | SDWTRGPVRNGILTFSPIDPITVKLHDLYQDSRPVYVDGVRVPPLRSDWLPCSGQVVDRA
335 | GKARVFVVTPTGKMSRGSFTLVTWPMPPLAAPILRTDTGFPRGRSDSPLSFLGSRFVPSG
336 | YRRSVETGAIREITGILDGACEACLLTHDPVLVPDPSWSDGGPPVYEDPVPSRALEGFTG
337 | AEKKARMLVEYAKKAISIREGSCTQESVRSFAANGGFVVSPGALDGMKVFNPRFEAPGPF
338 | AEADWAVKVPDVKTARRLVYALRVASVNGTCPVQEYASASLVPNFYKTSTDFVQSPAYTI
339 | NVWRNDLDQSAVKKTRRAVVDWERGLAVPWPLPETELGFSYSLRFAGISRTFMAMNHPTW
340 | ESAAFAALTWAKSGYCPGVTSNQIPEGEKVPTYACVKGMKPAKVLESGDGTLKLDKSSYG
341 | DVRVSGVMIYRASEGKPMQYVSLLM
342 | >sp|Q6GZT4|042L_FRG3G Uncharacterized protein 042L OS=Frog virus 3 (isolate Goorha) GN=FV3-042L PE=4 SV=1
343 | MFAPPSSLFVPATAPAPSTSGFTIPANLRRDAYVCPFATAEKERKEREQQQPASKGLNHD
344 | LAAQEPLHPSLVSRFPSNYRGSFLR
345 | >sp|Q91G56|042R_IIV6 Uncharacterized protein 042R OS=Invertebrate iridescent virus 6 GN=IIV6-042R PE=4 SV=1
346 | MATLQQAQQQNNQLTQQNNQLTQQNNQLTQRVNELTRFLEDANRKIQIKENVIKSSEAEN
347 | RKNLAEINRLHSENHRLIQQSTRTICQKCSMRSN
348 | >sp|Q91G55|043L_IIV6 Uncharacterized protein 043L OS=Invertebrate iridescent virus 6 GN=IIV6-043L PE=4 SV=1
349 | MDLINNKLNIEIQKFCLDLEKKYNINYNNLIDLWFNKESTERLIKCEVNLENKIKFNQKY
350 | NSDTIKIMNILFLICSDGVFGKIENNDVKPLTDEDEKICVKFGYKIMIGCLNDIPI
351 | >sp|Q6GZT3|043R_FRG3G Uncharacterized protein 043R OS=Frog virus 3 (isolate Goorha) GN=FV3-043R PE=4 SV=1
352 | MEEVDGCAGPNSEAGALTAGALTAGAFAVTAGAGVAGAGVAGVGWCSWCSWCSWCWCSWC
353 | SWCWCSWCWCSWCWCSWCWCSWCWCSWCWCSWCWCSWCLSKGWEDRGGLEGCKSCKGWCL
354 | CSHCWCWCSWCWCSWCSWCLSKGWEDRGGLEGCKSCKGWCLCSHCRCWSIN
355 | >sp|Q197B6|044L_IIV3 Putative serine/threonine-protein kinase 040L OS=Invertebrate iridescent virus 3 GN=IIV3-044L PE=3 SV=1
356 | MPLSVFAEEFAEKSVKRYIGQGLWLPCNLSDYYYYQEFHDEGGYGSIHRVMDKATGNEVI
357 | MKHSYKLDFSPGILPEWWSKFGSLTDDLRERVVSNHQLRVSREAQILVQASTVLPEMKLH
358 | DYFDDGESFILIMDYGGRSLENIASSHKKKITNLVRYRAYKGNWFYKNWLKQVVDYMIKI
359 | YHKIKILYDIGIYHNDLKPENVLVDGDHITIIDFGVADFVPDENERKTWSCYDFRGTIDY
360 | IPPEVGTTGSFDPWHQTVWCFGVMLYFLSFMEYPFHIDNQFLEYALEGEKLDKLPEPFAQ
361 | LIRECLSVDPDKRPLTSLLDRLTELHHHLQTIDVW
362 | >sp|Q6GZN9|044R_FRG3G Uncharacterized protein 044R OS=Frog virus 3 (isolate Goorha) GN=FV3-044R PE=4 SV=1
363 | MVVRLAVRANMPKDSLARDSLPKDSLARDFLSDKTSPTDGTQSSDRYLLKIVTAVDYVHL
364 | T
365 | >sp|Q91G54|044R_IIV6 Uncharacterized protein 044R OS=Invertebrate iridescent virus 6 GN=IIV6-044R PE=4 SV=1
366 | MYLYQKIKNCLLLTMYQKKNKSHMYDILQSYLYYQKPIPKNLYSHPKKNLYLNIHHYKNI
367 | NKDLM
368 | >sp|Q6GZT1|045L_FRG3G Uncharacterized protein 045L OS=Frog virus 3 (isolate Goorha) GN=FV3-045L PE=4 SV=1
369 | MDDVEYRTEFSARERAGGDIEEGLELFGPATFKGMEGDPVQRFYNGIESAGRNLIRDGHI
370 | KLNKQEQTRLLSSVLRITYPNYKNPMGTVLGFYVTDGGRGPIDKGRLSHVQSFMEEVTDM
371 | DLRDLIRYCRLWLALK
372 | >sp|Q197B5|045R_IIV3 Uncharacterized protein 045R OS=Invertebrate iridescent virus 3 GN=IIV3-045R PE=4 SV=1
373 | MYKCSQGAMNTEKVMEKFVIQSRFREMYPDKAKAIAGMTVPARYADSVEDMVAFANEKIR
374 | VQKAKVEAEKNARQAMGAPAKFDKYGKYKY
375 | >sp|Q6GZT0|046L_FRG3G Uncharacterized protein 046L OS=Frog virus 3 (isolate Goorha) GN=FV3-046L PE=4 SV=1
376 | MYSVRNSGCSVGCSPRQGASPIMFGPSLGAMLSAPVVRASAPVVRASSPVVKRKSLVKRK
377 | SPVKRSPLKKRSQMRTSPCEA
378 | >sp|Q6GZS9|047L_FRG3G Uncharacterized protein 047L OS=Frog virus 3 (isolate Goorha) GN=FV3-047L PE=4 SV=1
379 | MHTISDWLNLNAPVRCFHVRQLSESEWRFTVNDTIRVVASVDGPWTVDAKGVEDLKMHKL
380 | YVPGPAKCWTRARDKAMAAALAEAVSESETCAADIVRPAVAKNTPRRPVVKRRVDAVKPA
381 | APDNLESWTKDDWYELDL
382 | >sp|Q6GZS8|048L_FRG3G Uncharacterized protein 048L OS=Frog virus 3 (isolate Goorha) GN=FV3-048L PE=4 SV=1
383 | MTAKTLDPSDYNVRDDSTTGMFTPVDRFVCDPESDRIIVRKIPPEWTIGNSMRFVHFTKE
384 | FTQTFDPSESPSNIVRHTNGKKK
385 | >sp|Q6GZS7|049L_FRG3G Putative SAP domain-containing protein 049L OS=Frog virus 3 (isolate Goorha) GN=FV3-049L PE=4 SV=1
386 | MAAPKAEGEDKPKRVRKSRAKPKPETKEVKKPKSKEFCTADDSSDDYNEVKPSPAMIALM
387 | AVKEIPESEDVPDKSDSEAEAPVPAIVKKRRTPPKKAESSDDKKLDEATGEQVIDEDALS
388 | KLTIQTLKGMCKTRNLKISGNKAALVQRLIEADGIAHIIPTTATVVQKVKKTKRPAVFSK
389 | VDSELKLIPCPGREHMLMDEATGLVFLDEDPSTAVGFIEHGEVFGLDSEHMTVCKNMGIR
390 | YSWTEDYLC
391 | >sp|Q91G50|049L_IIV6 Transmembrane protein 049L OS=Invertebrate iridescent virus 6 GN=IIV6-049L PE=4 SV=1
392 | MDKIEELKIEELKIEIPQRKTKFFHDSENSDKRDEEETLNPTITSKAKILIKSKNFWIET
393 | LIFVISVFGALCVAFGIMLIGFLLWLVSNTISILYFIKQKQYPLSLQQMVFLITTCIGVY
394 | NNV
395 | >sp|Q197B1|049R_IIV3 Uncharacterized protein 049R OS=Invertebrate iridescent virus 3 GN=IIV3-049R PE=4 SV=1
396 | MLRIENTVCKSACRVDSATAQPVYSSFDGENFKAEIHSKLDSFERKLNASPTYRDEEGGG
397 | NPEHYETLSQEINDLQSQIENLSLEVENLQGSSSSPSNVAAALAELSQSIRTIKEQLEAN
398 | RKERYNLTVTVANLTAAVNAAKKTGSESTTATATTTTNYETQLKAFEAQIKALDNQLQTQ
399 | KNLVQTTSVEAKNDRDSLRKTIEVIRLTVKTLQDQVESQTGPKKRRKSPIENQPTAGSEL
400 | ATLTTNLTFLTQRVEKLSQGVATHTTAMFTLEETMKKVHTTLQEATASNTNNIDAIRTRV
401 | QELADKIALFDQVQYSVGYEMAKKNPDSTKLRTDLDSAISTVNEEKKSLLTVKDSVQSLK
402 | TQLDELKRTLENDGDVSSLRQTVHDMASSIRDETATIYNKINALEEGLKRGGQTTTTPLT
403 | QLQTRVEEIDKTIVKWNNQHGEWTTRLNKLEAGVSNNQTLMNRFIQQVNGDVNPLKELPA
404 | ELETFKMTITNTWAQLNKKFLDFSAKTDTSVDNFTKKFTEIHPQIASLVDKMDQQIRDNP
405 | HTTEKLMDEIRQLKSAMTRLGTQSSGKPIFSINTKSSYNEKSKKTIFGHPGIIFPETVKI
406 | SSIYITLAAKEADGKEDARLFELTATSTHNNITSTIKQFEKKCTEETILEDYNPPLVIDA
407 | QTKLVLSCNQKVFGVAIFTLQYS
408 | >sp|Q6GZS6|050L_FRG3G Uncharacterized protein 050L OS=Frog virus 3 (isolate Goorha) GN=FV3-050L PE=4 SV=1
409 | MQVYSPSKISQQLETFLNSVANGLGHTMSHAMSQTFSETIVASVAKKAPKTSVLAAAQAA
410 | MQAEDKVSKPKKVKKTKSYADAAPKRVKKVKAPKEDTVVSEPEEAVVEQQEKQQPEKAVV
411 | EQQEKQQPEEAVVEQQEKQQPEEAVVEQQEKQQPEEAVVESEQPEQPEQPERQQQAQPER
412 | QQQAQPERQQQAQPEEAEDAEQEPVEQPTAKPKKVRKTQTESEDKPKRG
413 | >sp|Q6GZS5|051R_FRG3G Uncharacterized protein 051R OS=Frog virus 3 (isolate Goorha) GN=FV3-051R PE=4 SV=1
414 | MTVRITTTTGYPHGLLNPTVPLPMSRYTLVQETAQDILYTPMGKELALMGTCTKTECKYV
415 | RMQEDAHLLAEASKKALTMRLEQNPELKDLLASSGSQPIVYGDTRLVAHLTLLRGVSVLS
416 | PEGMVFSEDAMRKLYKGTIDMFVSDPSSLLNVDRATLTLESLRAMVKASGNWPSTSATAA
417 | VAIPEAPVTGVSVSDKVVMEHSASVYATQKMDFERSLLIRHLLAMDPAAEADVSHLVSRM
418 | DARTRAASSRLAAMYHDGLLDSAVTDGLVPPDQRLLEPMSTPSTPEVHAPQDGMSFEVPH
419 | VLTFAGGPVKVDDHVYDTPLHYAYNLAIRRMFADFGEGDLDDVHVSQVSVIYSDMLDKWI
420 | DAMYPQTLWRLMTEKFSGNQSCLAVLLSTDGADVKWTGRTEEESFLISDMMGQIKTGWIR
421 | SGPPSSSPLSSADIAGTDFFYGWLSYMSRTYATALNVISEMTLARLLDLPDIPEEVRQPT
422 | DREQAALGSDYVRSAWRVCYSEFVHKFEGKNLFASVDYCVKTHLKALKVSRDSVTGTAKT
423 | LSAKGYGTLIALPVIRLAMKS
424 | >sp|Q6GZS4|052L_FRG3G Uncharacterized protein 052L OS=Frog virus 3 (isolate Goorha) GN=FV3-052L PE=3 SV=1
425 | MVKYVVTGGCGFLGSHIVKCILKYAPEVTEVVAYDINISHIMTMWSSKLKVVRGDVMDVM
426 | ALAKAVDGADVVIHTAGIVDVWYRHTDDEIYRVNVSGTKNVLMCCINAGVQVLVNTSSME
427 | VVGPNTTSGVFVRGGERTPYNTVHDHVYPLSKDRAEKLVKHYTGVAAAPGMPALKTCSLR
428 | PTGIYGEGCDLLEKFFHDTVNAGNVAYGGSPPDSEHGRVYVGNVAWMHLLAARALLAGGE
429 | SAHKVNGEAFFCYDDSPYMSYDAFNAELFEDRGFGYVYVPYWVMKPMAAYNDLKRKFLGC
430 | FGVKRSPILNSYTLALARTSFTVKTSKARRMFGYMPLYEWSEAKRRTKDWISTLK
431 | >sp|Q197A7|053L_IIV3 Uncharacterized protein 053L OS=Invertebrate iridescent virus 3 GN=IIV3-053L PE=4 SV=1
432 | MEQYLQAFEFVEEMVVLPKYLSWELYHHLAVLLREKYPKTYKNKGYIFNIKVKSILDNRI
433 | TPTGQIVLVVMFQSDLYVPQVGHVFTERIRVNSVDDRYQWITIEPLTVFLRSNIPYKPNT
434 | LVTVQICSIKMDNTLCFGTILD
435 | >sp|Q6GZS3|053R_FRG3G Putative myristoylated protein 053R OS=Frog virus 3 (isolate Goorha) GN=FV3-053R PE=3 SV=1
436 | MGAAESINTVNIVTKAYAKIMTTMVTDQDITADQSQVFSIDHVKGDVVIKGDVFTQMLVI
437 | NLASLMKAIATQSAQDQLIDNIAQQAQAAVSGLNLAQYAYVSNNIDRLITACVQMSTDMR
438 | VSCKSKVTMTQSFSVTDVEGDVRVTGVKFNQFANILSSCAMDASVNNDQARDIVSQIKQR
439 | GDAKASGLDPTTLIVIIVLVMVGAPMGAGFMAGRRAIGPLLASVGLIGGGAVALGYVPRP
440 | VKIEGFSSDPDFTLAQPAATVKGLTFTAAVAKLKSTDGYGALFWKNYDVKGTTAVKLQET
441 | LSYFAPAGYDPASWAGVGDSAPPFRIFPGLYQGKGDPGARPRAAYGYAGPVAGPKKGDAY
442 | LDGDTGSYYVLGDSWKMRGTISGHQNGRTDYWGTVDPTTTAALTGSERYIWVDPFTLVKS
443 | TVWLFTGSPKKWTQQQTAPLDIPLTNTPSDFNVWVYKDDTAVQAVKWSSVGAGVAGAALT
444 | ASALLMPDSVASSEMSPAVGTGTPAIGTGSPAVGTGFPAHRG
445 | >sp|Q6GZS2|054L_FRG3G Uncharacterized protein 054L OS=Frog virus 3 (isolate Goorha) GN=FV3-054L PE=4 SV=1
446 | MPLRLCQGRKDRASDPVRDDGSPPRLFVSQVCRRAPKDPQGFQGHRGGQNVGDCSPIFHQ
447 | EKKQVMRRFYSLCEWK
448 | >sp|Q197A6|054L_IIV3 Uncharacterized protein 054L OS=Invertebrate iridescent virus 3 GN=IIV3-054L PE=4 SV=1
449 | MASEATVESVETKVESPIVESPVDQGLLESIKNFMDDLAVVTENENFQDYHTIVRRIDET
450 | KVKSYNKLVGGFREFFSLNKTALMEGNFEGLIEPHISYKTESGSFFFNFQTTYLETDEAN
451 | QEIIKEHLNHIWAQIRSENKCPEQLYIDEIFQKLKNKDQLTMDDQLIRDLFTKFQTANFN
452 | VTALIRAGCSKAREFLTNNGSQKSSSTFRLIETIENVNVDNFTQMDFMALISKISAIFSE
453 | SGESNPLNLCLSSLFGGGNTNQPSLTSMFPFPTPPLPDNVLLDNLDQLTLEQQSETTGDD
454 | DHHSFEPEK
455 | >sp|Q6GZS1|055L_FRG3G Putative helicase 055L OS=Frog virus 3 (isolate Goorha) GN=FV3-055L PE=4 SV=1
456 | MAKLLRLNAIDGDMPGAGEADLFTLAPGGKAYVPFAWGSRVLGCKPPPAHGAARERGSVS
457 | LRPHQKGVLKEAWGHVTSKGYCMLKCPPGFGKTFMALELWRRLGLPALVLTNRRVLATQW
458 | RDSATRFLPDSRVFTSGTPPPDALPRDLYVTGPASLRNRRIKAKDSPAKFLLIVDEAHQL
459 | TSPVSCRVLLSVRPSHLLGLSATPMRYDDYHAALGAFFGREDSTVDRVDPRPHEVEILST
460 | GVHIEPEFSKITGKMDWNSVIKAQSDNPERDAALADRMLLRPDVKWLVLCKRVDHVKRMA
461 | ETLSSRSGKKVDVLHGSKDEWDRDAWCVVGTYSKAGTGFDACERTGLCLAADVDRYFEQC
462 | LGRLRANGGTVLDPVDDLGVLRKHSKNREAVYIAAGCTIKKTKCDASRPSQSTPTPTGSS
463 | QPAPRTRRPQR
464 | >sp|Q67475|055R_FRG3G Uncharacterized protein 055R OS=Frog virus 3 (isolate Goorha) GN=FV3-055R PE=4 SV=1
465 | MLPQNSQVVHGVQDGPPVGPQPAQALLKVPVDVRRQAQAGPLAGVEPRPRLGVGAHHTPG
466 | VPVPLILGAVQHVHLLPGPRGQCLGHPLDVVHPLAQHQPLYVGPEEHPVGQGGVPLGVIG
467 | LGLDHRVPVHLARDLAKLGLYVHARAEYLHLVGPGVDPVHRAVLPAEKGPQCSVVVVVPH
468 | GCSAQTQQVRGPHRQEDPTRHGGRQLVGLIHNQEKFCGRVLGLDPPVSQRSRTRHIQVPG
469 | QGVGRRGAGCKDPRVRKEPGRRVPPLSRQHPPVCQDEGGQPQPPPQLQGHEGLAEAGRAL
470 | EHAVPLGGDVAPGLLQDPFLVRSERDGAPLPGCAVSGRRFASQDPGTPGEGDVGLSPGRE
471 | GKQVRFPRAGHVSIYRVEP
472 | >sp|Q6GZR9|056R_FRG3G Uncharacterized protein 056R OS=Frog virus 3 (isolate Goorha) GN=FV3-056R PE=4 SV=1
473 | MGVYSPAPRTPRGPWNIRIRFLSWSNSFLLEVKKNYGDVYLCDVCPVRPPGLQAPREQPV
474 | LHDRKVLHLYGQDSGVRDVQEVLWNPVSHQEVRRDNHGVLHGRRRARVRQAEEGRRRRQE
475 | GHRFRDWERLHQRVEGCPGLQGHGF
476 | >sp|O55703|056R_IIV6 Uncharacterized protein 056R OS=Invertebrate iridescent virus 6 GN=IIV6-056R PE=4 SV=1
477 | MEQKIDKKNSYSFGITSSTTVHVLGEVVAIGGILYYTHSQVNQLNTKIASLEKQILDLTN
478 | ILKHLSPHSFQQLQSSPTTQSTPPLPQSTPQSQQSQQSAVLPQRPSFLGGSTPKESQSFP
479 | GVETRSLGEKTTRGKLKSPHPSQIPVTQENFHYPYQTMNKTMRWEFLKPVESDESEDETN
480 | CQNGVCTLQKQEKNVTFNGSVEQLKYGNFSPQRSTKTMIGSPSIRPLIPHESIESVSESI
481 | ESSQDQSFSSRETISGNFKSKDDHQLSESEINQLVSKAIRTKK
482 | >sp|Q197A3|057L_IIV3 Uncharacterized protein 057L OS=Invertebrate iridescent virus 3 GN=IIV3-057L PE=4 SV=1
483 | MFKIYRTSCMGQHQSQFLHSGTVVQTVDGVTTTSFFQPCLVFPFSIEIISISLVSLNTTN
484 | ETKLIKMSIMENSELVDYNESAYTLAHLPGKQMTYLKYPAPFTIRQHQPFFFVHHGDLGD
485 | ASLTLEYRIK
486 | >sp|Q6GZR8|057R_FRG3G Putative phosphotransferase 057R OS=Frog virus 3 (isolate Goorha) GN=FV3-057R PE=4 SV=1
487 | MAMVSNVKYFADALQGTQGKVGTFTVLGENVFFKRGDGTDTVCGLEMVAGRILRARSDVH
488 | FCEPKYFVEMDDGEKVCSFELLDCKPLGSMAPGRKGKKSVGSVTQYLSGLYQTFAAAAAA
489 | HSVGVVHSDLHTGNVMLCPEPVSHYVYNLGGGEMLSLETNGVRAVVVDLGMARIPGKNTV
490 | ACDIFVHVGHVVNGRPDYAADVRTLTLGSCYDMVMMCASGKPSLEERMLCYEVMAAYNNL
491 | FAGVCAPSKGGWFVDHYPSMCAVMEATIPDSVASRGGGSWLLAVANMCKLLVPRPYVKRA
492 | CGKEKAHAMWMTLFTELGLTAKKSISKVDMVDAVQRLRAIADGSEIPPASLMKAACAVGL
493 | LTASVAEACYEKVEEIKASHVGMLRWKDALDAWVRLPVRCSGSVPKLGSTVILHTESGTE
494 | ETVVTQSMLRQIVKTREALDMAQAASDAVWTDTAYYEADDELMKGAHEESAEDFATSFLK
495 | GGTTGPIAKRCRLILKSL
496 | >sp|Q6GZR7|058R_FRG3G Uncharacterized protein 058R OS=Frog virus 3 (isolate Goorha) GN=FV3-058R PE=4 SV=1
497 | MEIRDTTVGLDRPVQSGAWDPGATREQLALAGISGRCDLGGRDEDLWSRKSQKDETFKDC
498 | ERRRGGEAPRLLCDKWRRDREAEAVRRRVQSQRRQGGREDPDGPARVRGRRRRVAVCGHS
499 | PDRGPAREEHKGPAGGSDHKDLGHHKGPEVGSGLRGPQARQVCGRSRGHEVQRVQKGLRQ
500 | GVQGHGCREVALDARLCLVRVLGRSGQKGRHGRLSVASRRLDGQEKVEKRGLFSLGA
501 |
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/figures/Protein_sentences.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/Protein_sentences.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/figures/Protein_sentences2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/Protein_sentences2.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/figures/Skip_gram_cbow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/Skip_gram_cbow.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/figures/linear-relationships.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/linear-relationships.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/figures/protein_vectors_wlabel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/protein_vectors_wlabel.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/figures/protein_words.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/protein_words.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/figures/proteinsequence.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/proteinsequence.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/figures/relationships_plus_vectors2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/relationships_plus_vectors2.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/figures/table_overview_vocab2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/figures/table_overview_vocab2.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/helpers.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | """
5 | Copyright (C) 2017 by Samo Turk and Sabrina Jaeger, BioMed X GmbH
6 | This work is licensed under the Creative Commons Attribution-ShareAlike 4.0 License.
7 | To view a copy of this license, visit https://creativecommons.org/licenses/by-sa/4.0/ or send a letter
8 | to Creative Commons, 543 Howard Street, 5th Floor, San Francisco, California, 94105, USA.
9 | """
10 |
11 | class nGram:
12 | """Class for storing n-grams with useful default depiction in jupyter.
13 | >>>nGram(split_ngrams('ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ',3))"""
14 |
15 | def __init__(self, n_gram):
16 | self.n_gram = n_gram
17 | def __len___(self):
18 | return len(self.n_gram)
19 | def __str__(self): # Stringe representation
20 | return 'n-gram with %i sentences' %len(self.n_gram)
21 | __repr__ = __str__ # Default representation
22 | def __contains__(self, word): # Contains method enable usage of "'Word' in nGram"
23 | if word in [item for sublist in self.n_gram for item in sublist]:
24 | return True
25 | else:
26 | return False
27 | contains = __contains__ # nGram.contains('word')
28 | def __iter__(self): # Iterate over sentences (for sentence in nGram:...)
29 | for x in self.n_gram:
30 | yield x
31 | def _repr_html_(self): # default jupyter representation
32 | colors = ['Red','Maroon','Yellow','Olive','Lime','Green','Aqua','Teal','Blue','Navy','Fuchsia','Purple']
33 | html = ""
34 | for i,ng in enumerate(self.n_gram):
35 | ng_2 = ''
36 | for n,c in zip(ng, colors[:len(ng)]): # depicts only as many as we have colors
37 | ng_2 += '' %c
38 | ng_2 += n
39 | ng_2 += ''
40 | html += " "*i + ng_2
41 | if len(ng) > len(colors): # append ... if we run out of colors
42 | html += "..."
43 | html += "\n"
44 | html += ""
45 | return html
46 |
47 | class DfVec:
48 | """
49 | Helper class to store vectors in a pandas DataFrame
50 |
51 | Parameters
52 | ----------
53 | vec: np.array
54 | """
55 | def __init__(self, vec):
56 | self.vec = vec
57 | def __str__(self):
58 | return "%d dimensional vector" % len(self.vec)
59 | def __repr__(self):
60 | return "%d dimensional vector" % len(self.vec)
61 | def __len__(self):
62 | return len(self.vec)
63 |
64 | def confusion_matrix_plot(cm, names, title='Confusion matrix', cmap=plt.cm.Blues):
65 | """Creates confusion matrix plot from confusion_matrix(observations, predictions).
66 | You can calculate confusion matrix with help of ``sklearn.metrics.confusion_matrix``
67 |
68 | Parameters
69 | ----------
70 | cm : np.array
71 | Confusion matrix
72 | names : list
73 | Names of classes
74 | title : str
75 | Title of the plot
76 | cmap : plt.cm
77 | Matplotlib colormap
78 |
79 | Returns
80 | -------
81 | plt.figure
82 | matplotlib figure
83 | """
84 | fig = plt.figure()
85 | axes = fig.add_axes([0, 0, 1, 1])
86 | im = axes.imshow(cm, interpolation='nearest', cmap=cmap)
87 | fig.colorbar(im)
88 | tick_marks = np.arange(len(names))
89 | plt.xticks(tick_marks, names, rotation=45)
90 | plt.yticks(tick_marks, names,)
91 | axes.set_ylabel('True value')
92 | axes.set_xlabel('Predicted value')
93 | axes.set_title(title)
94 | return fig
95 |
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/trained_models/model_SwissProt_small:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/trained_models/model_SwissProt_small
--------------------------------------------------------------------------------
/Presentations and Tutorials/Protvec demo 2017/trained_models/swissprot_reviewed_protvec:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/Protvec demo 2017/trained_models/swissprot_reviewed_protvec
--------------------------------------------------------------------------------
/Presentations and Tutorials/RDKit UGM 2014/biomedx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/RDKit UGM 2014/biomedx.png
--------------------------------------------------------------------------------
/Presentations and Tutorials/RDKit UGM 2014/hackaton/XLSX export.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "metadata": {
3 | "name": "",
4 | "signature": "sha256:d1c239959bbfc2922992b270c32e0a3f52ded415ef8c116012f7cc9a9a74d1dd"
5 | },
6 | "nbformat": 3,
7 | "nbformat_minor": 0,
8 | "worksheets": [
9 | {
10 | "cells": [
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "# Demo of SaveXlsxFromFrame function"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "collapsed": false,
21 | "input": [
22 | "import rdkit.Chem as Chem\n",
23 | "from rdkit.Chem import Draw\n",
24 | "from rdkit.Chem import PandasTools\n",
25 | "from rdkit.Chem.Draw import IPythonConsole # Enables RDKit IPython integration\n",
26 | "import pandas as pd"
27 | ],
28 | "language": "python",
29 | "metadata": {},
30 | "outputs": [],
31 | "prompt_number": 1
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "Load data"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "collapsed": false,
43 | "input": [
44 | "df = pd.read_csv('drugs.smi', delimiter=\"\\s\")"
45 | ],
46 | "language": "python",
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "output_type": "stream",
51 | "stream": "stderr",
52 | "text": [
53 | "/usr/lib/python2.7/site-packages/pandas/io/parsers.py:635: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators; you can avoid this warning by specifying engine='python'.\n",
54 | " ParserWarning)\n"
55 | ]
56 | }
57 | ],
58 | "prompt_number": 2
59 | },
60 | {
61 | "cell_type": "code",
62 | "collapsed": false,
63 | "input": [
64 | "df.columns"
65 | ],
66 | "language": "python",
67 | "metadata": {},
68 | "outputs": [
69 | {
70 | "metadata": {},
71 | "output_type": "pyout",
72 | "prompt_number": 3,
73 | "text": [
74 | "Index([u'SMILES', u'Name'], dtype='object')"
75 | ]
76 | }
77 | ],
78 | "prompt_number": 3
79 | },
80 | {
81 | "cell_type": "code",
82 | "collapsed": false,
83 | "input": [
84 | "smiles = 'SMILES'\n",
85 | "names = 'Name'"
86 | ],
87 | "language": "python",
88 | "metadata": {},
89 | "outputs": [],
90 | "prompt_number": 4
91 | },
92 | {
93 | "cell_type": "code",
94 | "collapsed": false,
95 | "input": [
96 | "df = df[df[smiles].notnull()]"
97 | ],
98 | "language": "python",
99 | "metadata": {},
100 | "outputs": [],
101 | "prompt_number": 5
102 | },
103 | {
104 | "cell_type": "markdown",
105 | "metadata": {},
106 | "source": [
107 | "Add mol objects"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "collapsed": false,
113 | "input": [
114 | "PandasTools.AddMoleculeColumnToFrame(df, smilesCol=smiles)"
115 | ],
116 | "language": "python",
117 | "metadata": {},
118 | "outputs": [],
119 | "prompt_number": 6
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | "Add some columns with number"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "collapsed": false,
131 | "input": [
132 | "from rdkit.Chem import Descriptors\n",
133 | "df['logp'] = df['ROMol'].map(Descriptors.MolLogP)\n",
134 | "df['mw'] = df['ROMol'].map(Descriptors.MolWt)\n",
135 | "df['hac'] = df['ROMol'].map(Descriptors.HeavyAtomCount)"
136 | ],
137 | "language": "python",
138 | "metadata": {},
139 | "outputs": [],
140 | "prompt_number": 7
141 | },
142 | {
143 | "cell_type": "code",
144 | "collapsed": false,
145 | "input": [
146 | "len(df)"
147 | ],
148 | "language": "python",
149 | "metadata": {},
150 | "outputs": [
151 | {
152 | "metadata": {},
153 | "output_type": "pyout",
154 | "prompt_number": 8,
155 | "text": [
156 | "1000"
157 | ]
158 | }
159 | ],
160 | "prompt_number": 8
161 | },
162 | {
163 | "cell_type": "markdown",
164 | "metadata": {},
165 | "source": [
166 | "Check dtypes of columns"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "collapsed": false,
172 | "input": [
173 | "df.dtypes"
174 | ],
175 | "language": "python",
176 | "metadata": {},
177 | "outputs": [
178 | {
179 | "metadata": {},
180 | "output_type": "pyout",
181 | "prompt_number": 9,
182 | "text": [
183 | "SMILES object\n",
184 | "Name object\n",
185 | "ROMol object\n",
186 | "logp float64\n",
187 | "mw float64\n",
188 | "hac int64\n",
189 | "dtype: object"
190 | ]
191 | }
192 | ],
193 | "prompt_number": 9
194 | },
195 | {
196 | "cell_type": "code",
197 | "collapsed": false,
198 | "input": [
199 | "df.head(1)"
200 | ],
201 | "language": "python",
202 | "metadata": {},
203 | "outputs": [
204 | {
205 | "html": [
206 | "\n",
207 | "
\n",
208 | " \n",
209 | " \n",
210 | " | \n",
211 | " SMILES | \n",
212 | " Name | \n",
213 | " ROMol | \n",
214 | " logp | \n",
215 | " mw | \n",
216 | " hac | \n",
217 | "
\n",
218 | " \n",
219 | " \n",
220 | " \n",
221 | " 0 | \n",
222 | " NC1=NCC(c2ccccc2)O1 | \n",
223 | " CHEMBL106258 | \n",
224 | "  | \n",
225 | " 1.0726 | \n",
226 | " 162.192 | \n",
227 | " 12 | \n",
228 | "
\n",
229 | " \n",
230 | "
\n",
231 | "
"
232 | ],
233 | "metadata": {},
234 | "output_type": "pyout",
235 | "prompt_number": 10,
236 | "text": [
237 | " SMILES Name ROMol logp mw hac\n",
238 | "0 NC1=NCC(c2ccccc2)O1 CHEMBL106258
1.0726 162.192 12"
239 | ]
240 | }
241 | ],
242 | "prompt_number": 10
243 | },
244 | {
245 | "cell_type": "code",
246 | "collapsed": false,
247 | "input": [
248 | "len(df)"
249 | ],
250 | "language": "python",
251 | "metadata": {},
252 | "outputs": [
253 | {
254 | "metadata": {},
255 | "output_type": "pyout",
256 | "prompt_number": 11,
257 | "text": [
258 | "1000"
259 | ]
260 | }
261 | ],
262 | "prompt_number": 11
263 | },
264 | {
265 | "cell_type": "code",
266 | "collapsed": false,
267 | "input": [
268 | "import numpy as np\n",
269 | "import os\n",
270 | "from cStringIO import StringIO\n",
271 | "\n",
272 | "def SaveXlsxFromFrame(frame, outFile, molCol='ROMol', size=(300,300)):\n",
273 | " \"\"\"\n",
274 | " Saves pandas DataFrame as a xlsx file with embedded images.\n",
275 | " It maps numpy data types to excel cell types:\n",
276 | " int, float -> number\n",
277 | " datetime -> datetime\n",
278 | " object -> string (limited to 32k character - xlsx limitations)\n",
279 | " \n",
280 | " Cells with compound images are a bit larger than images due to excel.\n",
281 | " Column width weirdness explained (from xlsxwriter docs):\n",
282 | " The width corresponds to the column width value that is specified in Excel. \n",
283 | " It is approximately equal to the length of a string in the default font of Calibri 11. \n",
284 | " Unfortunately, there is no way to specify \u201cAutoFit\u201d for a column in the Excel file format.\n",
285 | " This feature is only available at runtime from within Excel.\n",
286 | " \"\"\"\n",
287 | " \n",
288 | " import xlsxwriter # don't want to make this a RDKit dependency\n",
289 | " \n",
290 | " cols = list(frame.columns)\n",
291 | " print cols\n",
292 | " cols.remove(molCol)\n",
293 | " dataTypes = dict(frame.dtypes)\n",
294 | "\n",
295 | " workbook = xlsxwriter.Workbook(outFile) # New workbook\n",
296 | " worksheet = workbook.add_worksheet() # New work sheet\n",
297 | " worksheet.set_column('A:A', size[0]/6.) # column width\n",
298 | " \n",
299 | " # Write first row with column names\n",
300 | " c2 = 1\n",
301 | " for x in cols:\n",
302 | " worksheet.write_string(0, c2, x)\n",
303 | " c2 += 1\n",
304 | " \n",
305 | " c = 1\n",
306 | " for index, row in frame.iterrows():\n",
307 | " image_data = StringIO()\n",
308 | " img = Draw.MolToImage(row[molCol], size=size)\n",
309 | " img.save(image_data, format='PNG')\n",
310 | " \n",
311 | " worksheet.set_row(c, height=size[1]) # looks like height is not in px?\n",
312 | " worksheet.insert_image(c, 0, \"f\", {'image_data': image_data})\n",
313 | "\n",
314 | " \n",
315 | " for c2, x in enumerate(cols, start=1):\n",
316 | " if str(dataTypes[x]) == \"object\":\n",
317 | " worksheet.write_string(c, c2, str(row[x])[:32000]) # string length is limited in xlsx\n",
318 | " elif ('float' in str(dataTypes[x])) or ('int' in str(dataTypes[x])):\n",
319 | " if (row[x] != np.nan) or (row[x] != np.inf):\n",
320 | " worksheet.write_number(c, c2, row[x])\n",
321 | " elif 'datetime' in str(dataTypes[x]):\n",
322 | " worksheet.write_datetime(c, c2, row[x])\n",
323 | " c += 1\n",
324 | "\n",
325 | " workbook.close()\n",
326 | " image_data.close()"
327 | ],
328 | "language": "python",
329 | "metadata": {},
330 | "outputs": [],
331 | "prompt_number": 12
332 | },
333 | {
334 | "cell_type": "markdown",
335 | "metadata": {},
336 | "source": [
337 | "#### Lets write selected columns and first 500 compounds"
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "collapsed": false,
343 | "input": [
344 | "% time SaveXlsxFromFrame(df[['ROMol', smiles, names, 'logp', 'hac']][:500], 'demo.xlsx', size=(200,200))"
345 | ],
346 | "language": "python",
347 | "metadata": {},
348 | "outputs": [
349 | {
350 | "output_type": "stream",
351 | "stream": "stdout",
352 | "text": [
353 | "['ROMol', 'SMILES', 'Name', 'logp', 'hac']\n",
354 | "CPU times: user 4.52 s, sys: 46.7 ms, total: 4.57 s"
355 | ]
356 | },
357 | {
358 | "output_type": "stream",
359 | "stream": "stdout",
360 | "text": [
361 | "\n",
362 | "Wall time: 4.58 s\n"
363 | ]
364 | }
365 | ],
366 | "prompt_number": 13
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "metadata": {},
371 | "source": [
372 | "\n",
373 | "\n",
374 | "Copyright (C) 2014 by Samo Turk, [BioMed X GmbH](http://bio.mx)\n",
375 | "\n",
376 | "This work is licensed under the Creative Commons Attribution-ShareAlike 3.0 License. To view a copy of this license, visit http://creativecommons.org/licenses/by-sa/3.0/ or send a letter to Creative Commons, 543 Howard Street, 5th Floor, San Francisco, California, 94105, USA.\n"
377 | ]
378 | }
379 | ],
380 | "metadata": {}
381 | }
382 | ]
383 | }
--------------------------------------------------------------------------------
/Presentations and Tutorials/RDKit UGM 2014/hackaton/drugs.smi:
--------------------------------------------------------------------------------
1 | SMILES Name
2 | NC1=NCC(c2ccccc2)O1 CHEMBL106258
3 | C=CC1(C)CC(OC(=O)CSCCN(CC)CC)C2(C)C3C(=O)CCC3(CCC2C)C(C)C1O CHEMBL498466
4 | CCCCn1c2ncn(CC(C)=O)c2c(=O)n(CCCC)c1=O CHEMBL277465
5 | CCC(C(N)=O)N1CCCC1=O CHEMBL1286
6 | C(C1CO1)N1CCC(C2CCN(CC3CO3)CC2)CC1 CHEMBL462393
7 | CN1CCN(c2c(F)cc3c(=O)c(C(=O)O)cn4c3c2SCC4)CC1 CHEMBL295619
8 | ClC(Cl)Cl CHEMBL44618
9 | CNCCCCOc1ccccc1Cc1ccccc1 CHEMBL1192517
10 | CCCCCN(CCCOC)C(=O)C(CCC(=O)O)NC(=O)c1ccc(Cl)c(Cl)c1 CHEMBL550781
11 | CCN(CC)CCNC(C(=O)OCCC(C)C)c1ccccc1 CHEMBL253592
12 | CCCC(CCC)C(=O)NCC(N)=O CHEMBL471638
13 | O=c1[nH]c2cc(C(F)(F)F)c(N3CCOCC3)cc2n(CP(=O)(O)O)c1=O CHEMBL19892
14 | CCN(c1nncs1)P(=O)(N1CC1)N1CC1 CHEMBL474928
15 | Nc1ccncc1N CHEMBL354077
16 | CN(C)S(=O)(=O)NC1CC2c3cccc4c3c(cn4C)CC2N(C)C1 CHEMBL12314
17 | O=P(O)(O)C(Cl)(Cl)P(=O)(O)O CHEMBL12318
18 | O=C(C=CC=Cc1ccc2c(c1)OCO2)N1CCCCC1 CHEMBL43185
19 | [Na+].O=S(=O)([O-])NC1CCCCC1 CHEMBL273977
20 | CC(C)NCC(O)COc1cccc2ccccc21 CHEMBL275742
21 | CC(C)NCC(O)COc1ccc(COCCOC(C)C)cc1 CHEMBL645
22 | NCCCS(=O)(=O)O CHEMBL149082
23 | CC(C)(CC1Cc2ccccc2C1)NCC(O)COc1cc(CCC(=O)O)cc(F)c1F CHEMBL1198855
24 | CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1 CHEMBL593
25 | CC(=O)C(=Cc1cc(O)c(O)c([N+](=O)[O-])c1)C(C)=O CHEMBL167055
26 | NCCCNc1ccc2c3c(nn2CCNCCO)-c2c(O)ccc(O)c2C(=O)c31 CHEMBL203666
27 | COC(=O)C1C(O)CCC2CN3CCc4c5ccccc5[nH]c4C3CC21 CHEMBL15245
28 | CN1CCc2cccc3c2C1Cc1ccc(O)c(O)c1-3 CHEMBL53
29 | CC(C(O)c1ccc(O)cc1)N1CCC(Cc2ccccc2)CC1 CHEMBL305187
30 | CC(C)NCC(O)COc1ccc(CC(N)=O)cc1 CHEMBL24
31 | CC12CCC(=O)C=C1CCC1C3CCC(C(=O)CO)C3(C=O)CC(O)C12 CHEMBL273453
32 | OCC1OC(O)(CO)C(O)C1O CHEMBL604608
33 | CCN(CC)CCc1nc(-c2ccccc2)no1 CHEMBL1620875
34 | Cc1ccc(OCC(O)CNC(C)(C)C)c2oc(=O)ccc12 CHEMBL349807
35 | Cc1c(C)c(=O)oc2cc(OCc3nnc(C(C)C)s3)ccc12 CHEMBL19004
36 | C=CCN(C1=NCCN1)c1c(Cl)cccc1Cl CHEMBL278581
37 | CN1c2ccc(Cl)cc2C2c3ccccc3CCN2CC1=O CHEMBL1723823
38 | CCCCCCCC(=O)O CHEMBL324846
39 | CCOC1OC2OC3(C)CCC4C(C)CCC(C1C)C42OO3 CHEMBL301267
40 | CNCC(O)c1ccc(O)c(O)c1 CHEMBL1740
41 | C=C1CC(N)C(C(=O)O)C1 CHEMBL343803
42 | CC(=O)Nc1ccc(C(=O)O)cc1 CHEMBL112687
43 | CCc1c(C)n2cc(C(=O)c3ccccc3)nc2nc1OC CHEMBL281164
44 | Cc1cccc(OCC(O)CNC(C)(C)C)c1C CHEMBL347795
45 | CC(=O)Nc1cccc(N2CCN(CCCCNS(=O)(=O)CC3CCCCC3)CC2)c1 CHEMBL209821
46 | CCn1c2ccc(OC(C)=O)cc2c(C)c1-c1ccc(OC(C)=O)cc1 CHEMBL32227
47 | O=C(NP(=O)(N1CC1)N1CC1)OCc1ccccc1 CHEMBL504088
48 | CC(C)(C)C(=O)C(Oc1ccc(Cl)cc1)n1ccnc1 CHEMBL1437764
49 | CCOC(=S)SSC(=S)OCC CHEMBL331743
50 | O=C1c2ccccc2OC=C(Cl)N1CCCCN1CC=C(c2ccccn2)CC1 CHEMBL345237
51 | NCC(CC(=O)O)c1ccc(Cl)cc1 CHEMBL301742
52 | CC(=O)c1ccc2c(c1)C(=O)C1(CC1)O2 CHEMBL18098
53 | CCCCCCCC(=O)OC(O)C(O)CO CHEMBL1200882
54 | COc1ccc(CCNCC(O)c2ccc(O)cc2)cc1OC CHEMBL493682
55 | Cc1cc(-c2ccc(O)cc2)nnc1NCCN1CCOCC1 CHEMBL150980
56 | CCCCCCCCCCCCCC(=O)O CHEMBL111077
57 | CC(C)n1cc2c3c1cccc3C1CC(C(=O)NC3CCCCC3)CN(C)C1C2 CHEMBL160293
58 | NC(=O)Cc1cccc(C(=O)c2ccccc2)c1N CHEMBL1021
59 | CCCC(NC(C)C(=O)N1C(C(=O)O)CC2CCCCC21)C(=O)O CHEMBL1201368
60 | Cc1c(F)cc2c(=O)c(C(=O)O)cn3c2c1CCC3C CHEMBL170052
61 | CN(C)CCN1C(=O)c2cccc3cc([N+](=O)[O-])cc(c32)C1=O CHEMBL43482
62 | [Cl-].[Cl-].[Sr+2] CHEMBL2219640
63 | CN(C)CCOC(C)(c1ccccc1)c1ccccc1 CHEMBL1742465
64 | CN(C)S(=O)(=O)c1ccc2c(c1)N(CCCN1CCN(C)CC1)c1ccccc1S2 CHEMBL609109
65 | CCN(CC)C(=O)N1CCN(C)CC1 CHEMBL684
66 | COc1cc2c(cc1OCCCF)CCN1CC(CC(C)C)C(O)CC21 CHEMBL579217
67 | O=C(Nc1cccc(C2=NCCN2)c1)Nc1cccc(C2=NCCN2)c1 CHEMBL427342
68 | CC1COC2(c3ccccc3Cl)c3cc(Cl)ccc3NC(=O)CN12 CHEMBL1743261
69 | CCCCCCCCCCCCOCCCN CHEMBL1887483
70 | CCOC(=O)C(C#N)=C(c1ccccc1)c1ccccc1 CHEMBL1889451
71 | O=C(NCC1CCCCN1)c1cc(OCC(F)(F)F)ccc1OCC(F)(F)F CHEMBL652
72 | Cc1cnc(NCCCN(C)C)c2c1[nH]c1ccc3cc(O)ccc3c12 CHEMBL1908351
73 | O=C1NC(=O)C2C1C1C2C2C=CC1C1C(=O)NC(=O)C12 CHEMBL1908340
74 | CC(O)C(=O)OP(=O)(O)O CHEMBL2111170
75 | CCOC(=O)C1(c2ccccc2)CCN(CCCNc2ccccc2)CC1 CHEMBL2110995
76 | COc1ccc(Cl)c2c1CCCC2N(C)C CHEMBL2111119
77 | C#CCOC1CN2CCC1CC2 CHEMBL2111051
78 | CCC(=O)OC(CC(=O)O)C[N+](C)(C)C CHEMBL1516469
79 | C=CCC1(CC(C)C)C(=O)NC(=S)NC1=O CHEMBL2110937
80 | CCNC(C)C(O)c1ccc(O)c(O)c1 CHEMBL2110629
81 | [Cl-].[Cl-].[Ra+2] CHEMBL2111187
82 | OC1CN2CCCC(O)C2C1O CHEMBL371197
83 | CC12CCC3C(CCC4CC(=O)CCC43C)C1CCC2C(=O)COC(=O)CCC(=O)O CHEMBL2110850
84 | Cc1cc(OCCN(C)C)c(C(C)C)cc1OC(=O)OC(C)C CHEMBL2110682
85 | COc1ccc2c(C)c(C(C)=O)c(C)nc2c1 CHEMBL2104051
86 | Cc1cccc2c1NC(=O)C2(c1ccc(OS(=O)(=O)O)cc1)c1ccc(OS(=O)(=O)O)cc1 CHEMBL2104944
87 | NCC1CC(=O)N(Cc2ccccc2)C1 CHEMBL2104682
88 | O=[N+]([O-])c1ccc(-c2nc3ccccc3c(N(CCO)CCO)n2)o1 CHEMBL2107101
89 | CCN(CC)CCOc1c2occc2c(O)c2c(=O)cc(C)oc12 CHEMBL2104025
90 | C[N+](C)(C)CC(O)CC(=O)[O-] CHEMBL1149
91 | CCCCOC(=O)c1ccc(N)cc1 CHEMBL127516
92 | O=C1CN=C(c2ccccc2Cl)c2cc([N+](=O)[O-])ccc2N1 CHEMBL452
93 | CC1=CC(=O)NS(=O)(=O)O1 CHEMBL176687
94 | CC(C)(C)NCC(O)COc1ccccc1OCC1CCCO1 CHEMBL347830
95 | CC12CCC3c4ccc(O)cc4CCC3C1CCC2O CHEMBL286452
96 | CNC(C)C(O)c1ccc(O)c(O)c1 CHEMBL416557
97 | CC(=O)OC1CCC2C3C(C)CC4=CC(=O)CCC4C3CCC12C CHEMBL452329
98 | CC(=O)NC(Cc1ccccc1)C(=O)O CHEMBL134570
99 | COc1ccc2c3c1OC1CC(O)C=CC31CCN(C)C2 CHEMBL659
100 | c1ccc2c(c1)Nc1ccccc1S2 CHEMBL828
101 | CCC(=O)NS(=O)(=O)c1ccc(-c2c(C)onc2-c2ccccc2)cc1 CHEMBL1206690
102 | CN1C2CCC1CC(OC(=O)c1c[nH]c3ccccc13)C2 CHEMBL56564
103 | CN(C)CCN1C(=O)c2cccc3cc(N)cc(c32)C1=O CHEMBL428676
104 | CC(C)CC(N)C(=O)O CHEMBL291962
105 | CC(CN1CC(=O)NC(=O)C1)N1CC(=O)NC(=O)C1 CHEMBL444186
106 | O=C(c1ccc2nonc2c1)N1CCCCC1 CHEMBL1276138
107 | COc1ccc(S(=O)(=O)Nc2nnc(CC(C)C)s2)cc1 CHEMBL1481457
108 | CC(C)NCC(O)c1cccc(O)c1 CHEMBL327122
109 | O=C(COc1ccc(Cl)cc1)N1CCN(Cc2ccc3c(c2)OCO3)CC1 CHEMBL254857
110 | COc1cc(C=C2CCCC(=Cc3ccc(O)c(OC)c3)C2=O)ccc1O CHEMBL17205
111 | Cc1onc(-c2c(F)cccc2Cl)c1C(=O)NC1C(=O)N2C1SC(C)(C)C2C(=O)O CHEMBL222645
112 | CC12C=CC(=O)C=C1CCC1C2CCC2(C)OC(=O)CCC12 CHEMBL1571
113 | OCCN1CCN(C2CC(c3ccc(F)cc3)c3ccc(C(F)(F)F)cc32)CC1 CHEMBL95636
114 | CN(C)CCC1c2ccc(Cl)cc2CCc2cccnc21 CHEMBL1738982
115 | CC12CCC3C(C=CC4=CC(=O)CCC43C)C1CCC2(O)CCC(=O)O CHEMBL1616951
116 | O=C1C=C(N2CC2)C(=O)C(N2CC2)=C1N1CC1 CHEMBL313302
117 | CN1C(=O)CC(c2ccccc2)C1=O CHEMBL797
118 | CCN(CC)CC1CCCCN1CC(=O)N1c2ccccc2C(=O)Nc2cccnc21 CHEMBL17045
119 | OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1 CHEMBL370753
120 | O=C(O)CCCCCCCNC(=O)c1ccccc1O CHEMBL16503
121 | CSc1ccc(C(=O)c2[nH]c(=O)[nH]c2C)cc1 CHEMBL249856
122 | CS(=O)(=O)Nc1ccc(OCC(O)CNCCOc2ccc(-n3ccnc3)cc2)cc1 CHEMBL99585
123 | CCc1nc(N)nc(N)c1-c1ccc(Cl)c(Cl)c1 CHEMBL21799
124 | CCCCCCCCCCC(=O)O CHEMBL108030
125 | O=C(O)C(S)C(S)C(=O)O CHEMBL1201073
126 | C=C(c1ccccc1OCc1cccc(Cl)c1)n1ccnc1 CHEMBL27289
127 | NCCCC(N)C(=O)O CHEMBL446143
128 | NCC(Cl)C(O)c1cnc(N)[nH]1 CHEMBL1171272
129 | OC(CCl)CN1CC[N+]2(CC1)CC[N+]1(CCN(CC(O)CCl)CC1)CC2 CHEMBL1187562
130 | CN1CCC(OC(c2ccccc2)c2ccccc2)CC1 CHEMBL1492
131 | CN1c2ccccc2C(NCCCCCCC(=O)O)c2ccc(Cl)cc2S1(=O)=O CHEMBL1289110
132 | CC(=O)Oc1ccc(C2(c3ccc(OC(C)=O)cc3)C(=O)Nc3ccccc32)cc1 CHEMBL1402684
133 | COc1ccc(C(=O)c2ccccc2O)c(O)c1 CHEMBL1326877
134 | CC1=CC(=O)c2ccccc2C1=O CHEMBL590
135 | CC1(C)SCCN(S(=O)(=O)c2ccc(Oc3ccncc3)cc2)C1C(=O)NO CHEMBL75094
136 | CCOC(=O)NC(C)(C)Cc1ccc(Cl)cc1 CHEMBL1697686
137 | CC(CCc1ccc(O)cc1)NCCc1ccc(O)c(O)c1 CHEMBL926
138 | Cc1cc(N)c2cc(NC(=O)Nc3ccc4nc(C)cc(N)c4c3)ccc2n1 CHEMBL87223
139 | COc1ccc(N2CC(CN3CCC(O)(c4ccc5c(c4)OCO5)CC3)OC2=O)cc1 CHEMBL45686
140 | NC(=O)CCCCC1CCSS1 CHEMBL1403899
141 | CC(C)(C)[SiH2]O[Si](C)(C)O[SiH2]C(C)(C)C CHEMBL1200838
142 | NC(=O)c1ccc([N+](=O)[O-])cc1Cl CHEMBL1450565
143 | Cc1cccc(N2CC(CO)OC2=O)c1 CHEMBL18116
144 | Nc1cc2[nH]cnc2c(=O)[nH]1 CHEMBL31882
145 | CC1CC2=CC(=O)CCC2(C)C2CCC3(C)C(CCC3(C)O)C12 CHEMBL259548
146 | COc1ccc2c(c1)c1c3n2CCN(C)C3=NCC1 CHEMBL1619017
147 | NCc1cccnc1 CHEMBL1988272
148 | O=P(O)(O)Oc1ccc(C(c2ccc(OP(=O)(O)O)cc2)c2ccccn2)cc1 CHEMBL2111180
149 | [Na+].NC(CCC(=O)[O-])C(=O)O CHEMBL2107256
150 | [Na+].[K+].O=C([O-])C(O)C(O)C(=O)[O-] CHEMBL2219738
151 | CCCN(c1ccncc1)n1ccc2ccccc21 CHEMBL29835
152 | CC(C)NCC(O)c1ccc(O)c(NS(C)(=O)=O)c1 CHEMBL30746
153 | CCOc1ccc(S(=O)(=O)O)c2cccnc12 CHEMBL1356732
154 | CCOCn1cnc2c1c(=O)n(CCCCC(C)(C)O)c(=O)n2C CHEMBL2104934
155 | CN1CCCCC1C(C)(C)OC(=O)C(c1ccccc1)c1ccccc1 CHEMBL2106597
156 | CCOc1ccc2nc(SC(C)(C(=O)O)c3ccccc3)sc2c1 CHEMBL2107083
157 | O=C(c1ccccc1)N1CC2COCC2C1 CHEMBL2106524
158 | C=CC1(O)CCC2C3CCC4=C(CCC(=O)C4)C3CCC21C CHEMBL2106812
159 | O=c1[nH]c2ccccc2c2cc(CO)nn21 CHEMBL2106923
160 | CC(=O)NC(CSCC(C)C(=O)c1ccccc1)C(=O)O CHEMBL2104015
161 | Cc1cccc(Cl)c1NC(=O)C=C1SCC(=O)N1C CHEMBL2106933
162 | COc1ccc(C2Sc3ccccc3N(CCN(C)C(C)C)C(=O)C2OC(C)=O)cc1 CHEMBL2107305
163 | Cc1nnc(NS(=O)(=O)c2ccc(NC(=O)c3ccccc3C(=O)O)cc2)s1 CHEMBL2106914
164 | COc1ccc(-n2c(C)c[nH]c2=O)cc1 CHEMBL2104723
165 | CCCC(C)(O)C1CC23C=CC1(OC)C1Oc4c5c(ccc4OC(C)=O)CC2N(C)CCC513 CHEMBL2104593
166 | CCOC(=O)C1(c2cccc(O)c2)CCN(C)CC1 CHEMBL1182665
167 | CN(CCC1c2ccccc2COc2ccccc21)CC(O)COc1ccccc1 CHEMBL2106287
168 | CCCCCCn1c(=O)c2c(ncn2C)n(C)c1=O CHEMBL2105338
169 | CC(NCCC(=O)C1CCCCC1)C(O)c1ccccc1 CHEMBL2104023
170 | O=C1CCCCC1c1c(O)ccc2ccccc21 CHEMBL2106809
171 | O=C1N=C2SCCN2C(=O)C1Cc1ccc(Cl)cc1 CHEMBL2106465
172 | NC(Cc1ccccn1)c1ccccc1 CHEMBL2107647
173 | N=C(N)C(O)c1ccc2c(c1)OCO2 CHEMBL2104702
174 | CC12CCC3c4ccc(OC5CCCC5)cc4CCC3C1CC(O)C2O CHEMBL2107004
175 | CCOC(=O)N(Cc1ccccc1)C1CC1 CHEMBL2104204
176 | CN1CCC23c4c5c(O)ccc4CC1C2CCC(C)(O)C3O5 CHEMBL2104436
177 | CC1NC(C2CC2)CC1c1ccc(Cl)cc1 CHEMBL2105225
178 | CN(C)CCCOC1(Cc2ccccc2)CCCCCC1 CHEMBL2110767
179 | CCN(CC)CCCNCc1cc(Cl)c2cccnc2c1O CHEMBL2110791
180 | CNC1C(O)C(OC2C(N)CC(N)C(OC3OC(CN)C(O)C(O)C3O)C2O)OCC1(C)O CHEMBL2110604
181 | CC(CSC(=O)C(C)(C)C)C(=O)N(CC(=O)O)C1CCCC1 CHEMBL2051970
182 | CN1CCC23c4c5c(O)ccc4CC1C2CCCC3O5 CHEMBL2106274
183 | OC(CN1CCCCC1)c1cc(-c2ccccc2)on1 CHEMBL2105453
184 | COc1cc(C=CC(N)=O)cc(OC)c1OC CHEMBL2107098
185 | O=C(c1ccc(Cl)cc1)n1cc(Cc2nnn[nH]2)c2ccccc21 CHEMBL2104903
186 | Cc1cc(C(=O)c2cc(I)c(O)c(I)c2)c(C)o1 CHEMBL2107414
187 | NC(=O)C(CCN1CCN2CCCC2C1)(c1ccccc1)c1ccccc1 CHEMBL2106959
188 | CC(C)NC(=N)NC(=N)Nc1ccc(Cl)c(Cl)c1 CHEMBL1213553
189 | O=c1[nH]c2ccccc2n1C1CCN(CCCc2noc3cc(F)ccc23)CC1 CHEMBL2111040
190 | O=C1CCC2C3Cc4cccc(O)c4C2(CCN3CC2CC2)C1 CHEMBL2104366
191 | CC(=O)c1ccc2c(c1)N(CC(C)N(C)C)c1ccccc1S2 CHEMBL2104054
192 | C=C(CC(=O)c1ccc(-c2ccccc2Cl)cc1)C(=O)O CHEMBL2104844
193 | Cc1cc(NS(=O)(=O)c2ccc(N)cc2)n(-c2cccc(Cl)c2)n1 CHEMBL2104945
194 | O=S1(=O)CCNCN1 CHEMBL2104848
195 | COc1ccc(C(=O)N=c2[cH-][n+](N3C(C)CCCC3C)no2)cc1 CHEMBL2104889
196 | CCc1c(-c2ccccc2)[nH]n(C2CCN(C)CC2)c1=O CHEMBL2104576
197 | C=C(C)CNCC(=O)N(C)c1ccc(Cl)cc1C(=O)c1ccccc1 CHEMBL2106239
198 | CC(C)C1(C(C)C)OCC(CO)O1 CHEMBL2104577
199 | CC(=O)OCC(=O)OCCC(SC(=O)c1ccco1)=C(C)N(C=O)Cc1cnc(C)nc1N CHEMBL2104090
200 | CC1CC(OC(=O)CN2CCCC2=O)CC(C)(C)C1 CHEMBL2105253
201 | CC1(C)C2CCC1(C)C(=O)C2Br CHEMBL2106181
202 | CC1C=CC(C)N1CC(=O)N=C(N)N CHEMBL2104785
203 | CN1C(CC(=O)c2ccccc2)CCCC1CC(O)c1ccccc1 CHEMBL2103769
204 | NCCOC(c1ccc(F)cc1)c1ccc(F)cc1 CHEMBL2105476
205 | CN1C2CCC1CC(OC(=O)C(C)(CO)c1ccccc1)C2 CHEMBL2104939
206 | CCOc1ccccc1C(=O)NN=C(C)C(=O)O CHEMBL2107047
207 | O=C(NCCCN1CCOCC1)c1ccc(Cl)cc1 CHEMBL2104298
208 | COc1ccc2[nH]c(C)c(CC(=O)NO)c2c1 CHEMBL2104618
209 | c1ccc(C2(c3ccccc3)OCC(C3CCCCN3)O2)cc1 CHEMBL72982
210 | CC(=O)OC(Cc1ccccc1)(c1ccccc1)C(C)CN1CCCC1 CHEMBL2110705
211 | CN1C2CCC1CC(NC(=O)c1cc(Cl)cc3c1OC(C)(C)C3)C2 CHEMBL2111147
212 | CNCCCC1(c2ccccc2)OC(C)(C)c2ccccc21 CHEMBL299233
213 | CCN(CC)CCn1c(=O)oc2ccccc2c1=O CHEMBL2110855
214 | CNCC(O)CC12CCC(C3CCCCC31)C1CCCCC12 CHEMBL2111067
215 | [Ca+2].[Ca+2].[Ca+2].O=P([O-])([O-])[O-].O=P([O-])([O-])[O-] CHEMBL2106566
216 | [K+].[K+].NC(CC(=O)[O-])C(=O)[O-] CHEMBL2106910
217 | CC1CCCCN1CCCOC(=O)c1ccccc1 CHEMBL127865
218 | CC1CC(OC(=O)c2ccccc2)CC(C)(C)N1 CHEMBL2110936
219 | CCCc1cc2c(s1)CNC1CCc3cc(OC(C)=O)c(OC(C)=O)cc3C21 CHEMBL2104655
220 | CC(CN(C(=O)c1ccc(C#N)cc1)c1ccccn1)N1CCN(c2cccc3c2OCCO3)CC1 CHEMBL372205
221 | CCC(=O)N(c1ccccn1)C(C)CN1CCCCC1 CHEMBL2110985
222 | CCCCN1CCC(CNC(=O)c2c3ccccc3n3c2OCCC3)CC1 CHEMBL356359
223 | OC(c1ccccc1)(c1ccccc1)C1CCNCC1 CHEMBL127508
224 | CC(C)(C)NCC(O)COc1ccccc1-c1ccccc1 CHEMBL2104145
225 | CN(C)CCN(Cc1ccsc1)c1ccccn1 CHEMBL2107485
226 | CCNC1=Nc2ccc(Cl)cc2C(c2ccccc2)S1 CHEMBL2106208
227 | CC1=C(C(=O)O)N2C(=O)C(NC(=O)C(N)c3ccc(O)c(Cl)c3)C2SC1 CHEMBL2106477
228 | CCCOC(=O)c1ccc(N)cc1 CHEMBL2107010
229 | CCCn1c2nc(C34CC5CC3CC(C4)C5)[nH]c2c(=O)n(CCC)c1=O CHEMBL2103819
230 | CCCCc1ccc(C(N)=O)nc1 CHEMBL2106646
231 | CN1CCC23CCCCC2C1Cc1ccc(O)cc13 CHEMBL20803
232 | CN(C)C(=O)COc1ccc2c(c1)CC(NCC(O)c1ccc(O)c(CCO)c1)CC2 CHEMBL2111083
233 | CCCCCCC(C(C)O)n1cnc2c1nc[nH]c2=O CHEMBL2304038
234 | CC(Oc1ccccc1)C(=O)NC1C(=O)N2C1SC(C)(C)C2C(=O)O CHEMBL1614637
235 | C#CCNC1CCc2ccc(OC(=O)N(C)CC)cc21 CHEMBL255231
236 | C=CCc1ccccc1OCC(CNC(C)C)=NO CHEMBL2105963
237 | NC(=O)c1cnc2[nH]ccc2c1NC1C2CC3CC1CC(O)(C3)C2 CHEMBL3137308
238 | CCC1(O)C(=O)OCc2c(=O)n3c(cc21)-c1nc2ccccc2c(C=NOC(C)(C)C)c1C3 CHEMBL113051
239 | O=C1c2ccccc2CN2C(=O)c3ccccc3CN12 CHEMBL1536675
240 | Cc1cnc(C(=O)NCCc2ccc(S(=O)(=O)NC(=O)NC3CCCCC3)cc2)cn1 CHEMBL1073
241 | CCN(CC)CCNc1ccc(CO)c2sc3ccccc3c(=O)c12 CHEMBL22077
242 | CCCCCCCCCCCCCCCC(O)C(N)CO CHEMBL1442934
243 | NC(C(=O)NC1C(=O)N2C(C(=O)O)=C(CSc3c[nH]nn3)CSC12)c1ccc(O)cc1 CHEMBL1095284
244 | CSCC1CN(N=Cc2ccc([N+](=O)[O-])o2)C(=O)O1 CHEMBL514315
245 | CCOC(=O)c1ccccc1C(=O)OCC CHEMBL388558
246 | Sc1[nH]cnc2nncc1-2 CHEMBL119423
247 | NC(Cc1ccccc1)C(=O)O CHEMBL301523
248 | CC1CC(=O)NN=C1c1cc(-n2ccnc2)cs1 CHEMBL2104670
249 | CC1NC(=O)COC1c1ccccc1 CHEMBL2106282
250 | NC(=O)NCCN1CCN(c2cccc(C(F)(F)F)c2)CC1 CHEMBL2106753
251 | Cc1ccc(F)cc1NC1=NCCN1 CHEMBL2106760
252 | Fc1ccc(NC(=S)Nc2cc(Cl)cc(Cl)c2)cc1 CHEMBL2105198
253 | CCCCCC(C(=O)N1CCCC1C(=O)CC(CCCNC(=N)N)C(=O)O)N(CC)C(=O)CN CHEMBL2107753
254 | CC(C(=O)O)c1ccc(C(=O)c2cccs2)c(Cl)c1 CHEMBL2104170
255 | Cc1cc(=O)oc2cc(OCCN3CCOCC3)cc(OCCN3CCOCC3)c12 CHEMBL2105164
256 | C=CCC1CN(C)CCC1(OC(=O)CC)c1ccccc1 CHEMBL2103995
257 | Cc1cc(N(C)C)ccc1[PH](=O)O CHEMBL2105341
258 | CC(C)N1CCNC1=S CHEMBL2103937
259 | CCC1(O)CCN2CCc3ccc(C)cc3C2C1 CHEMBL2105550
260 | Cc1cc(C)nc(NS(=O)(=O)c2ccc(N=Nc3ccc(O)c(C(=O)O)c3)cc2)n1 CHEMBL2107040
261 | CC(C)[N+](C)(C)CC(O)COc1cccc2ccccc21 CHEMBL1710851
262 | CN(C)CCN1c2ccccc2SC(c2ccccc2)CC1=O CHEMBL2111123
263 | CC(C)NCC(O)c1ccccc1Cl CHEMBL1902627
264 | C=CCC(Cc1cc(OC)c(OC)c(OC)c1)NC CHEMBL2111182
265 | COC1CC2CC(OC(=O)C(O)(c3ccccc3)c3ccccc3)CC1[N+]2(C)C CHEMBL2111174
266 | CCCCCOc1ccccc1C(N)=O CHEMBL2105369
267 | COc1ccc(N(CCCC(=O)O)C(=O)c2ccc(Cl)cc2)cc1 CHEMBL2107589
268 | CC(=O)NC(CCSC(=O)C(C)(C)Oc1ccc(Cl)cc1)C(=O)O CHEMBL2107469
269 | COCCCP(CCCOC)CCCOC CHEMBL2107255
270 | CC(C)Oc1ccccc1-c1cc(=O)c2cc(C(=O)O)ccc2o1 CHEMBL2106345
271 | CCC(C#N)(CC(C)N(C)C)c1ccccc1 CHEMBL2104277
272 | CN(CCO)c1nc2c(c(=O)n(C)c(=O)n2C)n1C CHEMBL2104570
273 | CCOC(=O)C1(c2ccccc2)CCN(CCOCc2ccccc2)CC1 CHEMBL2104234
274 | Cc1cc(O)c2c3[nH]c(-c4ccco4)nc3ccc2n1 CHEMBL2104697
275 | O=C(O)CSC(SCC(=O)O)SCC(=O)O CHEMBL2105322
276 | Oc1ccc(C(O)CNC2CCC2)cc1O CHEMBL2106796
277 | CCOP(=S)(OCC)Oc1cc(C)nc(N(C)C)n1 CHEMBL2106985
278 | CCCCOc1ccc(C(=O)OCCOCCN(CC)CC)cc1N CHEMBL2110889
279 | O=C(O)CCCC=CCC1C(O)CC(O)C1C=CC(O)COc1cccc(C(F)(F)F)c1 CHEMBL2220419
280 | CCCN1CCCC2c3cc(O)ccc3OCC21 CHEMBL2220428
281 | C#Cc1cnc2nc(O)n(C(CC)CC)c2n1 CHEMBL3039529
282 | O=c1ccc2c(C(O)CNCCCCCCOCC(F)(F)c3ccccc3)ccc(O)c2[nH]1 CHEMBL3039530
283 | Cn1ncc(Cl)c1-c1cc(C(=O)NC(CN)Cc2ccc(F)c(F)c2)oc1Cl CHEMBL3137336
284 | Oc1cccc(O)c1O CHEMBL307145
285 | O=C(O)c1ccccc1C(=O)Nc1ccc(S(=O)(=O)Nc2nccs2)cc1 CHEMBL1524273
286 | CC12CCC3C(CCC4CC(O)CCC43C)C13OC3CC2c1ccc(=O)oc1 CHEMBL250172
287 | CC(O)C(N)C(=O)O CHEMBL291747
288 | O=C(Nc1ccc(O)cc1)c1ccccc1O CHEMBL389507
289 | O=c1oc2cc(O)ccc2s1 CHEMBL442687
290 | OC(Cn1ccnc1)c1ccc(CCc2ccccc2)cc1 CHEMBL41849
291 | CC(=N)N1CCC(SC2=C(C(=O)O)N3C(=O)C(C(C)O)C3C2)C1 CHEMBL339323
292 | CN(C(=O)C=Cc1ccoc1)C1CCC2(O)C3Cc4ccc(O)c5c4C2(CCN3CC2CC2)C1O5 CHEMBL267495
293 | CC(C)(C)NCC(O)COc1cc(Cl)ccc1Cl CHEMBL156791
294 | COc1ccc(-c2cc(C(F)F)nn2-c2ccc(S(N)(=O)=O)cc2)cc1F CHEMBL28636
295 | O=c1[nH]c2n[nH]cc2c(=O)[nH]1 CHEMBL859
296 | COc1ccc2c(c1)C13CCCCC1C(C2)N(C)CC3 CHEMBL22207
297 | Cc1c(C(N)=O)cc([N+](=O)[O-])cc1[N+](=O)[O-] CHEMBL472565
298 | N#CC(c1ccc(Cl)cc1)c1c(Cl)cc(-n2ncc(=O)[nH]c2=O)cc1Cl CHEMBL284733
299 | CCCC(=O)OC(C(Cl)(Cl)Cl)P(=O)(OC)OC CHEMBL1570266
300 | CCC1Nc2cc(Cl)c(S(N)(=O)=O)cc2S(=O)(=O)N1 CHEMBL2106262
301 | CC[N+]1(CCOC(=O)c2cc(OC)c(OC)c(OC)c2)CCCC1 CHEMBL2111183
302 | C[N+]1(CC2CC2)CCC23CC(=O)CCC2(O)C1Cc1ccc(C(N)=O)c(O)c13 CHEMBL2364619
303 | COc1ccc2c(c1)[nH]c1c2CCN2CC3C(C)OC(OC)=C(C(=O)OCCN(C)C)C3CC12 CHEMBL2106350
304 | Cn1ncnc1C1c2n[nH]c(=O)c3cc(F)cc(c32)NC1c1ccc(F)cc1 CHEMBL3137320
305 | CCc1nn(CCCN2CCN(c3cccc(Cl)c3)CC2)c(=O)n1CC CHEMBL1743259
306 | NS(=O)(=O)c1ccc(C(=O)O)cc1 CHEMBL414
307 | Cc1ncc2c(c1O)COC2c1ccc(Cl)cc1 CHEMBL191886
308 | CC(=O)C1CCC2C3CCC4CC(O)CCC4(C)C3C(=O)CC12C CHEMBL190279
309 | CC(=O)NC(CCC([O-])=C[N+]#N)C(=O)O CHEMBL475736
310 | Cc1cc(Cn2nnc3c2nc(N)nc3-c2ccco2)ccc1N CHEMBL447664
311 | CCC(CO[N+](=O)[O-])(CO[N+](=O)[O-])CO[N+](=O)[O-] CHEMBL488280
312 | Cc1ccc(S(=O)(=O)C(CC#Cc2ccc(Cl)cc2)C(=O)O)cc1 CHEMBL267770
313 | Nc1nc2[nH]cnc2c(=S)[nH]1 CHEMBL727
314 | OCC(O)C(O)C(OC1OC(CO)C(O)C(O)C1O)C(O)CO CHEMBL63558
315 | COc1ccc(-c2nc(C(F)(F)F)[nH]c2-c2ccc(OC)cc2)cc1 CHEMBL430150
316 | CCN(CC)CCNC(=O)c1cc(S(C)(=O)=O)ccc1OC CHEMBL84158
317 | CC1=NN(CCOc2ccc3ccccc3c2)C(=O)C1 CHEMBL8425
318 | Cc1nc(N)nc(N)c1-c1ccc(Cl)c(Cl)c1 CHEMBL264373
319 | CC(=O)NC(CC(=O)O)C(=O)NC(CCC(=O)O)C(=O)O CHEMBL1329032
320 | CCCCC1=NC2(CCCC2)C(=O)N1Cc1ccc(-c2ccccc2-c2nn[nH]n2)cc1 CHEMBL1513
321 | OCC(Br)(Br)Br CHEMBL1697852
322 | Oc1ccc2[nH]cc(CCCCN3CC=C(c4ccccc4)CC3)c2c1 CHEMBL431367
323 | CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1 CHEMBL36
324 | ClC(Cl)=C(Cl)Cl CHEMBL114062
325 | OCC1OC(n2cnc3c2ncnc3S)C(O)C1O CHEMBL448290
326 | Cc1ccc(-n2nccn2)c(C(=O)N2CCN(c3nc4cc(Cl)ccc4o3)CCC2C)c1 CHEMBL1083659
327 | CNc1nc(Cl)c(SC)c(N2CCN(C)CC2)n1 CHEMBL407641
328 | CC(=O)Oc1cccc2c1C(=O)c1c(OC(C)=O)cc(C(=O)O)cc1C2=O CHEMBL41286
329 | CC(=O)OCC1=C(C(=O)O)N2C(=O)C(NC(=O)CSc3ccncc3)C2SC1 CHEMBL1599
330 | CCCCCN(C)CCC(O)(P(=O)(O)O)P(=O)(O)O CHEMBL997
331 | Fc1ccc(Cn2c3ccccc3nc2NC2CCNCC2)cc1 CHEMBL61301
332 | CC1(O)CCC2C3CCC4=CC(=O)CCC4=C3C=CC21C CHEMBL166444
333 | Cc1ccc(S(=O)(=O)NC(=O)NN2CC3CCCC3C2)cc1 CHEMBL427216
334 | CCc1cccc2cc(C(O)CNC(C)(C)C)oc21 CHEMBL296035
335 | Cc1nc2c(NCc3c(C)cccc3C)cc(C(=O)NCCO)cn2c1C CHEMBL497011
336 | C=CCNc1nc(NCC=C)nc(N2CCN(C(c3ccc(F)cc3)c3ccc(F)cc3)CC2)n1 CHEMBL1183717
337 | OCc1ccccc1OC1OC(CO)C(O)C(O)C1O CHEMBL462997
338 | CNNCc1ccc(C(=O)NC(C)C)cc1 CHEMBL1321
339 | CCN(CC)C(=O)C1CN2CCc3cc(OC)c(OC)cc3C2CC1OC(C)=O CHEMBL1201250
340 | CN(C)CCC(c1ccc(Cl)cc1)c1ccccn1 CHEMBL1201353
341 | O=C(Nc1ncc([N+](=O)[O-])s1)c1cccs1 CHEMBL1082354
342 | Cc1cccc(C(=O)O)c1O CHEMBL448399
343 | Cc1c2[nH]c3ccc(O)cc3c2c(C)c2c[n+](C)ccc12 CHEMBL16699
344 | CCCn1cnc2c1c(=O)n(CCCCC(C)=O)c(=O)n2C CHEMBL1079905
345 | COc1cc2c(cc1OC)-c1cc(=Nc3c(C)cc(C)cc3C)n(C)c(=O)n1CC2 CHEMBL285913
346 | CC(Cl)C(=O)Nc1ccc(C2=NNC(=O)CC2C)cc1 CHEMBL116368
347 | COc1ccc(Cl)cc1C(=O)NCCc1ccc(C(=O)O)cc1 CHEMBL149930
348 | [K+].[K+].[K+].O=C([O-])CC(O)(CC(=O)[O-])C(=O)[O-] CHEMBL1200458
349 | CC(C)S(=O)(=O)n1c(N)nc2ccc(C(=NO)c3ccccc3)cc21 CHEMBL283403
350 | CCOP(=S)(OCC)Oc1ccc(Cl)cc1Cl CHEMBL1396626
351 | NS(=O)(=O)c1ccc(N2C(=O)CC(c3ccccc3)C2=O)c(Cl)c1 CHEMBL315877
352 | O=C1NCCN1CCN1CCN(C2CC(c3ccc(F)cc3)c3ccccc32)CC1 CHEMBL73461
353 | O=C(O)c1cc(-c2ccccc2)nc2ccccc21 CHEMBL348000
354 | CC(C(=O)O)c1ccc(C(=O)c2ccccc2)s1 CHEMBL365795
355 | CCOCc1nc2c(c3ccccc3nc2N)n1CC(C)(C)O CHEMBL383322
356 | Cn1cnc2c1cc(C(=O)NOCCO)c(Nc1ccc(Br)cc1Cl)c2F CHEMBL1614701
357 | CC(=O)OC1(C(C)=O)CCC2C3C=C(Cl)C4=CC(=O)C5CC5C4(C)C3CCC21C CHEMBL139835
358 | COc1cc2c(c(OC)c1OC)-c1ccc(OC)c(=O)cc1C(NC(C)=O)CC2 CHEMBL107
359 | CC(O)(CC(=O)O)C1CCCCC1 CHEMBL52091
360 | Nc1nccs1 CHEMBL344760
361 | CCOC(=O)NC1=C(N2CC2)C(=O)C(NC(=O)OCC)=C(N2CC2)C1=O CHEMBL36016
362 | O=C(O)CCCCC(=O)O CHEMBL1157
363 | C=CCN1CCCC1CNC(=O)c1cc2nn[nH]c2cc1OC CHEMBL290194
364 | O=c1[nH]c2ccccc2n1CCCN1CCN(C(c2ccccc2)c2ccccc2)CC1 CHEMBL13828
365 | CC1CS(=O)(=O)CCN1N=Cc1ccc([N+](=O)[O-])o1 CHEMBL290960
366 | COc1cc(C2c3cc4c(cc3C(O)C3COC(=O)C32)OCO4)cc(OC)c1OC CHEMBL61
367 | NS(=O)(=O)Oc1ccc2c(c1)oc(=O)c1c2CCCCC1 CHEMBL286738
368 | NCCCCC(N)C(=O)O CHEMBL8085
369 | O=c1ccc2ccccc2o1 CHEMBL6466
370 | CNC(=O)OCc1cccc(COC(=O)NC)n1 CHEMBL1620144
371 | CC(COc1ccccc1)NN CHEMBL1909286
372 | C(COCCOCC1CO1)OCCOCC1CO1 CHEMBL460287
373 | CC(N)(Cc1ccc(O)cc1)C(=O)O CHEMBL1330596
374 | C[N+](C)(C)CCOP(=O)([O-])OCC(O)CO CHEMBL1567463
375 | CCn1cc(C(=O)O)c(=O)c2cc(F)c(N3CCN(C)CC3)cc21 CHEMBL267648
376 | CCNC1CN(CCCOC)S(=O)(=O)c2sc(S(N)(=O)=O)cc21 CHEMBL220491
377 | Cc1cccc(O)c1 CHEMBL298312
378 | O=P1(N(CCCl)CCCl)OCCCN1CCCl CHEMBL462019
379 | O=C(O)CCc1nc2ccccc2[nH]1 CHEMBL596009
380 | OCc1cc(C(O)CNCCCCCCOCCOCc2c(Cl)cccc2Cl)ccc1O CHEMBL1198857
381 | CC1CC(=O)NN=C1c1ccc(NN=C(C#N)C#N)cc1 CHEMBL313136
382 | COc1ccc(-c2c(Cl)ncn2-c2ccc(S(N)(=O)=O)cc2)cc1F CHEMBL435381
383 | O=C1c2cccc3cc([N+](=O)[O-])cc(c32)C(=O)N1CCN1CCCC1 CHEMBL46874
384 | O=C(C1CC(N2CCN(c3ncccn3)CC2)CN1)N1CCC(F)(F)C1 CHEMBL515387
385 | CCOC(=O)CC(O)(CC(=O)OCC)C(=O)OCC CHEMBL464988
386 | Cc1[nH]c2cccc(OCC(O)CNCCOc3ccccc3)c2c1Cl CHEMBL1742464
387 | Cc1ccc(C23CC2CNC3)cc1 CHEMBL511099
388 | C=CC(=O)Nc1cc2c(cc1OCCCN1CCOCC1)ncnc2Nc1ccc(F)c(Cl)c1 CHEMBL31965
389 | CC(Oc1c(Cl)cccc1Cl)C1=NCCN1 CHEMBL17860
390 | CN1c2ccc(Cl)cc2C(c2ccccc2)=NCC1=S CHEMBL1891606
391 | CCc1ccc(C(=O)C(C)CN2CCCC2)cc1 CHEMBL1797127
392 | CN1CCC(=C2c3ccccc3CCc3cccnc32)CC1 CHEMBL946
393 | C#CC1(O)CCC2C3CCC4=CC(=O)CCC4=C3C=CC21C CHEMBL1908319
394 | CN(C)c1ccc(C2CC3(C)C(CCC3(O)C=CCO)C3CCC4=CC(=O)CCC4=C23)cc1 CHEMBL1908329
395 | CC1(O)CCC2C3CCC4=C(O)C(=O)CCC4(C)C3CCC21C CHEMBL1908006
396 | NC(=O)C1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc32)CC1 CHEMBL1909072
397 | COC(=O)CCC[N+](C)(C)C CHEMBL1896433
398 | O=S(=O)(O)CCCS(=O)(=O)O CHEMBL2111092
399 | CC[N+](C)(C)CCSCC[N+](C)(C)CC CHEMBL2110709
400 | c1c(CCc2ccncc2)c2ccccc2n1Cc1ccccc1 CHEMBL2110797
401 | C=C(CN(C)C)C(Cc1ccccc1)(OC(=O)CC)c1ccccc1 CHEMBL2104265
402 | CCN(CC)Cc1ccc2oc(-c3ccccc3)c(C)c(=O)c2c1 CHEMBL2104916
403 | CC(C)(CO)NCC(O)c1ccccc1 CHEMBL2106269
404 | OCC(O)C1OC2OC(C(Cl)(Cl)Cl)OC2C1O CHEMBL2104181
405 | CNC(C)C(=O)c1ccc(OC)cc1 CHEMBL2106893
406 | O=C(O)c1ccccc1C(=O)c1ccc(O)c(-c2ccccc2)c1 CHEMBL2104248
407 | CC(=O)N=c1sccn1CC(O)c1cccs1 CHEMBL2105962
408 | COc1cc(OC)c(C(=O)OCCN2CCOCC2)c(OC)c1 CHEMBL2106750
409 | O=C1NC(=O)C2(c3ccc(Cl)cc3)CC12 CHEMBL2104097
410 | CN1CCN(C2=Nc3cc(Cl)ccc3Cc3sccc32)CC1 CHEMBL2105484
411 | Oc1cccc(Oc2nnn[nH]2)c1 CHEMBL2106850
412 | O=C(CCn1nnc(-c2cccc(Br)c2)n1)N1CCCCC1 CHEMBL2106508
413 | COc1cc(C(=O)NC(COc2ccccc2)CN2CCCC2)cc(OC)c1OC CHEMBL2106241
414 | CC(N)C(=O)NC(CCC(=O)C=[N+]=[N-])C(=O)NC(CCC(=O)C=[N+]=[N-])C(=O)O CHEMBL2105946
415 | CC(=O)NC(CSC(=O)c1ccccc1OC(C)=O)C(=O)O CHEMBL2104494
416 | O=S(=O)(O)c1ccc2[nH]c(-c3ccccc3)nc2c1 CHEMBL1987518
417 | COC1OC(C(COCc2ccccc2)OCc2ccccc2)C(OCc2ccccc2)C1O CHEMBL2105173
418 | NC(=O)c1[nH]nc(C2OC(CO)C(O)C2O)c1O CHEMBL2105330
419 | CCCCCCCCCNc1ncnc2[nH]ccc21 CHEMBL2106899
420 | COc1cc(C(=O)OCCCNCC2CC2(c2ccccc2)c2ccccc2)cc(OC)c1OC CHEMBL2107590
421 | CC1=CC(=NOCC(=O)O)c2ccccc2C1=O CHEMBL2110691
422 | Cn1c2ncn(CC(O)CN3CCN(CCCSc4ccccc4)CC3)c2c(=O)n(C)c1=O CHEMBL155962
423 | CN1C2=CC(=O)C(=NNC(N)=O)C=C2CC1S(=O)(=O)O CHEMBL1697827
424 | CC1(NC2=NS(=O)(=O)C3SC(Cl)=CC3N2)CC1 CHEMBL2107789
425 | CC(C(=O)O)c1cccc2c(-c3ccccc3)coc21 CHEMBL2107396
426 | CC(C)CN1CC2CN(C(C)C)CC(C1)C21CCCCC1 CHEMBL2104579
427 | CCN(CC)CCOc1ccc(C(=O)CCc2ccccc2)cc1 CHEMBL2104343
428 | Nc1c(NC(=O)C(F)(F)C(F)F)cc(C(F)(F)F)cc1[N+](=O)[O-] CHEMBL2106844
429 | c1ccc2c(c1)Sc1ccccc1N2CCCN1CCN(CCC2OCCCO2)CC1 CHEMBL2106987
430 | CC(C)N1CCN=C1CN1CCCCSc2ccc(Cl)cc21 CHEMBL2104610
431 | CCN1CCCC1CNC(=O)c1cc(C(C)=O)ccc1OC CHEMBL2106206
432 | COc1ccccc1Oc1c(CS(=O)(=O)c2ccc(C)cn2)nc(-c2ccncc2)nc1OC CHEMBL2107803
433 | CCCN1CC(NC(=O)N(CC)CC)CC2c3cccc4[nH]cc(c43)CC21 CHEMBL2106986
434 | COc1ccc(C=NNC(=O)c2ccncc2)cc1OC CHEMBL2104481
435 | CC1(C)OC2C3OC(C)(C)OCC3OC2(C(=O)O)O1 CHEMBL2110720
436 | COc1cc2nc(N(C)CCCNC(=O)C3CCCO3)nc(N)c2cc1OC CHEMBL709
437 | Cc1ccsc1C=CC1=NCCCN1C CHEMBL1240978
438 | O=C(Cn1ccnc1)c1ccc2ccccc2c1 CHEMBL416801
439 | Oc1ccc(CCCCNCC(O)c2ccc(O)c(O)c2)cc1 CHEMBL1201251
440 | Cc1cccc(Nc2cc(Cl)nc(SCC(=O)NCCO)n2)c1C CHEMBL1908320
441 | CCCC(C)(O)C1CC23C=CC1(O)C1Oc4c5c(ccc4O)CC2N(C)CCC513 CHEMBL1908334
442 | NC(=O)C1c2ccccc2CCc2ccccc21 CHEMBL1868301
443 | CC(CN1CCCC1)C(=O)c1ccc(C(F)(F)F)cc1 CHEMBL1951050
444 | Cc1c(CN2CCN(C(=O)C(C)O)CC2)sc2c1nc(-c1cnc(N)nc1)nc2N1CCOCC1 CHEMBL1922094
445 | [O-][n+]1cccc(CO)c1 CHEMBL1985503
446 | O=[N+]([O-])c1cccc(C2CN3CCSC3=N2)c1 CHEMBL2110956
447 | C=CCOc1cc(Cl)ccc1C(=O)NCCN(CC)CC CHEMBL2103989
448 | O=[N+]([O-])OCC(O[N+](=O)[O-])C(CO[N+](=O)[O-])O[N+](=O)[O-] CHEMBL2107583
449 | CCCCC(=O)OC1C(C=O)=CC(O)C(O)C1O CHEMBL2105321
450 | CC(C)C(CCN(C)C)(C(N)=O)c1cccc2ccccc21 CHEMBL2107641
451 | CCCCOCCOC(=O)C(c1ccccc1)N1CCCCC1 CHEMBL2104630
452 | CCC1C(O)N2C3CC45c6ccccc6N(C)C4C2CC1C3C5O CHEMBL2105617
453 | NC(CCC(=O)NCCS(=O)(=O)O)C(=O)O CHEMBL2106758
454 | CC=C(C)C(=O)OC1CC2CCC(C1)N2C CHEMBL2107120
455 | CN(C)c1cccc2c1cccc2S(=O)(=O)Nc1ccc(NC(=O)C(C)(C)COC(=O)CN)cc1 CHEMBL2104762
456 | NNC(=O)OC1CC2CCC(C1)N2C(=O)Oc1ccccc1 CHEMBL2104515
457 | OC(OCC1OC(O)C(O)C(O)C1O)C(Cl)(Cl)Cl CHEMBL2105990
458 | CCOC(=O)C1(c2ccccc2)CCN(CCC(O)c2ccccc2)CC1 CHEMBL2105385
459 | CCCCc1ccc(NC(=O)CN(CC(=O)O)CC(=O)O)cc1 CHEMBL2104651
460 | CC(=O)C1(O)CCC2C3CCC4=CC(=O)C=CC4(C)C3C(O)CC21C CHEMBL2105540
461 | CC(=O)NS(=O)(=O)c1cc(N)ccc1Sc1ccc(N)cc1 CHEMBL2110764
462 | CC(CO)(CO)NCc1cc2c3ccccc3ccc2c2ccccc12 CHEMBL61495
463 | CN1CCC23c4c5c(O)ccc4CC1C2CCC(=O)C3(C)O5 CHEMBL2110992
464 | COc1cc(O)c(C(=O)Nc2nc(C(=O)NCCN(C(C)C)C(C)C)cs2)cc1OC CHEMBL2107723
465 | Cc1ccccc1C(=O)N1CCC(=NOS(=O)(=O)O)c2ccc(Cl)cc21 CHEMBL2104373
466 | Nc1nc(N)c2c(n1)nc(N)nc2-c1ccccc1 CHEMBL2107705
467 | CC(CCc1ccc(O)cc1)NCC(O)c1ccc(O)cc1 CHEMBL2103767
468 | Cc1cc(O)c(Cl)c(C)c1Cl CHEMBL2104302
469 | COC1OC2OC3(CN4CCCCC4)C(O)CC(C13)C2C CHEMBL2107624
470 | C=CC1CNCCC1CCCc1ccnc2ccc(OC)cc12 CHEMBL2104497
471 | CC(S)C(=O)NCC(=O)O CHEMBL1314
472 | CC1=C(C)C(=O)C(CCCCC#CCCCC#CCO)=C(C)C1=O CHEMBL304818
473 | OCC(O)C(O)C(O)C(O)CO CHEMBL16105
474 | CC1(C)NC(c2ccccc2)C(=O)N1C1C(=O)N2C1SC(C)(C)C2C(=O)O CHEMBL1201116
475 | Cc1c(C)n(Cc2ccccc2)c2ccc(C(=O)OCCN(C)C)cc12 CHEMBL1518149
476 | CC12OC(=O)C1(C(O)C1C=CCCC1)NC(=O)C2CCCl CHEMBL371405
477 | CN(C)CCCC1(c2ccc(F)cc2)OCc2cc(C#N)ccc21 CHEMBL1508
478 | CC#CCn1c(N2CCCC(N)C2)nc2c1c(=O)n(Cc1nc(C)c3ccccc3n1)c(=O)n2C CHEMBL237500
479 | Cc1c(=S)ssc1-c1cnccn1 CHEMBL178459
480 | c1ccc2c(c1)sc1ccccc12 CHEMBL219828
481 | CC(=O)OC1CN2CCC1CC2 CHEMBL20835
482 | COc1cc2c(cc1OC)C(c1ccccc1)CN(C)CC2 CHEMBL343569
483 | C=CCC1(C(C)CCC)C(=O)NC(=S)NC1=O CHEMBL440
484 | [O-][S+](Cc1ccccn1)c1nc2ccccc2[nH]1 CHEMBL9861
485 | C[N+]1(CCCCC[N+]2(C)CCCC2)CCCC1 CHEMBL1271
486 | CCN(CC)CCNC(=O)c1ccc(NC(C)=O)cc1 CHEMBL1097
487 | CCCCC1CC(=O)C2(O)OC3C(NC)C(O)C(NC)C(O)C3OC2O1 CHEMBL1614655
488 | COCCOC(=O)C1=C(C)NC(C)=C(C(=O)OC(C)C)C1c1cccc([N+](=O)[O-])c1 CHEMBL1428
489 | CCc1nn(C2CCCC2)c2c1CCn1c(-c3cccs3)nnc1-2 CHEMBL217899
490 | COc1ccc2c(=O)c(C)c(-c3ccccc3)oc2c1CN(C)C CHEMBL519364
491 | CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O CHEMBL192
492 | CC[N+](C)(C)CCN(C)CC[N+](C)(C)CC CHEMBL1180418
493 | CC(C)COCC(CN(Cc1ccccc1)c1ccccc1)N1CCCC1 CHEMBL1008
494 | CCOP(=O)(OCC)SCC[N+](C)(C)C CHEMBL1201341
495 | O=C(OC1CN2CCC1CC2)N1CCc2ccccc2C1c1ccccc1 CHEMBL1734
496 | CNCCCC1c2ccccc2C=Cc2ccccc21 CHEMBL668
497 | OCC1OC(O)C(O)C(O)C1O CHEMBL1233058
498 | C=C(Br)CC1(C(C)CC)C(=O)NC(=O)NC1=O CHEMBL469253
499 | OCCN1CCN(CCC=C2c3ccccc3Sc3ccc(C(F)(F)F)cc32)CC1 CHEMBL54661
500 | Cc1cccc(OCC(O)CNC(C)C)c1 CHEMBL67096
501 | CCn1c(=O)c2ccccc2n(-c2cccc([N+](=O)[O-])c2)c1=O CHEMBL88990
502 | O=C1c2ccccc2C(=O)C1c1ccc(Br)cc1 CHEMBL288496
503 | CC1(C)CCCC(NC(=O)C(S)Cc2ccccc2)C(=O)N1CC(=O)O CHEMBL107747
504 | C=C(C)C1CC=C(C)CC1 CHEMBL449062
505 | [Zn+2].O=S(=O)([O-])[O-] CHEMBL1200929
506 | CCN(CC)CCOc1ccccc1OC(=Cc1ccccc1)C(C)=O CHEMBL164840
507 | CC(C)NCC(O)c1ccc([N+](=O)[O-])cc1 CHEMBL127349
508 | COc1cc([S+](C)[O-])ccc1-c1nc2ccc[nH]c-2n1 CHEMBL286020
509 | Cc1cc(C)cc(NC(=O)Cc2ccc(OC(C)(C)C(=O)O)cc2)c1 CHEMBL18901
510 | O=NN(CCCl)C(=O)NCCO CHEMBL284907
511 | NC(C(=O)O)c1ccc(CO)c(O)c1 CHEMBL472562
512 | CC(C)Nc1ncccn1 CHEMBL1740513
513 | CC1(c2ccccc2)OC(C(=O)O)=CC1=O CHEMBL278488
514 | CC(C)(C)NCC(O)c1cc(Cl)c(N)c(C(F)(F)F)c1 CHEMBL86749
515 | FC(F)(F)COCC(F)(F)F CHEMBL477874
516 | Oc1ccc(Nc2ccnc3cc(Cl)ccc32)cc1CN1CCCC1 CHEMBL1213257
517 | O=C(C(O)CS(=O)(=O)c1ccc2cc(Cl)ccc2c1)N1CCC(N2CCCNC2=O)CC1 CHEMBL1095032
518 | O=C(Nc1ccc([N+](=O)[O-])cc1Cl)c1cc(Cl)ccc1O CHEMBL1448
519 | CCCn1c2nc[nH]c2c(=O)[nH]c1=O CHEMBL279898
520 | CCN(CC)CCNC(=O)c1cc(Cl)c(NC(=O)COc2ccc(Cl)cc2)cc1OC CHEMBL10445
521 | CC(CN1CCOCC1)C(C(=O)N1CCCC1)(c1ccccc1)c1ccccc1 CHEMBL431928
522 | CC1OC(n2cc(F)c(=O)[nH]c2=O)C(O)C1O CHEMBL1130
523 | CCCCC1(COC(=O)CCC(=O)O)C(=O)N(c2ccccc2)N(c2ccccc2)C1=O CHEMBL1414320
524 | COc1c2ccoc2c(OC)c2oc(CSC)cc(=O)c12 CHEMBL297550
525 | O=C(O)CCC(=O)c1ccc(C2CCCCC2)c(Cl)c1 CHEMBL1697824
526 | CCCN1CCC(c2cccc(S(C)(=O)=O)c2)CC1 CHEMBL596802
527 | Cc1cc(O)cc(C)c1Cl CHEMBL398440
528 | CN(C)C(=O)CCCOC(=O)C(C)(C)Oc1ccc(Cl)cc1 CHEMBL1697831
529 | CC1(C)CC1C(=O)NC(=CCCCCSCC(N)C(=O)O)C(=O)O CHEMBL766
530 | CCOCCP(CCOCC)CCP(CCOCC)CCOCC CHEMBL1615784
531 | COCCc1ccc(OCC(O)C(C)(C)N)cc1 CHEMBL1742421
532 | CCCN1CCCC2Cc3n[nH]cc3CC21 CHEMBL240773
533 | CNc1ccc(C=Cc2ccc(OCCOCCOCCF)nc2)cc1 CHEMBL1908919
534 | CN1CCN(c2cc3c(nn2)Oc2ccccc2N3C)CC1 CHEMBL1886755
535 | CCCCOC(=O)CC(CC(=O)OCCCC)(OC(C)=O)C(=O)OCCCC CHEMBL1904556
536 | Cc1ccccc1N1CCN(CCc2nnc3n2CCCC3)CC1 CHEMBL1201216
537 | CCc1nn(CCCN2CCN(c3cccc(Cl)c3)CC2)c(=O)n1CCOc1ccccc1 CHEMBL623
538 | c1ccc2c(c1)Sc1ccccc1N2CC12CCN(CC1)CC2 CHEMBL1908311
539 | C=NC(C(=O)NC1C(=O)N2C1SC(C)(C)C2C(=O)O)c1ccccc1 CHEMBL1908324
540 | Oc1ccccc1-c1nnco1 CHEMBL1903897
541 | CN(C)C1C(O)=C(C(N)=O)C(=O)C2(O)C(O)=C3C(=O)c4c(O)cccc4C(C)(O)C3CC12 CHEMBL1236180
542 | Cc1cc(NC(=O)c2c(C)cccc2C)no1 CHEMBL1958077
543 | Cc1c(Cl)c(=O)oc2cc(OP(=O)(OCCCl)OCCCl)ccc12 CHEMBL1897362
544 | COc1ccc(F)cc1CC(O)(C1CCOCC1)C1CNCCO1 CHEMBL2103857
545 | COc1ccc2c(c1)CC(C)N(CCc1ccc(N)cc1)CC2 CHEMBL2110599
546 | OCCN1CCN(CCC=C2c3ccccc3COc3ccc(Cl)cc32)CC1 CHEMBL2110968
547 | CC(C)(C)C(O)C=Cc1ccc2c(c1)OCO2 CHEMBL1983350
548 | CC(C)CN(CC(C)C)CC(COC1c2[nH]cnc2N(C)C(=O)N1C)OC(=O)c1ccccc1 CHEMBL2106742
549 | NC(=O)C(O)C(O)C(O)C(O)C=O CHEMBL2107404
550 | CCOc1cc(OCC)cc(OCCN2CCOCC2)c1 CHEMBL2106361
551 | O=C1CN=C(c2ccccc2F)c2cc(Cl)ccc2N1CC1CC1 CHEMBL2106743
552 | CCOC(=O)C(CCc1ccccc1)NC(C)C(=O)N1C2CCC(CC2)C1C(=O)O CHEMBL2106476
553 | O=C1NC(=O)C2=C(CCCC2)C1=Cc1ccccc1 CHEMBL2107135
554 | CCCCCOc1ccccc1C(=CSC)n1ccnc1 CHEMBL2107447
555 | CC(=O)OC(C)C(=O)C1(O)CCC2C3CCC4=CC(=O)C=CC4(C)C3(F)C(O)CC21C CHEMBL2107401
556 | CN(C)CCC1(c2ccccc2)CCc2ccccc2C1=O CHEMBL2104267
557 | C=CCN1CCC23c4c5c(OC(C)=O)ccc4CC1C2C=CC(OC(C)=O)C3O5 CHEMBL2106214
558 | CC(C)=CCN1CCC2(C)c3cc(O)ccc3CC1C2(C)C CHEMBL2106302
559 | CC(C)c1ccc(C(=O)N(C)CCN2CCN(C(=O)c3cccc4ccoc43)CC2)cc1 CHEMBL2104002
560 | Cn1c2ncn(CCOC(O)C(Cl)(Cl)Cl)c2c(=O)n(C)c1=O CHEMBL2104491
561 | CC(N)C(=O)c1ccccc1 CHEMBL2104047
562 | CSCCC(NC(C)=O)C(=O)Oc1ccc(NC(C)=O)cc1 CHEMBL2107605
563 | NN=c1ccc(C(N)=O)n[nH]1 CHEMBL2106246
564 | CC(=O)OC1C=COC=C2CC34SSC5(CC6=COC=CC(O)C6N5C3=O)C(=O)N4C21 CHEMBL2104550
565 | CCN(CC)CCOC(=O)C1N2C(=O)C(NC(=O)c3c(OC)cccc3OC)C2SC1(C)C CHEMBL2105448
566 | CC1(c2ccccc2)CC(=O)N(CN2CCOCC2)C1=O CHEMBL2105127
567 | CC1(C)OC(C)(C)C(=CNCCNC=C2C(=O)C(C)(C)OC2(C)C)C1=O CHEMBL2106748
568 | [Ca+2].O=P([O-])([O-])OCC(O)CO CHEMBL2106124
569 | [Na+].NC(CCC(=O)O)C(=O)[O-] CHEMBL2106738
570 | [Na+].O=CC(O)C(O)C(O)C(O)C(=O)[O-] CHEMBL2104556
571 | [K+].O=C([O-])c1ccccc1 CHEMBL2105241
572 | [K+].O=C([O-])C(O)C(O)C(=O)O CHEMBL2105248
573 | NC(CN(Cc1ccccc1)c1ccccc1)=NO CHEMBL2110778
574 | Cc1c(CCOP(=O)(O)O)sc[n+]1Cc1cnc(C)nc1N CHEMBL1236378
575 | CCCCCC(C)(O)C=CC1C(O)CC(=O)C1CC=CCCCC(=O)O CHEMBL2105959
576 | CC(COc1ccccc1Cc1ccccc1)N1CCCCC1 CHEMBL2105910
577 | COc1cc(C2c3ccc(N)c(N)c3OC(N)=C2C#N)cc(Br)c1OC CHEMBL2103852
578 | O=C(NN1CCCCCC1)NS(=O)(=O)c1ccc(Cl)cc1 CHEMBL2106693
579 | CC12CCC3C(CCC4CC5SC5CC43C)C1CCC2O CHEMBL2106161
580 | CCOc1c(N2CCOCC2)cnn(C)c1=O CHEMBL2103954
581 | Cc1cc(S(C)(=O)=O)c(S(C)(=O)=O)cc1C(=O)NC(=N)N CHEMBL2107802
582 | CC(C)(Oc1ccc(C(O)c2ccc(Cl)cc2)cc1)C(=O)O CHEMBL2106581
583 | CCCC(=O)c1ccccc1OCCN(C(C)C)C(C)C CHEMBL2107306
584 | CC(C)(C)C(=O)NNCc1ccccc1 CHEMBL2106941
585 | CCCC(CC)C1(CC)C(=O)NC(=O)NC1=O CHEMBL2107053
586 | O=C(COc1ccc(Cl)cc1)OCCNC12CC3CC(CC(C3)C1)C2 CHEMBL2104053
587 | CN(C)C1CC(c2ccccc2)c2ccccc21 CHEMBL1190753
588 | c1cn(Cc2ccccc2)c(-c2nnn[nH]2)n1 CHEMBL2105919
589 | CC1(O)CCC2C3CC4=CC(=O)CCC4(C)C3CCC21C CHEMBL2103992
590 | OCC1(CO)COC(C(Cl)(Cl)Cl)OC1 CHEMBL2103943
591 | COC(=O)Nc1nc2ccc(C3(c4ccc(F)cc4)OCCO3)cc2[nH]1 CHEMBL2104600
592 | NC(Cc1cccc(N(CCCl)CCCl)c1)C(=O)O CHEMBL2107075
593 | CCC(C)OC(=O)N1CCCCC1CCO CHEMBL2104314
594 | COC(=O)c1ccccc1C(=O)c1ccc(OCCN2CCCCC2)cc1 CHEMBL2104821
595 | Cc1c2ccc(NC(=O)N3CCOCC3)cc2oc(=O)c1CCN1CCOCC1 CHEMBL2105094
596 | CCCCOCCOC(=O)c1cccnc1 CHEMBL2105161
597 | CCCNC(=O)c1ccc2c(c1)N(C(C)CN1CCCC1)c1ccccc1S2 CHEMBL2104029
598 | CCC1(CO)CCCN2CCc3c4ccccc4[nH]c3C21 CHEMBL2105559
599 | CN1C2CCC1CC(NC(=O)c1cn(C3CCCCC3)c3ccccc3c1=O)C2 CHEMBL2110961
600 | CN1CCC2=C(C1)C(c1ccccc1)c1ccccc12 CHEMBL278398
601 | CC(CCc1ccc(O)cc1)NCC(O)c1ccc(O)cc1 CHEMBL509336
602 | Cc1[nH]cnc1CSCCNC(=N)NCCCc1c[nH]cn1 CHEMBL12608
603 | CC(C)NNCc1ccc2c(c1)OCO2 CHEMBL2111045
604 | C[S+](C)CCOC(=O)C(c1ccccc1)C1CCCCC1 CHEMBL2111042
605 | CC12CCC3C(C4CC4C4=CC(=O)CCC43C)C1CCC2(O)CCC(=O)O CHEMBL2111001
606 | Cc1ccc(O)c(-n2nc3ccccc3n2)c1 CHEMBL1564747
607 | O=C(O)COc1ccccc1C=NNC(=O)c1ccncc1 CHEMBL1590674
608 | C#CCC1(OC(=O)CC)CCN(CCc2ccccc2)CC1 CHEMBL1742418
609 | COc1ccc(CCNCC(O)COc2ccc(OCC(C)=O)c3c2CCC(=O)N3)cc1OC CHEMBL1742442
610 | NC(Cc1c[nH]cn1)C(=O)O CHEMBL17962
611 | CC(NC(CCc1ccccc1)C(=O)O)C(=O)N1Cc2ccccc2CC1C(=O)O CHEMBL1733
612 | [K+].O=[N+]([O-])[O-] CHEMBL1644029
613 | NC(CC(O)COc1cccc2ccccc21)=NO CHEMBL357513
614 | CNC(=O)Oc1ccccc1OC(C)C CHEMBL446060
615 | O=C1NCN(c2ccccc2)C12CCN(CCCOc1ccc(F)cc1)CC2 CHEMBL79834
616 | CCCC(Nc1nc(-c2ccc(NC(=O)NCC)c(OC)c2)ncc1C)c1cccnc1 CHEMBL552212
617 | CC(C(=O)O)c1ccc2c(c1)[nH]c1ccc(Cl)cc12 CHEMBL1316
618 | C=C1C(CO)C(O)CC1n1cnc2c1nc(N)[nH]c2=O CHEMBL713
619 | CC1CCC2C(C)C(N3CCS(=O)(=O)CC3)OC3OC4(C)CCC1C23OO4 CHEMBL516268
620 | CN(C)CCC(N)(P(=O)(O)O)P(=O)(O)O CHEMBL122455
621 | CCN(CC)CCOC(=O)C(O)(c1ccccc1)c1ccccc1 CHEMBL70352
622 | OCCNCCO CHEMBL119604
623 | OCC1CNCC(O)C1O CHEMBL206468
624 | Cc1ccc(S(=O)(=O)NC(=O)NC2CCCCC2)cc1 CHEMBL312394
625 | CCCOC(=O)c1ccc(O)cc1 CHEMBL194014
626 | CC(CCC(=O)O)C1CCC2C3C(=O)CC4CC(=O)CCC4(C)C3CC(=O)C12C CHEMBL514446
627 | O=P(O)(O)OCC1OC(O)(COP(=O)(O)O)C(O)C1O CHEMBL97893
628 | O=C(O)CN(CCN(CC(=O)O)CC(=O)O)CCN(CC(=O)O)CC(=O)O CHEMBL780
629 | Cn1c2ncn(CCOC(=O)C(C)(C)Oc3ccc(Cl)cc3)c2c(=O)n(C)c1=O CHEMBL1318939
630 | NC(CS)C(=O)O CHEMBL863
631 | Cc1ccccc1CNc1ncnc2c1ncn2C1OC(CO)C(O)C1O CHEMBL331382
632 | O=C(O)C(Cc1cc(=O)[nH]c2ccccc12)NC(=O)c1ccc(Cl)cc1 CHEMBL1697771
633 | Cc1nn(C)c2c1C(c1cccc(Cl)c1)=NCCN2 CHEMBL174697
634 | Cc1nnc2n1-c1ccc(Cl)cc1C(c1ccccc1Cl)=NC2 CHEMBL646
635 | CC1(C)c2cc(C3=NNC(=O)CC3)ccc2NC1=O CHEMBL38224
636 | O=C1C[N+]([O-])=C(c2ccccc2)c2cc(Cl)ccc2N1 CHEMBL1597677
637 | NCCc1ccc(O)c(O)c1 CHEMBL59
638 | CC1(O)CCC2C3CC=C4CC(O)CCC4(C)C3CCC21C CHEMBL1079446
639 | Cn1c2ncn(CC(=O)O)c2c(=O)n(C)c1=O CHEMBL70246
640 | CN1C2CCC1CC(OC1c3ccccc3CCc3ccccc31)C2 CHEMBL1946186
641 | CCCCCC(=O)OCC(=O)C1C(C)CC2C3CC(F)C4=CC(=O)C=CC4(C)C3C(O)CC21C CHEMBL2107415
642 | FC(F)(Cl)Cl CHEMBL2106634
643 | CCCCCC1Nc2cc(C(F)(F)F)c(S(N)(=O)=O)cc2S(=O)(=O)N1 CHEMBL2107203
644 | CC(I)C1OCC(CO)O1 CHEMBL2105827
645 | NC(=O)c1cc[n+](CC2=C(C(=O)[O-])N3C(=O)C(NC(=O)Cc4cccs4)C3SC2)cc1 CHEMBL2105567
646 | O=P(O)(O)OC1C(O)C(O)C(O)C(OP(=O)(O)O)C1OP(=O)(O)O CHEMBL2106016
647 | CN(C)CCN(Cc1cccs1)c1ccccc1 CHEMBL2111026
648 | CCOCC1COc2c(Cl)c3c(cc2O1)[nH]cc(C(=O)OCC)c3=O CHEMBL2104710
649 | COc1ccc2cc(C(C)CO)ccc2c1 CHEMBL2105135
650 | O=c1nc2cccccc-2n1Cc1ccccc1 CHEMBL2104035
651 | Cc1cc(C)[n+]([N-]C(=O)c2ccc(Cl)c(S(N)(=O)=O)c2)c(C)c1 CHEMBL2106604
652 | Cc1c2c(cn1C)NC(=O)CN=C2c1ccccc1 CHEMBL2104741
653 | CCC(CC)(C(=O)O)c1cccc2ccccc21 CHEMBL2106771
654 | COc1cc2c(cc1OC)C(CCc1ccc(Cl)cc1)N(C)CC2 CHEMBL2106915
655 | CN(C)CC1COC2(O1)c1ccccc1CCc1ccccc12 CHEMBL2104552
656 | Oc1ccc(C2CNCc3c(O)c(O)ccc32)cc1O CHEMBL2105532
657 | O=C(OCc1ccccc1)C1(CCN2CCCCCC2)CCCCC1=O CHEMBL2105937
658 | CCC(CCCCC1CCC2CC(=O)CC12)OC CHEMBL2104105
659 | COc1ccc2c(c1OC)C13CCN(C)C(C2)C1(O)CCC(O)C3 CHEMBL2104603
660 | CCN(CC)CC(=O)Nc1c(C)cccc1C(=O)OC CHEMBL2105549
661 | CCC(OC(N)=O)C1CCCCC1 CHEMBL2104652
662 | CCc1oc2ccccc2c1C(=O)c1ccc(OCCN(CC)CC)cc1 CHEMBL2106228
663 | CCc1nncn1C1CCCCC1 CHEMBL2105027
664 | CCN1CCCC1CNS(=O)(=O)c1cc(Cl)c(N)cc1OC CHEMBL2105073
665 | COC(=O)c1c2ccccc2n2c1C1CN(C)CCN1c1ccccc1C2 CHEMBL2110702
666 | CN(C)CCOCCOC(=O)N1c2ccccc2Sc2ccccc21 CHEMBL2110862
667 | CC1NCc2cc(-c3ccc4c(=O)c(C(=O)O)cn(C5CC5)c4c3OC(F)F)ccc21 CHEMBL215303
668 | O=C(O)CSc1nnc(Br)n1-c1ccc(C2CC2)c2ccccc21 CHEMBL2105720
669 | Clc1ccc([I+]c2ccc(Cl)cc2Cl)c(Cl)c1 CHEMBL2110834
670 | CCN(CC)CCOC(=O)C1(c2ccc(C)c(C)c2)CCCC1 CHEMBL2110967
671 | CC(=O)OC1(C(C)CN(C)C)CCCCC1c1ccccc1 CHEMBL2110963
672 | CCCCCCCCCCCC[N+](C)(C)CC(=O)OCC CHEMBL2111046
673 | Clc1ccc(OCC2=NCCc3ccccc32)cc1 CHEMBL2110933
674 | O=C(O)CN1CCN(CC(=O)O)CCN(C(CO)C(O)CO)CCN(CC(=O)O)CC1 CHEMBL2111181
675 | C#CCOCC1COC(Cn2ccnc2)(c2ccc(Cl)cc2Cl)O1 CHEMBL2110654
676 | [Ca+2].[Br-].[Br-] CHEMBL2105852
677 | [Mg+2].O=C([O-])C(O)C(O)C(O)C(O)CO.O=C([O-])C(O)C(O)C(O)C(O)CO CHEMBL2107145
678 | Cn1cccc1C(=O)C(C#N)C(=O)Nc1ccccc1 CHEMBL2111033
679 | [Mg+2].[Mg+2].[Mg+2].O=P([O-])([O-])[O-].O=P([O-])([O-])[O-] CHEMBL2106873
680 | CC(=O)C1=C(O)C(N)C2Cc3c(C)c4ccc(C)c(O)c4c(O)c3C(=O)C2(O)C1=O CHEMBL2110884
681 | N=C(N)c1ccc(C=C2c3ccccc3-c3ccccc32)cc1 CHEMBL2111004
682 | CCN(CC)C(C)CC(=O)Nc1ccccc1 CHEMBL2104563
683 | CC(=NCc1ccccc1)c1ccccc1O CHEMBL2105284
684 | CC(Cc1ccccc1)NCCC#N CHEMBL2105566
685 | CCN(CC)CC(C)(C)COC(=O)C(COP(=O)(O)O)c1ccccc1 CHEMBL2104488
686 | CCOC(=O)c1nc2c3cc(C)ccc3n(CC)c(=O)n2n1 CHEMBL2105192
687 | Cc1cc(Cl)cc(C(=NCCCC(N)=O)c2ccc(Cl)cc2)c1O CHEMBL2104937
688 | COc1ccccc1N1CCN(CC(O)c2ccc(O)c(O)c2)CC1 CHEMBL2105227
689 | CC(ON=C1c2ncncc2-c2cc(Br)cc(Br)c21)C(=O)NCCN(C)C CHEMBL2106169
690 | CC(=O)Oc1ccc([N+](=O)[O-])c(OC(C)=O)c1C(=O)Nc1cc(C(F)(F)F)cc(C(F)(F)F)c1 CHEMBL2104705
691 | COc1ccccc1N1CCN(CCc2c(C)[nH]c3cc(OC)c(OC)cc32)CC1 CHEMBL2105252
692 | COc1ccc(OC)c(C(O)C(C)NC(C)(C)C)c1 CHEMBL289093
693 | Cn1ncc(Cl)c1-c1cc(C(=O)NC(CN)Cc2cccc(F)c2)sc1Cl CHEMBL2219422
694 | O=C(c1ccco1)N(c1cnccn1)C1CCN(CCc2ccccc2)CC1 CHEMBL161084
695 | COc1c(-c2ccc3cc(NS(C)(=O)=O)ccc3c2)cc(-n2ccc(=O)[nH]c2=O)cc1C(C)(C)C CHEMBL3137312
696 | CCC1(c2ccccc2)OCC(C2CCCCN2)O1 CHEMBL305904
697 | CN(C)CCC(O)(P(=O)(O)O)P(=O)(O)O CHEMBL55812
698 | COc1cc2nccc(Oc3ccc(NC(=O)NC4CC4)c(Cl)c3)c2cc1C(N)=O CHEMBL1289601
699 | NC(=O)Oc1ccc(Cc2ccccc2)cc1 CHEMBL608856
700 | COCC1CN=C(c2ccccc2Cl)c2cc(Br)ccc2N1C CHEMBL1290783
701 | CCCCOC(=O)c1ccc(O)cc1 CHEMBL459008
702 | CCOC(=O)C(CCc1ccccc1)NC(C)C(=O)N1C(=O)N(C)CC1C(=O)O CHEMBL317094
703 | O=C1c2ccccc2C(=O)C1c1ccc(F)cc1 CHEMBL24924
704 | CCC(O)C(CC(C)N(C)C)(c1ccccc1)c1ccccc1 CHEMBL162243
705 | CCOC(=O)Oc1ccc(CCNC(=O)C(CCSC)NC(C)=O)cc1OC(=O)OCC CHEMBL2106351
706 | O=C(OCCOCCN1CCCCC1)N1c2ccccc2Sc2cccnc21 CHEMBL2104900
707 | CC(Oc1c(Cl)cccc1Cl)C1=NCCN1 CHEMBL2107417
708 | CCCC1(CC(=O)O)OCCc2c3ccccc3[nH]c21 CHEMBL1968386
709 | CCN(CC)CCN1c2ccccc2Sc2ccccc21 CHEMBL1620412
710 | C=C1CC(C)C2C3Cc4ccc(O)cc4C2(CCN3CC2CCC2)C1 CHEMBL2111060
711 | CCOCCOC(=O)C=Cc1ccc(OC)cc1 CHEMBL2104045
712 | COc1ccccc1OCC1CNC(=O)O1 CHEMBL2104790
713 | Oc1cc2c(cc1O)C(COc1ccc(Cl)cc1)NCC2 CHEMBL2106010
714 | CCN(CC)CCOCC(O)COc1ccccc1OC CHEMBL2107454
715 | NC(=O)c1ccccc1OCC(=O)OCC1CCCO1 CHEMBL2104233
716 | COc1cc(N)c(Cl)cc1C(=O)NC1CCNCC1OC CHEMBL2107465
717 | COC1=CC(=O)OC1C(O)c1ccccc1Cl CHEMBL2106442
718 | CCCC(=O)OC1(C(=O)COC(C)=O)CCC2C3CC(C)C4=CC(=O)CCC4(C)C3C(O)CC21C CHEMBL2104661
719 | CC(C)C(=O)OC(OC(=O)NCC(CC(=O)O)c1ccc(Cl)cc1)C(C)C CHEMBL2107312
720 | CCc1c(C(=O)C(N)=O)c2c(cccc2OCC(=O)OC)n1Cc1ccccc1 CHEMBL2105659
721 | CN(C)CCOC(=O)c1ccc(Cl)c(N)c1 CHEMBL2106019
722 | Cc1nc2cc(C(c3ccccc3)n3ccnc3)ccc2[nH]1 CHEMBL2105112
723 | CCOC(=O)OC(C)OC(=O)C1N2C(=O)C(N=CN3CCCCCC3)C2SC1(C)C CHEMBL2106658
724 | NC(=O)c1ccc(Cl)c(S(N)(=O)=O)c1 CHEMBL2107090
725 | CNCC1CCc2sc(C)nc2C1 CHEMBL2104686
726 | O=C1OC2C(O)C(O)OC2C1O CHEMBL2107425
727 | CC(S)C(=O)NCC(=O)O CHEMBL2106444
728 | COc1ccc(C(O)CN2CCN(C(c3ccccc3)c3ccccc3)CC2)cc1OC CHEMBL2074664
729 | N=C(N)c1ccc(C=Cc2ccc(C(=N)N)cc2)cc1 CHEMBL142304
730 | CN(C)CCC=C1c2ccccc2C(C)(C)c2ccccc21 CHEMBL110094
731 | O=C1c2ccccc2S(=O)(=O)N1CCCCN1CCN(c2ncccn2)CC1 CHEMBL8412
732 | OC(CNc1ncccn1)c1ccccc1 CHEMBL2110896
733 | O=c1[nH]c2ccccc2c(=O)n1CCCN1CCN(c2cccc(Cl)c2)CC1 CHEMBL2110792
734 | CN(C)CCC=C1c2cccn2CCc2ccc(Cl)cc21 CHEMBL2110942
735 | OCCCN1CCN(CCCC2c3ccccc3Sc3ccc(Cl)cc32)CC1 CHEMBL2110631
736 | CC1(C)SC2C(NC(=O)CSc3ccccc3)C(=O)N2C1C(=O)O CHEMBL2111130
737 | CCCc1c2oc(C(=O)O)cc(=O)c2cc2c(=O)cc(C(=O)O)oc21 CHEMBL2110970
738 | CC(C)NC1C2CCC(CC2)C1(O)c1ccc(Cl)c(Cl)c1 CHEMBL2110879
739 | CC(=O)C=C(C)c1ccc(-c2ccccc2)cc1 CHEMBL2104473
740 | N=C(N)NC(=O)c1ccc(N2CCN(C(=O)c3ccc[nH]3)CC2)c(C(F)(F)F)c1 CHEMBL2105423
741 | CC(=C(CCO)SSCC1CCCO1)N(C=O)Cc1cnc(C)nc1N CHEMBL1740659
742 | C=CCNc1nc(C(=O)NN=CC=Cc2ccc([N+](=O)[O-])o2)cs1 CHEMBL2105145
743 | CCN(CC)CCN1C(=O)c2ccccc2C1c1ccccc1 CHEMBL2104536
744 | CC(C)OC(=O)OC1CCCCC1 CHEMBL2104038
745 | CC(C)(C)NCC(O)COCC1COc2ccccc2O1 CHEMBL2106691
746 | COC(=O)C1(c2ccccc2)CCCN(C)C(C)C1 CHEMBL2104520
747 | CC(=O)NC(C(O)CC(=O)C(=O)O)C(O)C(O)C(O)CO CHEMBL2105945
748 | CC[N+](C)(C)CCOC(=O)C(O)(c1ccccc1)c1ccccc1 CHEMBL2110848
749 | [Ca+2].O=P[O-].O=P[O-] CHEMBL2218895
750 | CSC[S+]([O-])CC(CO)NC(=O)C=Cc1c(C)nc(O)nc1O CHEMBL2303630
751 | Cc1cnc(NC(=O)Nc2cc(Br)c(C)cc2OCC2CNCCO2)cn1 CHEMBL3039517
752 | Cc1nc2ccccc2n1Cc1ccc(Cl)cc1 CHEMBL152649
753 | O=c1n(Cl)c(=O)n(Cl)c(=O)n1Cl CHEMBL1698868
754 | OCCOCCOCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1 CHEMBL2104263
755 | CCN(CC)CCOC(=O)N1CCC(Cc2ccccc2)CC1 CHEMBL2105607
756 | CCC(C)(C)NCC(O)COc1cc(CCC(=O)c2ccc(C)cc2)ccc1OC CHEMBL2104075
757 | CCCCCCNC(=N)NC(=N)N CHEMBL2106939
758 | O=C(N1CCCC1)C(CCN1CCOCC1)(c1ccccc1)c1ccccc1 CHEMBL2106152
759 | [Ca+2].[Ca+2].[Ca+2].O=C([O-])CC(O)(CC(=O)[O-])C(=O)[O-].O=C([O-])CC(O)(CC(=O)[O-])C(=O)[O-] CHEMBL2106123
760 | C[N+](C)(C)CCOC(=O)C(O)(c1ccccc1)C1CC2C=CC1C2 CHEMBL2110818
761 | Cc1ccc(C(C)OC(=O)C2(C)CCC(C(=O)O)C2(C)C)cc1 CHEMBL2110648
762 | O=C(Nc1ccc(Cl)c(S(=O)(=O)N2CCNCC2)c1O)Nc1cccc(F)c1Cl CHEMBL2178579
763 | CCC(=O)N(c1ccc(Cl)c(Cl)c1)C1CCCC1N(C)C CHEMBL20679
764 | Cl.CCCCNc1ccc(C(=O)OCCN(CC)CC)cc1 CHEMBL593548
765 | CCN(CC)CCOC(=O)CC(O)(c1ccccc1)c1ccccc1 CHEMBL2107672
766 | COC1C(O)C(N)C(OC2OC(C(C)N)CCC2N)C(O)C1N(C)C(=O)CN CHEMBL3084803
767 | Oc1cc(O)c(Cl)cn1 CHEMBL1730601
768 | [Na+].O=C([O-])CCC(=O)O CHEMBL1200345
769 | Cc1ccc2c(-c3ccccc3)nc(=O)n(C(C)C)c2c1 CHEMBL268501
770 | CC12CCC3C(CCC4=CC(=O)CCC43C)C1CCC2=O CHEMBL274826
771 | CCC(c1ccc(O)cc1)C(CC)c1ccc(O)cc1 CHEMBL9225
772 | CCOC(=O)C(C)(C)Oc1ccc(Cl)cc1 CHEMBL565
773 | CC(O)C1C(=O)N2C(C(=O)O)=C(C3CCCO3)SC12 CHEMBL556262
774 | Nc1c2c(nc3c1CCCC3)CCC2 CHEMBL130880
775 | CC1(c2nc3cccc(C(N)=O)c3[nH]2)CCCN1 CHEMBL506871
776 | O=C(O)C=CC(=O)O CHEMBL503160
777 | O=C1NC(CCCl)Oc2ccccc21 CHEMBL124815
778 | O=c1[nH]cnc2c(CN3CC(O)C(CO)C3)c[nH]c21 CHEMBL269864
779 | O=[N+]([O-])c1cn2c(n1)OCC(OCc1ccc(OC(F)(F)F)cc1)C2 CHEMBL227875
780 | CCCCC(CC)CNC(=O)CC(C)O CHEMBL452859
781 | Cc1nnc(SCC2=C(C(=O)O)N3C(=O)C(NC(=O)Cn4cnnn4)C3SC2)s1 CHEMBL1435
782 | Cc1cc(NC(=O)C(=O)c2cc(Cc3ccc(C#N)cc3)n3ccccc23)sn1 CHEMBL271068
783 | CC(O)(CC(=O)O)CC(=O)O CHEMBL50444
784 | COc1ccccc1O CHEMBL13766
785 | c1ccc(Cn2c3ccccc3nc2N2CCNCC2)cc1 CHEMBL56900
786 | COc1cc(OC)c(Cl)c2c1C(=O)C1(O2)C(OC)=CC(=O)CC1C CHEMBL562
787 | CCN(CC)CCCC(C)Nc1cc(OC)cc2cccnc21 CHEMBL472698
788 | CC(CCC(=O)O)C1CCC2C3C(O)CC4CC(O)CCC4(C)C3CCC12C CHEMBL1551
789 | Cc1cc(NS(=O)(=O)c2ccc(N)cc2)nc(C)n1 CHEMBL485696
790 | Cn1c2ncn(CCO)c2c(=O)n(C)c1=O CHEMBL699
791 | CC(C)NCC(O)COc1ccccc1-n1cccc1 CHEMBL27077
792 | CCCCCCCCC=CCCCCCCC(N)(CO)C(=O)O CHEMBL1200394
793 | O=[N+]([O-])OC1COC2C(O)COC12 CHEMBL1311
794 | CCOC(=O)c1c(C)cc2c(CO)nnc(O)c2c1C CHEMBL153427
795 | O=C(C=Cc1cccc(F)c1)NC1CC1 CHEMBL132663
796 | CC(Cc1cccc(C(F)(F)F)c1)NCCOC(=O)c1ccccc1 CHEMBL400599
797 | Cc1cc(C)cc(C(=O)NCc2ccncc2)c1 CHEMBL1581974
798 | Nc1ccc(C(=O)Nc2ccccc2N)cc1 CHEMBL1351761
799 | CCC(O)c1ccccc1 CHEMBL1397202
800 | CCn1cc(C(=O)O)c(=O)c2cnc(N3CCCC3)nc21 CHEMBL311350
801 | O=C(O)C1CSC(c2ccccc2O)N1C(=O)CCS CHEMBL309962
802 | [N-]=[N+]=CC(=O)OCC(N)C(=O)O CHEMBL1095699
803 | C=CC1(C)CC(=O)C2(O)C(C)(O1)C(OC(C)=O)C(O)C1C(C)(C)CCC(O)C12C CHEMBL52606
804 | CC(=O)NN=Cc1ccc([N+](=O)[O-])o1 CHEMBL1565322
805 | O=[As]O[As]=O CHEMBL1200978
806 | CC(C)N(CCC(CCN1CCCCC1)(C(N)=O)c1ccccc1Cl)C(C)C CHEMBL276177
807 | [Cl-].C[N+](C)(C)CCO CHEMBL282468
808 | COP(=O)(OC)C(O)C(Cl)(Cl)Cl CHEMBL167150
809 | CN(C)CCCC1(c2ccc(F)cc2)OCc2cc(C#N)ccc21 CHEMBL549
810 | C=CCC1(C(C)C)C(=O)NC(=O)NC1=O CHEMBL7863
811 | O=c1n(CC2CO2)c(=O)n(CC2CO2)c(=O)n1CC1CO1 CHEMBL453863
812 | CS(=O)(=O)OCCCNCCCOS(C)(=O)=O CHEMBL96292
813 | OCCN1CCN(CCC=C2c3ccccc3Sc3ccc(Cl)cc32)CC1 CHEMBL87385
814 | CCC(C)C(C)(COC(N)=O)COC(N)=O CHEMBL1200922
815 | CC1CC(=O)NN=C1c1ccc2c(c1)OCC(=O)N2 CHEMBL46765
816 | C#CC1(O)C=CC2C3CCC4=CC(=O)CCC4C3CCC21CC CHEMBL1213583
817 | Oc1cc(O)c2c(c1)OC(c1ccc(O)c(O)c1)C(O)C2O CHEMBL123809
818 | COc1ccc(C2(C#N)CCC(C(=O)O)CC2)cc1OC1CCCC1 CHEMBL511115
819 | CN1CCC(=C2c3ccccc3C(O)c3ccccc32)CC1 CHEMBL440557
820 | NCCc1c[nH]cn1 CHEMBL90
821 | C#CC1(O)CCC2C3CCc4cc(O)ccc4C3C(OC)CC21C CHEMBL1628161
822 | CCCOC(C(=O)OC1CCN(C)CC1)(c1ccccc1)c1ccccc1 CHEMBL1078261
823 | O=C(CCCN1CC=C(n2c(=O)[nH]c3ccccc32)CC1)c1ccc(F)cc1 CHEMBL1108
824 | COc1cc(Cc2cnc(N)nc2N)cc(OC)c1N CHEMBL122351
825 | [Na+].O=C1OC(C(O)CO)C([O-])=C1O CHEMBL591665
826 | Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(CC(O)C(O)C(O)COP(=O)(O)O)c2cc1C CHEMBL1201794
827 | Nc1ncnc2c1ncn2C1OC(CO)C(O)C1O CHEMBL1090
828 | COCC(NC(C)=O)C(=O)NCc1ccccc1 CHEMBL58323
829 | CCN(CC)C(=S)S CHEMBL961
830 | CC(C)(C)C1(O)CCN2CC3c4ccccc4CCc4cccc(c43)C2C1 CHEMBL8514
831 | CCC1(c2ccc(N)cc2)CCC(=O)NC1=O CHEMBL488
832 | [Na+].O=C([O-])C(Cl)Cl CHEMBL306823
833 | CCOC(=O)COc1ccc2c(c1)CC(NCC(O)c1cccc(Cl)c1)CC2 CHEMBL1193948
834 | CCOC(=O)C(CCc1ccccc1)NC(C)C(=O)N1CC2(CC1C(=O)O)SCCS2 CHEMBL431
835 | C[N+]1(C)CCN(CC(O)(c2ccccc2)C2CCCCC2)CC1 CHEMBL1201325
836 | COC(=O)C1=CCCN(C)C1 CHEMBL7303
837 | CCCN1CC(CSC)CC2c3cccc4[nH]cc(c43)CC21 CHEMBL531
838 | CN(C)CCCN1c2ccccc2Sc2ccccc21 CHEMBL564
839 | CN(CCCl)CCCl CHEMBL427
840 | NC(=O)C(c1ccccc1)(c1ccccc1)C1CCN(CCc2ccc3c(c2)CCO3)C1 CHEMBL1346
841 | C#CCOc1ccc2c(C)cc(=O)oc2c1 CHEMBL1377940
842 | O=[N+]([O-])c1ccc(Nc2ccc(N=C=S)cc2)cc1 CHEMBL93385
843 | O=C(C(c1ccccc1)c1ccccc1)N1CCN(CC(O)COc2cccc3ncccc32)CC1 CHEMBL65067
844 | O=C(C[S+]([O-])C(c1ccccc1)c1ccccc1)NO CHEMBL93077
845 | COCN1c2ccc([N+](=O)[O-])cc2C(c2ccccc2)=NCC1=O CHEMBL146817
846 | CN(C(=O)c1c(O)c2ccccc2n(C)c1=O)c1ccccc1 CHEMBL11672
847 | COC(S)=NCCn1c(C)ncc1[N+](=O)[O-] CHEMBL135000
848 | CCC1(CC)C(=O)NCC(C)C1=O CHEMBL1200790
849 | NCCCCCC(O)(P(=O)(O)O)P(=O)(O)O CHEMBL55214
850 | COc1ccc2c(c1)CN(C)CC2c1ccc(Cl)c(Cl)c1 CHEMBL287257
851 | O=S(=O)(c1nc(-c2ccc(F)cc2)c(-c2ccc(F)cc2)[nH]1)C(F)(F)C(F)F CHEMBL18647
852 | CC(C)NCC(O)c1ccc2ccccc2c1 CHEMBL16476
853 | CC(=O)c1ccc2c(c1)N(CCCN1CCC(CCO)CC1)c1ccccc1S2 CHEMBL1584
854 | O=C1CN(N=Cc2ccc(-c3ccc([N+](=O)[O-])cc3)o2)C(=O)N1 CHEMBL1201288
855 | CCOC(=O)C1(c2ccccc2)CCN(CCc2ccccc2)CC1 CHEMBL285517
856 | O=C(O)CCNC(=O)c1ccccc1 CHEMBL1231530
857 | Oc1ccc(OCc2ccccc2)cc1 CHEMBL1388
858 | Cn1c(COC(N)=O)ncc1[N+](=O)[O-] CHEMBL290299
859 | O=C1CSC2(CCN(CCCN3c4ccccc4Sc4ccc(Cl)cc43)CC2)N1 CHEMBL1483796
860 | FC(F)(F)C(F)(F)C(F)(F)F CHEMBL1663
861 | COc1ccc(-c2noc(CC(=O)O)c2-c2ccc(OC)cc2)cc1 CHEMBL259972
862 | NCCCC(=O)O CHEMBL96
863 | CCNC1(c2ccccc2)CCCCC1 CHEMBL279924
864 | CCC(C(=O)O)C1(O)CCCCC1 CHEMBL1697739
865 | CC(C)OP(=O)(F)OC(C)C CHEMBL1025
866 | CC(N)C(O)c1ccccc1 CHEMBL61006
867 | Cc1cc(=O)oc2cc(O)ccc12 CHEMBL12208
868 | CC(C)n1c2ccccc2n2cnc(-c3noc(C4CC4)n3)c2c1=O CHEMBL279867
869 | Cc1ncc([N+](=O)[O-])n1CC(C)O CHEMBL498847
870 | O=C(O)CNC(=O)CNC(=O)CNC(=O)CS CHEMBL1615778
871 | CCN(CC)CCNC(=O)c1ccc(N)c(Cl)c1 CHEMBL1618378
872 | CCCOc1cc(N)ccc1C(=O)OCCN(CC)CC CHEMBL1195
873 | CC(C)NCCCC1(C(N)=O)c2ccccc2-c2ccccc21 CHEMBL1201242
874 | Cc1cc(S(=O)(=O)O)c2c(C)ccc(C(C)C)cc1-2 CHEMBL1852533
875 | CN(C)C1CCc2[nH]c3c(cc(F)cc3F)c2C1 CHEMBL1882682
876 | COC1OC2OC3(C)CCC4C(C)CCC(C1C)C42OO3 CHEMBL566534
877 | CC(C)C(=O)Nc1cccc(C2CN3CCSC3=N2)c1 CHEMBL2110807
878 | CNC1(C)C2CCC(C2)C1(C)C CHEMBL2103881
879 | CC1CN(N=Cc2ccc([N+](=O)[O-])o2)C(=O)O1 CHEMBL2107517
880 | OC(C1=CC(=C(c2ccccn2)c2ccccn2)C=C1)(c1ccccn1)c1ccccn1 CHEMBL2105304
881 | COC(=O)C(C)c1cccc(C(c2ccccc2)n2ccnc2)c1 CHEMBL2107439
882 | [O-][N+]1=C(c2ccccc2)c2cc(Cl)ccc2N=C(NCC2CC2)C1 CHEMBL2104165
883 | Cc1ccccc1OCC(O)CN1CC=CCC1 CHEMBL2107693
884 | O=C(Nc1ccc(CC2CCC(C(O)c3ccccc3)N2)cc1)C1CCc2nccc(=O)n21 CHEMBL2107826
885 | O=C1c2ccccc2C(=O)C1c1ccc(C(F)(F)F)cc1 CHEMBL2105129
886 | CCC(C)(CC)OC(N)=O CHEMBL2104208
887 | COc1ccccc1OCCNCC(O)COc1cccc2[nH]nnc21 CHEMBL2104914
888 | C[N+](C)(CCO)CC(=O)[O-] CHEMBL2105274
889 | CC(C)(O)CC(C)(O)c1ccc(Cl)cc1 CHEMBL2106273
890 | CN(C)C(=O)COC1c2ccccc2CCc2ccccc21 CHEMBL2104766
891 | CN(C)C(=O)NC1CCC(CCN2CCN(c3cccc(Cl)c3Cl)CC2)CC1 CHEMBL2028019
892 | OCc1cccc(Cc2c[nH]cn2)c1O CHEMBL2104355
893 | c1cc(CNCc2ccncc2)ccn1 CHEMBL2103958
894 | C[N+]1(C)CCC(OC(=O)C(O)(c2cccs2)c2ccccc2)C1 CHEMBL2110853
895 | CC[N+]1(CC)CCC(OC(=O)C(O)(c2ccccc2)c2ccccc2)C1 CHEMBL2110788
896 | c1ccc2c(c1)CCN1CCNCC21 CHEMBL2104690
897 | CC(CC#N)N(C)CC(=O)N(C)c1ccc(Cl)cc1C(=O)c1ccccc1F CHEMBL2104077
898 | O=C(O)CCCNC(=O)c1ccc(Cl)cc1O CHEMBL2107776
899 | O=C(O)CC1(CO)CCCCC1 CHEMBL2110826
900 | CN(C(=O)CNC1CC1)c1ccc(Cl)cc1C(=O)c1ccccc1Cl CHEMBL2104597
901 | COC(=O)CCC=C=CCC1C(O)CC(O)C1C=CC(O)COc1ccccc1 CHEMBL2104232
902 | O=c1cc(CN2CCOCC2)c2cc(O)c(O)cc2o1 CHEMBL2104742
903 | CC(C)C(OC(=O)c1cccnc1)c1ccc(Cl)cc1 CHEMBL2107111
904 | CCCCN(C(=O)C(C)C)c1nc(C)co1 CHEMBL2105068
905 | CCc1cc(S(=O)(=O)O)c2cc(C(C)C)cccc1-2 CHEMBL2105570
906 | COc1cc2c(cc1OC)S(=O)(=O)OC(C(=O)NC(C)CC(C)(C)N(C)C)C2 CHEMBL2105495
907 | O=C1COC(c2ccc(C(F)(F)F)cc2)CN1 CHEMBL2105520
908 | COC(=O)c1cnc2cc(OC(C)C)c(OC(C)C)cc2c1O CHEMBL2105380
909 | Nc1ncc(S(=O)(=O)c2ccc(N)cc2)s1 CHEMBL2107139
910 | COc1ccc2c3c1OC1C(OC(=O)c4cccnc4)CCC4C(C2)N(C)CCC314 CHEMBL2104623
911 | NC(=O)c1ccc(Oc2ccc(CNCCC3CCOCC3)cc2F)nc1 CHEMBL2103878
912 | OCC(O)C(O)C(O)C(O)COC1OC(CO)C(O)C(O)C1O CHEMBL2104398
913 | CCCCOCC(O)COc1ccccc1 CHEMBL2104223
914 | CN(C)CCN(C)n1cc(-c2ccccc2)c2ccccc21 CHEMBL2104611
915 | CCCCC(C)C(O)C=CC1C(O)CC2OC(=C(C#N)CCCC(=O)O)CC21 CHEMBL2104724
916 | Cn1c2ncn(CCCNC3COC4C(O[N+](=O)[O-])COC34)c2c(=O)n(C)c1=O CHEMBL2107483
917 | CCOC(=O)C1(c2ccccc2)CCC=CC1N(C)C CHEMBL2104560
918 | CCNC1=Nc2ccc(Cl)cc2C(C)(c2ccccc2)O1 CHEMBL2106227
919 | COc1c2c(cc3c1CN(C)CC3)OCO2 CHEMBL1606295
920 | CCN(CC)CCOC1(c2ccccc2)CCN(CCNc2ccccc2)CC1 CHEMBL2110911
921 | CCn1cc(C(=O)O)c(=O)c2cc3c(cc21)CCO3 CHEMBL2110851
922 | CCC1CC(=O)C2Oc3c4c(ccc3OC)CC3C1C42CCN3CC1CC1 CHEMBL2110790
923 | NC(=O)OCC1C(NC(=O)C(=NOCC(=O)O)c2csc(N)n2)C(=O)N1S(=O)(=O)O CHEMBL1614658
924 | CN(C)c1cnccn1 CHEMBL2110770
925 | CN=C(NC)NCc1ccc(OC)cc1 CHEMBL2110972
926 | COC(=O)C1CC2=CC(=O)CCC2(C)C2CCC3(C)C(CCC3(O)CCC(=O)O)C12 CHEMBL2110977
927 | CCCCN(CCCC)C(=O)OCC[N+](C)(C)CC CHEMBL2110746
928 | CNC(=O)CCN1CCN(CCC=C2c3ccccc3Sc3ccc(Cl)cc32)CC1 CHEMBL2110894
929 | CC[N+](C)(C)CCOC(=O)CCC(=O)OCC[N+](C)(C)CC CHEMBL2111153
930 | NCc1ccccc1Sc1ccccc1CO CHEMBL310160
931 | C[N+]1(C)CCC(OC(=O)C(O)(c2ccccc2)c2ccccc2)CC1 CHEMBL2110714
932 | NC1CCCC1c1ccccc1 CHEMBL2110918
933 | N.O=C(O)c1ccccc1 CHEMBL2105970
934 | S=C=Nc1ccc(N=C=S)cc1 CHEMBL2104676
935 | COC(=O)Nc1cc(N2CC=CCC2)nc2nc(=O)on21 CHEMBL2104058
936 | CC1C[S+]([O-])C(C)(C)C(C(=O)O)N1 CHEMBL2105470
937 | c1csc(Cc2ccccc2OCC2CNCCO2)c1 CHEMBL2105438
938 | O=C1CCCC(=O)C1C(=S)Nc1ccc(Cl)cc1 CHEMBL2105264
939 | N#CCNC(=O)c1ccc(-c2ccnc(Nc3ccc(N4CCOCC4)cc3)n2)cc1 CHEMBL1078178
940 | CCOP(=S)(OCC)Oc1cc(C)nc(C(C)C)n1 CHEMBL388560
941 | OC(O)C(Cl)(Cl)Cl CHEMBL455917
942 | O=C1c2ccccc2S(=O)(=O)N1C1CCCNC1=O CHEMBL72115
943 | O=C(O)c1ccccc1Nc1cccc(Cl)c1Cl CHEMBL23115
944 | Nc1ccc(OCCCCCN2C(=O)c3ccccc3C2=O)cc1 CHEMBL94192
945 | CC(C)C1CCC(C)CC1O CHEMBL256087
946 | CC(=O)Nc1ccc(OC(=O)c2ccccc2O)cc1 CHEMBL92590
947 | N=C(N)NN=Cc1c(Cl)cccc1Cl CHEMBL420
948 | O=[N+]([O-])OCC(CO)(CO[N+](=O)[O-])CO[N+](=O)[O-] CHEMBL466660
949 | CN(C)CC1CCn2cc(c3ccccc32)C2=C(c3cn(c4ccccc34)CCO1)C(=O)NC2=O CHEMBL91829
950 | C#CC1(OC(N)=O)CCCCC1 CHEMBL1576
951 | BrC(Br)Br CHEMBL345248
952 | Cc1cc2cc3c(C)cc(=O)oc3c(C)c2o1 CHEMBL1475
953 | N#Cc1cc(-c2ccccc2)nnc1NCCN1CCOCC1 CHEMBL150365
954 | CC(N)C(O)c1ccc(O)c(O)c1 CHEMBL677
955 | COc1cc2c(cc1OC)C1CC(=O)C(CC(C)C)CN1CC2 CHEMBL117785
956 | CCOC(=O)C1=C(C)NC(C)=C(C(=O)OC)C1c1cccc(Cl)c1Cl CHEMBL1480
957 | CCC1(C)CC(=O)NC1=O CHEMBL696
958 | C=C1CC2C3C=C(C)C4=CC(=O)CCC4(C)C3CCC2(C)C1(OC(C)=O)C(C)=O CHEMBL1328968
959 | c1ccc(Cc2nc3ccccc3[nH]2)cc1 CHEMBL355063
960 | CCCCCCCCC=CCCCCCCCC(=O)OCC(O)CO CHEMBL428593
961 | CCOC(=O)C(CCc1ccccc1)NC1CCCN2CCCC(C(=O)O)N2C1=O CHEMBL515606
962 | CCC1(c2c[nH]cn2)Cc2ccc(F)cc2C1 CHEMBL1255582
963 | CC(=O)NS(=O)(=O)c1ccc(N)cc1 CHEMBL455
964 | Nc1c(CC(=O)O)cccc1C(=O)c1ccc(Br)cc1 CHEMBL1077
965 | CCS(=O)(=O)c1ccc(F)cc1 CHEMBL93309
966 | Cc1cc(CC(=O)O)n(C)c1C(=O)c1ccc(Cl)cc1 CHEMBL19490
967 | CCCCCc1ccc(C)cc1O CHEMBL1512677
968 | CC1(C)SC2C(Br)C(=O)N2C1C(=O)O CHEMBL73622
969 | CCn1cc(C(=O)O)c(=O)c2ccc(Cc3ccccc3)nc21 CHEMBL35337
970 | CCN(CC)CCOC(=O)C(c1ccccc1)c1ccccc1 CHEMBL353846
971 | CNC1CCCN(c2c(F)cc3c(=O)c(C(=O)O)cn(C4CC4)c3c2OC)C1 CHEMBL1210954
972 | CC(C)CNC1CCS(=O)(=O)c2sc(S(N)(=O)=O)cc21 CHEMBL417975
973 | CNC1=Nc2ccc(Cl)cc2C(c2ccccc2)=[N+]([O-])C1 CHEMBL451
974 | NC12CC3CC(CC(C3)C1)C2 CHEMBL660
975 | CN1CCCN=C1COC(=O)C(O)(c1ccccc1)C1CCCCC1 CHEMBL1495
976 | Nc1c2ccccc2nc2c1CCCC2 CHEMBL95
977 | O=C(O)COCCN1CCN(C(c2ccccc2)c2ccc(Cl)cc2)CC1 CHEMBL1201191
978 | O=CCCCC=O CHEMBL1235482
979 | O=C1C(O)N=C(c2ccccc2F)c2cc(Cl)ccc2N1CCO CHEMBL64677
980 | CC12CC(O)C3C(CC(F)C4=CC(=O)C=CC43C)C1CCC2(O)C(=O)CO CHEMBL1200774
981 | O=C(O)Cc1nn(Cc2nc3cc(C(F)(F)F)ccc3s2)c(=O)c2ccccc12 CHEMBL10372
982 | Cc1cc(Cl)ccc1N=C1SCS1 CHEMBL93885
983 | CN1c2c(=O)nc(N)[nH]c2NCC1CNc1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1 CHEMBL1231574
984 | CNC(=N)NC(=O)Nc1c(C)cccc1C CHEMBL448416
985 | CCCCC(C)(O)CC=CC1C(O)CC(=O)C1CCC=CCCC(=O)OC CHEMBL266979
986 | Cc1cccc(C(C)c2c[nH]cn2)c1C CHEMBL778
987 | COc1ccc(Cc2cnc(N)nc2N)cc1OC CHEMBL19633
988 | Oc1ccc(Cl)cc1 CHEMBL57053
989 | NCCCCC(NC1CCc2ccccc2N(CC(=O)O)C1=O)C(=O)O CHEMBL430554
990 | CCOC(=O)c1ccc(OC(=O)CCCCCN=C(N)N)cc1 CHEMBL87563
991 | COc1ccc(C=Cc2cc(OC)c(OC)c(OC)c2)cc1OP(=O)(O)O CHEMBL1206232
992 | CN1CCC(=C2c3ccsc3CCc3ccccc32)CC1 CHEMBL294951
993 | CN(C)CCCOC1C(C(O)CO)OC2OC(C)(C)OC12 CHEMBL1316321
994 | CC1(C(=O)O)CSC(c2ccc(O)cc2O)=N1 CHEMBL432481
995 | CCN(CC)C(=O)NC1C=C2c3cccc4[nH]cc(c43)CC2N(C)C1 CHEMBL157138
996 | CC(C[N+](C)(C)C)OC(N)=O CHEMBL1482
997 | Cc1ccc(C(=CCN2CCCC2)c2cccc(C=CC(=O)O)n2)cc1 CHEMBL1224
998 | CCOC(=O)C(C)(N)Cc1ccc(O)c(O)c1 CHEMBL1201233
999 | CNS(=O)(=O)CCc1ccc2[nH]cc(C3CCN(C)CC3)c2c1 CHEMBL1278
1000 | CC(CN1CC(=O)NC(=O)C1)N1CC(=O)NC(=O)C1 CHEMBL1738
1001 | Cc1cc(O)ccc1Cl CHEMBL1230222
1002 |
--------------------------------------------------------------------------------
/Presentations and Tutorials/RDKit UGM 2014/merck.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/RDKit UGM 2014/merck.jpeg
--------------------------------------------------------------------------------
/Presentations and Tutorials/RDKit UGM 2014/rdkit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Team-SKI/snippets/1fc4d05e544c29a7376415a6a7223af302f98788/Presentations and Tutorials/RDKit UGM 2014/rdkit.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #### Code snippets from Team-SKI
2 | This repository contains code snippets that shall demonstrate the use of RDKit, pandas, and other python libaries for common computer-aided drug design tasks.
3 |
4 | ```
5 | ├── Cheminformatics
6 | │ └── scaffold-analysis
7 | │
8 | ├── Kinase inhibitors
9 | │ └── approved-or-in-clinical-trials
10 | │
11 | ├── Presentations and Tutorials
12 | │ ├── Molecular Modelling Workshop 2014
13 | │ └── Protvec demo 2017
14 | │ └── RDKit UGM 2014
15 | │ └── hackaton
16 | │
17 | └── Virtual-Screening
18 | ```
19 | These projects were supported by [BioMed X](http://bio.mx/) Innovation Center, Heidelberg
20 |
21 | ---
22 | #### Presentations and tutorials
23 | * **Molecular Modelling Workshop 2014**
24 | *Scaffold analysis in Python with RDKit and pandas*
25 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Presentations%20and%20tutorials/Molecular%20Modelling%20Workshop%202014/Scaffold%20analysis%20in%20Python%20with%20RDKit%20and%20pandas%20-%20MMWS%20Erlangen%202014.ipynb)
26 |
27 | * **Protvec demo 2017**
28 | *Demo of encoding protein sequences via vectors*
29 |
30 | * **RDKit UGM 2014** - [RDKit UGM](https://github.com/rdkit/UGM_2014)
31 | *Scaffold analysis of ChEMBL data with pandas and RDKit*
32 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Presentations%20and%20tutorials/RDKit%20UGM%202014/Scaffold%20analysis%20of%20ChEMBL%20data%20with%20pandas%20and%20RDKit%20-%20RDKit%20UGM2014.ipynb)
33 |
34 | *hackaton contribution*
35 | Demo of SaveXlsxFromFrame function that can export PandasDataFrame to Excel including images of molecules.
36 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Presentations%20and%20tutorials/RDKit%20UGM%202014/rdkit_hackaton/XLSX%20export.ipynb) -- Resulting demo xlsx: [download](https://github.com/Team-SKI/snippets/blob/master/IPython/rdkit_hackaton/demo.xlsx)
37 |
38 | ---
39 | #### Cheminformatics
40 | * **Scaffold analysis in Python with RDKit and pandas**
41 | Internal presentation given at BioMed X team meeting, March 2014.
42 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Cheminformatics/Basics/Scaffold%20analysis%20%26%20Schnellkurs%20in%20chemoinformatics.ipynb)
43 |
44 | * **Markdown usage**
45 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Cheminformatics/Basics/Markdown%20demo.ipynb)
46 |
47 | * **RDKit pandas integration**
48 | Demo of new functions that were integrated in [RDKit] (https://github.com/rdkit/rdkit/commit/8269bc9002cf3c6b106c847d86bcbabc016b697e), 2013.
49 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Cheminformatics/Basics/RDKit%26pandas%20demo%20of%20new%20functions.ipynb)
50 |
51 | * **Rendering of images in IPython**
52 | Example of how to use object representations.
53 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Cheminformatics/Basics/Custom%20objects%20and%20their%20rendering%20in%20IPython.ipynb)
54 |
55 | ---
56 | #### Kinase-Inhibitors
57 | * **approved or in clinical trials**
58 | Notebook that extracts all kinase inhibitors that are in clinical trials or are on the market.
59 | Jupyter notebook: [view](https://github.com/Team-SKI/snippets/blob/master/Kinase%20inhibitors/Kinase%20inhibitors%20-%20approved%20or%20in%20clinical%20trials.ipynb)
60 |
61 | ---
62 | #### Virtual-Screening
63 | * **ligand-3D-conformations**
64 | [prepare_for_docking.py](https://github.com/Team-SKI/snippets/blob/master/Structural%20bioinformatics/prepare_for_docking.py): Script that uses Open Babel to generate 3D structures of compounds.
65 | For usage info run `prepare_for_docking.py -h`
66 |
67 | * **filtering**
68 | [filter_pains.py](https://github.com/Team-SKI/snippets/blob/master/Cheminformatics/Screening/filter_pains.py): Script that uses RDKit to remove PAINS compounds from sdf or smile files.
69 | For usage info run `filter_pains.py -h`
70 |
--------------------------------------------------------------------------------
/Virtual-Screening/ligand-3D-conformations/prepare_for_docking.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | __author__ = "Samo Turk"
4 | __copyright__ = "Copyright (C) 2014 by BioMed X GmbH"
5 | __credits__ = ["Simone Fulle", "Katra Kolsek"]
6 | __license__ = "GPL"
7 | __version__ = "0.1"
8 | __email__ = "turk@bio.mx"
9 | __status__ = "Development"
10 |
11 | import argparse
12 | import sys
13 | import os
14 | import pybel
15 |
16 | def gen3D(mol, pH=7.4, forcefield="MMFF94", s1=10, s2=500):
17 | """
18 | Add protons for certain pH and generates 3D conformation and minimizes molecule
19 | - pH: defaults to 7.4
20 | - forcefield: options - MMFF94, UFF or Ghemical, defaults to MMFF94
21 | - s1: how many steps for make3D, defaults to 10
22 | - s2: how many steps for localopt, defaults to 500
23 | """
24 | mol.OBMol.AddHydrogens(False, True, pH)
25 | mol.make3D(forcefield=forcefield, steps=s1)
26 | mol.localopt(forcefield=forcefield, steps=s2)
27 | return mol
28 |
29 | def arg_parser():
30 | parser = argparse.ArgumentParser(description='Generate 3D structures of molecules.')
31 | parser.add_argument('-i', '--infile', help="Specifies input file")
32 | parser.add_argument('-o', '--outfile', help="Specifies output file")
33 | parser.add_argument('-p', '--ph', default=7.4, help="Specifies pH for hydrogen addition")
34 | parser.add_argument('-f', '--forcefield', default='MMFF94', help="Specifies forcefield. MMFF94, UFF or Ghemical, defaults to MMFF94")
35 | parser.add_argument('-s1', '--steps1', default=10, help="Specifies how many steps for make3D, defaults to 10")
36 | parser.add_argument('-s2', '--steps2', default=500, help="Specifies how many steps for localopt, defaults to 500")
37 |
38 | return parser
39 |
40 | if __name__ == "__main__":
41 | parser = arg_parser()
42 | if len(sys.argv) == 1:
43 | argv = ['-h']
44 | else:
45 | argv = sys.argv[1:]
46 | args = parser.parse_args(argv)
47 |
48 | informat = args.infile.split(".")[-1]
49 | outformat = args.outfile.split(".")[-1]
50 |
51 | # Limit acceptable in and out formats
52 | informats = ['smi', 'ism', 'sdf', 'mol2', 'mol', 'pdb']
53 | outformats = ['mol2', 'sdf', 'pdbqt']
54 |
55 |
56 | if informat in informats and outformat in outformats:
57 | mols = pybel.readfile(informat, args.infile)
58 | numMols = 0
59 | for mol in mols:
60 | numMols += 1
61 | mols = pybel.readfile(informat, args.infile)
62 | os.system('cls' if os.name == 'nt' else 'clear') # Clear the terminal
63 | molsdetected = str(numMols) + " molecules detected."
64 | print(molsdetected)
65 | i = 1
66 | output = pybel.Outputfile(outformat, args.outfile, overwrite=True)
67 | for mol in mols:
68 | mol = gen3D(mol, pH=float(args.ph), forcefield=args.forcefield, s1=int(args.steps1), s2=int(args.steps2))
69 | os.system('cls' if os.name == 'nt' else 'clear')
70 | print(molsdetected)
71 | print("Processing molecule titled: " + str(mol.title))
72 | output.write(mol)
73 | print(str(i) + " out of " + str(numMols))
74 | i += 1
75 | output.close()
76 | else:
77 | print("Format either of infile or outfile not recognized!\nSupported formats are: " + " ".join(list(set(informats+outformats))))
78 |
--------------------------------------------------------------------------------