├── ChEMBL_API_example_for_webinar.ipynb └── ChEMBL_webresource_client_examples.ipynb /ChEMBL_API_example_for_webinar.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using a list of Schistosome parasite genes, find compounds that are active on these targets:" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "* Aim: Find novel therapeutic targets for the parasite Schistosoma mansoni. \n", 15 | "* Background: Schistosome parasites kill 250,000 people every year. Treatment of schistosomiasis relies on the drug praziquantel (only!). \n", 16 | "* But what other targets could be druggable? And are there any existing marketed drugs for these targets? \n", 17 | "* For further detail see comment at https://www.science.org/doi/10.1126/science.abe0710 \n", 18 | "* and scientific paper: https://www.science.org/doi/10.1126/science.abb7699 " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Method 1: Use the ChEMBL python client to access the API" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "* Available to install from Python Package Index by typing: \n", 40 | "pip install chembl_webresource_client \n", 41 | "* See https://github.com/chembl/chembl_webresource_client \n", 42 | " \n", 43 | "* Pros: simple to use, tailored for ChEMBL API endpoints\n", 44 | "* Cons: The keyword 'only' is limited to main fields, so it can't search within nested fields (e.g. in the molecule API, .only(['molecule_properties__alogp']) is equivalent to .only(['molecule_properties']). " 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 17, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "###############################\n", 54 | "#This cell imports relevant python modules:\n", 55 | "###############################\n", 56 | "import pandas as pd #Use pandas python module to view and analyse data\n", 57 | "from chembl_webresource_client.new_client import new_client #Import ChEMBL python client" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "## 1a: Find compounds that are active on the specified targets: " 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 20, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "(83, 7)\n" 77 | ] 78 | }, 79 | { 80 | "data": { 81 | "text/html": [ 82 | "

\n", 83 | "\n", 96 | "\n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | "

	molecule_chembl_id	molecule_pref_name	parent_molecule_chembl_id	pchembl_value	target_chembl_id	target_pref_name	value
0	CHEMBL574738	AST-487	CHEMBL574738	5.92	CHEMBL5552	Serine/threonine-protein kinase 25	1200.0
1	CHEMBL522892	DOVITINIB	CHEMBL522892	5.92	CHEMBL5552	Serine/threonine-protein kinase 25	1200.0
2	CHEMBL607707	PELITINIB	CHEMBL607707	5.47	CHEMBL5552	Serine/threonine-protein kinase 25	3400.0
3	CHEMBL191003	JNJ-7706621	CHEMBL191003	6.44	CHEMBL5552	Serine/threonine-protein kinase 25	360.0
4	CHEMBL608533	MIDOSTAURIN	CHEMBL608533	5.75	CHEMBL5552	Serine/threonine-protein kinase 25	1800.0
...	...	...	...	...	...	...	...
78	CHEMBL388978	STAUROSPORINE	CHEMBL388978	8.57	CHEMBL5552	Serine/threonine-protein kinase 25	2.71
79	CHEMBL388978	STAUROSPORINE	CHEMBL388978	7.93	CHEMBL1075195	Serine/threonine-protein kinase TAO2	1.17
80	CHEMBL388978	STAUROSPORINE	CHEMBL388978	8.72	CHEMBL5552	Serine/threonine-protein kinase 25	1.9
81	CHEMBL4569508	None	CHEMBL4569508	5.57	CHEMBL1075195	Serine/threonine-protein kinase TAO2	2700.0
82	CHEMBL4568087	None	CHEMBL4568087	6.38	CHEMBL1075195	Serine/threonine-protein kinase TAO2	420.0

\n", 222 | "

83 rows × 7 columns

\n", 223 | "

" 224 | ], 225 | "text/plain": [ 226 | " molecule_chembl_id molecule_pref_name parent_molecule_chembl_id \\\n", 227 | "0 CHEMBL574738 AST-487 CHEMBL574738 \n", 228 | "1 CHEMBL522892 DOVITINIB CHEMBL522892 \n", 229 | "2 CHEMBL607707 PELITINIB CHEMBL607707 \n", 230 | "3 CHEMBL191003 JNJ-7706621 CHEMBL191003 \n", 231 | "4 CHEMBL608533 MIDOSTAURIN CHEMBL608533 \n", 232 | ".. ... ... ... \n", 233 | "78 CHEMBL388978 STAUROSPORINE CHEMBL388978 \n", 234 | "79 CHEMBL388978 STAUROSPORINE CHEMBL388978 \n", 235 | "80 CHEMBL388978 STAUROSPORINE CHEMBL388978 \n", 236 | "81 CHEMBL4569508 None CHEMBL4569508 \n", 237 | "82 CHEMBL4568087 None CHEMBL4568087 \n", 238 | "\n", 239 | " pchembl_value target_chembl_id target_pref_name \\\n", 240 | "0 5.92 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 241 | "1 5.92 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 242 | "2 5.47 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 243 | "3 6.44 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 244 | "4 5.75 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 245 | ".. ... ... ... \n", 246 | "78 8.57 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 247 | "79 7.93 CHEMBL1075195 Serine/threonine-protein kinase TAO2 \n", 248 | "80 8.72 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 249 | "81 5.57 CHEMBL1075195 Serine/threonine-protein kinase TAO2 \n", 250 | "82 6.38 CHEMBL1075195 Serine/threonine-protein kinase TAO2 \n", 251 | "\n", 252 | " value \n", 253 | "0 1200.0 \n", 254 | "1 1200.0 \n", 255 | "2 3400.0 \n", 256 | "3 360.0 \n", 257 | "4 1800.0 \n", 258 | ".. ... \n", 259 | "78 2.71 \n", 260 | "79 1.17 \n", 261 | "80 1.9 \n", 262 | "81 2700.0 \n", 263 | "82 420.0 \n", 264 | "\n", 265 | "[83 rows x 7 columns]" 266 | ] 267 | }, 268 | "execution_count": 20, 269 | "metadata": {}, 270 | "output_type": "execute_result" 271 | } 272 | ], 273 | "source": [ 274 | "###############################\n", 275 | "#Set up the call to the ChEMBL 'activity' API:\n", 276 | "###############################\n", 277 | "activities = new_client.activity.filter(target_chembl_id__in=['CHEMBL5552','CHEMBL1075195'] ##Specify a list of example targets (#Serine/threonine-protein kinase 25 (STK25) & Serine-threonine kinases TAO2)\n", 278 | " , pchembl_value__gte=5 ##Specify a minimum threshold of the pChEMBL activity value. Note that pCHEMBL = -log10(IC50, XC50, AC50, Ki, Kd, potency). Greater than or equal to 5 (10um) is a typical minimum rule of thumb for binding activity between a compound and a protein target. \n", 279 | " , assay_type='B' ##Only look for Binding Assays\n", 280 | " ).only(['target_chembl_id', 'target_pref_name', 'parent_molecule_chembl_id' ## Specify which fields (columns) to extract\n", 281 | " ,'molecule_chembl_id','molecule_pref_name', 'pchembl_value'])\n", 282 | "#Convert the list of results into a Pandas dataframe:\n", 283 | "act_df = pd.DataFrame(activities)\n", 284 | "act_df" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "## 1b: Using the active compounds from the previous step, and call the 'molecule' API to find their molecular properties etc, so that the compound list can be prioritised" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 3, 304 | "metadata": {}, 305 | "outputs": [ 306 | { 307 | "name": "stdout", 308 | "output_type": "stream", 309 | "text": [ 310 | "There are 55 compounds initially identified as active on the known targets. e.g.['CHEMBL4568087', 'CHEMBL1721885']...\n" 311 | ] 312 | } 313 | ], 314 | "source": [ 315 | "###############################\n", 316 | "#First find the list of compounds that are within the act_df dataframe:\n", 317 | "###############################\n", 318 | "cmpd_chembl_ids = list(set(act_df['molecule_chembl_id']))\n", 319 | "print(\"There are {} compounds initially identified as active on the known targets. e.g.{}...\".format(len(cmpd_chembl_ids),cmpd_chembl_ids[0:2]))" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 5, 325 | "metadata": {}, 326 | "outputs": [ 327 | { 328 | "data": { 329 | "text/html": [ 330 | "

\n", 331 | "\n", 344 | "\n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | "

	max_phase	molecule_chembl_id	molecule_hierarchy	molecule_properties	pref_name
0	4	CHEMBL535	{'molecule_chembl_id': 'CHEMBL535', 'parent_ch...	{'alogp': '3.33', 'aromatic_rings': 2, 'cx_log...	SUNITINIB
1	1	CHEMBL296468	{'molecule_chembl_id': 'CHEMBL296468', 'parent...	{'alogp': '3.66', 'aromatic_rings': 2, 'cx_log...	BMS-387032
2	4	CHEMBL180022	{'molecule_chembl_id': 'CHEMBL180022', 'parent...	{'alogp': '5.93', 'aromatic_rings': 4, 'cx_log...	NERATINIB
3	2	CHEMBL475251	{'molecule_chembl_id': 'CHEMBL475251', 'parent...	{'alogp': '3.63', 'aromatic_rings': 3, 'cx_log...	R-406

\n", 390 | "

" 391 | ], 392 | "text/plain": [ 393 | " max_phase molecule_chembl_id \\\n", 394 | "0 4 CHEMBL535 \n", 395 | "1 1 CHEMBL296468 \n", 396 | "2 4 CHEMBL180022 \n", 397 | "3 2 CHEMBL475251 \n", 398 | "\n", 399 | " molecule_hierarchy \\\n", 400 | "0 {'molecule_chembl_id': 'CHEMBL535', 'parent_ch... \n", 401 | "1 {'molecule_chembl_id': 'CHEMBL296468', 'parent... \n", 402 | "2 {'molecule_chembl_id': 'CHEMBL180022', 'parent... \n", 403 | "3 {'molecule_chembl_id': 'CHEMBL475251', 'parent... \n", 404 | "\n", 405 | " molecule_properties pref_name \n", 406 | "0 {'alogp': '3.33', 'aromatic_rings': 2, 'cx_log... SUNITINIB \n", 407 | "1 {'alogp': '3.66', 'aromatic_rings': 2, 'cx_log... BMS-387032 \n", 408 | "2 {'alogp': '5.93', 'aromatic_rings': 4, 'cx_log... NERATINIB \n", 409 | "3 {'alogp': '3.63', 'aromatic_rings': 3, 'cx_log... R-406 " 410 | ] 411 | }, 412 | "execution_count": 5, 413 | "metadata": {}, 414 | "output_type": "execute_result" 415 | } 416 | ], 417 | "source": [ 418 | "###############################\n", 419 | "#Set up the call to the ChEMBL 'molecule' API:\n", 420 | "###############################\n", 421 | "\n", 422 | "#Select a few examples of the active compounds from above (i.e. a reduced list!) : \n", 423 | "cmpd_chembl_ids = \",\".join(['CHEMBL296468', 'CHEMBL180022', 'CHEMBL475251','CHEMBL535']) # Amend the format of the text string so that it is suitable for the API call\n", 424 | "\n", 425 | "molecules = new_client.molecule.filter(molecule_chembl_id__in=cmpd_chembl_ids ##Select a few examples of the active compounds (i.e. a reduced list!)\n", 426 | " ).only([ 'molecule_chembl_id','pref_name', 'molecule_hierarchy', 'molecule_properties', 'max_phase']) ## Specify which fields (columns) to extract\n", 427 | "\n", 428 | "#Convert the list of results into a Pandas dataframe:\n", 429 | "mol_df = pd.DataFrame(molecules)\n", 430 | "mol_df" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 6, 436 | "metadata": {}, 437 | "outputs": [ 438 | { 439 | "name": "stdout", 440 | "output_type": "stream", 441 | "text": [ 442 | "(4, 18)\n" 443 | ] 444 | }, 445 | { 446 | "data": { 447 | "text/html": [ 448 | "

\n", 449 | "\n", 462 | "\n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | "

	max_phase	molecule_chembl_id	molecule_hierarchy	molecule_properties	pref_name	parent_molecule_chembl_id	cx_logd	cx_logp	cx_most_apka	cx_most_bpka	alogp	hba	hbd	mw_freebase	full_mwt	num_ro5_violations	psa	heavy_atoms
0	4	CHEMBL535	{'molecule_chembl_id': 'CHEMBL535', 'parent_ch...	{'alogp': '3.33', 'aromatic_rings': 2, 'cx_log...	SUNITINIB	CHEMBL535	1.28	2.93	11.46	9.04	3.33	3	3	398.48	398.48	0	77.23	29
1	1	CHEMBL296468	{'molecule_chembl_id': 'CHEMBL296468', 'parent...	{'alogp': '3.66', 'aromatic_rings': 2, 'cx_log...	BMS-387032	CHEMBL296468	0.33	0.94	7.94	10.17	3.66	7	2	380.54	380.54	0	80.05	25
2	4	CHEMBL180022	{'molecule_chembl_id': 'CHEMBL180022', 'parent...	{'alogp': '5.93', 'aromatic_rings': 4, 'cx_log...	NERATINIB	CHEMBL180022	3.05	4.47	12.55	8.81	5.93	8	2	557.05	557.05	2	112.40	40
3	2	CHEMBL475251	{'molecule_chembl_id': 'CHEMBL475251', 'parent...	{'alogp': '3.63', 'aromatic_rings': 3, 'cx_log...	R-406	CHEMBL475251	3.63	3.63	10.90	3.05	3.63	10	3	470.46	470.46	0	128.75	34

\n", 573 | "

" 574 | ], 575 | "text/plain": [ 576 | " max_phase molecule_chembl_id \\\n", 577 | "0 4 CHEMBL535 \n", 578 | "1 1 CHEMBL296468 \n", 579 | "2 4 CHEMBL180022 \n", 580 | "3 2 CHEMBL475251 \n", 581 | "\n", 582 | " molecule_hierarchy \\\n", 583 | "0 {'molecule_chembl_id': 'CHEMBL535', 'parent_ch... \n", 584 | "1 {'molecule_chembl_id': 'CHEMBL296468', 'parent... \n", 585 | "2 {'molecule_chembl_id': 'CHEMBL180022', 'parent... \n", 586 | "3 {'molecule_chembl_id': 'CHEMBL475251', 'parent... \n", 587 | "\n", 588 | " molecule_properties pref_name \\\n", 589 | "0 {'alogp': '3.33', 'aromatic_rings': 2, 'cx_log... SUNITINIB \n", 590 | "1 {'alogp': '3.66', 'aromatic_rings': 2, 'cx_log... BMS-387032 \n", 591 | "2 {'alogp': '5.93', 'aromatic_rings': 4, 'cx_log... NERATINIB \n", 592 | "3 {'alogp': '3.63', 'aromatic_rings': 3, 'cx_log... R-406 \n", 593 | "\n", 594 | " parent_molecule_chembl_id cx_logd cx_logp cx_most_apka cx_most_bpka alogp \\\n", 595 | "0 CHEMBL535 1.28 2.93 11.46 9.04 3.33 \n", 596 | "1 CHEMBL296468 0.33 0.94 7.94 10.17 3.66 \n", 597 | "2 CHEMBL180022 3.05 4.47 12.55 8.81 5.93 \n", 598 | "3 CHEMBL475251 3.63 3.63 10.90 3.05 3.63 \n", 599 | "\n", 600 | " hba hbd mw_freebase full_mwt num_ro5_violations psa heavy_atoms \n", 601 | "0 3 3 398.48 398.48 0 77.23 29 \n", 602 | "1 7 2 380.54 380.54 0 80.05 25 \n", 603 | "2 8 2 557.05 557.05 2 112.40 40 \n", 604 | "3 10 3 470.46 470.46 0 128.75 34 " 605 | ] 606 | }, 607 | "execution_count": 6, 608 | "metadata": {}, 609 | "output_type": "execute_result" 610 | } 611 | ], 612 | "source": [ 613 | "###########################\n", 614 | "# Convert nested cells (ie those containing a dictionary) to individual columns in the dataframe so that is it easier to filter!\n", 615 | "###########################\n", 616 | "# Molecule hierarchy: \n", 617 | "mol_df['parent_molecule_chembl_id'] = mol_df['molecule_hierarchy'].apply(lambda x: x['parent_chembl_id'])\n", 618 | "\n", 619 | "#Physicochemical properties (only report if cells are not null)\n", 620 | "mol_df['cx_logd'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['cx_logd'])\n", 621 | "mol_df['cx_logp'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['cx_logp'])\n", 622 | "mol_df['cx_most_apka'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['cx_most_apka'])\n", 623 | "mol_df['cx_most_bpka'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['cx_most_bpka'])\n", 624 | "mol_df['alogp'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['alogp'])\n", 625 | "mol_df['hba'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['hba'])\n", 626 | "mol_df['hbd'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['hbd'])\n", 627 | "mol_df['mw_freebase'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['mw_freebase']) #This is the mwt of the parent compound\n", 628 | "mol_df['full_mwt'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['full_mwt']) #This is the mwt of the full compound including any salt\n", 629 | "mol_df['num_ro5_violations'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['num_ro5_violations'])\n", 630 | "mol_df['psa'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['psa'])\n", 631 | "mol_df['heavy_atoms'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['heavy_atoms'])\n", 632 | "\n", 633 | "mol_df" 634 | ] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": 8, 639 | "metadata": {}, 640 | "outputs": [ 641 | { 642 | "data": { 643 | "text/html": [ 644 | "

\n", 645 | "\n", 658 | "\n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | "

	max_phase	molecule_chembl_id	molecule_hierarchy	molecule_properties	pref_name	parent_molecule_chembl_id	cx_logd	cx_logp	cx_most_apka	cx_most_bpka	alogp	hba	hbd	mw_freebase	full_mwt	num_ro5_violations	psa	heavy_atoms
0	4	CHEMBL535	{'molecule_chembl_id': 'CHEMBL535', 'parent_ch...	{'alogp': '3.33', 'aromatic_rings': 2, 'cx_log...	SUNITINIB	CHEMBL535	1.28	2.93	11.46	9.04	3.33	3	3	398.48	398.48	0	77.23	29
2	4	CHEMBL180022	{'molecule_chembl_id': 'CHEMBL180022', 'parent...	{'alogp': '5.93', 'aromatic_rings': 4, 'cx_log...	NERATINIB	CHEMBL180022	3.05	4.47	12.55	8.81	5.93	8	2	557.05	557.05	2	112.40	40

\n", 727 | "

" 728 | ], 729 | "text/plain": [ 730 | " max_phase molecule_chembl_id \\\n", 731 | "0 4 CHEMBL535 \n", 732 | "2 4 CHEMBL180022 \n", 733 | "\n", 734 | " molecule_hierarchy \\\n", 735 | "0 {'molecule_chembl_id': 'CHEMBL535', 'parent_ch... \n", 736 | "2 {'molecule_chembl_id': 'CHEMBL180022', 'parent... \n", 737 | "\n", 738 | " molecule_properties pref_name \\\n", 739 | "0 {'alogp': '3.33', 'aromatic_rings': 2, 'cx_log... SUNITINIB \n", 740 | "2 {'alogp': '5.93', 'aromatic_rings': 4, 'cx_log... NERATINIB \n", 741 | "\n", 742 | " parent_molecule_chembl_id cx_logd cx_logp cx_most_apka cx_most_bpka alogp \\\n", 743 | "0 CHEMBL535 1.28 2.93 11.46 9.04 3.33 \n", 744 | "2 CHEMBL180022 3.05 4.47 12.55 8.81 5.93 \n", 745 | "\n", 746 | " hba hbd mw_freebase full_mwt num_ro5_violations psa heavy_atoms \n", 747 | "0 3 3 398.48 398.48 0 77.23 29 \n", 748 | "2 8 2 557.05 557.05 2 112.40 40 " 749 | ] 750 | }, 751 | "execution_count": 8, 752 | "metadata": {}, 753 | "output_type": "execute_result" 754 | } 755 | ], 756 | "source": [ 757 | "###########################\n", 758 | "#Filter the compounds based on their molecular properties (e.g. molecular weight < 400 amu), or max_phase, for example:\n", 759 | "###########################\n", 760 | "\n", 761 | "#Now keep only compounds with max_phase = 4 (ie approved drugs): \n", 762 | "res = mol_df[ mol_df['max_phase'] == 4 ]\n", 763 | "\n", 764 | "#Display results:\n", 765 | "res" 766 | ] 767 | }, 768 | { 769 | "cell_type": "code", 770 | "execution_count": null, 771 | "metadata": {}, 772 | "outputs": [], 773 | "source": [] 774 | }, 775 | { 776 | "cell_type": "markdown", 777 | "metadata": {}, 778 | "source": [ 779 | "## Method 2: Use the python 'requests' module to access the API" 780 | ] 781 | }, 782 | { 783 | "cell_type": "markdown", 784 | "metadata": {}, 785 | "source": [ 786 | "* Pros: Use if don't want to install ChEMBL python client, e.g. if 'requests' module already in use as part of an existing workflow, or using a non-python coding language\n", 787 | "* Cons: Slightly more code required than for ChEMBL python client. Need to include code to iterate over multiple pages of records.\n", 788 | "* See https://docs.python-requests.org/en/latest/ " 789 | ] 790 | }, 791 | { 792 | "cell_type": "code", 793 | "execution_count": 9, 794 | "metadata": {}, 795 | "outputs": [], 796 | "source": [ 797 | "###############################\n", 798 | "#This cell imports relevant python modules:\n", 799 | "###############################\n", 800 | "import pandas as pd #Use pandas python module to view and analyse data\n", 801 | "import requests #This is used to access json files!" 802 | ] 803 | }, 804 | { 805 | "cell_type": "markdown", 806 | "metadata": {}, 807 | "source": [ 808 | "## 2a: Find compounds that are active on the specified targets:" 809 | ] 810 | }, 811 | { 812 | "cell_type": "code", 813 | "execution_count": 10, 814 | "metadata": {}, 815 | "outputs": [ 816 | { 817 | "name": "stdout", 818 | "output_type": "stream", 819 | "text": [ 820 | "This is the url string that calls the 'Activities' API with the initial query specification:\n", 821 | "https://www.ebi.ac.uk/chembl/api/data/activity?target_chembl_id__in=CHEMBL5552,CHEMBL1075195&pchembl_value__gte=5&assay_type=B&only=target_chembl_id,target_pref_name,parent_molecule_chembl_id,molecule_chembl_id,molecule_pref_name,pchembl_value&format=json\n" 822 | ] 823 | }, 824 | { 825 | "data": { 826 | "text/html": [ 827 | "

\n", 828 | "\n", 841 | "\n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | "

	molecule_chembl_id	molecule_pref_name	parent_molecule_chembl_id	pchembl_value	target_chembl_id	target_pref_name	value
0	CHEMBL574738	AST-487	CHEMBL574738	5.92	CHEMBL5552	Serine/threonine-protein kinase 25	1200.0
1	CHEMBL522892	DOVITINIB	CHEMBL522892	5.92	CHEMBL5552	Serine/threonine-protein kinase 25	1200.0
2	CHEMBL607707	PELITINIB	CHEMBL607707	5.47	CHEMBL5552	Serine/threonine-protein kinase 25	3400.0
3	CHEMBL191003	JNJ-7706621	CHEMBL191003	6.44	CHEMBL5552	Serine/threonine-protein kinase 25	360.0
4	CHEMBL608533	MIDOSTAURIN	CHEMBL608533	5.75	CHEMBL5552	Serine/threonine-protein kinase 25	1800.0
...	...	...	...	...	...	...	...
78	CHEMBL388978	STAUROSPORINE	CHEMBL388978	8.57	CHEMBL5552	Serine/threonine-protein kinase 25	2.71
79	CHEMBL388978	STAUROSPORINE	CHEMBL388978	7.93	CHEMBL1075195	Serine/threonine-protein kinase TAO2	1.17
80	CHEMBL388978	STAUROSPORINE	CHEMBL388978	8.72	CHEMBL5552	Serine/threonine-protein kinase 25	1.9
81	CHEMBL4569508	None	CHEMBL4569508	5.57	CHEMBL1075195	Serine/threonine-protein kinase TAO2	2700.0
82	CHEMBL4568087	None	CHEMBL4568087	6.38	CHEMBL1075195	Serine/threonine-protein kinase TAO2	420.0

\n", 967 | "

83 rows × 7 columns

\n", 968 | "

" 969 | ], 970 | "text/plain": [ 971 | " molecule_chembl_id molecule_pref_name parent_molecule_chembl_id \\\n", 972 | "0 CHEMBL574738 AST-487 CHEMBL574738 \n", 973 | "1 CHEMBL522892 DOVITINIB CHEMBL522892 \n", 974 | "2 CHEMBL607707 PELITINIB CHEMBL607707 \n", 975 | "3 CHEMBL191003 JNJ-7706621 CHEMBL191003 \n", 976 | "4 CHEMBL608533 MIDOSTAURIN CHEMBL608533 \n", 977 | ".. ... ... ... \n", 978 | "78 CHEMBL388978 STAUROSPORINE CHEMBL388978 \n", 979 | "79 CHEMBL388978 STAUROSPORINE CHEMBL388978 \n", 980 | "80 CHEMBL388978 STAUROSPORINE CHEMBL388978 \n", 981 | "81 CHEMBL4569508 None CHEMBL4569508 \n", 982 | "82 CHEMBL4568087 None CHEMBL4568087 \n", 983 | "\n", 984 | " pchembl_value target_chembl_id target_pref_name \\\n", 985 | "0 5.92 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 986 | "1 5.92 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 987 | "2 5.47 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 988 | "3 6.44 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 989 | "4 5.75 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 990 | ".. ... ... ... \n", 991 | "78 8.57 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 992 | "79 7.93 CHEMBL1075195 Serine/threonine-protein kinase TAO2 \n", 993 | "80 8.72 CHEMBL5552 Serine/threonine-protein kinase 25 \n", 994 | "81 5.57 CHEMBL1075195 Serine/threonine-protein kinase TAO2 \n", 995 | "82 6.38 CHEMBL1075195 Serine/threonine-protein kinase TAO2 \n", 996 | "\n", 997 | " value \n", 998 | "0 1200.0 \n", 999 | "1 1200.0 \n", 1000 | "2 3400.0 \n", 1001 | "3 360.0 \n", 1002 | "4 1800.0 \n", 1003 | ".. ... \n", 1004 | "78 2.71 \n", 1005 | "79 1.17 \n", 1006 | "80 1.9 \n", 1007 | "81 2700.0 \n", 1008 | "82 420.0 \n", 1009 | "\n", 1010 | "[83 rows x 7 columns]" 1011 | ] 1012 | }, 1013 | "execution_count": 10, 1014 | "metadata": {}, 1015 | "output_type": "execute_result" 1016 | } 1017 | ], 1018 | "source": [ 1019 | "###############################\n", 1020 | "#Search for activity for a list of targets:\n", 1021 | "###############################\n", 1022 | "\n", 1023 | "#Specify the input parameters: \n", 1024 | "targets = ['CHEMBL5552', 'CHEMBL1075195'] # Select a few example targets: Serine/threonine-protein kinase 25 (STK25) & Serine-threonine kinases TAO2.\n", 1025 | "targets = \",\".join(targets) #Join the targets into a suitable string to fulfil the search conditions of the API\n", 1026 | "pchembl_value = 5 #Specify a minimum threshold of the pChEMBL activity value. Note that pCHEMBL = -log10(IC50, XC50, AC50, Ki, Kd, potency). Greater than or equal to 5 (10um) is a typical minimum rule of thumb for binding activity between a compound and a protein target. \n", 1027 | "assay_type = 'B' #Only look for Binding Assays\n", 1028 | "cols = \",\".join(['target_chembl_id', 'target_pref_name', 'parent_molecule_chembl_id','molecule_chembl_id','molecule_pref_name', 'pchembl_value']) # Only return the specified data fields \n", 1029 | "\n", 1030 | "###############################\n", 1031 | "url_stem = \"https://www.ebi.ac.uk\" #This is the stem of the url\n", 1032 | "url_string = url_stem + \"/chembl/api/data/activity?target_chembl_id__in={}&pchembl_value__gte={}&assay_type={}&only={}&format=json\".format(targets, pchembl_value, assay_type, cols) #This is the full url with the specified input parameters\n", 1033 | "url = requests.get( url_string ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format\n", 1034 | "results = url['activities'] #This is a list of the results for activities\n", 1035 | "\n", 1036 | "#This 'while' loop iterates over several pages of records (if required), and collates the list of results\n", 1037 | "#Remember that there is a limit to the number of records returned in any one API call (default is 20 records, maximum is 1000 records) e.g. include \"&limit=50\" in the url string\n", 1038 | "#So need to iterate over several pages of records to gather all relevant information together!\n", 1039 | "while url['page_meta']['next']:\n", 1040 | " url = requests.get(url_stem + url['page_meta']['next']).json()\n", 1041 | " results = results + url['activities'] #Add result (as a list) to previous list of results\n", 1042 | "\n", 1043 | "#Convert the list of results into a Pandas dataframe:\n", 1044 | "act_df = pd.DataFrame(results)\n", 1045 | "\n", 1046 | "#Print out some useful information:\n", 1047 | "print(\"This is the url string that calls the 'Activities' API with the initial query specification:\\n{}\".format(url_string) )\n", 1048 | "act_df" 1049 | ] 1050 | }, 1051 | { 1052 | "cell_type": "code", 1053 | "execution_count": null, 1054 | "metadata": {}, 1055 | "outputs": [], 1056 | "source": [] 1057 | }, 1058 | { 1059 | "cell_type": "markdown", 1060 | "metadata": {}, 1061 | "source": [ 1062 | "## 2b: Using the active compounds from the previous step, and call the 'molecule' API to find their molecular properties etc, so that the compound list can be prioritised" 1063 | ] 1064 | }, 1065 | { 1066 | "cell_type": "code", 1067 | "execution_count": 12, 1068 | "metadata": {}, 1069 | "outputs": [ 1070 | { 1071 | "name": "stdout", 1072 | "output_type": "stream", 1073 | "text": [ 1074 | "There are 55 compounds initially identified as active on the known targets. e.g.['CHEMBL4568087', 'CHEMBL1721885']...\n" 1075 | ] 1076 | } 1077 | ], 1078 | "source": [ 1079 | "###############################\n", 1080 | "#First find the list of compounds that are within the act_df dataframe:\n", 1081 | "###############################\n", 1082 | "cmpd_chembl_ids = list(set(act_df['molecule_chembl_id']))\n", 1083 | "print(\"There are {} compounds initially identified as active on the known targets. e.g.{}...\".format(len(cmpd_chembl_ids),cmpd_chembl_ids[0:2]))" 1084 | ] 1085 | }, 1086 | { 1087 | "cell_type": "code", 1088 | "execution_count": 13, 1089 | "metadata": {}, 1090 | "outputs": [ 1091 | { 1092 | "name": "stdout", 1093 | "output_type": "stream", 1094 | "text": [ 1095 | "This is the url string that calls the 'Molecule' API with the specified query\n", 1096 | "https://www.ebi.ac.uk/chembl/api/data/molecule?molecule_chembl_id__in=CHEMBL296468,CHEMBL180022,CHEMBL475251,CHEMBL535&only=molecule_chembl_id,pref_name,molecule_hierarchy,molecule_properties,max_phase&format=json\n" 1097 | ] 1098 | }, 1099 | { 1100 | "data": { 1101 | "text/html": [ 1102 | "

\n", 1103 | "\n", 1116 | "\n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | "

	max_phase	molecule_chembl_id	molecule_hierarchy	molecule_properties	pref_name
0	4	CHEMBL535	{'molecule_chembl_id': 'CHEMBL535', 'parent_ch...	{'alogp': '3.33', 'aromatic_rings': 2, 'cx_log...	SUNITINIB
1	1	CHEMBL296468	{'molecule_chembl_id': 'CHEMBL296468', 'parent...	{'alogp': '3.66', 'aromatic_rings': 2, 'cx_log...	BMS-387032
2	4	CHEMBL180022	{'molecule_chembl_id': 'CHEMBL180022', 'parent...	{'alogp': '5.93', 'aromatic_rings': 4, 'cx_log...	NERATINIB
3	2	CHEMBL475251	{'molecule_chembl_id': 'CHEMBL475251', 'parent...	{'alogp': '3.63', 'aromatic_rings': 3, 'cx_log...	R-406

\n", 1162 | "

" 1163 | ], 1164 | "text/plain": [ 1165 | " max_phase molecule_chembl_id \\\n", 1166 | "0 4 CHEMBL535 \n", 1167 | "1 1 CHEMBL296468 \n", 1168 | "2 4 CHEMBL180022 \n", 1169 | "3 2 CHEMBL475251 \n", 1170 | "\n", 1171 | " molecule_hierarchy \\\n", 1172 | "0 {'molecule_chembl_id': 'CHEMBL535', 'parent_ch... \n", 1173 | "1 {'molecule_chembl_id': 'CHEMBL296468', 'parent... \n", 1174 | "2 {'molecule_chembl_id': 'CHEMBL180022', 'parent... \n", 1175 | "3 {'molecule_chembl_id': 'CHEMBL475251', 'parent... \n", 1176 | "\n", 1177 | " molecule_properties pref_name \n", 1178 | "0 {'alogp': '3.33', 'aromatic_rings': 2, 'cx_log... SUNITINIB \n", 1179 | "1 {'alogp': '3.66', 'aromatic_rings': 2, 'cx_log... BMS-387032 \n", 1180 | "2 {'alogp': '5.93', 'aromatic_rings': 4, 'cx_log... NERATINIB \n", 1181 | "3 {'alogp': '3.63', 'aromatic_rings': 3, 'cx_log... R-406 " 1182 | ] 1183 | }, 1184 | "execution_count": 13, 1185 | "metadata": {}, 1186 | "output_type": "execute_result" 1187 | } 1188 | ], 1189 | "source": [ 1190 | "###############################\n", 1191 | "#For the identified compounds, extract their molecular properties and other information from the 'molecule' ChEMBL API\n", 1192 | "###############################\n", 1193 | "\n", 1194 | "#Select a few examples of the active compounds from above (i.e. a reduced list!) : \n", 1195 | "cmpd_chembl_ids = \",\".join(['CHEMBL296468', 'CHEMBL180022', 'CHEMBL475251','CHEMBL535']) #Amend the format of the text string so that it is suitable for the API call\n", 1196 | "cols = \",\".join([ 'molecule_chembl_id','pref_name', 'molecule_hierarchy', 'molecule_properties', 'max_phase']) # Only return the specified data fields \n", 1197 | "\n", 1198 | "###############################\n", 1199 | "url_stem = \"https://www.ebi.ac.uk\" #This is the stem of the url\n", 1200 | "url_string = url_stem + \"/chembl/api/data/molecule?molecule_chembl_id__in={}&only={}&format=json\".format(cmpd_chembl_ids,cols) #This is the full url with the specified input parameters\n", 1201 | "url = requests.get( url_string ).json() #This calls the information back from the API using the 'requests' module, and converts it to json format\n", 1202 | "results = url['molecules'] #This is a list of the results for activities\n", 1203 | "\n", 1204 | "#This 'while' loop iterates over several pages of records (if required), and collates the list of results\n", 1205 | "#Remember that there is a limit to the number of records returned in any one API call (default is 20 records, maximum is 1000 records) e.g. include \"&limit=50\" in the url string\n", 1206 | "#So need to iterate over several pages of records to gather all relevant information together!\n", 1207 | "while url['page_meta']['next']:\n", 1208 | " url = requests.get(url_stem + url['page_meta']['next']).json()\n", 1209 | " results = results + url['molecules'] #Add result (as a list) to previous list of results\n", 1210 | "\n", 1211 | "#Convert the list of results into a Pandas dataframe:\n", 1212 | "mol_df = pd.DataFrame(results)\n", 1213 | "\n", 1214 | "#Print out some useful information:\n", 1215 | "print(\"This is the url string that calls the 'Molecule' API with the specified query\\n{}\".format(url_string) )\n", 1216 | "mol_df" 1217 | ] 1218 | }, 1219 | { 1220 | "cell_type": "code", 1221 | "execution_count": 14, 1222 | "metadata": {}, 1223 | "outputs": [ 1224 | { 1225 | "name": "stdout", 1226 | "output_type": "stream", 1227 | "text": [ 1228 | "(4, 18)\n" 1229 | ] 1230 | }, 1231 | { 1232 | "data": { 1233 | "text/html": [ 1234 | "

\n", 1235 | "\n", 1248 | "\n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | "

	max_phase	molecule_chembl_id	molecule_hierarchy	molecule_properties	pref_name	parent_chembl_id	cx_logd	cx_logp	cx_most_apka	cx_most_bpka	alogp	hba	hbd	mw_freebase	full_mwt	num_ro5_violations	psa	heavy_atoms
0	4	CHEMBL535	{'molecule_chembl_id': 'CHEMBL535', 'parent_ch...	{'alogp': '3.33', 'aromatic_rings': 2, 'cx_log...	SUNITINIB	CHEMBL535	1.28	2.93	11.46	9.04	3.33	3	3	398.48	398.48	0	77.23	29
1	1	CHEMBL296468	{'molecule_chembl_id': 'CHEMBL296468', 'parent...	{'alogp': '3.66', 'aromatic_rings': 2, 'cx_log...	BMS-387032	CHEMBL296468	0.33	0.94	7.94	10.17	3.66	7	2	380.54	380.54	0	80.05	25
2	4	CHEMBL180022	{'molecule_chembl_id': 'CHEMBL180022', 'parent...	{'alogp': '5.93', 'aromatic_rings': 4, 'cx_log...	NERATINIB	CHEMBL180022	3.05	4.47	12.55	8.81	5.93	8	2	557.05	557.05	2	112.40	40
3	2	CHEMBL475251	{'molecule_chembl_id': 'CHEMBL475251', 'parent...	{'alogp': '3.63', 'aromatic_rings': 3, 'cx_log...	R-406	CHEMBL475251	3.63	3.63	10.90	3.05	3.63	10	3	470.46	470.46	0	128.75	34

\n", 1359 | "

" 1360 | ], 1361 | "text/plain": [ 1362 | " max_phase molecule_chembl_id \\\n", 1363 | "0 4 CHEMBL535 \n", 1364 | "1 1 CHEMBL296468 \n", 1365 | "2 4 CHEMBL180022 \n", 1366 | "3 2 CHEMBL475251 \n", 1367 | "\n", 1368 | " molecule_hierarchy \\\n", 1369 | "0 {'molecule_chembl_id': 'CHEMBL535', 'parent_ch... \n", 1370 | "1 {'molecule_chembl_id': 'CHEMBL296468', 'parent... \n", 1371 | "2 {'molecule_chembl_id': 'CHEMBL180022', 'parent... \n", 1372 | "3 {'molecule_chembl_id': 'CHEMBL475251', 'parent... \n", 1373 | "\n", 1374 | " molecule_properties pref_name \\\n", 1375 | "0 {'alogp': '3.33', 'aromatic_rings': 2, 'cx_log... SUNITINIB \n", 1376 | "1 {'alogp': '3.66', 'aromatic_rings': 2, 'cx_log... BMS-387032 \n", 1377 | "2 {'alogp': '5.93', 'aromatic_rings': 4, 'cx_log... NERATINIB \n", 1378 | "3 {'alogp': '3.63', 'aromatic_rings': 3, 'cx_log... R-406 \n", 1379 | "\n", 1380 | " parent_chembl_id cx_logd cx_logp cx_most_apka cx_most_bpka alogp hba hbd \\\n", 1381 | "0 CHEMBL535 1.28 2.93 11.46 9.04 3.33 3 3 \n", 1382 | "1 CHEMBL296468 0.33 0.94 7.94 10.17 3.66 7 2 \n", 1383 | "2 CHEMBL180022 3.05 4.47 12.55 8.81 5.93 8 2 \n", 1384 | "3 CHEMBL475251 3.63 3.63 10.90 3.05 3.63 10 3 \n", 1385 | "\n", 1386 | " mw_freebase full_mwt num_ro5_violations psa heavy_atoms \n", 1387 | "0 398.48 398.48 0 77.23 29 \n", 1388 | "1 380.54 380.54 0 80.05 25 \n", 1389 | "2 557.05 557.05 2 112.40 40 \n", 1390 | "3 470.46 470.46 0 128.75 34 " 1391 | ] 1392 | }, 1393 | "execution_count": 14, 1394 | "metadata": {}, 1395 | "output_type": "execute_result" 1396 | } 1397 | ], 1398 | "source": [ 1399 | "###########################\n", 1400 | "# Convert nested cells (ie those containing a dictionary) to individual columns in the dataframe so that is it easier to filter!\n", 1401 | "###########################\n", 1402 | "# Molecule hierarchy: \n", 1403 | "mol_df['parent_chembl_id'] = mol_df['molecule_hierarchy'].apply(lambda x: x['parent_chembl_id'])\n", 1404 | "\n", 1405 | "#Physicochemical properties (only report if cells are not null)\n", 1406 | "mol_df['cx_logd'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['cx_logd'])\n", 1407 | "mol_df['cx_logp'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['cx_logp'])\n", 1408 | "mol_df['cx_most_apka'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['cx_most_apka'])\n", 1409 | "mol_df['cx_most_bpka'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['cx_most_bpka'])\n", 1410 | "mol_df['alogp'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['alogp'])\n", 1411 | "mol_df['hba'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['hba'])\n", 1412 | "mol_df['hbd'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['hbd'])\n", 1413 | "mol_df['mw_freebase'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['mw_freebase']) #This is the mwt of the parent compound\n", 1414 | "mol_df['full_mwt'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['full_mwt']) #This is the mwt of the full compound including any salt\n", 1415 | "mol_df['num_ro5_violations'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['num_ro5_violations'])\n", 1416 | "mol_df['psa'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['psa'])\n", 1417 | "mol_df['heavy_atoms'] = mol_df.loc[ mol_df['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['heavy_atoms'])\n", 1418 | "\n", 1419 | "mol_df" 1420 | ] 1421 | }, 1422 | { 1423 | "cell_type": "code", 1424 | "execution_count": 16, 1425 | "metadata": {}, 1426 | "outputs": [ 1427 | { 1428 | "data": { 1429 | "text/html": [ 1430 | "

\n", 1431 | "\n", 1444 | "\n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | "

	max_phase	molecule_chembl_id	molecule_hierarchy	molecule_properties	pref_name	parent_chembl_id	cx_logd	cx_logp	cx_most_apka	cx_most_bpka	alogp	hba	hbd	mw_freebase	full_mwt	num_ro5_violations	psa	heavy_atoms
0	4	CHEMBL535	{'molecule_chembl_id': 'CHEMBL535', 'parent_ch...	{'alogp': '3.33', 'aromatic_rings': 2, 'cx_log...	SUNITINIB	CHEMBL535	1.28	2.93	11.46	9.04	3.33	3	3	398.48	398.48	0	77.23	29
2	4	CHEMBL180022	{'molecule_chembl_id': 'CHEMBL180022', 'parent...	{'alogp': '5.93', 'aromatic_rings': 4, 'cx_log...	NERATINIB	CHEMBL180022	3.05	4.47	12.55	8.81	5.93	8	2	557.05	557.05	2	112.40	40

\n", 1513 | "

" 1514 | ], 1515 | "text/plain": [ 1516 | " max_phase molecule_chembl_id \\\n", 1517 | "0 4 CHEMBL535 \n", 1518 | "2 4 CHEMBL180022 \n", 1519 | "\n", 1520 | " molecule_hierarchy \\\n", 1521 | "0 {'molecule_chembl_id': 'CHEMBL535', 'parent_ch... \n", 1522 | "2 {'molecule_chembl_id': 'CHEMBL180022', 'parent... \n", 1523 | "\n", 1524 | " molecule_properties pref_name \\\n", 1525 | "0 {'alogp': '3.33', 'aromatic_rings': 2, 'cx_log... SUNITINIB \n", 1526 | "2 {'alogp': '5.93', 'aromatic_rings': 4, 'cx_log... NERATINIB \n", 1527 | "\n", 1528 | " parent_chembl_id cx_logd cx_logp cx_most_apka cx_most_bpka alogp hba hbd \\\n", 1529 | "0 CHEMBL535 1.28 2.93 11.46 9.04 3.33 3 3 \n", 1530 | "2 CHEMBL180022 3.05 4.47 12.55 8.81 5.93 8 2 \n", 1531 | "\n", 1532 | " mw_freebase full_mwt num_ro5_violations psa heavy_atoms \n", 1533 | "0 398.48 398.48 0 77.23 29 \n", 1534 | "2 557.05 557.05 2 112.40 40 " 1535 | ] 1536 | }, 1537 | "execution_count": 16, 1538 | "metadata": {}, 1539 | "output_type": "execute_result" 1540 | } 1541 | ], 1542 | "source": [ 1543 | "###########################\n", 1544 | "#Filter the compounds based on their molecular properties (e.g. molecular weight < 400 amu), or max_phase, for example:\n", 1545 | "###########################\n", 1546 | "\n", 1547 | "#Now keep only compounds with max_phase = 4 (ie approved drugs), for example: \n", 1548 | "res = mol_df[ mol_df['max_phase'] == 4 ]\n", 1549 | "\n", 1550 | "#Display results:\n", 1551 | "res" 1552 | ] 1553 | }, 1554 | { 1555 | "cell_type": "code", 1556 | "execution_count": null, 1557 | "metadata": {}, 1558 | "outputs": [], 1559 | "source": [] 1560 | }, 1561 | { 1562 | "cell_type": "code", 1563 | "execution_count": null, 1564 | "metadata": {}, 1565 | "outputs": [], 1566 | "source": [] 1567 | } 1568 | ], 1569 | "metadata": { 1570 | "kernelspec": { 1571 | "display_name": "Python (my-rdkit-env)", 1572 | "language": "python", 1573 | "name": "my-rdkit-env" 1574 | }, 1575 | "language_info": { 1576 | "codemirror_mode": { 1577 | "name": "ipython", 1578 | "version": 3 1579 | }, 1580 | "file_extension": ".py", 1581 | "mimetype": "text/x-python", 1582 | "name": "python", 1583 | "nbconvert_exporter": "python", 1584 | "pygments_lexer": "ipython3", 1585 | "version": "3.7.3" 1586 | } 1587 | }, 1588 | "nbformat": 4, 1589 | "nbformat_minor": 4 1590 | } 1591 | -------------------------------------------------------------------------------- /ChEMBL_webresource_client_examples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ChEMBL webresource client examples\n", 8 | "\n", 9 | "The library helps to access ChEMBL data and cheminformatics tools from Python. You don't need to know how to write SQL. You don't need to know how to interact with REST APIs. You don't need to compile or install any cheminformatics frameworks. Results are cached.\n", 10 | "\n", 11 | "The client handles interaction with the HTTPS protocol and caches all results in the local file system for faster retrieval. Abstracting away all network-related tasks, the client provides the end user with a convenient interface, giving the impression of working with a local resource. The design is based on the Django QuerySet interface. The client also implements lazy evaluation of results, which means it will only evaluate a request for data when a value is required. This approach reduces the number of network requests and increases performance." 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "1. [Compounds](#section1)\n", 19 | "1. [Drugs](#section2)\n", 20 | "1. [Targets](#section3)\n", 21 | "1. [Activities](#section4)\n", 22 | "1. [Assays](#section5)\n", 23 | "1. [Tissues](#section6)\n", 24 | "1. [Cells](#section7)\n", 25 | "1. [Utils](#section8)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Available data entities\n", 33 | "\n", 34 | "You can list available data entities using the following code" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "['activity', 'activity_supplementary_data_by_activity', 'assay', 'assay_class', 'atc_class', 'binding_site', 'biotherapeutic', 'cell_line', 'chembl_id_lookup', 'compound_record', 'compound_structural_alert', 'description', 'document', 'document_similarity', 'drug', 'drug_indication', 'drug_warning', 'go_slim', 'image', 'mechanism', 'metabolism', 'molecule', 'molecule_form', 'official', 'organism', 'protein_class', 'similarity', 'source', 'substructure', 'target', 'target_component', 'target_relation', 'tissue', 'xref_source']\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "from chembl_webresource_client.new_client import new_client\n", 52 | "\n", 53 | "available_resources = [resource for resource in dir(new_client) if not resource.startswith('_')]\n", 54 | "print(available_resources)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Available filters\n", 62 | "\n", 63 | "The design of the client is based on Django QuerySet (https://docs.djangoproject.com/en/1.11/ref/models/querysets) and most important lookup types are supported. These are:\n", 64 | "\n", 65 | "- exact\n", 66 | "- iexact\n", 67 | "- contains\n", 68 | "- icontains\n", 69 | "- in\n", 70 | "- gt\n", 71 | "- gte\n", 72 | "- lt\n", 73 | "- lte\n", 74 | "- startswith\n", 75 | "- istartswith\n", 76 | "- endswith\n", 77 | "- iendswith\n", 78 | "- range\n", 79 | "- isnull\n", 80 | "- regex\n", 81 | "- iregex" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## `Only` operator\n", 89 | "\n", 90 | "`only` is a special method that limits the results to a selected set of database fields. `only` takes a list of fields (as a single argument) to be included in the results. Note that the specified fields have to exist in the API endpoint against which `only` is executed. Using `only` normally makes an API call faster because less information returned will save bandwidth. The API logic will also check if any SQL joins are necessary to return the specified field(s) and will exclude unnecessary joins which critically improves performance.\n", 91 | "\n", 92 | "Please note that `only` has one limitation: any specified fields will ignore nested fields i.e. calling only(['molecule_properties__alogp']) is equivalent to only(['molecule_properties']).\n", 93 | "\n", 94 | "For many-to-many relationships `only` will not make any SQL join optimisation." 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "# 1. Compounds\n", 102 | "- Compounds in ChEMBL usually have associated bioactivity data. For example, the activity of a compound may have been measured in an experiment against a particular target and results in a certain IC50 value that has been published in the scientific literature.\n", 103 | "\n", 104 | "- Compound records may be retrieved in a number of ways, such as a lookup of an individual compound using various identifiers or by searching for compounds via similarity." 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## Find a compound by pref_name using the `molecule` endpoint\n", 112 | "- Note the double underscore to filter for a (case insensitive) `iexact` name match within the 'pref_name' database field" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 2, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "text/plain": [ 123 | "[{'atc_classifications': ['B01AC06', 'N02BA01', 'N02BA51', 'A01AD05', 'N02BA71'], 'availability_type': 2, 'biotherapeutic': None, 'black_box_warning': 0, 'chebi_par_id': 15365, 'chirality': 2, 'cross_references': [{'xref_id': 'aspirin', 'xref_name': 'aspirin', 'xref_src': 'DailyMed'}, {'xref_id': '144203627', 'xref_name': 'SID: 144203627', 'xref_src': 'PubChem'}, {'xref_id': '144209315', 'xref_name': 'SID: 144209315', 'xref_src': 'PubChem'}, {'xref_id': '144210466', 'xref_name': 'SID: 144210466', 'xref_src': 'PubChem'}, {'xref_id': '170465039', 'xref_name': 'SID: 170465039', 'xref_src': 'PubChem'}, {'xref_id': '17389202', 'xref_name': 'SID: 17389202', 'xref_src': 'PubChem'}, {'xref_id': '17390036', 'xref_name': 'SID: 17390036', 'xref_src': 'PubChem'}, {'xref_id': '174007205', 'xref_name': 'SID: 174007205', 'xref_src': 'PubChem'}, {'xref_id': '26747283', 'xref_name': 'SID: 26747283', 'xref_src': 'PubChem'}, {'xref_id': '26752858', 'xref_name': 'SID: 26752858', 'xref_src': 'PubChem'}, {'xref_id': '47193676', 'xref_name': 'SID: 47193676', 'xref_src': 'PubChem'}, {'xref_id': '50105490', 'xref_name': 'SID: 50105490', 'xref_src': 'PubChem'}, {'xref_id': '85230910', 'xref_name': 'SID: 85230910', 'xref_src': 'PubChem'}, {'xref_id': '87798', 'xref_name': 'SID: 87798', 'xref_src': 'PubChem'}, {'xref_id': '90340586', 'xref_name': 'SID: 90340586', 'xref_src': 'PubChem'}, {'xref_id': '14', 'xref_name': 'aspirin', 'xref_src': 'TG-GATEs'}, {'xref_id': 'Aspirin', 'xref_name': None, 'xref_src': 'Wikipedia'}], 'dosed_ingredient': True, 'first_approval': 1950, 'first_in_class': 0, 'helm_notation': None, 'indication_class': 'Analgesic; Antirheumatic; Antipyretic', 'inorganic_flag': 0, 'max_phase': 4, 'molecule_chembl_id': 'CHEMBL25', 'molecule_hierarchy': {'molecule_chembl_id': 'CHEMBL25', 'parent_chembl_id': 'CHEMBL25'}, 'molecule_properties': {'alogp': '1.31', 'aromatic_rings': 1, 'cx_logd': '-2.16', 'cx_logp': '1.24', 'cx_most_apka': '3.41', 'cx_most_bpka': None, 'full_molformula': 'C9H8O4', 'full_mwt': '180.16', 'hba': 3, 'hba_lipinski': 4, 'hbd': 1, 'hbd_lipinski': 1, 'heavy_atoms': 13, 'molecular_species': 'ACID', 'mw_freebase': '180.16', 'mw_monoisotopic': '180.0423', 'num_lipinski_ro5_violations': 0, 'num_ro5_violations': 0, 'psa': '63.60', 'qed_weighted': '0.55', 'ro3_pass': 'N', 'rtb': 2}, 'molecule_structures': {'canonical_smiles': 'CC(=O)Oc1ccccc1C(=O)O', 'molfile': '\\n RDKit 2D\\n\\n 13 13 0 0 0 0 0 0 0 0999 V2000\\n 8.8810 -2.1206 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 8.8798 -2.9479 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 9.5946 -3.3607 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 10.3110 -2.9474 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 10.3081 -2.1170 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 9.5928 -1.7078 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.0210 -1.7018 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.7369 -2.1116 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.0260 -3.3588 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.0273 -4.1837 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.7423 -4.5949 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 10.3136 -4.5972 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.0178 -0.8769 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n 1 2 2 0\\n 5 7 1 0\\n 3 4 2 0\\n 7 8 2 0\\n 4 9 1 0\\n 4 5 1 0\\n 9 10 1 0\\n 2 3 1 0\\n 10 11 1 0\\n 5 6 2 0\\n 10 12 2 0\\n 6 1 1 0\\n 7 13 1 0\\nM END\\n\\n> \\nCHEMBL25\\n\\n> \\nASPIRIN\\n\\n', 'standard_inchi': 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)', 'standard_inchi_key': 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N'}, 'molecule_synonyms': [{'molecule_synonym': '8-hour bayer', 'syn_type': 'TRADE_NAME', 'synonyms': '8-HOUR BAYER'}, {'molecule_synonym': 'Acetosalic Acid', 'syn_type': 'TRADE_NAME', 'synonyms': 'Acetosalic Acid'}, {'molecule_synonym': 'Acetylsalic acid', 'syn_type': 'TRADE_NAME', 'synonyms': 'ACETYLSALIC ACID'}, {'molecule_synonym': 'Acetylsalicylic Acid', 'syn_type': 'INN', 'synonyms': 'Acetylsalicylic Acid'}, {'molecule_synonym': 'Acetylsalicylic Acid', 'syn_type': 'TRADE_NAME', 'synonyms': 'Acetylsalicylic Acid'}, {'molecule_synonym': 'Acetylsalicylic acid', 'syn_type': 'ATC', 'synonyms': 'ACETYLSALICYLIC ACID'}, {'molecule_synonym': 'Acetylsalicylic acid', 'syn_type': 'OTHER', 'synonyms': 'ACETYLSALICYLIC ACID'}, {'molecule_synonym': 'Alka rapid', 'syn_type': 'TRADE_NAME', 'synonyms': 'ALKA RAPID'}, {'molecule_synonym': 'Anadin all night', 'syn_type': 'TRADE_NAME', 'synonyms': 'ANADIN ALL NIGHT'}, {'molecule_synonym': 'Angettes 75', 'syn_type': 'TRADE_NAME', 'synonyms': 'ANGETTES 75'}, {'molecule_synonym': 'Aspirin', 'syn_type': 'USAN', 'synonyms': 'Aspirin'}, {'molecule_synonym': 'Aspirin', 'syn_type': 'BAN', 'synonyms': 'ASPIRIN'}, {'molecule_synonym': 'Aspirin', 'syn_type': 'BNF', 'synonyms': 'ASPIRIN'}, {'molecule_synonym': 'Aspirin', 'syn_type': 'FDA', 'synonyms': 'ASPIRIN'}, {'molecule_synonym': 'Aspirin', 'syn_type': 'JAN', 'synonyms': 'ASPIRIN'}, {'molecule_synonym': 'Aspirin', 'syn_type': 'MERCK_INDEX', 'synonyms': 'ASPIRIN'}, {'molecule_synonym': 'Aspirin', 'syn_type': 'OTHER', 'synonyms': 'ASPIRIN'}, {'molecule_synonym': 'Aspirin', 'syn_type': 'TRADE_NAME', 'synonyms': 'ASPIRIN'}, {'molecule_synonym': 'Aspirin', 'syn_type': 'USP', 'synonyms': 'ASPIRIN'}, {'molecule_synonym': 'Aspro clr', 'syn_type': 'TRADE_NAME', 'synonyms': 'ASPRO CLR'}, {'molecule_synonym': 'BAY1019036', 'syn_type': 'RESEARCH_CODE', 'synonyms': 'BAY1019036'}, {'molecule_synonym': 'Bayer extra strength aspirin for migraine pain', 'syn_type': 'TRADE_NAME', 'synonyms': 'BAYER EXTRA STRENGTH ASPIRIN FOR MIGRAINE PAIN'}, {'molecule_synonym': 'Danamep', 'syn_type': 'TRADE_NAME', 'synonyms': 'DANAMEP'}, {'molecule_synonym': 'Disprin cv', 'syn_type': 'TRADE_NAME', 'synonyms': 'DISPRIN CV'}, {'molecule_synonym': 'Disprin direct', 'syn_type': 'TRADE_NAME', 'synonyms': 'DISPRIN DIRECT'}, {'molecule_synonym': 'Durlaza', 'syn_type': 'TRADE_NAME', 'synonyms': 'DURLAZA'}, {'molecule_synonym': 'Ecotrin', 'syn_type': 'TRADE_NAME', 'synonyms': 'Ecotrin'}, {'molecule_synonym': 'Enprin', 'syn_type': 'TRADE_NAME', 'synonyms': 'ENPRIN'}, {'molecule_synonym': 'Equi-Prin', 'syn_type': 'TRADE_NAME', 'synonyms': 'Equi-Prin'}, {'molecule_synonym': 'Gencardia', 'syn_type': 'TRADE_NAME', 'synonyms': 'GENCARDIA'}, {'molecule_synonym': 'Levius', 'syn_type': 'TRADE_NAME', 'synonyms': 'LEVIUS'}, {'molecule_synonym': 'Max strgh aspro clr', 'syn_type': 'TRADE_NAME', 'synonyms': 'MAX STRGH ASPRO CLR'}, {'molecule_synonym': 'Measurin', 'syn_type': 'TRADE_NAME', 'synonyms': 'MEASURIN'}, {'molecule_synonym': 'Micropirin ec', 'syn_type': 'TRADE_NAME', 'synonyms': 'MICROPIRIN EC'}, {'molecule_synonym': 'NSC-27223', 'syn_type': 'RESEARCH_CODE', 'synonyms': 'NSC-27223'}, {'molecule_synonym': 'NSC-406186', 'syn_type': 'RESEARCH_CODE', 'synonyms': 'NSC-406186'}, {'molecule_synonym': 'Nu-seals 300', 'syn_type': 'TRADE_NAME', 'synonyms': 'NU-SEALS 300'}, {'molecule_synonym': 'Nu-seals 600', 'syn_type': 'TRADE_NAME', 'synonyms': 'NU-SEALS 600'}, {'molecule_synonym': 'Nu-seals 75', 'syn_type': 'TRADE_NAME', 'synonyms': 'NU-SEALS 75'}, {'molecule_synonym': 'Nu-seals cardio 75', 'syn_type': 'TRADE_NAME', 'synonyms': 'NU-SEALS CARDIO 75'}, {'molecule_synonym': 'Paynocil', 'syn_type': 'TRADE_NAME', 'synonyms': 'PAYNOCIL'}, {'molecule_synonym': 'Platet', 'syn_type': 'TRADE_NAME', 'synonyms': 'PLATET'}, {'molecule_synonym': 'Platet 300', 'syn_type': 'TRADE_NAME', 'synonyms': 'PLATET 300'}, {'molecule_synonym': 'Postmi 300', 'syn_type': 'TRADE_NAME', 'synonyms': 'POSTMI 300'}, {'molecule_synonym': 'Postmi 75', 'syn_type': 'TRADE_NAME', 'synonyms': 'POSTMI 75'}, {'molecule_synonym': 'Salicylic Acid Acetate', 'syn_type': 'TRADE_NAME', 'synonyms': 'Salicylic Acid Acetate'}, {'molecule_synonym': 'Vazalore', 'syn_type': 'TRADE_NAME', 'synonyms': 'VAZALORE'}], 'molecule_type': 'Small molecule', 'natural_product': 0, 'oral': True, 'parenteral': False, 'polymer_flag': False, 'pref_name': 'ASPIRIN', 'prodrug': 0, 'structure_type': 'MOL', 'therapeutic_flag': True, 'topical': False, 'usan_stem': None, 'usan_stem_definition': None, 'usan_substem': None, 'usan_year': None, 'withdrawn_class': None, 'withdrawn_country': None, 'withdrawn_flag': False, 'withdrawn_reason': None, 'withdrawn_year': None}]" 124 | ] 125 | }, 126 | "execution_count": 2, 127 | "metadata": {}, 128 | "output_type": "execute_result" 129 | } 130 | ], 131 | "source": [ 132 | "from chembl_webresource_client.new_client import new_client #Import ChEMBL python client\n", 133 | "\n", 134 | "molecule = new_client.molecule\n", 135 | "mols = molecule.filter(pref_name__iexact='aspirin')\n", 136 | "mols" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "## Find a compound by its synonyms\n", 144 | "\n", 145 | "- In some cases a compound may be more commonly known by a synonym than its preferred name in ChEMBL (pref_name)\n", 146 | "- The 'molecule_synonym' field is nested within the 'molecule_synonyms' field hence the first double underscore, followed by a second double underscore to filter for a (case insensitive) iexact name match within the 'pref_name' database field\n", 147 | "- Use the `only` method to specify which database fields you want to be included in response" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 3, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "[{'molecule_chembl_id': 'CHEMBL192'}, {'molecule_chembl_id': 'CHEMBL1737'}]" 159 | ] 160 | }, 161 | "execution_count": 3, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "from chembl_webresource_client.new_client import new_client\n", 168 | "\n", 169 | "molecule = new_client.molecule\n", 170 | "mols = molecule.filter(molecule_synonyms__molecule_synonym__iexact='viagra').only('molecule_chembl_id')\n", 171 | "mols" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "## Get a single compound by ChEMBL id\n", 179 | "\n", 180 | "- All the main entities in the ChEMBL database have a ChEMBL ID. This is a stable identifier designed for straightforward lookup of data." 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 4, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "text/plain": [ 191 | "[{'molecule_chembl_id': 'CHEMBL192', 'molecule_structures': {'canonical_smiles': 'CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 'molfile': '\\n RDKit 2D\\n\\n 33 36 0 0 0 0 0 0 0 0999 V2000\\n 2.1000 -0.0042 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.1000 0.7000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -1.5375 -0.0042 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0\\n 1.4917 -0.3667 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.8792 -0.0042 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.8042 0.9083 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 1.4917 1.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.8792 0.6833 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 3.2042 0.3458 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.8042 -0.2417 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.2875 -0.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -2.1583 -0.3750 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n -0.9333 -0.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -0.3208 -0.0333 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -1.1875 0.6083 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n -1.8958 0.6083 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n -3.3958 -1.0917 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n -2.7833 -0.0042 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -2.1583 -1.0917 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.2875 -1.1125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 1.4917 1.7708 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n -0.9333 -1.1125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -0.3208 -1.4542 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -3.3958 -0.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -2.7833 -1.4417 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 3.0750 1.5750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.8042 -0.9500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.8792 -1.4542 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n -3.9958 -1.4292 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 1.4958 -1.1000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 3.4167 -1.3125 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.1125 -1.4500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 4.0375 -0.9542 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 2 1 2 0\\n 3 13 1 0\\n 4 1 1 0\\n 5 4 2 0\\n 6 2 1 0\\n 7 2 1 0\\n 8 5 1 0\\n 9 10 2 0\\n 10 1 1 0\\n 11 5 1 0\\n 12 3 1 0\\n 13 14 2 0\\n 14 11 1 0\\n 15 3 2 0\\n 16 3 2 0\\n 17 25 1 0\\n 18 12 1 0\\n 19 12 1 0\\n 20 11 2 0\\n 21 7 2 0\\n 22 23 2 0\\n 23 20 1 0\\n 24 18 1 0\\n 25 19 1 0\\n 26 6 1 0\\n 27 10 1 0\\n 28 20 1 0\\n 29 17 1 0\\n 30 28 1 0\\n 31 27 1 0\\n 32 30 1 0\\n 33 31 1 0\\n 9 6 1 0\\n 8 7 1 0\\n 22 13 1 0\\n 17 24 1 0\\nM END\\n\\n> \\nCHEMBL192\\n\\n> \\nSILDENAFIL\\n\\n', 'standard_inchi': 'InChI=1S/C22H30N6O4S/c1-5-7-17-19-20(27(4)25-17)22(29)24-21(23-19)16-14-15(8-9-18(16)32-6-2)33(30,31)28-12-10-26(3)11-13-28/h8-9,14H,5-7,10-13H2,1-4H3,(H,23,24,29)', 'standard_inchi_key': 'BNRNXUUZRGQAQC-UHFFFAOYSA-N'}, 'pref_name': 'SILDENAFIL'}]" 192 | ] 193 | }, 194 | "execution_count": 4, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": [ 200 | "from chembl_webresource_client.new_client import new_client\n", 201 | "\n", 202 | "molecule = new_client.molecule\n", 203 | "m1 = molecule.filter(chembl_id='CHEMBL192').only(['molecule_chembl_id', 'pref_name', 'molecule_structures'])\n", 204 | "m1" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "## Get many compounds using a list of their identifiers (ChEMBL_id)\n", 212 | "- Use the double underscore followed by the `in` keyword to find a list of molecule_chembl_id's" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 5, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "data": { 222 | "text/plain": [ 223 | "[{'molecule_chembl_id': 'CHEMBL25', 'pref_name': 'ASPIRIN'}, {'molecule_chembl_id': 'CHEMBL27', 'pref_name': 'PROPRANOLOL'}, {'molecule_chembl_id': 'CHEMBL192', 'pref_name': 'SILDENAFIL'}]" 224 | ] 225 | }, 226 | "execution_count": 5, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "from chembl_webresource_client.new_client import new_client\n", 233 | "\n", 234 | "molecule = new_client.molecule\n", 235 | "mols = molecule.filter(molecule_chembl_id__in=['CHEMBL25', 'CHEMBL192', 'CHEMBL27']).only(['molecule_chembl_id', 'pref_name'])\n", 236 | "mols" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "## Display a compound image using the `image` endpoint" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 6, 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "data": { 253 | "image/svg+xml": [ 254 | "" 286 | ], 287 | "text/plain": [ 288 | "" 289 | ] 290 | }, 291 | "execution_count": 6, 292 | "metadata": {}, 293 | "output_type": "execute_result" 294 | } 295 | ], 296 | "source": [ 297 | "from chembl_webresource_client.new_client import new_client\n", 298 | "from IPython.display import SVG\n", 299 | "\n", 300 | "image = new_client.image\n", 301 | "image.set_format('svg')\n", 302 | "SVG(image.get('CHEMBL25'))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "## Get a single compound by standard inchi key" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 7, 315 | "metadata": {}, 316 | "outputs": [ 317 | { 318 | "data": { 319 | "text/plain": [ 320 | "[{'molecule_chembl_id': 'CHEMBL25', 'molecule_structures': {'canonical_smiles': 'CC(=O)Oc1ccccc1C(=O)O', 'molfile': '\\n RDKit 2D\\n\\n 13 13 0 0 0 0 0 0 0 0999 V2000\\n 8.8810 -2.1206 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 8.8798 -2.9479 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 9.5946 -3.3607 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 10.3110 -2.9474 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 10.3081 -2.1170 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 9.5928 -1.7078 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.0210 -1.7018 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.7369 -2.1116 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.0260 -3.3588 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.0273 -4.1837 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.7423 -4.5949 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 10.3136 -4.5972 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n 11.0178 -0.8769 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n 1 2 2 0\\n 5 7 1 0\\n 3 4 2 0\\n 7 8 2 0\\n 4 9 1 0\\n 4 5 1 0\\n 9 10 1 0\\n 2 3 1 0\\n 10 11 1 0\\n 5 6 2 0\\n 10 12 2 0\\n 6 1 1 0\\n 7 13 1 0\\nM END\\n\\n> \\nCHEMBL25\\n\\n> \\nASPIRIN\\n\\n', 'standard_inchi': 'InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)', 'standard_inchi_key': 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N'}, 'pref_name': 'ASPIRIN'}]" 321 | ] 322 | }, 323 | "execution_count": 7, 324 | "metadata": {}, 325 | "output_type": "execute_result" 326 | } 327 | ], 328 | "source": [ 329 | "from chembl_webresource_client.new_client import new_client\n", 330 | "\n", 331 | "molecule = new_client.molecule\n", 332 | "mol = molecule.filter(molecule_structures__standard_inchi_key='BSYNRYMUTXBXSQ-UHFFFAOYSA-N').only(['molecule_chembl_id', 'pref_name', 'molecule_structures'])\n", 333 | "mol" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "## Find compounds similar to a given SMILES query, with a similarity threshold of 70% using the `similarity` endpoint" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 8, 346 | "metadata": {}, 347 | "outputs": [ 348 | { 349 | "name": "stdout", 350 | "output_type": "stream", 351 | "text": [ 352 | "{'molecule_chembl_id': 'CHEMBL477888', 'similarity': '85.4166686534881591796875'}\n", 353 | "{'molecule_chembl_id': 'CHEMBL477889', 'similarity': '85.4166686534881591796875'}\n", 354 | "{'molecule_chembl_id': 'CHEMBL478779', 'similarity': '85.4166686534881591796875'}\n", 355 | "{'molecule_chembl_id': 'CHEMBL2304268', 'similarity': '70.1754391193389892578125'}\n" 356 | ] 357 | } 358 | ], 359 | "source": [ 360 | "from chembl_webresource_client.new_client import new_client\n", 361 | "\n", 362 | "similarity = new_client.similarity\n", 363 | "res = similarity.filter(smiles=\"CO[C@@H](CCC#C\\C=C/CCCC(C)CCCCC=C)C(=O)[O-]\", similarity=70).only(['molecule_chembl_id', 'similarity'])\n", 364 | "for i in res:\n", 365 | " print(i)" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "## Find compounds similar to aspirin (CHEMBL25) with similarity threshold of 70% using the `similarity` endpoint\n", 373 | "- Use pandas python module to view the result as a table of data" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 9, 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "text/html": [ 384 | "

\n", 385 | "\n", 398 | "\n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | "

	molecule_chembl_id	pref_name	similarity
0	CHEMBL2296002	None	100
1	CHEMBL1697753	ASPIRIN DL-LYSINE	100
2	CHEMBL3833325	CARBASPIRIN CALCIUM	88.8888895511627197265625
3	CHEMBL3833404	CARBASPIRIN	88.8888895511627197265625
4	CHEMBL350343	DIPLOSALSALATE	85.7142865657806396484375
5	CHEMBL4515737	None	70.3703701496124267578125
6	CHEMBL1451173	DIPYROCETYL	69.9999988079071044921875

\n", 452 | "

" 453 | ], 454 | "text/plain": [ 455 | " molecule_chembl_id pref_name similarity\n", 456 | "0 CHEMBL2296002 None 100\n", 457 | "1 CHEMBL1697753 ASPIRIN DL-LYSINE 100\n", 458 | "2 CHEMBL3833325 CARBASPIRIN CALCIUM 88.8888895511627197265625\n", 459 | "3 CHEMBL3833404 CARBASPIRIN 88.8888895511627197265625\n", 460 | "4 CHEMBL350343 DIPLOSALSALATE 85.7142865657806396484375\n", 461 | "5 CHEMBL4515737 None 70.3703701496124267578125\n", 462 | "6 CHEMBL1451173 DIPYROCETYL 69.9999988079071044921875" 463 | ] 464 | }, 465 | "execution_count": 9, 466 | "metadata": {}, 467 | "output_type": "execute_result" 468 | } 469 | ], 470 | "source": [ 471 | "from chembl_webresource_client.new_client import new_client\n", 472 | "import pandas as pd #Use pandas python module to view data\n", 473 | "\n", 474 | "similarity = new_client.similarity\n", 475 | "res = similarity.filter(chembl_id='CHEMBL25', similarity=70).only(['molecule_chembl_id', 'pref_name', 'similarity'])\n", 476 | "pd.DataFrame(res)" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": {}, 482 | "source": [ 483 | "## Find compounds with the same connectivity" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 10, 489 | "metadata": {}, 490 | "outputs": [ 491 | { 492 | "data": { 493 | "text/html": [ 494 | "

\n", 495 | "\n", 508 | "\n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | "

	molecule_chembl_id	pref_name
0	CHEMBL1431	METFORMIN
1	CHEMBL1703	METFORMIN HYDROCHLORIDE
2	CHEMBL3094198	None

\n", 534 | "

" 535 | ], 536 | "text/plain": [ 537 | " molecule_chembl_id pref_name\n", 538 | "0 CHEMBL1431 METFORMIN\n", 539 | "1 CHEMBL1703 METFORMIN HYDROCHLORIDE\n", 540 | "2 CHEMBL3094198 None" 541 | ] 542 | }, 543 | "execution_count": 10, 544 | "metadata": {}, 545 | "output_type": "execute_result" 546 | } 547 | ], 548 | "source": [ 549 | "from chembl_webresource_client.new_client import new_client\n", 550 | "import pandas as pd #Use pandas python module to view data\n", 551 | "\n", 552 | "molecule = new_client.molecule\n", 553 | "res = molecule.filter(molecule_structures__canonical_smiles__connectivity='CN(C)C(=N)N=C(N)N').only(['molecule_chembl_id', 'pref_name'])\n", 554 | "pd.DataFrame(res)" 555 | ] 556 | }, 557 | { 558 | "cell_type": "markdown", 559 | "metadata": {}, 560 | "source": [ 561 | "## Get all biotherapeutic compounds" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": 11, 567 | "metadata": {}, 568 | "outputs": [ 569 | { 570 | "data": { 571 | "text/plain": [ 572 | "22963" 573 | ] 574 | }, 575 | "execution_count": 11, 576 | "metadata": {}, 577 | "output_type": "execute_result" 578 | } 579 | ], 580 | "source": [ 581 | "from chembl_webresource_client.new_client import new_client\n", 582 | "\n", 583 | "molecule = new_client.molecule\n", 584 | "biotherapeutics = molecule.filter(biotherapeutic__isnull=False)\n", 585 | "len(biotherapeutics)" 586 | ] 587 | }, 588 | { 589 | "cell_type": "markdown", 590 | "metadata": {}, 591 | "source": [ 592 | "## Get compounds with molecular weight <= 300\n", 593 | "- The `lte` keyword means 'less than or equal to'" 594 | ] 595 | }, 596 | { 597 | "cell_type": "code", 598 | "execution_count": 12, 599 | "metadata": {}, 600 | "outputs": [ 601 | { 602 | "data": { 603 | "text/plain": [ 604 | "367682" 605 | ] 606 | }, 607 | "execution_count": 12, 608 | "metadata": {}, 609 | "output_type": "execute_result" 610 | } 611 | ], 612 | "source": [ 613 | "from chembl_webresource_client.new_client import new_client\n", 614 | "\n", 615 | "molecule = new_client.molecule\n", 616 | "light_molecules = molecule.filter(molecule_properties__mw_freebase__lte=300)\n", 617 | "\n", 618 | "len(light_molecules)" 619 | ] 620 | }, 621 | { 622 | "cell_type": "markdown", 623 | "metadata": {}, 624 | "source": [ 625 | "## Get compounds with molecular weight <= 300 AND a pref_name ending with nib (ie a small molecule inhibitor)\n", 626 | "- Note the double underscore to access the 'mw_freebase' database field that is nested within 'molecule_properties' field\n", 627 | "- Multiple `filter` conditions can be combined into a list separated by \",\"" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": 13, 633 | "metadata": {}, 634 | "outputs": [ 635 | { 636 | "data": { 637 | "text/html": [ 638 | "

\n", 639 | "\n", 652 | "\n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | "

	molecule_chembl_id	pref_name
0	CHEMBL276711	SEMAXANIB
1	CHEMBL4594348	ELSUBRUTINIB

\n", 673 | "

" 674 | ], 675 | "text/plain": [ 676 | " molecule_chembl_id pref_name\n", 677 | "0 CHEMBL276711 SEMAXANIB\n", 678 | "1 CHEMBL4594348 ELSUBRUTINIB" 679 | ] 680 | }, 681 | "execution_count": 13, 682 | "metadata": {}, 683 | "output_type": "execute_result" 684 | } 685 | ], 686 | "source": [ 687 | "from chembl_webresource_client.new_client import new_client\n", 688 | "import pandas as pd #Use pandas python module to view data as tables\n", 689 | "\n", 690 | "molecule = new_client.molecule\n", 691 | "light_nib_molecules = molecule.filter(molecule_properties__mw_freebase__lte=300, pref_name__iendswith=\"nib\").only(['molecule_chembl_id', 'pref_name'])\n", 692 | "\n", 693 | "pd.DataFrame(light_nib_molecules)" 694 | ] 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": {}, 699 | "source": [ 700 | "## Get all compounds in ChEMBL with no Rule-of-Five violations" 701 | ] 702 | }, 703 | { 704 | "cell_type": "code", 705 | "execution_count": 14, 706 | "metadata": {}, 707 | "outputs": [ 708 | { 709 | "data": { 710 | "text/plain": [ 711 | "1441706" 712 | ] 713 | }, 714 | "execution_count": 14, 715 | "metadata": {}, 716 | "output_type": "execute_result" 717 | } 718 | ], 719 | "source": [ 720 | "from chembl_webresource_client.new_client import new_client\n", 721 | "\n", 722 | "molecule = new_client.molecule\n", 723 | "no_violations = molecule.filter(molecule_properties__num_ro5_violations=0)\n", 724 | "len(no_violations)" 725 | ] 726 | }, 727 | { 728 | "cell_type": "markdown", 729 | "metadata": {}, 730 | "source": [ 731 | "# 2. Drugs\n", 732 | "- By contrast to compounds, marketed drugs and clinical candidates in ChEMBL normally have curated information for their synonyms, disease indication, mechanism of action, safety warnings etc, and do not necessarily have to have associated measured bioactivity data. \n", 733 | "- Clinical candidates are drugs that are progressing through Phases 1, 2 and 3 of the drug discovery pipeline, while approved drugs are considered to be Phase 4 (ie max_phase=4).\n", 734 | "- Note that the data in the `drugs` API endpoint is aggregated onto the parent drug form within each compound family" 735 | ] 736 | }, 737 | { 738 | "cell_type": "markdown", 739 | "metadata": {}, 740 | "source": [ 741 | "## Filter drugs by approval year and name using the `drug` endpoint" 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": 15, 747 | "metadata": {}, 748 | "outputs": [ 749 | { 750 | "data": { 751 | "text/html": [ 752 | "

\n", 753 | "\n", 766 | "\n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | "

	applicants	atc_code_description	development_phase	first_approval	molecule_chembl_id	research_codes	synonyms	usan_stem	usan_stem_definition	usan_year
0	None	None	4	1987	CHEMBL611	None	[Terazosin (BAN, INN, MI), Terazosin hydrochlo...	-azosin	antihypertensives (prazosin type)	1980
1	None	None	4	1990	CHEMBL707	None	[Doxazosin mesilate (JAN), Doxazosin mesylate ...	-azosin	antihypertensives (prazosin type)	1981

\n", 811 | "

" 812 | ], 813 | "text/plain": [ 814 | " applicants atc_code_description development_phase first_approval \\\n", 815 | "0 None None 4 1987 \n", 816 | "1 None None 4 1990 \n", 817 | "\n", 818 | " molecule_chembl_id research_codes \\\n", 819 | "0 CHEMBL611 None \n", 820 | "1 CHEMBL707 None \n", 821 | "\n", 822 | " synonyms usan_stem \\\n", 823 | "0 [Terazosin (BAN, INN, MI), Terazosin hydrochlo... -azosin \n", 824 | "1 [Doxazosin mesilate (JAN), Doxazosin mesylate ... -azosin \n", 825 | "\n", 826 | " usan_stem_definition usan_year \n", 827 | "0 antihypertensives (prazosin type) 1980 \n", 828 | "1 antihypertensives (prazosin type) 1981 " 829 | ] 830 | }, 831 | "execution_count": 15, 832 | "metadata": {}, 833 | "output_type": "execute_result" 834 | } 835 | ], 836 | "source": [ 837 | "from chembl_webresource_client.new_client import new_client\n", 838 | "import pandas as pd #Use pandas python module to view data as tables\n", 839 | "\n", 840 | "drug = new_client.drug\n", 841 | "res = drug.filter(first_approval__gte=1980).filter(usan_stem=\"-azosin\").only(['development_phase','first_approval','molecule_chembl_id','synonyms', 'usan_stem','usan_stem_definition', 'usan_year'])\n", 842 | "pd.DataFrame(res)" 843 | ] 844 | }, 845 | { 846 | "cell_type": "markdown", 847 | "metadata": {}, 848 | "source": [ 849 | "## Find compounds that have been recently approved as marketed drugs and are immunosupressants\n", 850 | "- Use the `molecule` endpoint to find compounds (an alternative would be to use the `drugs` endpoint)\n", 851 | "- Use `order_by` to sort the result" 852 | ] 853 | }, 854 | { 855 | "cell_type": "code", 856 | "execution_count": 17, 857 | "metadata": {}, 858 | "outputs": [ 859 | { 860 | "data": { 861 | "text/html": [ 862 | "

\n", 863 | "\n", 876 | "\n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | "

	atc_classifications	first_approval	indication_class	max_phase	molecule_chembl_id	molecule_type	pref_name
0	[L04AA47]	2020	None	4	CHEMBL2109334	Antibody	INEBILIZUMAB
1	[L04AA38]	2020	None	4	CHEMBL3707247	Small molecule	OZANIMOD
2	[L04AC19]	2020	None	4	CHEMBL3833307	Antibody	SATRALIZUMAB
3	[L04AD03]	2021	None	4	CHEMBL2218919	Protein	VOCLOSPORIN

\n", 932 | "

" 933 | ], 934 | "text/plain": [ 935 | " atc_classifications first_approval indication_class max_phase \\\n", 936 | "0 [L04AA47] 2020 None 4 \n", 937 | "1 [L04AA38] 2020 None 4 \n", 938 | "2 [L04AC19] 2020 None 4 \n", 939 | "3 [L04AD03] 2021 None 4 \n", 940 | "\n", 941 | " molecule_chembl_id molecule_type pref_name \n", 942 | "0 CHEMBL2109334 Antibody INEBILIZUMAB \n", 943 | "1 CHEMBL3707247 Small molecule OZANIMOD \n", 944 | "2 CHEMBL3833307 Antibody SATRALIZUMAB \n", 945 | "3 CHEMBL2218919 Protein VOCLOSPORIN " 946 | ] 947 | }, 948 | "execution_count": 17, 949 | "metadata": {}, 950 | "output_type": "execute_result" 951 | } 952 | ], 953 | "source": [ 954 | "from chembl_webresource_client.new_client import new_client\n", 955 | "import pandas as pd #Use pandas python module to view data as tables\n", 956 | "\n", 957 | "molecule = new_client.molecule\n", 958 | "approved_drugs = molecule.filter(max_phase=4 #approved drugs only\n", 959 | " , atc_classifications__level2='L04' #ATC classification as Immunosupressants\n", 960 | " , first_approval__gte=2020 #first_approval after 2020\n", 961 | " ).order_by('first_approval').only(['atc_classifications','first_approval','indication_class','max_phase','pref_name','molecule_type', 'molecule_chembl_id'])\n", 962 | "\n", 963 | "pd.DataFrame(approved_drugs) #Convert the list of results into a Pandas dataframe" 964 | ] 965 | }, 966 | { 967 | "cell_type": "markdown", 968 | "metadata": {}, 969 | "source": [ 970 | "## Get Phase 3 clinical candidates for lung cancer, and examine their molecular properties\n", 971 | "- First use the `drug_indication` API endpoint, specifying 'max_phase_for_ind' to be 3 (ie clinical candidates in Phase 3 progressing through the drug discovery pipeline for lung cancer)\n", 972 | "- And then feed the results into the `molecule` API endpoint to extract their molecular properties" 973 | ] 974 | }, 975 | { 976 | "cell_type": "code", 977 | "execution_count": 18, 978 | "metadata": {}, 979 | "outputs": [ 980 | { 981 | "data": { 982 | "text/html": [ 983 | "

\n", 984 | "\n", 997 | "\n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | "

	molecule_chembl_id	molecule_properties	mw_freebase
0	CHEMBL25	{'alogp': '1.31', 'aromatic_rings': 1, 'cx_log...	180.16
1	CHEMBL38	{'alogp': '5.60', 'aromatic_rings': 0, 'cx_log...	300.44
2	CHEMBL45	{'alogp': '1.86', 'aromatic_rings': 2, 'cx_log...	232.28
3	CHEMBL481	{'alogp': '4.09', 'aromatic_rings': 3, 'cx_log...	586.69
4	CHEMBL84	{'alogp': '1.85', 'aromatic_rings': 3, 'cx_log...	421.45

\n", 1039 | "

" 1040 | ], 1041 | "text/plain": [ 1042 | " molecule_chembl_id molecule_properties \\\n", 1043 | "0 CHEMBL25 {'alogp': '1.31', 'aromatic_rings': 1, 'cx_log... \n", 1044 | "1 CHEMBL38 {'alogp': '5.60', 'aromatic_rings': 0, 'cx_log... \n", 1045 | "2 CHEMBL45 {'alogp': '1.86', 'aromatic_rings': 2, 'cx_log... \n", 1046 | "3 CHEMBL481 {'alogp': '4.09', 'aromatic_rings': 3, 'cx_log... \n", 1047 | "4 CHEMBL84 {'alogp': '1.85', 'aromatic_rings': 3, 'cx_log... \n", 1048 | "\n", 1049 | " mw_freebase \n", 1050 | "0 180.16 \n", 1051 | "1 300.44 \n", 1052 | "2 232.28 \n", 1053 | "3 586.69 \n", 1054 | "4 421.45 " 1055 | ] 1056 | }, 1057 | "execution_count": 18, 1058 | "metadata": {}, 1059 | "output_type": "execute_result" 1060 | } 1061 | ], 1062 | "source": [ 1063 | "from chembl_webresource_client.new_client import new_client\n", 1064 | "import pandas as pd #Use pandas python module to view data as tables\n", 1065 | "\n", 1066 | "drug_indication = new_client.drug_indication\n", 1067 | "molecules = new_client.molecule\n", 1068 | "\n", 1069 | "lung_cancer_ind = drug_indication.filter(efo_term__icontains=\"LUNG CARCINOMA\", max_phase_for_ind = 3)\n", 1070 | "lung_cancer_phase3 = molecules.filter(molecule_chembl_id__in=[x['molecule_chembl_id'] for x in lung_cancer_ind]).only(['molecule_chembl_id','molecule_properties'])\n", 1071 | "lung_cancer_phase3 = pd.DataFrame(lung_cancer_phase3) #Convert results to dataframe\n", 1072 | "\n", 1073 | "#Expand out nested cells for 'mw_freebase':\n", 1074 | "lung_cancer_phase3['mw_freebase'] = lung_cancer_phase3.loc[ lung_cancer_phase3['molecule_properties'].notnull(), 'molecule_properties'].apply(lambda x: x['mw_freebase']) #This is the mwt of the parent compound\n", 1075 | "\n", 1076 | "lung_cancer_phase3.head() #Display first 5 rows of result" 1077 | ] 1078 | }, 1079 | { 1080 | "cell_type": "markdown", 1081 | "metadata": {}, 1082 | "source": [ 1083 | "## What is the (therapeutic) mechanism of action for Sodium channel protein type V alpha subunit? For which drugs?" 1084 | ] 1085 | }, 1086 | { 1087 | "cell_type": "code", 1088 | "execution_count": 48, 1089 | "metadata": {}, 1090 | "outputs": [ 1091 | { 1092 | "data": { 1093 | "text/html": [ 1094 | "

\n", 1095 | "\n", 1108 | "\n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | "

	action_type	max_phase	mechanism_comment	mechanism_of_action	molecule_chembl_id	target_chembl_id
0	BLOCKER	4	None	Sodium channel protein type V alpha subunit bl...	CHEMBL1200822	CHEMBL1980
1	BLOCKER	4	None	Sodium channel protein type V alpha subunit bl...	CHEMBL1200612	CHEMBL1980
2	BLOCKER	4	None	Sodium channel protein type V alpha subunit bl...	CHEMBL1404	CHEMBL1980
3	BLOCKER	2	None	Sodium channel protein type V alpha subunit bl...	CHEMBL3545040	CHEMBL1980
4	BLOCKER	2	L-type calcium channel (CACNA1C, CACNA1D, CACN...	Sodium channel protein type V alpha subunit bl...	CHEMBL3545169	CHEMBL1980
5	BLOCKER	4	Prenylamine is a potent sodium channel (NaCh) ...	Sodium channel protein type V alpha subunit bl...	CHEMBL24072	CHEMBL1980
6	BLOCKER	3	Also partially blocks the hERG channel.	Sodium channel protein type V alpha subunit bl...	CHEMBL2107383	CHEMBL1980

\n", 1186 | "

" 1187 | ], 1188 | "text/plain": [ 1189 | " action_type max_phase mechanism_comment \\\n", 1190 | "0 BLOCKER 4 None \n", 1191 | "1 BLOCKER 4 None \n", 1192 | "2 BLOCKER 4 None \n", 1193 | "3 BLOCKER 2 None \n", 1194 | "4 BLOCKER 2 L-type calcium channel (CACNA1C, CACNA1D, CACN... \n", 1195 | "5 BLOCKER 4 Prenylamine is a potent sodium channel (NaCh) ... \n", 1196 | "6 BLOCKER 3 Also partially blocks the hERG channel. \n", 1197 | "\n", 1198 | " mechanism_of_action molecule_chembl_id \\\n", 1199 | "0 Sodium channel protein type V alpha subunit bl... CHEMBL1200822 \n", 1200 | "1 Sodium channel protein type V alpha subunit bl... CHEMBL1200612 \n", 1201 | "2 Sodium channel protein type V alpha subunit bl... CHEMBL1404 \n", 1202 | "3 Sodium channel protein type V alpha subunit bl... CHEMBL3545040 \n", 1203 | "4 Sodium channel protein type V alpha subunit bl... CHEMBL3545169 \n", 1204 | "5 Sodium channel protein type V alpha subunit bl... CHEMBL24072 \n", 1205 | "6 Sodium channel protein type V alpha subunit bl... CHEMBL2107383 \n", 1206 | "\n", 1207 | " target_chembl_id \n", 1208 | "0 CHEMBL1980 \n", 1209 | "1 CHEMBL1980 \n", 1210 | "2 CHEMBL1980 \n", 1211 | "3 CHEMBL1980 \n", 1212 | "4 CHEMBL1980 \n", 1213 | "5 CHEMBL1980 \n", 1214 | "6 CHEMBL1980 " 1215 | ] 1216 | }, 1217 | "execution_count": 48, 1218 | "metadata": {}, 1219 | "output_type": "execute_result" 1220 | } 1221 | ], 1222 | "source": [ 1223 | "from chembl_webresource_client.new_client import new_client\n", 1224 | "import pandas as pd #Use pandas python module to view data as tables\n", 1225 | "\n", 1226 | "mechanism = new_client.mechanism\n", 1227 | "res = mechanism.filter(target_chembl_id='CHEMBL1980' #Sodium channel protein type V alpha subunit target\n", 1228 | " ).only(['action_type','max_phase','mechanism_comment','mechanism_of_action','molecule_chembl_id', 'target_chembl_id'])\n", 1229 | "res = pd.DataFrame(res)\n", 1230 | "res" 1231 | ] 1232 | }, 1233 | { 1234 | "cell_type": "markdown", 1235 | "metadata": {}, 1236 | "source": [ 1237 | "## Find marketed drugs that have been subsequently withdrawn for hepatotoxicity reasons " 1238 | ] 1239 | }, 1240 | { 1241 | "cell_type": "code", 1242 | "execution_count": 19, 1243 | "metadata": {}, 1244 | "outputs": [ 1245 | { 1246 | "data": { 1247 | "text/html": [ 1248 | "

\n", 1249 | "\n", 1262 | "\n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | "

	molecule_chembl_id	parent_molecule_chembl_id	warning_class	warning_country	warning_description	warning_id	warning_refs	warning_type	warning_year
0	CHEMBL2105740	CHEMBL282724	Hepatotoxicity	European Union	Liver toxicity	2049	[{'ref_id': 'Public_statement/2011/03/WC500102...	Withdrawn	2010
1	CHEMBL1341	CHEMBL1341	Hepatotoxicity	United States	Serious, irreversible, and even fatal nephroto...	2059	[{'ref_id': 'FR-2014-07-02/pdf/2014-15371.pdf'...	Withdrawn	2001
2	CHEMBL3183453	CHEMBL36506	Hepatotoxicity	United States	Relatively common skin reactions; Jaundice; He...	2069	[{'ref_id': 'FR-2014-07-02/pdf/2014-15371.pdf'...	Withdrawn	2009
3	CHEMBL1200498	CHEMBL1201197	Hepatotoxicity	European Union; United States	Liver toxicity; Serious liver injury leading t...	2070	[{'ref_id': 'public-statement-trovan/trovan-iv...	Withdrawn	2006
4	CHEMBL54349	CHEMBL54349	Hepatotoxicity	France	Hepatitis	2076	[{'ref_id': '10.1177/009286150103500134', 'ref...	Withdrawn	1993

\n", 1340 | "

" 1341 | ], 1342 | "text/plain": [ 1343 | " molecule_chembl_id parent_molecule_chembl_id warning_class \\\n", 1344 | "0 CHEMBL2105740 CHEMBL282724 Hepatotoxicity \n", 1345 | "1 CHEMBL1341 CHEMBL1341 Hepatotoxicity \n", 1346 | "2 CHEMBL3183453 CHEMBL36506 Hepatotoxicity \n", 1347 | "3 CHEMBL1200498 CHEMBL1201197 Hepatotoxicity \n", 1348 | "4 CHEMBL54349 CHEMBL54349 Hepatotoxicity \n", 1349 | "\n", 1350 | " warning_country \\\n", 1351 | "0 European Union \n", 1352 | "1 United States \n", 1353 | "2 United States \n", 1354 | "3 European Union; United States \n", 1355 | "4 France \n", 1356 | "\n", 1357 | " warning_description warning_id \\\n", 1358 | "0 Liver toxicity 2049 \n", 1359 | "1 Serious, irreversible, and even fatal nephroto... 2059 \n", 1360 | "2 Relatively common skin reactions; Jaundice; He... 2069 \n", 1361 | "3 Liver toxicity; Serious liver injury leading t... 2070 \n", 1362 | "4 Hepatitis 2076 \n", 1363 | "\n", 1364 | " warning_refs warning_type \\\n", 1365 | "0 [{'ref_id': 'Public_statement/2011/03/WC500102... Withdrawn \n", 1366 | "1 [{'ref_id': 'FR-2014-07-02/pdf/2014-15371.pdf'... Withdrawn \n", 1367 | "2 [{'ref_id': 'FR-2014-07-02/pdf/2014-15371.pdf'... Withdrawn \n", 1368 | "3 [{'ref_id': 'public-statement-trovan/trovan-iv... Withdrawn \n", 1369 | "4 [{'ref_id': '10.1177/009286150103500134', 'ref... Withdrawn \n", 1370 | "\n", 1371 | " warning_year \n", 1372 | "0 2010 \n", 1373 | "1 2001 \n", 1374 | "2 2009 \n", 1375 | "3 2006 \n", 1376 | "4 1993 " 1377 | ] 1378 | }, 1379 | "execution_count": 19, 1380 | "metadata": {}, 1381 | "output_type": "execute_result" 1382 | } 1383 | ], 1384 | "source": [ 1385 | "from chembl_webresource_client.new_client import new_client\n", 1386 | "import pandas as pd #Use pandas python module to view data as tables\n", 1387 | "\n", 1388 | "drug_warning = new_client.drug_warning\n", 1389 | "res = drug_warning.filter(warning_type='Withdrawn', warning_class='Hepatotoxicity')\n", 1390 | "res = pd.DataFrame(res)\n", 1391 | "res.head() #Display top five rows only" 1392 | ] 1393 | }, 1394 | { 1395 | "cell_type": "markdown", 1396 | "metadata": {}, 1397 | "source": [ 1398 | "## Find FDA drugs that carry a black box warning for cardiotoxicity" 1399 | ] 1400 | }, 1401 | { 1402 | "cell_type": "code", 1403 | "execution_count": 20, 1404 | "metadata": {}, 1405 | "outputs": [ 1406 | { 1407 | "data": { 1408 | "text/html": [ 1409 | "

\n", 1410 | "\n", 1423 | "\n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | "

	molecule_chembl_id	parent_molecule_chembl_id	warning_class	warning_country	warning_description	warning_id	warning_refs	warning_type	warning_year
0	CHEMBL270190	CHEMBL270190	Cardiotoxicity	United States	None	81	[{'ref_id': '77a67dc6-35d3-48ff-9d18-292d4d442...	Black Box Warning	None
1	CHEMBL1083993	CHEMBL633	Cardiotoxicity	United States	None	98	[{'ref_id': '730039c2-0a32-4775-855d-98b2207e9...	Black Box Warning	None
2	CHEMBL501	CHEMBL405	Cardiotoxicity	United States	None	109	[{'ref_id': 'f469fb38-0380-4621-9db3-a4f429126...	Black Box Warning	None
3	CHEMBL2362016	CHEMBL2362016	Cardiotoxicity	United States	None	135	[{'ref_id': 'a482eccd-8837-47ea-904d-2f2c294d1...	Black Box Warning	None
4	CHEMBL24	CHEMBL24	Cardiotoxicity	United States	None	145	[{'ref_id': '06c0a04f-a77e-4871-9bb4-13abe2cbb...	Black Box Warning	None
...	...	...	...	...	...	...	...	...	...
87	CHEMBL1201132	CHEMBL895	Cardiotoxicity	United States	None	2615	[{'ref_id': '96128b54-24d6-88a3-e053-2a95a90a1...	Black Box Warning	None
88	CHEMBL1200890	CHEMBL656	Cardiotoxicity	United States	None	2646	[{'ref_id': '1c674d3e-c80a-47a5-a3ea-e76bb4f53...	Black Box Warning	None
89	CHEMBL2007641	CHEMBL2007641	Cardiotoxicity	United States	None	2659	[{'ref_id': '17f85d17-ab71-4f5b-9fe3-0b8c822f6...	Black Box Warning	None
90	CHEMBL2107874	CHEMBL2107874	Cardiotoxicity	United States	None	2711	[{'ref_id': '471baba2-7154-4488-9891-0db2f4679...	Black Box Warning	None
91	CHEMBL2107067	CHEMBL2107067	Cardiotoxicity	United States	None	2767	[{'ref_id': 'ed7b5d41-7475-4c10-99b9-b62b3434a...	Black Box Warning	None

\n", 1573 | "

92 rows × 9 columns

\n", 1574 | "

" 1575 | ], 1576 | "text/plain": [ 1577 | " molecule_chembl_id parent_molecule_chembl_id warning_class \\\n", 1578 | "0 CHEMBL270190 CHEMBL270190 Cardiotoxicity \n", 1579 | "1 CHEMBL1083993 CHEMBL633 Cardiotoxicity \n", 1580 | "2 CHEMBL501 CHEMBL405 Cardiotoxicity \n", 1581 | "3 CHEMBL2362016 CHEMBL2362016 Cardiotoxicity \n", 1582 | "4 CHEMBL24 CHEMBL24 Cardiotoxicity \n", 1583 | ".. ... ... ... \n", 1584 | "87 CHEMBL1201132 CHEMBL895 Cardiotoxicity \n", 1585 | "88 CHEMBL1200890 CHEMBL656 Cardiotoxicity \n", 1586 | "89 CHEMBL2007641 CHEMBL2007641 Cardiotoxicity \n", 1587 | "90 CHEMBL2107874 CHEMBL2107874 Cardiotoxicity \n", 1588 | "91 CHEMBL2107067 CHEMBL2107067 Cardiotoxicity \n", 1589 | "\n", 1590 | " warning_country warning_description warning_id \\\n", 1591 | "0 United States None 81 \n", 1592 | "1 United States None 98 \n", 1593 | "2 United States None 109 \n", 1594 | "3 United States None 135 \n", 1595 | "4 United States None 145 \n", 1596 | ".. ... ... ... \n", 1597 | "87 United States None 2615 \n", 1598 | "88 United States None 2646 \n", 1599 | "89 United States None 2659 \n", 1600 | "90 United States None 2711 \n", 1601 | "91 United States None 2767 \n", 1602 | "\n", 1603 | " warning_refs warning_type \\\n", 1604 | "0 [{'ref_id': '77a67dc6-35d3-48ff-9d18-292d4d442... Black Box Warning \n", 1605 | "1 [{'ref_id': '730039c2-0a32-4775-855d-98b2207e9... Black Box Warning \n", 1606 | "2 [{'ref_id': 'f469fb38-0380-4621-9db3-a4f429126... Black Box Warning \n", 1607 | "3 [{'ref_id': 'a482eccd-8837-47ea-904d-2f2c294d1... Black Box Warning \n", 1608 | "4 [{'ref_id': '06c0a04f-a77e-4871-9bb4-13abe2cbb... Black Box Warning \n", 1609 | ".. ... ... \n", 1610 | "87 [{'ref_id': '96128b54-24d6-88a3-e053-2a95a90a1... Black Box Warning \n", 1611 | "88 [{'ref_id': '1c674d3e-c80a-47a5-a3ea-e76bb4f53... Black Box Warning \n", 1612 | "89 [{'ref_id': '17f85d17-ab71-4f5b-9fe3-0b8c822f6... Black Box Warning \n", 1613 | "90 [{'ref_id': '471baba2-7154-4488-9891-0db2f4679... Black Box Warning \n", 1614 | "91 [{'ref_id': 'ed7b5d41-7475-4c10-99b9-b62b3434a... Black Box Warning \n", 1615 | "\n", 1616 | " warning_year \n", 1617 | "0 None \n", 1618 | "1 None \n", 1619 | "2 None \n", 1620 | "3 None \n", 1621 | "4 None \n", 1622 | ".. ... \n", 1623 | "87 None \n", 1624 | "88 None \n", 1625 | "89 None \n", 1626 | "90 None \n", 1627 | "91 None \n", 1628 | "\n", 1629 | "[92 rows x 9 columns]" 1630 | ] 1631 | }, 1632 | "execution_count": 20, 1633 | "metadata": {}, 1634 | "output_type": "execute_result" 1635 | } 1636 | ], 1637 | "source": [ 1638 | "from chembl_webresource_client.new_client import new_client\n", 1639 | "import pandas as pd #Use pandas python module to view data as tables\n", 1640 | "\n", 1641 | "drug_warning = new_client.drug_warning\n", 1642 | "res = drug_warning.filter(warning_class__icontains='cardio', warning_type='Black Box Warning')\n", 1643 | "res = pd.DataFrame(res)\n", 1644 | "res" 1645 | ] 1646 | }, 1647 | { 1648 | "cell_type": "markdown", 1649 | "metadata": {}, 1650 | "source": [ 1651 | "# 3. Targets" 1652 | ] 1653 | }, 1654 | { 1655 | "cell_type": "markdown", 1656 | "metadata": {}, 1657 | "source": [ 1658 | "## Find a target by gene name" 1659 | ] 1660 | }, 1661 | { 1662 | "cell_type": "code", 1663 | "execution_count": 21, 1664 | "metadata": {}, 1665 | "outputs": [ 1666 | { 1667 | "name": "stdout", 1668 | "output_type": "stream", 1669 | "text": [ 1670 | "{'organism': 'Homo sapiens', 'pref_name': 'Bromodomain-containing protein 4', 'target_type': 'SINGLE PROTEIN'}\n", 1671 | "{'organism': 'Mus musculus', 'pref_name': 'Bromodomain-containing protein 4', 'target_type': 'SINGLE PROTEIN'}\n", 1672 | "{'organism': 'Homo sapiens', 'pref_name': 'BRD4/HDAC1', 'target_type': 'PROTEIN COMPLEX'}\n", 1673 | "{'organism': 'Homo sapiens', 'pref_name': 'Cereblon/Cullin-4A/Bromodomain-containing protein 4', 'target_type': 'PROTEIN-PROTEIN INTERACTION'}\n", 1674 | "{'organism': 'Homo sapiens', 'pref_name': 'Cereblon/Bromodomain-containing protein 4', 'target_type': 'PROTEIN-PROTEIN INTERACTION'}\n", 1675 | "{'organism': 'Homo sapiens', 'pref_name': 'von Hippel-Lindau disease tumor suppressor/Bromodomain-containing protein 4', 'target_type': 'PROTEIN-PROTEIN INTERACTION'}\n", 1676 | "{'organism': 'Homo sapiens', 'pref_name': 'Cereblon/DNA damage-binding protein 1/Bromodomain-containing protein 4', 'target_type': 'PROTEIN-PROTEIN INTERACTION'}\n", 1677 | "{'organism': 'Homo sapiens', 'pref_name': 'von Hippel-Lindau disease tumor suppressor/Elongin-B/Elongin-C/Bromodomain-containing protein 4', 'target_type': 'PROTEIN-PROTEIN INTERACTION'}\n", 1678 | "{'organism': 'Homo sapiens', 'pref_name': 'Bromodomain and extra-terminal motif (BET)', 'target_type': 'PROTEIN FAMILY'}\n", 1679 | "{'organism': 'Homo sapiens', 'pref_name': 'BRD4/E3 ubiquitin-protein ligase Mdm2', 'target_type': 'PROTEIN-PROTEIN INTERACTION'}\n", 1680 | "{'organism': 'Homo sapiens', 'pref_name': 'BRD4/E3 ubiquitin-protein ligase XIAP', 'target_type': 'PROTEIN-PROTEIN INTERACTION'}\n" 1681 | ] 1682 | } 1683 | ], 1684 | "source": [ 1685 | "from chembl_webresource_client.new_client import new_client\n", 1686 | "\n", 1687 | "target = new_client.target\n", 1688 | "gene_name = 'BRD4'\n", 1689 | "res = target.filter(target_synonym__icontains=gene_name).only(['organism', 'pref_name', 'target_type'])\n", 1690 | "for i in res:\n", 1691 | " print(i)" 1692 | ] 1693 | }, 1694 | { 1695 | "cell_type": "markdown", 1696 | "metadata": {}, 1697 | "source": [ 1698 | "## Find a protein target using its Uniprot_id" 1699 | ] 1700 | }, 1701 | { 1702 | "cell_type": "code", 1703 | "execution_count": 22, 1704 | "metadata": {}, 1705 | "outputs": [ 1706 | { 1707 | "data": { 1708 | "text/html": [ 1709 | "

\n", 1710 | "\n", 1723 | "\n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | "

	cross_references	organism	pref_name	species_group_flag	target_chembl_id	target_components	target_type	tax_id
0	[{'xref_id': 'FLT4', 'xref_name': None, 'xref_...	Homo sapiens	Vascular endothelial growth factor receptor 3	False	CHEMBL1955	[{'accession': 'P35916', 'component_descriptio...	SINGLE PROTEIN	9606
1	[]	Homo sapiens	Vascular endothelial growth factor receptor	False	CHEMBL2095227	[{'accession': 'P17948', 'component_descriptio...	PROTEIN FAMILY	9606
2	[]	Homo sapiens	Vascular endothelial growth factor receptor 2 ...	False	CHEMBL2111409	[{'accession': 'P35916', 'component_descriptio...	SELECTIVITY GROUP	9606
3	[]	Homo sapiens	Focal adhesion kinase 1/vascular endothelial g...	False	CHEMBL3301389	[{'accession': 'P35916', 'component_descriptio...	PROTEIN-PROTEIN INTERACTION	9606

\n", 1784 | "

" 1785 | ], 1786 | "text/plain": [ 1787 | " cross_references organism \\\n", 1788 | "0 [{'xref_id': 'FLT4', 'xref_name': None, 'xref_... Homo sapiens \n", 1789 | "1 [] Homo sapiens \n", 1790 | "2 [] Homo sapiens \n", 1791 | "3 [] Homo sapiens \n", 1792 | "\n", 1793 | " pref_name species_group_flag \\\n", 1794 | "0 Vascular endothelial growth factor receptor 3 False \n", 1795 | "1 Vascular endothelial growth factor receptor False \n", 1796 | "2 Vascular endothelial growth factor receptor 2 ... False \n", 1797 | "3 Focal adhesion kinase 1/vascular endothelial g... False \n", 1798 | "\n", 1799 | " target_chembl_id target_components \\\n", 1800 | "0 CHEMBL1955 [{'accession': 'P35916', 'component_descriptio... \n", 1801 | "1 CHEMBL2095227 [{'accession': 'P17948', 'component_descriptio... \n", 1802 | "2 CHEMBL2111409 [{'accession': 'P35916', 'component_descriptio... \n", 1803 | "3 CHEMBL3301389 [{'accession': 'P35916', 'component_descriptio... \n", 1804 | "\n", 1805 | " target_type tax_id \n", 1806 | "0 SINGLE PROTEIN 9606 \n", 1807 | "1 PROTEIN FAMILY 9606 \n", 1808 | "2 SELECTIVITY GROUP 9606 \n", 1809 | "3 PROTEIN-PROTEIN INTERACTION 9606 " 1810 | ] 1811 | }, 1812 | "execution_count": 22, 1813 | "metadata": {}, 1814 | "output_type": "execute_result" 1815 | } 1816 | ], 1817 | "source": [ 1818 | "from chembl_webresource_client.new_client import new_client\n", 1819 | "\n", 1820 | "target = new_client.target\n", 1821 | "uniprot_id = 'P35916' #Vascular endothelial growth factor receptor 3; Uniprot accession P35916\n", 1822 | "res = target.filter(target_components__accession=uniprot_id)\n", 1823 | "\n", 1824 | "res = pd.DataFrame(res)\n", 1825 | "res" 1826 | ] 1827 | }, 1828 | { 1829 | "cell_type": "markdown", 1830 | "metadata": {}, 1831 | "source": [ 1832 | "## Which species have available data in ChEMBL for cannabinoid receptor target(s)? What target types are they?" 1833 | ] 1834 | }, 1835 | { 1836 | "cell_type": "code", 1837 | "execution_count": 23, 1838 | "metadata": {}, 1839 | "outputs": [ 1840 | { 1841 | "data": { 1842 | "text/html": [ 1843 | "

\n", 1844 | "\n", 1857 | "\n", 1858 | " \n", 1859 | " \n", 1860 | " \n", 1861 | " \n", 1862 | " \n", 1863 | " \n", 1864 | " \n", 1865 | " \n", 1866 | " \n", 1867 | " \n", 1868 | " \n", 1869 | " \n", 1870 | " \n", 1871 | " \n", 1872 | " \n", 1873 | " \n", 1874 | " \n", 1875 | " \n", 1876 | " \n", 1877 | " \n", 1878 | " \n", 1879 | " \n", 1880 | " \n", 1881 | " \n", 1882 | " \n", 1883 | " \n", 1884 | " \n", 1885 | " \n", 1886 | " \n", 1887 | " \n", 1888 | " \n", 1889 | " \n", 1890 | " \n", 1891 | " \n", 1892 | " \n", 1893 | " \n", 1894 | " \n", 1895 | " \n", 1896 | " \n", 1897 | " \n", 1898 | " \n", 1899 | " \n", 1900 | " \n", 1901 | " \n", 1902 | " \n", 1903 | " \n", 1904 | " \n", 1905 | " \n", 1906 | " \n", 1907 | " \n", 1908 | " \n", 1909 | " \n", 1910 | " \n", 1911 | " \n", 1912 | " \n", 1913 | " \n", 1914 | " \n", 1915 | " \n", 1916 | " \n", 1917 | " \n", 1918 | " \n", 1919 | " \n", 1920 | " \n", 1921 | " \n", 1922 | " \n", 1923 | " \n", 1924 | " \n", 1925 | " \n", 1926 | " \n", 1927 | " \n", 1928 | " \n", 1929 | " \n", 1930 | " \n", 1931 | " \n", 1932 | " \n", 1933 | " \n", 1934 | " \n", 1935 | " \n", 1936 | " \n", 1937 | " \n", 1938 | " \n", 1939 | " \n", 1940 | " \n", 1941 | " \n", 1942 | " \n", 1943 | " \n", 1944 | " \n", 1945 | " \n", 1946 | "

	organism	pref_name	target_chembl_id	target_type
0	Homo sapiens	Cannabinoid CB1 receptor	CHEMBL218	SINGLE PROTEIN
1	Mus musculus	Cannabinoid CB1 receptor	CHEMBL3037	SINGLE PROTEIN
2	Rattus norvegicus	Cannabinoid CB1 receptor	CHEMBL3571	SINGLE PROTEIN
3	Homo sapiens	Cannabinoid CB1 receptor/orexin receptor 1 com...	CHEMBL3301387	PROTEIN COMPLEX
4	Homo sapiens	Cannabinoid CB2 receptor	CHEMBL253	SINGLE PROTEIN
5	Mus musculus	Cannabinoid CB2 receptor	CHEMBL5373	SINGLE PROTEIN
6	Rattus norvegicus	Cannabinoid CB2 receptor	CHEMBL2470	SINGLE PROTEIN
7	Homo sapiens	Cannabinoid receptor	CHEMBL2096981	PROTEIN FAMILY
8	Mus musculus	Cannabinoid receptor	CHEMBL2111415	PROTEIN FAMILY
9	Rattus norvegicus	Cannabinoid receptor	CHEMBL2111385	PROTEIN FAMILY
10	Homo sapiens	Cannabinoid receptor 1/Mu-type opioid receptor	CHEMBL3885538	PROTEIN COMPLEX

\n", 1947 | "

" 1948 | ], 1949 | "text/plain": [ 1950 | " organism pref_name \\\n", 1951 | "0 Homo sapiens Cannabinoid CB1 receptor \n", 1952 | "1 Mus musculus Cannabinoid CB1 receptor \n", 1953 | "2 Rattus norvegicus Cannabinoid CB1 receptor \n", 1954 | "3 Homo sapiens Cannabinoid CB1 receptor/orexin receptor 1 com... \n", 1955 | "4 Homo sapiens Cannabinoid CB2 receptor \n", 1956 | "5 Mus musculus Cannabinoid CB2 receptor \n", 1957 | "6 Rattus norvegicus Cannabinoid CB2 receptor \n", 1958 | "7 Homo sapiens Cannabinoid receptor \n", 1959 | "8 Mus musculus Cannabinoid receptor \n", 1960 | "9 Rattus norvegicus Cannabinoid receptor \n", 1961 | "10 Homo sapiens Cannabinoid receptor 1/Mu-type opioid receptor \n", 1962 | "\n", 1963 | " target_chembl_id target_type \n", 1964 | "0 CHEMBL218 SINGLE PROTEIN \n", 1965 | "1 CHEMBL3037 SINGLE PROTEIN \n", 1966 | "2 CHEMBL3571 SINGLE PROTEIN \n", 1967 | "3 CHEMBL3301387 PROTEIN COMPLEX \n", 1968 | "4 CHEMBL253 SINGLE PROTEIN \n", 1969 | "5 CHEMBL5373 SINGLE PROTEIN \n", 1970 | "6 CHEMBL2470 SINGLE PROTEIN \n", 1971 | "7 CHEMBL2096981 PROTEIN FAMILY \n", 1972 | "8 CHEMBL2111415 PROTEIN FAMILY \n", 1973 | "9 CHEMBL2111385 PROTEIN FAMILY \n", 1974 | "10 CHEMBL3885538 PROTEIN COMPLEX " 1975 | ] 1976 | }, 1977 | "execution_count": 23, 1978 | "metadata": {}, 1979 | "output_type": "execute_result" 1980 | } 1981 | ], 1982 | "source": [ 1983 | "from chembl_webresource_client.new_client import new_client\n", 1984 | "import pandas as pd #Use pandas python module to view data as tables\n", 1985 | "\n", 1986 | "target = new_client.target\n", 1987 | "protein_name = 'cannabinoid' \n", 1988 | "res = target.filter(pref_name__icontains=protein_name).only(['organism','pref_name','target_chembl_id','target_type']).order_by(['pref_name','organism'])\n", 1989 | "\n", 1990 | "pd.DataFrame(res)" 1991 | ] 1992 | }, 1993 | { 1994 | "cell_type": "markdown", 1995 | "metadata": {}, 1996 | "source": [ 1997 | "## What does the protein classification look like for chemokine receptor targets?" 1998 | ] 1999 | }, 2000 | { 2001 | "cell_type": "code", 2002 | "execution_count": 24, 2003 | "metadata": {}, 2004 | "outputs": [ 2005 | { 2006 | "data": { 2007 | "text/html": [ 2008 | "

\n", 2009 | "\n", 2022 | "\n", 2023 | " \n", 2024 | " \n", 2025 | " \n", 2026 | " \n", 2027 | " \n", 2028 | " \n", 2029 | " \n", 2030 | " \n", 2031 | " \n", 2032 | " \n", 2033 | " \n", 2034 | " \n", 2035 | " \n", 2036 | " \n", 2037 | " \n", 2038 | " \n", 2039 | " \n", 2040 | " \n", 2041 | " \n", 2042 | " \n", 2043 | " \n", 2044 | " \n", 2045 | " \n", 2046 | " \n", 2047 | " \n", 2048 | " \n", 2049 | " \n", 2050 | " \n", 2051 | " \n", 2052 | " \n", 2053 | " \n", 2054 | " \n", 2055 | " \n", 2056 | " \n", 2057 | " \n", 2058 | " \n", 2059 | " \n", 2060 | " \n", 2061 | " \n", 2062 | " \n", 2063 | " \n", 2064 | " \n", 2065 | " \n", 2066 | " \n", 2067 | " \n", 2068 | " \n", 2069 | " \n", 2070 | " \n", 2071 | " \n", 2072 | " \n", 2073 | " \n", 2074 | " \n", 2075 | " \n", 2076 | " \n", 2077 | " \n", 2078 | " \n", 2079 | " \n", 2080 | " \n", 2081 | " \n", 2082 | " \n", 2083 | " \n", 2084 | " \n", 2085 | " \n", 2086 | " \n", 2087 | " \n", 2088 | " \n", 2089 | " \n", 2090 | " \n", 2091 | " \n", 2092 | " \n", 2093 | " \n", 2094 | " \n", 2095 | " \n", 2096 | " \n", 2097 | " \n", 2098 | " \n", 2099 | " \n", 2100 | " \n", 2101 | " \n", 2102 | " \n", 2103 | " \n", 2104 | " \n", 2105 | " \n", 2106 | " \n", 2107 | " \n", 2108 | " \n", 2109 | " \n", 2110 | " \n", 2111 | " \n", 2112 | " \n", 2113 | " \n", 2114 | " \n", 2115 | " \n", 2116 | " \n", 2117 | " \n", 2118 | " \n", 2119 | " \n", 2120 | " \n", 2121 | " \n", 2122 | " \n", 2123 | "

	l1	l2	l3	l4	l5	l6	l7	l8	protein_class_id
0	Membrane receptor	Family A G protein-coupled receptor	Peptide receptor (family A GPCR)	Chemokine receptor	CC chemokine receptor	None	None	None	547
1	Membrane receptor	Family A G protein-coupled receptor	Peptide receptor (family A GPCR)	Chemokine receptor	XC chemokine receptor	None	None	None	548
2	Membrane receptor	Family A G protein-coupled receptor	Peptide receptor (family A GPCR)	Chemokine receptor	CXC chemokine receptor	None	None	None	554
3	Membrane receptor	Family A G protein-coupled receptor	Peptide receptor (family A GPCR)	Chemokine receptor-like	Chemerin receptor	None	None	None	555
4	Membrane receptor	Family A G protein-coupled receptor	Peptide receptor (family A GPCR)	Chemokine receptor	CX3C chemokine receptor	None	None	None	558
5	Membrane receptor	Family A G protein-coupled receptor	Peptide receptor (family A GPCR)	Chemokine receptor	None	None	None	None	1265
6	Membrane receptor	Family A G protein-coupled receptor	Peptide receptor (family A GPCR)	Chemokine receptor-like	None	None	None	None	1270

\n", 2124 | "

" 2125 | ], 2126 | "text/plain": [ 2127 | " l1 l2 \\\n", 2128 | "0 Membrane receptor Family A G protein-coupled receptor \n", 2129 | "1 Membrane receptor Family A G protein-coupled receptor \n", 2130 | "2 Membrane receptor Family A G protein-coupled receptor \n", 2131 | "3 Membrane receptor Family A G protein-coupled receptor \n", 2132 | "4 Membrane receptor Family A G protein-coupled receptor \n", 2133 | "5 Membrane receptor Family A G protein-coupled receptor \n", 2134 | "6 Membrane receptor Family A G protein-coupled receptor \n", 2135 | "\n", 2136 | " l3 l4 \\\n", 2137 | "0 Peptide receptor (family A GPCR) Chemokine receptor \n", 2138 | "1 Peptide receptor (family A GPCR) Chemokine receptor \n", 2139 | "2 Peptide receptor (family A GPCR) Chemokine receptor \n", 2140 | "3 Peptide receptor (family A GPCR) Chemokine receptor-like \n", 2141 | "4 Peptide receptor (family A GPCR) Chemokine receptor \n", 2142 | "5 Peptide receptor (family A GPCR) Chemokine receptor \n", 2143 | "6 Peptide receptor (family A GPCR) Chemokine receptor-like \n", 2144 | "\n", 2145 | " l5 l6 l7 l8 protein_class_id \n", 2146 | "0 CC chemokine receptor None None None 547 \n", 2147 | "1 XC chemokine receptor None None None 548 \n", 2148 | "2 CXC chemokine receptor None None None 554 \n", 2149 | "3 Chemerin receptor None None None 555 \n", 2150 | "4 CX3C chemokine receptor None None None 558 \n", 2151 | "5 None None None None 1265 \n", 2152 | "6 None None None None 1270 " 2153 | ] 2154 | }, 2155 | "execution_count": 24, 2156 | "metadata": {}, 2157 | "output_type": "execute_result" 2158 | } 2159 | ], 2160 | "source": [ 2161 | "from chembl_webresource_client.new_client import new_client\n", 2162 | "import pandas as pd #Use pandas python module to view data as tables\n", 2163 | "\n", 2164 | "protein_class = new_client.protein_class\n", 2165 | "res = protein_class.filter(l4__icontains='chemokine receptor')\n", 2166 | "\n", 2167 | "pd.DataFrame(res)" 2168 | ] 2169 | }, 2170 | { 2171 | "cell_type": "markdown", 2172 | "metadata": {}, 2173 | "source": [ 2174 | "# 4. Activities\n", 2175 | "- The `activitity` API endpoint provides bioactivity data for compounds that have been measured against target(s) in an assay" 2176 | ] 2177 | }, 2178 | { 2179 | "cell_type": "markdown", 2180 | "metadata": {}, 2181 | "source": [ 2182 | "## Get all IC50 activities related to the hERG target" 2183 | ] 2184 | }, 2185 | { 2186 | "cell_type": "code", 2187 | "execution_count": 25, 2188 | "metadata": {}, 2189 | "outputs": [ 2190 | { 2191 | "data": { 2192 | "text/plain": [ 2193 | "13200" 2194 | ] 2195 | }, 2196 | "execution_count": 25, 2197 | "metadata": {}, 2198 | "output_type": "execute_result" 2199 | } 2200 | ], 2201 | "source": [ 2202 | "from chembl_webresource_client.new_client import new_client\n", 2203 | "\n", 2204 | "target = new_client.target\n", 2205 | "activity = new_client.activity\n", 2206 | "herg = target.filter(pref_name__iexact='hERG').only('target_chembl_id')[0]\n", 2207 | "herg_activities = activity.filter(target_chembl_id=herg['target_chembl_id']).filter(standard_type=\"IC50\")\n", 2208 | "\n", 2209 | "len(herg_activities)" 2210 | ] 2211 | }, 2212 | { 2213 | "cell_type": "markdown", 2214 | "metadata": {}, 2215 | "source": [ 2216 | "## Get all activities for a specific target with assay type B (binding):" 2217 | ] 2218 | }, 2219 | { 2220 | "cell_type": "code", 2221 | "execution_count": 26, 2222 | "metadata": {}, 2223 | "outputs": [ 2224 | { 2225 | "data": { 2226 | "text/plain": [ 2227 | "860" 2228 | ] 2229 | }, 2230 | "execution_count": 26, 2231 | "metadata": {}, 2232 | "output_type": "execute_result" 2233 | } 2234 | ], 2235 | "source": [ 2236 | "from chembl_webresource_client.new_client import new_client\n", 2237 | "\n", 2238 | "activity = new_client.activity\n", 2239 | "res = activity.filter(target_chembl_id='CHEMBL3938', assay_type='B')\n", 2240 | "\n", 2241 | "len(res)" 2242 | ] 2243 | }, 2244 | { 2245 | "cell_type": "markdown", 2246 | "metadata": {}, 2247 | "source": [ 2248 | "## Get all activities with a pChEMBL value for Aspirin" 2249 | ] 2250 | }, 2251 | { 2252 | "cell_type": "code", 2253 | "execution_count": 27, 2254 | "metadata": {}, 2255 | "outputs": [ 2256 | { 2257 | "data": { 2258 | "text/plain": [ 2259 | "138" 2260 | ] 2261 | }, 2262 | "execution_count": 27, 2263 | "metadata": {}, 2264 | "output_type": "execute_result" 2265 | } 2266 | ], 2267 | "source": [ 2268 | "from chembl_webresource_client.new_client import new_client\n", 2269 | "\n", 2270 | "activities = new_client.activity\n", 2271 | "res = activities.filter(molecule_chembl_id=\"CHEMBL25\", pchembl_value__isnull=False)\n", 2272 | "\n", 2273 | "len(res)" 2274 | ] 2275 | }, 2276 | { 2277 | "cell_type": "markdown", 2278 | "metadata": {}, 2279 | "source": [ 2280 | "# 5. Assays" 2281 | ] 2282 | }, 2283 | { 2284 | "cell_type": "markdown", 2285 | "metadata": {}, 2286 | "source": [ 2287 | "## Search for ADMET-related inhibitor assays (assay_type A) measured in Rat" 2288 | ] 2289 | }, 2290 | { 2291 | "cell_type": "code", 2292 | "execution_count": 28, 2293 | "metadata": {}, 2294 | "outputs": [ 2295 | { 2296 | "data": { 2297 | "text/html": [ 2298 | "

\n", 2299 | "\n", 2312 | "\n", 2313 | " \n", 2314 | " \n", 2315 | " \n", 2316 | " \n", 2317 | " \n", 2318 | " \n", 2319 | " \n", 2320 | " \n", 2321 | " \n", 2322 | " \n", 2323 | " \n", 2324 | " \n", 2325 | " \n", 2326 | " \n", 2327 | " \n", 2328 | " \n", 2329 | " \n", 2330 | " \n", 2331 | " \n", 2332 | " \n", 2333 | " \n", 2334 | " \n", 2335 | " \n", 2336 | " \n", 2337 | " \n", 2338 | " \n", 2339 | " \n", 2340 | " \n", 2341 | " \n", 2342 | " \n", 2343 | " \n", 2344 | " \n", 2345 | " \n", 2346 | " \n", 2347 | " \n", 2348 | " \n", 2349 | " \n", 2350 | " \n", 2351 | " \n", 2352 | " \n", 2353 | " \n", 2354 | " \n", 2355 | " \n", 2356 | " \n", 2357 | " \n", 2358 | " \n", 2359 | " \n", 2360 | " \n", 2361 | " \n", 2362 | " \n", 2363 | " \n", 2364 | " \n", 2365 | " \n", 2366 | " \n", 2367 | " \n", 2368 | " \n", 2369 | " \n", 2370 | " \n", 2371 | " \n", 2372 | " \n", 2373 | " \n", 2374 | " \n", 2375 | " \n", 2376 | " \n", 2377 | " \n", 2378 | " \n", 2379 | " \n", 2380 | " \n", 2381 | " \n", 2382 | " \n", 2383 | " \n", 2384 | " \n", 2385 | " \n", 2386 | " \n", 2387 | " \n", 2388 | " \n", 2389 | "

	assay_chembl_id	assay_organism	description
0	CHEMBL884521	Rattus norvegicus	Inhibition of cytochrome P450 progesterone 15-...
1	CHEMBL615148	Rattus norvegicus	Inhibition of cytochrome P450 progesterone 16-...
2	CHEMBL615199	Rattus norvegicus	Inhibition of cytochrome P450 progesterone 2-a...
3	CHEMBL883800	Rattus norvegicus	Inhibition of progesterone 6-beta-hydroxylase ...
4	CHEMBL618439	Rattus norvegicus	Inhibition of progesterone 6-beta-hydroxylase ...
...	...	...	...
582	CHEMBL4626205	Rattus norvegicus	Inhibition of rat adrenergic alpha1A receptor ...
583	CHEMBL4630381	Rattus norvegicus	Drug metabolism in selective CYP3A inhibitor K...
584	CHEMBL4630382	Rattus norvegicus	Drug metabolism in SULT inhibitor DCNP pre-tre...
585	CHEMBL4630392	Rattus norvegicus	Drug metabolism in selective CYP3A inhibitor K...
586	CHEMBL4630393	Rattus norvegicus	Drug metabolism in selective SULT inhibitor DC...

\n", 2390 | "

587 rows × 3 columns

\n", 2391 | "

" 2392 | ], 2393 | "text/plain": [ 2394 | " assay_chembl_id assay_organism \\\n", 2395 | "0 CHEMBL884521 Rattus norvegicus \n", 2396 | "1 CHEMBL615148 Rattus norvegicus \n", 2397 | "2 CHEMBL615199 Rattus norvegicus \n", 2398 | "3 CHEMBL883800 Rattus norvegicus \n", 2399 | "4 CHEMBL618439 Rattus norvegicus \n", 2400 | ".. ... ... \n", 2401 | "582 CHEMBL4626205 Rattus norvegicus \n", 2402 | "583 CHEMBL4630381 Rattus norvegicus \n", 2403 | "584 CHEMBL4630382 Rattus norvegicus \n", 2404 | "585 CHEMBL4630392 Rattus norvegicus \n", 2405 | "586 CHEMBL4630393 Rattus norvegicus \n", 2406 | "\n", 2407 | " description \n", 2408 | "0 Inhibition of cytochrome P450 progesterone 15-... \n", 2409 | "1 Inhibition of cytochrome P450 progesterone 16-... \n", 2410 | "2 Inhibition of cytochrome P450 progesterone 2-a... \n", 2411 | "3 Inhibition of progesterone 6-beta-hydroxylase ... \n", 2412 | "4 Inhibition of progesterone 6-beta-hydroxylase ... \n", 2413 | ".. ... \n", 2414 | "582 Inhibition of rat adrenergic alpha1A receptor ... \n", 2415 | "583 Drug metabolism in selective CYP3A inhibitor K... \n", 2416 | "584 Drug metabolism in SULT inhibitor DCNP pre-tre... \n", 2417 | "585 Drug metabolism in selective CYP3A inhibitor K... \n", 2418 | "586 Drug metabolism in selective SULT inhibitor DC... \n", 2419 | "\n", 2420 | "[587 rows x 3 columns]" 2421 | ] 2422 | }, 2423 | "execution_count": 28, 2424 | "metadata": {}, 2425 | "output_type": "execute_result" 2426 | } 2427 | ], 2428 | "source": [ 2429 | "from chembl_webresource_client.new_client import new_client\n", 2430 | "import pandas as pd #Use pandas python module to view data as tables\n", 2431 | "\n", 2432 | "assay = new_client.assay\n", 2433 | "res = assay.filter(description__icontains='inhibit', assay_type='A', assay_organism='Rattus norvegicus').only(['assay_chembl_id','assay_organism','description'])\n", 2434 | "pd.DataFrame(res)" 2435 | ] 2436 | }, 2437 | { 2438 | "cell_type": "markdown", 2439 | "metadata": {}, 2440 | "source": [ 2441 | "## Find assays that assess nephrotoxicity (kidney toxicity)\n", 2442 | "- Note that nephrotoxic drugs could be searched for using the `drug_warning` API endpoint" 2443 | ] 2444 | }, 2445 | { 2446 | "cell_type": "code", 2447 | "execution_count": 29, 2448 | "metadata": {}, 2449 | "outputs": [ 2450 | { 2451 | "data": { 2452 | "text/html": [ 2453 | "

\n", 2454 | "\n", 2467 | "\n", 2468 | " \n", 2469 | " \n", 2470 | " \n", 2471 | " \n", 2472 | " \n", 2473 | " \n", 2474 | " \n", 2475 | " \n", 2476 | " \n", 2477 | " \n", 2478 | " \n", 2479 | " \n", 2480 | " \n", 2481 | " \n", 2482 | " \n", 2483 | " \n", 2484 | " \n", 2485 | " \n", 2486 | " \n", 2487 | " \n", 2488 | " \n", 2489 | " \n", 2490 | " \n", 2491 | " \n", 2492 | " \n", 2493 | " \n", 2494 | " \n", 2495 | " \n", 2496 | " \n", 2497 | " \n", 2498 | " \n", 2499 | " \n", 2500 | " \n", 2501 | " \n", 2502 | " \n", 2503 | " \n", 2504 | " \n", 2505 | " \n", 2506 | " \n", 2507 | " \n", 2508 | " \n", 2509 | " \n", 2510 | " \n", 2511 | " \n", 2512 | " \n", 2513 | " \n", 2514 | " \n", 2515 | " \n", 2516 | " \n", 2517 | " \n", 2518 | " \n", 2519 | " \n", 2520 | " \n", 2521 | " \n", 2522 | " \n", 2523 | " \n", 2524 | " \n", 2525 | " \n", 2526 | " \n", 2527 | " \n", 2528 | " \n", 2529 | " \n", 2530 | " \n", 2531 | " \n", 2532 | " \n", 2533 | " \n", 2534 | " \n", 2535 | " \n", 2536 | " \n", 2537 | " \n", 2538 | " \n", 2539 | " \n", 2540 | " \n", 2541 | " \n", 2542 | " \n", 2543 | " \n", 2544 | "

	assay_chembl_id	assay_organism	description
0	CHEMBL730330	Mus musculus	Nephrotoxicity upon intragastric administratio...
1	CHEMBL730331	Mus musculus	Nephrotoxicity upon intragastric administratio...
2	CHEMBL729614	Mus musculus	Nephrotoxicity upon intragastric administratio...
3	CHEMBL729615	Mus musculus	Nephrotoxicity upon intragastric administratio...
4	CHEMBL729616	Mus musculus	Nephrotoxicity upon intragastric administratio...
...	...	...	...
349	CHEMBL4422918	Mus musculus	Nephrotoxicity in C57BL/6 mouse assessed as ab...
350	CHEMBL4422919	Mus musculus	Nephrotoxicity in C57BL/6 mouse assessed as ti...
351	CHEMBL4480539	Rattus norvegicus	Renal toxicity in Wistar rat assessed as effec...
352	CHEMBL4623569	Homo sapiens	Nephrotoxicity in human patients assessed as p...
353	CHEMBL4626515	Mus musculus	Renal toxicity in C57BL/6 mouse assessed effec...

\n", 2545 | "

354 rows × 3 columns

\n", 2546 | "

" 2547 | ], 2548 | "text/plain": [ 2549 | " assay_chembl_id assay_organism \\\n", 2550 | "0 CHEMBL730330 Mus musculus \n", 2551 | "1 CHEMBL730331 Mus musculus \n", 2552 | "2 CHEMBL729614 Mus musculus \n", 2553 | "3 CHEMBL729615 Mus musculus \n", 2554 | "4 CHEMBL729616 Mus musculus \n", 2555 | ".. ... ... \n", 2556 | "349 CHEMBL4422918 Mus musculus \n", 2557 | "350 CHEMBL4422919 Mus musculus \n", 2558 | "351 CHEMBL4480539 Rattus norvegicus \n", 2559 | "352 CHEMBL4623569 Homo sapiens \n", 2560 | "353 CHEMBL4626515 Mus musculus \n", 2561 | "\n", 2562 | " description \n", 2563 | "0 Nephrotoxicity upon intragastric administratio... \n", 2564 | "1 Nephrotoxicity upon intragastric administratio... \n", 2565 | "2 Nephrotoxicity upon intragastric administratio... \n", 2566 | "3 Nephrotoxicity upon intragastric administratio... \n", 2567 | "4 Nephrotoxicity upon intragastric administratio... \n", 2568 | ".. ... \n", 2569 | "349 Nephrotoxicity in C57BL/6 mouse assessed as ab... \n", 2570 | "350 Nephrotoxicity in C57BL/6 mouse assessed as ti... \n", 2571 | "351 Renal toxicity in Wistar rat assessed as effec... \n", 2572 | "352 Nephrotoxicity in human patients assessed as p... \n", 2573 | "353 Renal toxicity in C57BL/6 mouse assessed effec... \n", 2574 | "\n", 2575 | "[354 rows x 3 columns]" 2576 | ] 2577 | }, 2578 | "execution_count": 29, 2579 | "metadata": {}, 2580 | "output_type": "execute_result" 2581 | } 2582 | ], 2583 | "source": [ 2584 | "from chembl_webresource_client.new_client import new_client\n", 2585 | "import pandas as pd #Use pandas python module to view data as tables\n", 2586 | "\n", 2587 | "assay = new_client.assay\n", 2588 | "res = assay.filter(description__iregex='nephrotoxicity|renal toxicity').only(['assay_chembl_id','assay_organism','description'])\n", 2589 | "pd.DataFrame(res)" 2590 | ] 2591 | }, 2592 | { 2593 | "cell_type": "markdown", 2594 | "metadata": {}, 2595 | "source": [ 2596 | "# Tissues" 2597 | ] 2598 | }, 2599 | { 2600 | "cell_type": "markdown", 2601 | "metadata": {}, 2602 | "source": [ 2603 | "## Get tissue by Uberon id (UBERON is a cross-species anatomy ontology)" 2604 | ] 2605 | }, 2606 | { 2607 | "cell_type": "code", 2608 | "execution_count": 30, 2609 | "metadata": {}, 2610 | "outputs": [ 2611 | { 2612 | "data": { 2613 | "text/plain": [ 2614 | "[{'bto_id': 'BTO:0000068', 'caloha_id': 'TS-0034', 'efo_id': None, 'pref_name': 'Amniotic fluid', 'tissue_chembl_id': 'CHEMBL3638177', 'uberon_id': 'UBERON:0000173'}]" 2615 | ] 2616 | }, 2617 | "execution_count": 30, 2618 | "metadata": {}, 2619 | "output_type": "execute_result" 2620 | } 2621 | ], 2622 | "source": [ 2623 | "from chembl_webresource_client.new_client import new_client\n", 2624 | "\n", 2625 | "tissue = new_client.tissue\n", 2626 | "res = tissue.filter(uberon_id=\"UBERON:0000173\")\n", 2627 | "res" 2628 | ] 2629 | }, 2630 | { 2631 | "cell_type": "markdown", 2632 | "metadata": {}, 2633 | "source": [ 2634 | "## Get tissue by name" 2635 | ] 2636 | }, 2637 | { 2638 | "cell_type": "code", 2639 | "execution_count": 31, 2640 | "metadata": {}, 2641 | "outputs": [ 2642 | { 2643 | "data": { 2644 | "text/html": [ 2645 | "

\n", 2646 | "\n", 2659 | "\n", 2660 | " \n", 2661 | " \n", 2662 | " \n", 2663 | " \n", 2664 | " \n", 2665 | " \n", 2666 | " \n", 2667 | " \n", 2668 | " \n", 2669 | " \n", 2670 | " \n", 2671 | " \n", 2672 | " \n", 2673 | " \n", 2674 | " \n", 2675 | " \n", 2676 | " \n", 2677 | " \n", 2678 | " \n", 2679 | " \n", 2680 | " \n", 2681 | " \n", 2682 | " \n", 2683 | " \n", 2684 | " \n", 2685 | " \n", 2686 | " \n", 2687 | " \n", 2688 | " \n", 2689 | " \n", 2690 | " \n", 2691 | " \n", 2692 | " \n", 2693 | " \n", 2694 | " \n", 2695 | " \n", 2696 | " \n", 2697 | " \n", 2698 | " \n", 2699 | " \n", 2700 | " \n", 2701 | " \n", 2702 | " \n", 2703 | " \n", 2704 | " \n", 2705 | " \n", 2706 | " \n", 2707 | " \n", 2708 | " \n", 2709 | " \n", 2710 | " \n", 2711 | " \n", 2712 | " \n", 2713 | " \n", 2714 | " \n", 2715 | " \n", 2716 | " \n", 2717 | " \n", 2718 | "

	bto_id	caloha_id	efo_id	pref_name	tissue_chembl_id	uberon_id
0	None	None	None	Blood brain barrier	CHEMBL3987461	UBERON:0000120
1	BTO:0000089	TS-0079	EFO:0000296	Blood	CHEMBL3638178	UBERON:0000178
2	BTO:0001102	TS-0080	EFO:0000817	Blood vessel	CHEMBL3987656	UBERON:0001981
3	BTO:0000102	None	None	Blood clot	CHEMBL3987655	UBERON:0010210
4	None	None	None	Blood/Brain	CHEMBL3832984	None

\n", 2719 | "

" 2720 | ], 2721 | "text/plain": [ 2722 | " bto_id caloha_id efo_id pref_name tissue_chembl_id \\\n", 2723 | "0 None None None Blood brain barrier CHEMBL3987461 \n", 2724 | "1 BTO:0000089 TS-0079 EFO:0000296 Blood CHEMBL3638178 \n", 2725 | "2 BTO:0001102 TS-0080 EFO:0000817 Blood vessel CHEMBL3987656 \n", 2726 | "3 BTO:0000102 None None Blood clot CHEMBL3987655 \n", 2727 | "4 None None None Blood/Brain CHEMBL3832984 \n", 2728 | "\n", 2729 | " uberon_id \n", 2730 | "0 UBERON:0000120 \n", 2731 | "1 UBERON:0000178 \n", 2732 | "2 UBERON:0001981 \n", 2733 | "3 UBERON:0010210 \n", 2734 | "4 None " 2735 | ] 2736 | }, 2737 | "execution_count": 31, 2738 | "metadata": {}, 2739 | "output_type": "execute_result" 2740 | } 2741 | ], 2742 | "source": [ 2743 | "from chembl_webresource_client.new_client import new_client\n", 2744 | "import pandas as pd #Use pandas python module to view data as tables\n", 2745 | "\n", 2746 | "tissue = new_client.tissue\n", 2747 | "res = tissue.filter(pref_name__istartswith='blood')\n", 2748 | "res = pd.DataFrame(res)\n", 2749 | "res.head() #Display top five rows" 2750 | ] 2751 | }, 2752 | { 2753 | "cell_type": "markdown", 2754 | "metadata": {}, 2755 | "source": [ 2756 | "## Get tissue by BTO ID (Brenda Tissue Ontology)" 2757 | ] 2758 | }, 2759 | { 2760 | "cell_type": "code", 2761 | "execution_count": 32, 2762 | "metadata": {}, 2763 | "outputs": [ 2764 | { 2765 | "data": { 2766 | "text/plain": [ 2767 | "[{'bto_id': 'BTO:0001073', 'caloha_id': 'TS-0798', 'efo_id': 'EFO:0000857', 'pref_name': 'Pituitary gland', 'tissue_chembl_id': 'CHEMBL3638173', 'uberon_id': 'UBERON:0000007'}]" 2768 | ] 2769 | }, 2770 | "execution_count": 32, 2771 | "metadata": {}, 2772 | "output_type": "execute_result" 2773 | } 2774 | ], 2775 | "source": [ 2776 | "from chembl_webresource_client.new_client import new_client\n", 2777 | "\n", 2778 | "tissue = new_client.tissue\n", 2779 | "res = tissue.filter(bto_id=\"BTO:0001073\")\n", 2780 | "res" 2781 | ] 2782 | }, 2783 | { 2784 | "cell_type": "markdown", 2785 | "metadata": {}, 2786 | "source": [ 2787 | "## Get tissue by EFO id (Experimental Factor Ontology)" 2788 | ] 2789 | }, 2790 | { 2791 | "cell_type": "code", 2792 | "execution_count": 33, 2793 | "metadata": {}, 2794 | "outputs": [ 2795 | { 2796 | "data": { 2797 | "text/html": [ 2798 | "

\n", 2799 | "\n", 2812 | "\n", 2813 | " \n", 2814 | " \n", 2815 | " \n", 2816 | " \n", 2817 | " \n", 2818 | " \n", 2819 | " \n", 2820 | " \n", 2821 | " \n", 2822 | " \n", 2823 | " \n", 2824 | " \n", 2825 | " \n", 2826 | " \n", 2827 | " \n", 2828 | " \n", 2829 | " \n", 2830 | " \n", 2831 | " \n", 2832 | " \n", 2833 | " \n", 2834 | " \n", 2835 | "

	bto_id	caloha_id	efo_id	pref_name	tissue_chembl_id	uberon_id
0	BTO:0001281	TS-0956	EFO:0000869	Spleen	CHEMBL3559722	UBERON:0002106

\n", 2836 | "

" 2837 | ], 2838 | "text/plain": [ 2839 | " bto_id caloha_id efo_id pref_name tissue_chembl_id \\\n", 2840 | "0 BTO:0001281 TS-0956 EFO:0000869 Spleen CHEMBL3559722 \n", 2841 | "\n", 2842 | " uberon_id \n", 2843 | "0 UBERON:0002106 " 2844 | ] 2845 | }, 2846 | "execution_count": 33, 2847 | "metadata": {}, 2848 | "output_type": "execute_result" 2849 | } 2850 | ], 2851 | "source": [ 2852 | "from chembl_webresource_client.new_client import new_client\n", 2853 | "import pandas as pd #Use pandas python module to view data as tables\n", 2854 | "\n", 2855 | "tissue = new_client.tissue\n", 2856 | "res = tissue.filter(efo_id=\"EFO:0000869\") #spleen\n", 2857 | "pd.DataFrame(res)" 2858 | ] 2859 | }, 2860 | { 2861 | "cell_type": "markdown", 2862 | "metadata": {}, 2863 | "source": [ 2864 | "# Cells" 2865 | ] 2866 | }, 2867 | { 2868 | "cell_type": "markdown", 2869 | "metadata": {}, 2870 | "source": [ 2871 | "## Get cell line by cellosaurus id" 2872 | ] 2873 | }, 2874 | { 2875 | "cell_type": "code", 2876 | "execution_count": 34, 2877 | "metadata": {}, 2878 | "outputs": [ 2879 | { 2880 | "data": { 2881 | "text/html": [ 2882 | "

\n", 2883 | "\n", 2896 | "\n", 2897 | " \n", 2898 | " \n", 2899 | " \n", 2900 | " \n", 2901 | " \n", 2902 | " \n", 2903 | " \n", 2904 | " \n", 2905 | " \n", 2906 | " \n", 2907 | " \n", 2908 | " \n", 2909 | " \n", 2910 | " \n", 2911 | " \n", 2912 | " \n", 2913 | " \n", 2914 | " \n", 2915 | " \n", 2916 | " \n", 2917 | " \n", 2918 | " \n", 2919 | " \n", 2920 | " \n", 2921 | " \n", 2922 | " \n", 2923 | " \n", 2924 | " \n", 2925 | " \n", 2926 | " \n", 2927 | " \n", 2928 | " \n", 2929 | "

	cell_chembl_id	cell_description	cell_id	cell_name	cell_source_organism	cell_source_tax_id	cell_source_tissue	cellosaurus_id	cl_lincs_id	clo_id	efo_id
0	CHEMBL3307686	MDA-MB-435 (Breast metastasis of melanoma cells	687	MDA-MB-435	Homo sapiens	9606	Breast metastasis of melanoma cells	CVCL_0417	None	None	EFO_0001213

\n", 2930 | "

" 2931 | ], 2932 | "text/plain": [ 2933 | " cell_chembl_id cell_description cell_id \\\n", 2934 | "0 CHEMBL3307686 MDA-MB-435 (Breast metastasis of melanoma cells 687 \n", 2935 | "\n", 2936 | " cell_name cell_source_organism cell_source_tax_id \\\n", 2937 | "0 MDA-MB-435 Homo sapiens 9606 \n", 2938 | "\n", 2939 | " cell_source_tissue cellosaurus_id cl_lincs_id clo_id \\\n", 2940 | "0 Breast metastasis of melanoma cells CVCL_0417 None None \n", 2941 | "\n", 2942 | " efo_id \n", 2943 | "0 EFO_0001213 " 2944 | ] 2945 | }, 2946 | "execution_count": 34, 2947 | "metadata": {}, 2948 | "output_type": "execute_result" 2949 | } 2950 | ], 2951 | "source": [ 2952 | "from chembl_webresource_client.new_client import new_client\n", 2953 | "import pandas as pd #Use pandas python module to view data as tables\n", 2954 | "\n", 2955 | "cell_line = new_client.cell_line\n", 2956 | "res = cell_line.filter(cellosaurus_id=\"CVCL_0417\")\n", 2957 | "pd.DataFrame(res)" 2958 | ] 2959 | }, 2960 | { 2961 | "cell_type": "markdown", 2962 | "metadata": {}, 2963 | "source": [ 2964 | "## Get cells described as lymphoma" 2965 | ] 2966 | }, 2967 | { 2968 | "cell_type": "code", 2969 | "execution_count": 35, 2970 | "metadata": {}, 2971 | "outputs": [ 2972 | { 2973 | "data": { 2974 | "text/html": [ 2975 | "

\n", 2976 | "\n", 2989 | "\n", 2990 | " \n", 2991 | " \n", 2992 | " \n", 2993 | " \n", 2994 | " \n", 2995 | " \n", 2996 | " \n", 2997 | " \n", 2998 | " \n", 2999 | " \n", 3000 | " \n", 3001 | " \n", 3002 | " \n", 3003 | " \n", 3004 | " \n", 3005 | " \n", 3006 | " \n", 3007 | " \n", 3008 | " \n", 3009 | " \n", 3010 | " \n", 3011 | " \n", 3012 | " \n", 3013 | " \n", 3014 | " \n", 3015 | " \n", 3016 | " \n", 3017 | " \n", 3018 | " \n", 3019 | " \n", 3020 | " \n", 3021 | " \n", 3022 | " \n", 3023 | " \n", 3024 | " \n", 3025 | " \n", 3026 | " \n", 3027 | " \n", 3028 | " \n", 3029 | " \n", 3030 | " \n", 3031 | " \n", 3032 | " \n", 3033 | " \n", 3034 | " \n", 3035 | " \n", 3036 | " \n", 3037 | " \n", 3038 | " \n", 3039 | " \n", 3040 | " \n", 3041 | " \n", 3042 | " \n", 3043 | " \n", 3044 | " \n", 3045 | " \n", 3046 | " \n", 3047 | " \n", 3048 | " \n", 3049 | " \n", 3050 | " \n", 3051 | " \n", 3052 | " \n", 3053 | " \n", 3054 | " \n", 3055 | " \n", 3056 | " \n", 3057 | " \n", 3058 | " \n", 3059 | " \n", 3060 | " \n", 3061 | " \n", 3062 | " \n", 3063 | " \n", 3064 | " \n", 3065 | " \n", 3066 | " \n", 3067 | " \n", 3068 | " \n", 3069 | " \n", 3070 | " \n", 3071 | " \n", 3072 | " \n", 3073 | " \n", 3074 | " \n", 3075 | " \n", 3076 | " \n", 3077 | " \n", 3078 | " \n", 3079 | " \n", 3080 | " \n", 3081 | " \n", 3082 | " \n", 3083 | " \n", 3084 | " \n", 3085 | " \n", 3086 | " \n", 3087 | " \n", 3088 | " \n", 3089 | " \n", 3090 | " \n", 3091 | " \n", 3092 | " \n", 3093 | " \n", 3094 | " \n", 3095 | " \n", 3096 | " \n", 3097 | " \n", 3098 | " \n", 3099 | " \n", 3100 | " \n", 3101 | " \n", 3102 | " \n", 3103 | " \n", 3104 | " \n", 3105 | " \n", 3106 | " \n", 3107 | " \n", 3108 | " \n", 3109 | " \n", 3110 | " \n", 3111 | " \n", 3112 | " \n", 3113 | " \n", 3114 | " \n", 3115 | " \n", 3116 | " \n", 3117 | " \n", 3118 | " \n", 3119 | " \n", 3120 | " \n", 3121 | " \n", 3122 | " \n", 3123 | " \n", 3124 | " \n", 3125 | " \n", 3126 | " \n", 3127 | " \n", 3128 | " \n", 3129 | " \n", 3130 | " \n", 3131 | " \n", 3132 | " \n", 3133 | " \n", 3134 | " \n", 3135 | " \n", 3136 | " \n", 3137 | " \n", 3138 | " \n", 3139 | " \n", 3140 | " \n", 3141 | " \n", 3142 | " \n", 3143 | " \n", 3144 | " \n", 3145 | " \n", 3146 | " \n", 3147 | " \n", 3148 | " \n", 3149 | " \n", 3150 | " \n", 3151 | " \n", 3152 | " \n", 3153 | " \n", 3154 | " \n", 3155 | " \n", 3156 | " \n", 3157 | " \n", 3158 | " \n", 3159 | " \n", 3160 | " \n", 3161 | " \n", 3162 | " \n", 3163 | " \n", 3164 | " \n", 3165 | " \n", 3166 | " \n", 3167 | " \n", 3168 | " \n", 3169 | " \n", 3170 | " \n", 3171 | " \n", 3172 | " \n", 3173 | " \n", 3174 | " \n", 3175 | " \n", 3176 | " \n", 3177 | " \n", 3178 | " \n", 3179 | " \n", 3180 | " \n", 3181 | " \n", 3182 | " \n", 3183 | " \n", 3184 | " \n", 3185 | " \n", 3186 | " \n", 3187 | " \n", 3188 | " \n", 3189 | " \n", 3190 | " \n", 3191 | " \n", 3192 | " \n", 3193 | " \n", 3194 | " \n", 3195 | " \n", 3196 | " \n", 3197 | " \n", 3198 | " \n", 3199 | " \n", 3200 | " \n", 3201 | " \n", 3202 | " \n", 3203 | " \n", 3204 | " \n", 3205 | " \n", 3206 | " \n", 3207 | " \n", 3208 | " \n", 3209 | " \n", 3210 | " \n", 3211 | " \n", 3212 | " \n", 3213 | " \n", 3214 | " \n", 3215 | " \n", 3216 | " \n", 3217 | " \n", 3218 | "

	cell_chembl_id	cell_description	cell_id	cell_name	cell_source_organism	cell_source_tax_id	cell_source_tissue	cellosaurus_id	cl_lincs_id	clo_id	efo_id
0	CHEMBL3308488	CA46 (Burkitts lymphoma cells)	346	CA46	Homo sapiens	9606.0	Burkitts lymphoma cells	CVCL_1101	LCL-2017	CLO_0002168	EFO_0002124
1	CHEMBL3308006	U-937 (Histiocytic lymphoma cells)	379	U-937	Homo sapiens	9606.0	Histiocytic lymphoma cells	CVCL_0007	LCL-1125	CLO_0009465	EFO_0001257
2	CHEMBL3308007	U-937/GTB (Histiocytic lymphoma cells)	380	U-937/GTB	Homo sapiens	9606.0	Histiocytic lymphoma cells	CVCL_U631	None	None	None
3	CHEMBL3308492	BJAB (Burkitts lymphoma cells)	412	BJAB	Homo sapiens	9606.0	Burkitts lymphoma cells	CVCL_5711	None	None	EFO_0002815
4	CHEMBL3308050	W4 (Lymphoma cells)	426	W4	Mus musculus	10090.0	Lymphoma cells	CVCL_H588	None	None	None
5	CHEMBL3307500	HuT78 (T-lymphoma cells)	472	HuT78	Homo sapiens	9606.0	T-lymphoma cells	CVCL_0337	LCL-2005	CLO_0004304	EFO_0002209
6	CHEMBL3308401	P388 (Lymphoma cells)	549	P388	Mus musculus	10090.0	Lymphoma cells	CVCL_7222	None	CLO_0008321	None
7	CHEMBL3307566	P388/ADR (Lymphoma cells)	550	P388/ADR	Mus musculus	10090.0	Lymphoma cells	CVCL_IZ75	None	None	None
8	CHEMBL3307567	P388CPT45 (Lymphoma cells)	551	P388CPT45	Mus musculus	10090.0	Lymphoma cells	CVCL_D638	None	None	None
9	CHEMBL3307579	Daudi (Burkitts lymphoma cells)	564	Daudi	Homo sapiens	9606.0	Burkitts lymphoma cells	CVCL_0008	LCL-2018	CLO_0002708	EFO_0002169
10	CHEMBL3307608	Burkitts lymphoma cells	596	Burkitts lymphoma cell	None	NaN	Burkitts lymphoma cells	None	None	None	None
11	CHEMBL3307658	JIJOYE (Burkitts lymphoma cells)	654	JIYOYE	Homo sapiens	9606.0	Burkitts lymphoma cells	CVCL_1317	None	CLO_0007025	EFO_0002215
12	CHEMBL3307690	L5178Y (Lymphoma cells)	692	L5178Y	Mus musculus	10090.0	Lymphoma cells	CVCL_2097	None	CLO_0007195	None
13	CHEMBL3307722	FEPD (Anaplastic lymphoma cells)	731	FEPD	Homo sapiens	9606.0	Anaplastic lymphoma cells	CVCL_H614	None	None	None
14	CHEMBL3307875	Lymphoma cell line	949	Lymphoma cell line	None	NaN	None	None	None	None	None

\n", 3219 | "

" 3220 | ], 3221 | "text/plain": [ 3222 | " cell_chembl_id cell_description cell_id \\\n", 3223 | "0 CHEMBL3308488 CA46 (Burkitts lymphoma cells) 346 \n", 3224 | "1 CHEMBL3308006 U-937 (Histiocytic lymphoma cells) 379 \n", 3225 | "2 CHEMBL3308007 U-937/GTB (Histiocytic lymphoma cells) 380 \n", 3226 | "3 CHEMBL3308492 BJAB (Burkitts lymphoma cells) 412 \n", 3227 | "4 CHEMBL3308050 W4 (Lymphoma cells) 426 \n", 3228 | "5 CHEMBL3307500 HuT78 (T-lymphoma cells) 472 \n", 3229 | "6 CHEMBL3308401 P388 (Lymphoma cells) 549 \n", 3230 | "7 CHEMBL3307566 P388/ADR (Lymphoma cells) 550 \n", 3231 | "8 CHEMBL3307567 P388CPT45 (Lymphoma cells) 551 \n", 3232 | "9 CHEMBL3307579 Daudi (Burkitts lymphoma cells) 564 \n", 3233 | "10 CHEMBL3307608 Burkitts lymphoma cells 596 \n", 3234 | "11 CHEMBL3307658 JIJOYE (Burkitts lymphoma cells) 654 \n", 3235 | "12 CHEMBL3307690 L5178Y (Lymphoma cells) 692 \n", 3236 | "13 CHEMBL3307722 FEPD (Anaplastic lymphoma cells) 731 \n", 3237 | "14 CHEMBL3307875 Lymphoma cell line 949 \n", 3238 | "\n", 3239 | " cell_name cell_source_organism cell_source_tax_id \\\n", 3240 | "0 CA46 Homo sapiens 9606.0 \n", 3241 | "1 U-937 Homo sapiens 9606.0 \n", 3242 | "2 U-937/GTB Homo sapiens 9606.0 \n", 3243 | "3 BJAB Homo sapiens 9606.0 \n", 3244 | "4 W4 Mus musculus 10090.0 \n", 3245 | "5 HuT78 Homo sapiens 9606.0 \n", 3246 | "6 P388 Mus musculus 10090.0 \n", 3247 | "7 P388/ADR Mus musculus 10090.0 \n", 3248 | "8 P388CPT45 Mus musculus 10090.0 \n", 3249 | "9 Daudi Homo sapiens 9606.0 \n", 3250 | "10 Burkitts lymphoma cell None NaN \n", 3251 | "11 JIYOYE Homo sapiens 9606.0 \n", 3252 | "12 L5178Y Mus musculus 10090.0 \n", 3253 | "13 FEPD Homo sapiens 9606.0 \n", 3254 | "14 Lymphoma cell line None NaN \n", 3255 | "\n", 3256 | " cell_source_tissue cellosaurus_id cl_lincs_id clo_id \\\n", 3257 | "0 Burkitts lymphoma cells CVCL_1101 LCL-2017 CLO_0002168 \n", 3258 | "1 Histiocytic lymphoma cells CVCL_0007 LCL-1125 CLO_0009465 \n", 3259 | "2 Histiocytic lymphoma cells CVCL_U631 None None \n", 3260 | "3 Burkitts lymphoma cells CVCL_5711 None None \n", 3261 | "4 Lymphoma cells CVCL_H588 None None \n", 3262 | "5 T-lymphoma cells CVCL_0337 LCL-2005 CLO_0004304 \n", 3263 | "6 Lymphoma cells CVCL_7222 None CLO_0008321 \n", 3264 | "7 Lymphoma cells CVCL_IZ75 None None \n", 3265 | "8 Lymphoma cells CVCL_D638 None None \n", 3266 | "9 Burkitts lymphoma cells CVCL_0008 LCL-2018 CLO_0002708 \n", 3267 | "10 Burkitts lymphoma cells None None None \n", 3268 | "11 Burkitts lymphoma cells CVCL_1317 None CLO_0007025 \n", 3269 | "12 Lymphoma cells CVCL_2097 None CLO_0007195 \n", 3270 | "13 Anaplastic lymphoma cells CVCL_H614 None None \n", 3271 | "14 None None None None \n", 3272 | "\n", 3273 | " efo_id \n", 3274 | "0 EFO_0002124 \n", 3275 | "1 EFO_0001257 \n", 3276 | "2 None \n", 3277 | "3 EFO_0002815 \n", 3278 | "4 None \n", 3279 | "5 EFO_0002209 \n", 3280 | "6 None \n", 3281 | "7 None \n", 3282 | "8 None \n", 3283 | "9 EFO_0002169 \n", 3284 | "10 None \n", 3285 | "11 EFO_0002215 \n", 3286 | "12 None \n", 3287 | "13 None \n", 3288 | "14 None " 3289 | ] 3290 | }, 3291 | "execution_count": 35, 3292 | "metadata": {}, 3293 | "output_type": "execute_result" 3294 | } 3295 | ], 3296 | "source": [ 3297 | "from chembl_webresource_client.new_client import new_client\n", 3298 | "import pandas as pd #Use pandas python module to view data as tables\n", 3299 | "\n", 3300 | "cell_line = new_client.cell_line\n", 3301 | "res = cell_line.filter(cell_description__icontains=\"lymphoma\")\n", 3302 | "pd.DataFrame(res)" 3303 | ] 3304 | }, 3305 | { 3306 | "cell_type": "markdown", 3307 | "metadata": {}, 3308 | "source": [ 3309 | "# Utils" 3310 | ] 3311 | }, 3312 | { 3313 | "cell_type": "markdown", 3314 | "metadata": {}, 3315 | "source": [ 3316 | "## Convert SMILES to CTAB" 3317 | ] 3318 | }, 3319 | { 3320 | "cell_type": "code", 3321 | "execution_count": 37, 3322 | "metadata": {}, 3323 | "outputs": [ 3324 | { 3325 | "data": { 3326 | "text/plain": [ 3327 | "'\\n RDKit 2D\\n\\n 13 13 0 0 0 0 0 0 0 0999 V2000\\n -0.9550 -1.3220 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n -0.9528 -0.3220 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -0.0858 0.1764 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.7792 -0.3254 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.7772 -1.3254 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 1.6422 -1.8270 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.5092 -1.3288 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.5112 -0.3288 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 1.6462 0.1728 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 1.6482 1.1728 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.7832 1.6746 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.5152 1.6710 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n -1.8178 0.1798 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 1 2 2 0\\n 2 3 1 0\\n 3 4 1 0\\n 4 5 2 0\\n 5 6 1 0\\n 6 7 2 0\\n 7 8 1 0\\n 8 9 2 0\\n 9 10 1 0\\n 10 11 2 0\\n 10 12 1 0\\n 2 13 1 0\\n 9 4 1 0\\nM END\\n$$$$\\n'" 3328 | ] 3329 | }, 3330 | "execution_count": 37, 3331 | "metadata": {}, 3332 | "output_type": "execute_result" 3333 | } 3334 | ], 3335 | "source": [ 3336 | "from chembl_webresource_client.utils import utils\n", 3337 | "\n", 3338 | "aspirin = utils.smiles2ctab('O=C(Oc1ccccc1C(=O)O)C')\n", 3339 | "aspirin" 3340 | ] 3341 | }, 3342 | { 3343 | "cell_type": "markdown", 3344 | "metadata": {}, 3345 | "source": [ 3346 | "## Compute Maximal Common Substructure" 3347 | ] 3348 | }, 3349 | { 3350 | "cell_type": "code", 3351 | "execution_count": 38, 3352 | "metadata": {}, 3353 | "outputs": [ 3354 | { 3355 | "data": { 3356 | "text/plain": [ 3357 | "'[#6]1(-[#6]):[#6]:[#6](-[#8]-[#6]):[#6](:[#6]:[#6]:1)-[#8]'" 3358 | ] 3359 | }, 3360 | "execution_count": 38, 3361 | "metadata": {}, 3362 | "output_type": "execute_result" 3363 | } 3364 | ], 3365 | "source": [ 3366 | "from chembl_webresource_client.utils import utils\n", 3367 | "\n", 3368 | "smiles = [\"O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C\",\n", 3369 | " \"CC(C)CCCCCC(=O)NCC1=CC(=C(C=C1)O)OC\", \"c1(C=O)cc(OC)c(O)cc1\"]\n", 3370 | "mols = [utils.smiles2ctab(smile) for smile in smiles]\n", 3371 | "sdf = ''.join(mols)\n", 3372 | "result = utils.mcs(sdf)\n", 3373 | "result" 3374 | ] 3375 | }, 3376 | { 3377 | "cell_type": "markdown", 3378 | "metadata": {}, 3379 | "source": [ 3380 | "## Compute various molecular descriptors" 3381 | ] 3382 | }, 3383 | { 3384 | "cell_type": "code", 3385 | "execution_count": 39, 3386 | "metadata": {}, 3387 | "outputs": [ 3388 | { 3389 | "data": { 3390 | "text/plain": [ 3391 | "{'qed': 0.5501217966938848,\n", 3392 | " 'MolWt': 180.15899999999996,\n", 3393 | " 'TPSA': 63.60000000000001,\n", 3394 | " 'HeavyAtomCount': 13,\n", 3395 | " 'NumAromaticRings': 1,\n", 3396 | " 'NumHAcceptors': 3,\n", 3397 | " 'NumHDonors': 1,\n", 3398 | " 'NumRotatableBonds': 2,\n", 3399 | " 'MolLogP': 1.3100999999999998,\n", 3400 | " 'MolecularFormula': 'C9H8O4',\n", 3401 | " 'Ro3Pass': 0,\n", 3402 | " 'NumRo5': 0,\n", 3403 | " 'MonoisotopicMolWt': 180.042258736}" 3404 | ] 3405 | }, 3406 | "execution_count": 39, 3407 | "metadata": {}, 3408 | "output_type": "execute_result" 3409 | } 3410 | ], 3411 | "source": [ 3412 | "from chembl_webresource_client.utils import utils\n", 3413 | "import json\n", 3414 | "\n", 3415 | "aspirin = utils.smiles2ctab('O=C(Oc1ccccc1C(=O)O)C')\n", 3416 | "descs = json.loads(utils.chemblDescriptors(aspirin))[0]\n", 3417 | "descs" 3418 | ] 3419 | }, 3420 | { 3421 | "cell_type": "markdown", 3422 | "metadata": {}, 3423 | "source": [ 3424 | "## Compute structural alerts" 3425 | ] 3426 | }, 3427 | { 3428 | "cell_type": "code", 3429 | "execution_count": 40, 3430 | "metadata": {}, 3431 | "outputs": [ 3432 | { 3433 | "name": "stdout", 3434 | "output_type": "stream", 3435 | "text": [ 3436 | "{'alert_id': 1030, 'alert_name': 'Ester', 'set_name': 'MLSMR', 'smarts': '[#6]-C(=O)O-[#6]'}\n", 3437 | "{'alert_id': 1069, 'alert_name': 'vinyl michael acceptor1', 'set_name': 'MLSMR', 'smarts': '[#6]-[CH1]=C-C(=O)[#6,#7,#8]'}\n" 3438 | ] 3439 | } 3440 | ], 3441 | "source": [ 3442 | "from chembl_webresource_client.utils import utils\n", 3443 | "\n", 3444 | "mol = utils.smiles2ctab(\"O=C(Oc1ccccc1C(=O)O)C\")\n", 3445 | "alerts = json.loads(utils.structuralAlerts(mol))\n", 3446 | "for a in alerts[0]:\n", 3447 | " print(a)" 3448 | ] 3449 | }, 3450 | { 3451 | "cell_type": "markdown", 3452 | "metadata": {}, 3453 | "source": [ 3454 | "## Standardize a molecule" 3455 | ] 3456 | }, 3457 | { 3458 | "cell_type": "code", 3459 | "execution_count": 41, 3460 | "metadata": {}, 3461 | "outputs": [ 3462 | { 3463 | "data": { 3464 | "text/plain": [ 3465 | "[{'standard_molblock': '\\n RDKit 2D\\n\\n 19 17 0 0 0 0 0 0 0 0999 V2000\\n 0.0000 -3.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n -5.5170 -1.9538 0.0000 Na 0 0 0 0 0 15 0 0 0 0 0 0\\n -2.9244 -0.4442 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n -2.0602 0.0590 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -2.0638 1.0590 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n -1.1924 -0.4380 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -0.3282 0.0652 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.5396 -0.4318 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 1.4038 0.0714 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 1.4002 1.0714 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.2644 1.5744 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.2608 2.5744 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.5324 1.5682 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -0.3318 1.0652 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 5.0649 2.8712 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 4.8623 1.8920 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 3.8683 1.7822 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 3.4567 2.6936 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 4.1963 3.3666 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 3 4 1 0\\n 4 5 2 0\\n 4 6 1 0\\n 6 7 1 0\\n 7 8 2 0\\n 8 9 1 0\\n 9 10 2 0\\n 10 11 1 0\\n 11 12 1 0\\n 10 13 1 0\\n 13 14 2 0\\n 14 7 1 0\\n 15 16 2 0\\n 16 17 1 0\\n 17 18 2 0\\n 18 19 1 0\\n 19 15 1 0\\nM CHG 2 2 1 3 -1\\nM END\\n'}]" 3466 | ] 3467 | }, 3468 | "execution_count": 41, 3469 | "metadata": {}, 3470 | "output_type": "execute_result" 3471 | } 3472 | ], 3473 | "source": [ 3474 | "from chembl_webresource_client.utils import utils\n", 3475 | "mol = utils.smiles2ctab(\"[Na]OC(=O)Cc1ccc(C[NH3+])cc1.c1nnn[n-]1.O\")\n", 3476 | "st = json.loads(utils.standardize(mol))\n", 3477 | "st" 3478 | ] 3479 | }, 3480 | { 3481 | "cell_type": "markdown", 3482 | "metadata": {}, 3483 | "source": [ 3484 | "## Calculate the parent molecule from its SMILES identifier" 3485 | ] 3486 | }, 3487 | { 3488 | "cell_type": "code", 3489 | "execution_count": 42, 3490 | "metadata": {}, 3491 | "outputs": [ 3492 | { 3493 | "data": { 3494 | "text/plain": [ 3495 | "[{'parent_molblock': '\\n RDKit 2D\\n\\n 18 18 0 0 0 0 0 0 0 0999 V2000\\n -5.5170 -1.9538 0.0000 Na 0 0 0 0 0 1 0 0 0 0 0 0\\n -2.9244 -0.4442 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n -2.0602 0.0590 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -2.0638 1.0590 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\\n -1.1924 -0.4380 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -0.3282 0.0652 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.5396 -0.4318 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 1.4038 0.0714 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 1.4002 1.0714 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.2644 1.5744 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 2.2608 2.5744 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 0.5324 1.5682 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n -0.3318 1.0652 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 5.0649 2.8712 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\\n 4.8623 1.8920 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 3.8683 1.7822 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 3.4567 2.6936 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 4.1963 3.3666 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\\n 1 2 1 0\\n 2 3 1 0\\n 3 4 2 0\\n 3 5 1 0\\n 5 6 1 0\\n 6 7 2 0\\n 7 8 1 0\\n 8 9 2 0\\n 9 10 1 0\\n 10 11 1 0\\n 9 12 1 0\\n 12 13 2 0\\n 13 6 1 0\\n 14 15 2 0\\n 15 16 1 0\\n 16 17 2 0\\n 17 18 1 0\\n 18 14 1 0\\nM END\\n',\n", 3496 | " 'exclude': False}]" 3497 | ] 3498 | }, 3499 | "execution_count": 42, 3500 | "metadata": {}, 3501 | "output_type": "execute_result" 3502 | } 3503 | ], 3504 | "source": [ 3505 | "from chembl_webresource_client.utils import utils\n", 3506 | "\n", 3507 | "mol = utils.smiles2ctab(\"[Na]OC(=O)Cc1ccc(C[NH3+])cc1.c1nnn[n-]1.[Na]\")\n", 3508 | "par = json.loads(utils.getParent(mol))\n", 3509 | "par" 3510 | ] 3511 | }, 3512 | { 3513 | "cell_type": "code", 3514 | "execution_count": null, 3515 | "metadata": {}, 3516 | "outputs": [], 3517 | "source": [] 3518 | } 3519 | ], 3520 | "metadata": { 3521 | "kernelspec": { 3522 | "display_name": "Python 3", 3523 | "language": "python", 3524 | "name": "python3" 3525 | }, 3526 | "language_info": { 3527 | "codemirror_mode": { 3528 | "name": "ipython", 3529 | "version": 3 3530 | }, 3531 | "file_extension": ".py", 3532 | "mimetype": "text/x-python", 3533 | "name": "python", 3534 | "nbconvert_exporter": "python", 3535 | "pygments_lexer": "ipython3", 3536 | "version": "3.7.3" 3537 | } 3538 | }, 3539 | "nbformat": 4, 3540 | "nbformat_minor": 5 3541 | } 3542 | --------------------------------------------------------------------------------