├── .idea
├── .gitignore
├── GraphCDR.iml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
└── modules.xml
├── Framework.png
├── README.md
├── __pycache__
├── data_load.cpython-36.pyc
├── data_process.cpython-36.pyc
├── graphset.cpython-36.pyc
├── model.cpython-36.pyc
└── my_utiils.cpython-36.pyc
├── data
├── CCLE
│ ├── CCLE_response.csv
│ └── CCLE_smiles.csv
├── Case_study.xlsx
├── Celline
│ ├── Cell_lines_annotations.txt
│ ├── GDSC_IC50.csv
│ ├── genomic_expression_561celllines_697genes_demap_features.csv
│ ├── genomic_methylation_561celllines_808genes_demap_features.csv
│ └── genomic_mutation_34673_demap_features.csv
└── Drug
│ ├── 1.Drug_listMon Jun 24 09_00_55 2019.csv
│ ├── 222drugs_pubchem_smiles.txt
│ ├── drug_graph_feat.zip
│ ├── drug_threshold.csv
│ └── drug_threshold.txt
└── prog
├── base_model
├── GCNConv.py
├── SGConv.py
└── extract_drug_feature.py
├── data_load.py
├── data_process.py
├── graphCDR-ccle.py
├── graphCDR.py
├── graphset.py
├── model.py
└── my_utiils.py
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/.idea/GraphCDR.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioMedicalBigDataMiningLab/GraphCDR/656de097f7e48580e35a9cfef9898ac3948d5dfa/Framework.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GraphCDR
2 | Source code and data for "GraphCDR: A graph neural network method with contrastive learning for cancer drug response prediction"
3 |
4 | 
5 |
6 | # Requirements
7 | * Python >= 3.6
8 | * PyTorch >= 1.4
9 | * PyTorch Geometry >= 1.6
10 | * hickle >= 3.4
11 | * DeepChem >= 2.4
12 | * RDkit >= 2020.09
13 |
14 | # Usage
15 | * please unzip the file: data/Drug/drug_graph_feat.zip first.
16 | * python graphCDR.py \
17 | * python graphCDR-ccle.py \
18 |
19 | # Case study
20 | As GDSC database only measured IC50 of part cell line and drug pairs. We applied GraphCDR to predicted the missing types of responses. The predicted results can be find at data/Case study (missing pairs).xlsx
21 |
--------------------------------------------------------------------------------
/__pycache__/data_load.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioMedicalBigDataMiningLab/GraphCDR/656de097f7e48580e35a9cfef9898ac3948d5dfa/__pycache__/data_load.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/data_process.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioMedicalBigDataMiningLab/GraphCDR/656de097f7e48580e35a9cfef9898ac3948d5dfa/__pycache__/data_process.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/graphset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioMedicalBigDataMiningLab/GraphCDR/656de097f7e48580e35a9cfef9898ac3948d5dfa/__pycache__/graphset.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioMedicalBigDataMiningLab/GraphCDR/656de097f7e48580e35a9cfef9898ac3948d5dfa/__pycache__/model.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/my_utiils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioMedicalBigDataMiningLab/GraphCDR/656de097f7e48580e35a9cfef9898ac3948d5dfa/__pycache__/my_utiils.cpython-36.pyc
--------------------------------------------------------------------------------
/data/CCLE/CCLE_smiles.csv:
--------------------------------------------------------------------------------
1 | pubchem,isosmiles
2 | 11476171,C1CN(C1)CC2CC(C2)N3C=C(C4=C(N=CN=C43)N)C5=CC(=CC=C5)OCC6=CC=CC=C6
3 | 644241,CC1=C(C=C(C=C1)C(=O)NC2=CC(=CC(=C2)C(F)(F)F)N3C=C(N=C3)C)NC4=NC=CC(=N4)C5=CN=CC=C5
4 | 6505803,C[C@H]1C[C@@H]([C@@H]([C@H](/C=C(/[C@@H]([C@H](/C=C\\C=C(\\C(=O)NC2=CC(=O)C(=C(C1)C2=O)NCC=C)/C)OC)OC(=O)N)\\C)C)O)OC
5 | 10461815,CC1=C(NC(=C1C(=O)N2CCC[C@@H]2CN3CCCC3)C)/C=C\\4/C5=C(C=CC(=C5)S(=O)(=O)CC6=C(C=CC=C6Cl)Cl)NC4=O
6 | 208908,CS(=O)(=O)CCNCC1=CC=C(O1)C2=CC3=C(C=C2)N=CN=C3NC4=CC(=C(C=C4)OCC5=CC(=CC=C5)F)Cl
7 | 216345,CC(C)OC1=C(C=CC(=C1)OC)C2=NC(C(N2C(=O)N3CCNC(=O)C3)C4=CC=C(C=C4)Cl)C5=CC=C(C=C5)Cl
8 | 10302451,CN1CCN(CC1)CCOC2=CC3=C(C(=C2)OC4CCOCC4)C(=NC=N3)NC5=C(C=CC6=C5OCO6)Cl
9 | 11626560,C[C@H](C1=C(C=CC(=C1Cl)F)Cl)OC2=C(N=CC(=C2)C3=CN(N=C3)C4CCNCC4)N
10 | 5479543,CC(C)C[C@@H](C(=O)N[C@@H](CC1=CC=CC=C1)C(=O)N)NC(=O)[C@H](CC2=CC=CC=C2)C[C@H]([C@H](CC3=CC=CC=C3)NC(=O)OC(C)(C)C)O
11 | 3081361,CN1CCC(CC1)COC2=C(C=C3C(=C2)N=CN=C3NC4=C(C=C(C=C4)Br)F)OC
12 | 6918837,CC1=C(C2=CC=CC=C2N1)CCNCC3=CC=C(C=C3)/C=C/C(=O)NO
13 | 216239,CNC(=O)C1=NC=CC(=C1)OC2=CC=C(C=C2)NC(=O)NC3=CC(=C(C=C3)Cl)C(F)(F)F
14 | 60838,CCC1=C2CN3C(=CC4=C(C3=O)COC(=O)[C@@]4(CC)O)C2=NC5=C1C=C(C=C5)OC(=O)N6CCC(CC6)N7CCCCC7
15 | 60700,CC[C@@]1(C2=C(COC1=O)C(=O)N3CC4=CC5=C(C=CC(=C5CN(C)C)O)N=C4C3=C2)O
16 | 11503417,C[C@@H](C(=O)N[C@@H](C1CCCCC1)C(=O)N2CC[C@@H]3[C@H]2CN(CC3)CCC4=CC=CC=C4)NC
17 | 10117717,C1=CC(=C(C=C1I)Cl)NC2=C(C=CC(=C2F)F)C(=O)NOCC(CO)O
18 | 5330286,CC1=C(C(=O)N(C2=NC(=NC=C12)NC3=NC=C(C=C3)N4CCNCC4)C5CCCC5)C(=O)C
19 | 36314,CC1=C2[C@H](C(=O)[C@@]3([C@H](C[C@@H]4[C@]([C@H]3[C@@H]([C@@](C2(C)C)(C[C@@H]1OC(=O)[C@@H]([C@H](C5=CC=CC=C5)NC(=O)C6=CC=CC=C6)O)O)OC(=O)C7=CC=CC=C7)(CO4)OC(=O)C)O)C)OC(=O)C
20 | 10127622,CN1C=NC2=C1C=C(C(=C2F)NC3=C(C=C(C=C3)Br)Cl)C(=O)NOCCO
21 | 24180719,CCCS(=O)(=O)NC1=C(C(=C(C=C1)F)C(=O)C2=CNC3=C2C=C(C=N3)Cl)F
22 | 11656518,CN1C2=C(C=C(C=C2)OC3=CC(=NC=C3)C4=NC=C(N4)C(F)(F)F)N=C1NC5=CC=C(C=C5)C(F)(F)F
23 | 16038120,CC(C)S(=O)(=O)C1=CC=CC=C1NC2=NC(=NC=C2Cl)NC3=C(C=C(C=C3)N4CCC(CC4)N5CCN(CC5)C)OC
24 | 135611162,CC(C(=O)O)O.CN1CCN(CC1)C2=CC3=C(C=C2)N=C(N3)C4=C(C5=C(C=CC=C5F)NC4=O)N.O
25 | 176870,COCCOC1=C(C=C2C(=C1)C(=NC=N2)NC3=CC=CC(=C3)C#C)OCCOC
26 |
--------------------------------------------------------------------------------
/data/Case_study.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioMedicalBigDataMiningLab/GraphCDR/656de097f7e48580e35a9cfef9898ac3948d5dfa/data/Case_study.xlsx
--------------------------------------------------------------------------------
/data/Drug/1.Drug_listMon Jun 24 09_00_55 2019.csv:
--------------------------------------------------------------------------------
1 | drug_id,Name,Synonyms,Targets,Target pathway,PubCHEM,Sample Size,Count
2 | 1242,(5Z)-7-Oxozeaenol,"5Z-7-Oxozeaenol, LL-Z1640-2",TAK1,"Other, kinases",9863776,945,266
3 | 179,5-Fluorouracil,5-FU,Antimetabolite (DNA & RNA),Other,3385,968,266
4 | 86,A-443654,KIN001-139,"AKT1, AKT2, AKT3",PI3K/MTOR signaling,10172943,425,266
5 | 55,A-770041,KIN001-111,"LCK, FYN","Other, kinases",9549184,426,266
6 | 1001,AICA Ribonucleotide,"AICAR, N1-(b-D-Ribofuranosyl)-5-aminoimidazole-4-carboxamide",AMPK agonist,Metabolism,65110,872,266
7 | 171,AKT inhibitor VIII,Akti-1/2,"AKT1, AKT2, AKT3",PI3K/MTOR signaling,10196499,934,266
8 | 228,AKT inhibitor VIII,"Akti-1/2, KIN001-102","AKT1, AKT2, AKT3",PI3K/MTOR signaling,10196499,979,266
9 | 272,AR-42,"HDAC-42, AR 42, AR42",HDAC1,Chromatin histone acetylation,6918848,965,266
10 | 207,AS601245,,"JNK1, JNK2, JNK2",JNK and p38 signaling,10109823,925,266
11 | 224,AS605240,"KIN001-173, AS-605240",PI3Kgamma,PI3K/MTOR signaling,5289247,975,266
12 | 219,AT-7519,AT7519,"CDK1, CDK2, CDK4, CDK6, CDK9",Cell cycle,11338033,976,266
13 | 29,AZ628,"AZ-628, AZ 628",BRAF,ERK MAPK signaling,11676786,428,266
14 | 156,AZD6482,"AZD 6482, AZD-6482, AK-55409",PI3Kbeta,PI3K/MTOR signaling,44137675,933,266
15 | 1066,AZD6482,"AZD 6482, AZD-6482, AK-55409",PI3Kbeta,PI3K/MTOR signaling,44137675,931,266
16 | 1022,AZD7762,"AZD-7762, AZD 7762","CHEK1, CHEK2",Cell cycle,11152667,881,266
17 | 1059,AZD8055,AZD-8055,"MTORC1, MTORC2",PI3K/MTOR signaling,25262965,870,266
18 | 1032,Afatinib,"BIBW2992, Tovok, Gilotrif","ERBB2, EGFR",EGFR signaling,10184653,881,266
19 | 1377,Afatinib,"BIBW2992, Tovok, Gilotrif","ERBB2, EGFR",EGFR signaling,10184653,944,266
20 | 281,Alectinib,"CH5424802, CH 542802, Alecensa",ALK,RTK signaling,49806720,974,266
21 | 293,Amuvatinib,"MP470, MP 470, MP-470","KIT, PDGFRA, FLT3","Other, kinases",11282283,971,266
22 | 205,Avagacestat,"BMS-708163, BMS 708163","Amyloid beta20, Amyloid beta40",Other,46883536,977,266
23 | 1072,Avagacestat,"BMS-708163, BMS 708163","Amyloid beta20, Amyloid beta40",Other,46883536,957,266
24 | 1021,Axitinib,"AG-13736, Inlyta","PDGFR, KIT, VEGFR",RTK signaling,6450551,877,266
25 | 178,BAY-61-3606,"Syk Inhibitor, BAY-613606",SYK,"Other, kinases",10200390,925,266
26 | 60,BI-2536,,"PLK1, PLK2, PLK3",Cell cycle,11364421,424,266
27 | 279,BIX02189,BIX 02189,"MEK5, ERK5",ERK MAPK signaling,46931012,975,266
28 | 203,BMS-345541,"BMS345541, IKK Inhibitor 3","IKK1, IKK2","Other, kinases",9813758,980,266
29 | 63,BMS-509744,"KIN001-127, ITK inhibitor",ITK,Other,20635522,426,266
30 | 62,BMS-536924,BMS 536924,"IGF1R, IR",IGFR signaling,10390396,425,266
31 | 1091,BMS-536924,BMS 536924,"IGF1R, IR",Unclassified,10390396,956,266
32 | 184,BMS-754807,"BMS754807, BMS 754807","IGF1R, IR",IGFR signaling,24785538,924,266
33 | 222,BX-912,,PDK1 (PDPK1),PI3K/MTOR signaling,11754511,978,266
34 | 1037,BX795,BX-795,"TBK1, PDK1 (PDPK1), IKK, AURKB, AURKC",Other,10077147,878,266
35 | 274,Belinostat,"PXD101, PXD-101",HDAC1,Chromatin histone acetylation,6918638,948,266
36 | 186,Bexarotene,"LG-100069, Targretin, Targret, Targrexin, Targretyn, Bexarotenum",Retinioic X receptor (RXR) agonist,Other,82146,917,266
37 | 150,Bicalutamide,"ICI-176334, Casodex, Cosudex, ICI 176334",AR,Hormone-related,2375,927,266
38 | 1502,Bicalutamide,"ICI-176334, Casodex, Cosudex, ICI 176334",AR,Hormone-related,2375,903,266
39 | 190,Bleomycin,,dsDNA break induction,DNA replication,several,920,266
40 | 1378,Bleomycin,,dsDNA break induction,DNA replication,5460769,965,266
41 | 104,Bortezomib,"PS-341, LDP-341, Velcade",Proteasome,Protein stability and degradation,387447,426,266
42 | 1019,Bosutinib,"SKI-606, Bosulif","SRC, ABL, TEC","Other, kinases",5328940,883,266
43 | 197,Bryostatin 1,Bryostatin,PKC,"Other, kinases",5280757,923,266
44 | 276,CAY10603,,"HDAC1, HDAC6",Chromatin histone acetylation,24951314,968,266
45 | 1170,CCT-018159,"CCT018159, CCT 018159",HSP90,Protein stability and degradation,5327091,929,266
46 | 1067,CCT007093,,PPM1D,Other,2314623,952,266
47 | 54,CGP-082996,"CINK4, KIN001-021",CDK4,Cell cycle,24825971,425,266
48 | 53,CGP-60474,"KIN001-019, CGP60474, CGP 60474","CDK1,CDK2,CDK5,CDK7,CDK9, PKC",Cell cycle,644215,425,266
49 | 154,CHIR-99021,"CT 99021, CHIR99021, CHIR 99021","GSK3A, GSK3B",WNT signaling,9956119,935,266
50 | 1241,CHIR-99021,"CT 99021, CHIR99021, CHIR 99021","GSK3A, GSK3B",WNT signaling,9956119,944,266
51 | 1015,CI-1040,"CI 1040, PD-18435, PD-184352, 212631-79-3","MEK1, MEK2",ERK MAPK signaling,6918454,871,266
52 | 64,CMK,KIN001-128,RSK2,ERK MAPK signaling,16663089,425,266
53 | 152,CP466722,"CP-466722, CP 466722, 1080622-86-1",ATM,Genome integrity,44551660,980,266
54 | 255,CP724714,CP-724714,ERBB2,EGFR signaling,9874913,977,266
55 | 273,CUDC-101,CUDC 101,"HDAC1-10, EGFR, ERBB2",Other,24756910,957,266
56 | 300,CX-5461,"CX5461, CX 5461",RNA Polymerase 1,Other,25257557,971,266
57 | 249,Cabozantinib,"BMS-907351, XL-184, Cometriq","VEGFR, MET, RET, KIT, FLT1, FLT3, FLT4, TIE2,AXL","Other, kinases",25102847,975,266
58 | 1114,Cetuximab,"Erbitux, IMC-C225, C225, IMC-225, L01XC06",EGFR,EGFR signaling,85668777,899,266
59 | 1005,Cisplatin,"cis-Diammineplatinum(II) dichloride, Platinol, CIS-DDP",DNA crosslinker,DNA replication,84691,882,266
60 | 37,Crizotinib,"Xalkori, PF2341066, PF-2341066, PF 2341066","MET, ALK, ROS1",RTK signaling,11626560,433,266
61 | 17,Cyclopamine,,SMO,Other,several,421,266
62 | 1006,Cytarabine,"Ara-Cytidine, Arabinosyl Cytosine, U-19920",Antimetabolite,DNA replication,6253,878,266
63 | 165,DMOG,Dimethyloxalylglcine,HIF-PH,Metabolism,560326,934,266
64 | 1373,Dabrafenib,"GSK2118436, Tafinlar",BRAF,ERK MAPK signaling,44462760,899,266
65 | 200,Dacinostat,"NVP-LAQ824, LAQ824",HDAC1,Chromatin histone acetylation,6445533,929,266
66 | 1057,Dactolisib,"NVP-BEZ235, BEZ235","PI3K (Class 1), MTORC1, MTORC2",PI3K/MTOR signaling,11977753,871,266
67 | 1248,Daporinad,"APO866, FK866, FK866",NAMPT,Metabolism,6914657,930,266
68 | 51,Dasatinib,"BMS-354825-03, BMS-354825, Sprycel","ABL, SRC, Ephrins, PDGFR, KIT",Other,3062316,422,266
69 | 1007,Docetaxel,"RP-56976, Taxotere",Microtubule stabiliser,Mitosis,148124,882,266
70 | 1042,Doramapimod,"BIRB-796, BIRB 796","p38, JNK2",JNK and p38 signaling,156422,867,266
71 | 133,Doxorubicin,"Doxil, Rubex, Adriamycin, Adriablastin, Doxorubicine",Anthracycline,DNA replication,31703,928,266
72 | 1069,EHT-1864,EHT 1864,"RAC1, RAC2, RAC3",Cytoskeleton,9938202,955,266
73 | 1031,Elesclomol,STA-4783,HSP90,Protein stability and degradation,300471,881,266
74 | 172,Embelin,"Emberine, Embelic acid",XIAP,Apoptosis regulation,3218,929,266
75 | 88,Entinostat,MS-275,"HDAC1, HDAC3",Chromatin histone acetylation,4261,427,266
76 | 229,Enzastaurin,LY317615,PKCB,"Other, kinases",176167,978,266
77 | 201,Epothilone B,"Patupilone, EpoB, EPO906, GNF-PF-193",Microtubule stabiliser,Mitosis,448013,928,266
78 | 1,Erlotinib,"Tarceva, RG-1415, CP-358774, OSI-774, Ro-508231, R-1415",EGFR,EGFR signaling,176870,392,266
79 | 134,Etoposide,"Etopophos, Vepesid, Eposin, VP-16",TOP2,DNA replication,36462,936,266
80 | 173,FH535,,"PPARgamma, PPARdelta",WNT signaling,3463933,926,266
81 | 231,FMK,KIN001-242,RSK,"Other, kinases",none,836,266
82 | 263,FR-180204,"FR 180204, FR180204, ERK Inhibitor II","ERK1, ERK2",ERK MAPK signaling,11493598,975,266
83 | 166,FTI-277,,Farnesyl-transferase (FNTA),Other,3005532,937,266
84 | 306,Fedratinib,"TG101348, TG-101348, SAR302503, SAR-302503",JAK2,"Other, kinases",16722836,976,266
85 | 308,Foretinib,"GSK1363089, XL-880, EXEL-2880, GSK089","MET, KDR, TIE2, VEGFR3/FLT4, RON, PDGFR, FGFR1, EGFR",RTK signaling,42642645,972,266
86 | 52,GNF-2,KIN001-013,BCR-ABL,ABL signaling,5311510,423,266
87 | 226,GSK1070916,GSK-1070916,"AURKA, AURKC",Mitosis,46885626,950,266
88 | 202,GSK1904529A,"GSK-1904529A, GSK 1904529A","IGF1R, IR",IGFR signaling,25124816,928,266
89 | 127,GSK269962A,GSK 269962A,"ROCK1, ROCK2",Cytoskeleton,16095342,429,266
90 | 1192,GSK269962A,GSK 269962A,"ROCK1, ROCK2",Cytoskeleton,16095342,958,266
91 | 91,GSK319347A,"KIN001-135, IKK-3 inhibitor",IKK,Other,11626927,428,266
92 | 230,GSK429286A,,"ROCK1, ROCK2",Cytoskeleton,11373846,980,266
93 | 177,GSK650394,"GSK-650394, GSK 650394","SGK2, SGK3",Other,25022668,920,266
94 | 326,GSK690693,"GSK 690693, GSK-690693","AKT1, AKT2, AKT3",PI3K/MTOR signaling,16725726,975,266
95 | 193,GW-2580,"GX2580, CFMS receptor tyrosine kinase inhibitor",CSF1R,RTK signaling,11617559,977,266
96 | 1023,GW441756,GW 441756,NTRK1,RTK signaling,9943465,879,266
97 | 87,GW843682X,GW843682X (AN-13),PLK1,Cell cycle,9826308,426,266
98 | 1010,Gefitinib,"ZD-1839, Iressa",EGFR,EGFR signaling,123631,878,266
99 | 135,Gemcitabine,"Gemzar, LY-188011",Pyrimidine antimetabolite,DNA replication,60750,923,266
100 | 225,Genentech Cpd 10,,"AURKA, AURKB",Mitosis,none,980,266
101 | 1142,HG-5-113-01,,"LOK, LTK, TRCB, ABL(T315I)",Other,none,512,266
102 | 1143,HG-5-88-01,,"EGFR, ADCK4","Other, kinases",none,512,266
103 | 159,HG6-64-1,KIN001-206,BRAF,ERK MAPK signaling,53302361,933,266
104 | 275,I-BET-762,GSK525762A,"BRD2, BRD3, BRD4",Chromatin other,46943432,972,266
105 | 1266,ICL1100013,,N-myristoyltransferase 1/2,Other,,945,266
106 | 1230,IOX2,"IOX-2, IOX 2, AK176060",EGLN1,Other,54685215,959,266
107 | 176,IPA-3,IPA 3,PAK1,Cytoskeleton,521106,927,266
108 | 238,Idelalisib,"CAL-101, Zydelig",PI3Kdelta,PI3K/MTOR signaling,11625818,979,266
109 | 34,Imatinib,"Gleevec, STI-571","ABL, KIT, PDGFR",RTK signaling,5291,435,266
110 | 208,Ispinesib Mesylate,SB-715992,KSP,Mitosis,6450816,979,266
111 | 1043,JNK Inhibitor VIII,,JNK,JNK and p38 signaling,11624601,876,266
112 | 157,JNK-9L,"KIN001-204, JNK inhibitor 9l","JNK2, JNK3",JNK and p38 signaling,25222038,937,266
113 | 163,JQ1,"JQ-1, (+)-JQ-1","BRD2, BRD3, BRD4, BRDT",Chromatin other,46907787,937,266
114 | 1218,JQ1,"JQ-1, (+)-JQ-1","BRD2, BRD3, BRD4, BRDT",Chromatin other,46907787,944,266
115 | 164,JQ12,,"HDAC1, HDAC2",Chromatin histone acetylation,none,931,266
116 | 256,JW-7-24-1,,LCK,"Other, kinases",none,977,266
117 | 83,JW-7-52-1,NA,MTOR,PI3K/MTOR signaling,49836027,410,266
118 | 286,KIN001-236,,Angiopoietin-1 receptor,Other,none,975,266
119 | 287,KIN001-244,PDK1 inhibitor 7,PDK1 (PDPK1),"Other, kinases",56965967,974,266
120 | 290,KIN001-260,"Bayer IKKb inhibitor, ACHP",IKKB,Other,10451420,974,266
121 | 291,KIN001-266,,MAP3K8,"Other, kinases",44143370,975,266
122 | 345,KIN001-270,"CDK9 inhibitor, CDK9-IN-1",CDK9,Cell cycle,66577006,975,266
123 | 1030,KU-55933,KU55933,ATM,Genome integrity,5278396,877,266
124 | 192,LFM-A13,DDE-28,BTK,"Other, kinases",54676905,927,266
125 | 119,Lapatinib,"Tykerb, Tyverb","ERBB2, EGFR",EGFR signaling,208908,421,266
126 | 1020,Lenalidomide,"CDC-501, CC-5013, Revlimid",CRBN,Protein stability and degradation,216326,882,266
127 | 1024,Lestaurtinib,"CEP-701, SP-924, SPM-924, A-154475, KT-555","FLT3, JAK2, NTRK1, NTRK2, NTRK3","Other, kinases",126565,881,266
128 | 277,Linifanib,"ABT-869, ABT 869","VEGFR1, VEGFR2, VEGFR3, CSF1R, FLT3, KIT",RTK signaling,11485656,977,266
129 | 185,Linsitinib,"OSI-906, ASP-7487",IGF1R,IGFR signaling,11640390,924,266
130 | 194,Luminespib,"AUY922, VER-52296,NVP-AUY922, AUY",HSP90,Protein stability and degradation,10096043,918,266
131 | 9,MG-132,"LLL cpd, MG 132, MG132","Proteasome, CAPN1",Protein stability and degradation,462382,426,266
132 | 1053,MK-2206,"MK 2206, MK2206","AKT1, AKT2",PI3K/MTOR signaling,46930998,854,266
133 | 294,MPS-1-IN-1,,MPS1,Mitosis,25195352,973,266
134 | 292,Masitinib,"AB1010, Masivet","KIT, PDGFRA, PDGFRB","Other, kinases",10074640,976,266
135 | 1008,Methotrexate,"Abitrexate, Amethopterin, Rheumatrex, Trexall, Folex",Antimetabolite,DNA replication,126941,881,266
136 | 153,Midostaurin,"PKC412, benzoylstaurosporine, CGP-41251","PKC, PPK, FLT1, c-FGR, others",Other,several,938,266
137 | 136,Mitomycin-C,"Mytozytrex, NSC-26980, MMC, Mitosol, Mitozytrex",DNA crosslinker,DNA replication,5746,929,266
138 | 1029,Motesanib,"AMG-706, AMG 706, AMG706","VEGFR, RET, KIT, PDGFR",RTK signaling,11667893,877,266
139 | 260,NG-25,NG25,"TAK1, MAP4K2","Other, kinases",53340664,974,266
140 | 257,NPK76-II-72-1,,PLK3,Cell cycle,none,975,266
141 | 269,NSC-207895,"XI-006, NSC207895",MDM4,p53 pathway,42640,968,266
142 | 147,NSC-87877,NSC 87877,"SHP-1 (PTPN6), SHP-2 (PTPN11)",Other,5459322,933,266
143 | 1038,NU7441,"KU-57788, NU-7432, NU-7741",DNAPK,Genome integrity,11327430,875,266
144 | 295,NVP-BHG712,BHG712,EPHB4,Other,16747388,975,266
145 | 35,NVP-TAE684,"NVP-TAE 684, TAE684, TAE-684",ALK,RTK signaling,16038120,433,266
146 | 1011,Navitoclax,"ABT-263, ABT263, ABT 263","BCL2, BCL-XL, BCL-W",Apoptosis regulation,24978538,878,266
147 | 1013,Nilotinib,"Tasigna, AMN 107",ABL,ABL signaling,644241,833,266
148 | 1047,Nutlin-3a (-),,MDM2,p53 pathway,11433190,878,266
149 | 299,OSI-027,A-1065-5,"MTORC1, MTORC2",PI3K/MTOR signaling,44224160,971,266
150 | 298,OSI-930,OSI 930 OSI930,KIT,RTK signaling,9868037,976,266
151 | 167,OSU-03012,"AR-12, OSU 03012, OSU03012, PDK1 inhibitor AR-12",PDK1 (PDPK1),"Other, kinases",10027278,927,266
152 | 182,Obatoclax Mesylate,"GX15-070MS, Obatoclax, GX15-070","BCL2, BCL-XL, BCL-W, MCL1",Apoptosis regulation,11404337,920,266
153 | 1017,Olaparib,"AZD2281, KU0059436, Lynparza","PARP1, PARP2",Genome integrity,23725625,882,266
154 | 1495,Olaparib,"AZD2281, KU0059436, Lynparza","PARP1, PARP2",Genome integrity,23725625,934,266
155 | 283,Omipalisib,"GSK2126458, GSK-2126458, EX-8678, GSK458","PI3K (class 1), MTORC1, MTORC2",PI3K/MTOR signaling,25167777,977,266
156 | 175,PAC-1,GTPL5238,"Procaspase-3, Procaspase-7",Apoptosis regulation,6753378,916,266
157 | 1060,PD0325901,"PD-0325901, PD 0325901","MEK1, MEK2",ERK MAPK signaling,9826528,870,266
158 | 1049,PD173074,"PD-173074, PD 173074","FGFR1, FGFR3",RTK signaling,1401,878,266
159 | 1129,PF-4708671,"PF 4708671, PF4708671",S6K1,PI3K/MTOR signaling,51371303,939,266
160 | 158,PF-562271,PF-00562271,"FAK, FAK2",Cytoskeleton,11713159,918,266
161 | 1219,PFI-1,,BRD4,Chromatin other,71271629,961,266
162 | 1530,PFI-3,BDF00016096,"SMARCA2, SMARCA4, PB1",Other,78243717,874,266
163 | 6,PHA-665752,"PHA665752, PHA 665752",MET,RTK signaling,10461815,434,266
164 | 301,PHA-793887,"PHA793887, PHA 793887","CDK2, CDK7, CDK5",Cell cycle,46191454,977,266
165 | 302,PI-103,"PI-103, PI103, PI 103","PI3Kalpha, DAPK3, CLK4, PIM3, HIPK2",PI3K/MTOR signaling,9884685,966,266
166 | 303,PIK-93,"PIK 93, PIK93",PI3Kgamma,PI3K/MTOR signaling,6852167,975,266
167 | 1036,PLX-4720,"PLX4720, PLX 4720",BRAF,ERK MAPK signaling,24180719,877,266
168 | 1371,PLX-4720,"PLX4720, PLX 4720",BRAF,ERK MAPK signaling,24180719,947,266
169 | 11,Paclitaxel,"BMS-181339-01, Taxol, Onxol, Paxene, Praxel, Abraxane",Microtubule stabiliser,Mitosis,36314,426,266
170 | 1054,Palbociclib,"PD0332991, PD-0332991, PF-00080665-73","CDK4, CDK6",Cell cycle,5330286,854,266
171 | 89,Parthenolide,,HDAC1,Chromatin histone acetylation,7251185,428,266
172 | 199,Pazopanib,Votrient,"CSF1R, KIT, PDGFRA, PDGFRB",RTK signaling,10113978,925,266
173 | 282,Pelitinib,"EKB-569, EKB 569",EGFR,EGFR signaling,6445562,976,266
174 | 1529,Pevonedistat,"MLN4924, MLN 4924, MLN-4924",NAE,Other,16720766,722,266
175 | 196,Phenformin,DBI,Biguanide agent,Other,8249,966,266
176 | 1058,Pictilisib,"GDC-0941, GDC0941, RG-7621",PI3K (class 1),PI3K/MTOR signaling,17755052,870,266
177 | 1527,Pictilisib,"GDC-0941, GDC0941, RG-7621",PI3K (class 1),PI3K/MTOR signaling,17755052,914,266
178 | 1243,Piperlongumine,Piplartine,Induces reactive oxygen species,Other,637858,957,266
179 | 155,Ponatinib,"AP24534, AP-24534, KIN001-192, Iclusig","ABL, PDGFRA, VEGFR2, FGFR1, SRC, TIE2, FLT3",RTK signaling,24826799,937,266
180 | 71,Pyrimethamine,"Daraprim, Chloridine",Dihydrofolate reductase (DHFR),Other,4993,428,266
181 | 1166,QL-VIII-58,,"MTOR, ATR",Other,none,515,266
182 | 331,QL-X-138,,BTK,"Other, kinases",none,964,266
183 | 329,QL-XI-92,,DDR1,Other,none,977,266
184 | 235,QL-XII-47,,"BTK, BMX","Other, kinases",none,977,266
185 | 1203,QL-XII-61,,"BMX, BTK","Other, kinases",none,490,266
186 | 151,QS11,944328-88-5,ARFGAP1,Other,4263900,928,266
187 | 254,Quizartinib,"AC220, AC 220, AC-220, Asp-2689",FLT3,RTK signaling,24889392,976,266
188 | 1052,RO-3306,,CDK1,Cell cycle,44450571,878,266
189 | 3,Rapamycin,"AY-22989, Sirolimus, WY-090217, Torisel, Rapamune",MTORC1,PI3K/MTOR signaling,5384616,388,266
190 | 1014,Refametinib,"RDEA119, BAY-86-9766, BAY 869766","MEK1, MEK2",ERK MAPK signaling,44182295,869,266
191 | 1526,Refametinib,"RDEA119, BAY-86-9766, BAY 869766","MEK1, MEK2",ERK MAPK signaling,44182295,905,266
192 | 1175,Rucaparib,"PF-01367338, AG-014699, AG-14447, AG-14699","PARP1, PARP2",Genome integrity,9931953,953,266
193 | 206,Ruxolitinib,"INCB-18424, Ruxolitinib Phosphate, Jakafi","JAK1, JAK2","Other, kinases",25126798,979,266
194 | 41,S-Trityl-L-cysteine,"NSC 83265, Tritylcysteine",KIF11,Mitosis,76044,424,266
195 | 1025,SB216763,"SB-216763, SB 216763","GSK3A, GSK3B",WNT signaling,176158,778,266
196 | 1194,SB505124,"SB 505124, SB505124","ALK4, ALK5",RTK signaling,9858940,958,266
197 | 304,SB52334,"SB-52334, SB 52334",ALK5,RTK signaling,9967941,974,266
198 | 1061,SB590885,SB-590885,BRAF,ERK MAPK signaling,11316960,858,266
199 | 1264,SGC0946,,DOT1L,Chromatin histone methylation,56962337,938,266
200 | 1039,SL0101,"SL-0101, SL 0101-1","RSK, AURKB, PIM1, PIM3",Other,10459196,863,266
201 | 1494,SN-38,"7-Ethyl-10-Hydroxy-Camptothecin, SN 38",TOP1,DNA replication,104842,989,266
202 | 328,SNX-2112,SNX 2112,HSP90,Protein stability and degradation,24772860,967,266
203 | 258,STF-62247,STF62247,Autophagy inducer,Other,704473,972,266
204 | 111,Salubrinal,EIF-2alpha Inhibitor,EIF2A,Other,5717801,423,266
205 | 38,Saracatinib,"AZD0530, AZD-0530, AZ-10353926","ABL, SRC",RTK signaling,10302451,435,266
206 | 110,Seliciclib,"Roscovitine, CYC-202, AL-39256","CDK2, CDK7, CDK9",Cell cycle,160355,421,266
207 | 341,Selisistat,"EX-527, EX 527",SIRT1,Chromatin histone acetylation,5113032,971,266
208 | 1062,Selumetinib,"AZD6244, AZD-6244, ARRY-886","MEK1, MEK2",ERK MAPK signaling,10127622,852,266
209 | 1498,Selumetinib,"AZD6244, AZD-6244, ARRY-886","MEK1, MEK2",ERK MAPK signaling,10127622,950,266
210 | 268,Sepantronium bromide,"YM155, YM-155, YM 155",BIRC5,Apoptosis regulation,11178236,942,266
211 | 1133,Serdemetan,JNJ-26854165,MDM2,p53 pathway,11609586,950,266
212 | 170,Shikonin,Anchusin,not defined,Other,5208,934,266
213 | 30,Sorafenib,"Nexavar, 284461-73-0, BAY 43-9006","PDGFR, KIT, VEGFR, RAF",RTK signaling,216239,430,266
214 | 5,Sunitinib,"Sutent, Sunitinib Malate, SU-11248","PDGFR, KIT, VEGFR, FLT3, RET, CSF1R",RTK signaling,5329102,425,266
215 | 333,T0901317,"TO-901317, TO901317","LXR, FXR",Other,447912,969,266
216 | 221,TAK-715,"KIN001-201, TAK 715","p38alpha, p38beta",JNK and p38 signaling,9952773,980,266
217 | 94,TGX221,"TGX-221, Tgx 221",PI3Kbeta,PI3K/MTOR signaling,9907093,426,266
218 | 346,THZ-2-102-1,,CDK7,Cell cycle,none,957,266
219 | 344,THZ-2-49,,CDK9,Cell cycle,none,972,266
220 | 261,TL-1-85,,TAK,"Other, kinases",none,975,266
221 | 211,TL-2-105,,not defined,Unclassified,none,980,266
222 | 305,TPCA-1,,IKK2,"Other, kinases",9903786,975,266
223 | 1149,TW 37,"TW37, TW-37","BCL2, BCL-XL, MCL1",Apoptosis regulation,11455910,952,266
224 | 1259,Talazoparib,"BMN-673, BMN 973","PARP1, PARP2",Genome integrity,44819241,944,266
225 | 1199,Tamoxifen,"Nolvadex, Soltamox, Zynoplex, ICI-46474, Kessar",ESR1,Hormone-related,2733526,958,266
226 | 1026,Tanespimycin,"17-AAG, BMS-722782",HSP90,Protein stability and degradation,6505803,879,266
227 | 1375,Temozolomide,"Temodar, Temodal, M-39831, SCH 52365",DNA alkylating agent,DNA replication,5394,938,266
228 | 1016,Temsirolimus,"CCI-779, Torisel",MTOR,PI3K/MTOR signaling,6918289,868,266
229 | 180,Thapsigargin,Octanoic acid,SERCA,Other,446378,911,266
230 | 204,Tipifarnib,Zarnestra,Farnesyl-transferase (FNTA),Other,159324,925,266
231 | 312,Tivozanib,"AV-951, AV 951, KRN-951, KIL8951, ASP-4130","VEGFR1, VEGFR2, VEGFR3",RTK signaling,9911830,975,266
232 | 32,Tozasertib,"MK 0457,MK-0457,MK-045, VX-680 VX 680 VX-68","AURKA, AURKB, AURKC, others",Mitosis,5494449,420,266
233 | 1372,Trametinib,"GSK1120212, Mekinist","MEK1, MEK2",ERK MAPK signaling,11707110,925,266
234 | 1009,Tretinoin,"ATRA, Vesanoid, Renova, Atralin, Tretin-X, Avita",Retinoic acid,Other,444795,873,266
235 | 265,Tubastatin A,,"HDAC1, HDAC6, HDAC8",Chromatin histone acetylation,53394750,971,266
236 | 245,UNC0638,"UNC-0638, UNC 0683",G9a and GLP methyltransferases,Chromatin histone methylation,46224516,969,266
237 | 1236,UNC0638,"UNC-0638, UNC 0683",G9a and GLP methyltransferases,Chromatin histone methylation,46224516,961,266
238 | 1262,UNC1215,UNC-1215,L3MBTL3,Chromatin other,57339144,939,266
239 | 271,VNLG/124,HDAC inhibitor XV,"HDAC,RAR",Chromatin histone acetylation,24894414,971,266
240 | 262,VX-11e,"VX11e, VX11e",ERK2,ERK MAPK signaling,11634725,974,266
241 | 1028,VX-702,"VX702, VX 702",p38,JNK and p38 signaling,10341154,879,266
242 | 1018,Veliparib,"ABT-888, ABT888, ABT 888","PARP1, PARP2",Genome integrity,11960529,882,266
243 | 1004,Vinblastine,Velban,Microtubule destabiliser,Mitosis,6710780,882,266
244 | 140,Vinorelbine,"vinorelbine tartrate, Navelbine, Exelbine",Microtubule destabiliser,Mitosis,5311497,938,266
245 | 1033,Vismodegib,"GDC0449, Erivedge",SMO,Other,24776445,882,266
246 | 1012,Vorinostat,"Zolinza, SAHA, suberanilohydroxamic acid, suberoylanilide hydroxamic acid, MK-0683","HDAC inhibitor Class I, IIa, IIb, IV",Chromatin histone acetylation,5311,883,266
247 | 56,WH-4-023,KIN001-112,"SRC, LCK","Other, kinases",11844351,423,266
248 | 288,WHI-P97,"AC1L1GQE, KIN001-055",JAK3,"Other, kinases",3796,975,266
249 | 59,WZ-1-84,KIN001-123,BMX,"Other, kinases",49821040,425,266
250 | 252,WZ3105,,"SRC, ROCK2, NTRK2, FLT3, IRAK1, others",Other,none,976,266
251 | 1046,Wee1 Inhibitor,"681640, Wee1 Inhibitor","WEE1, CHEK1",Cell cycle,10384072,787,266
252 | 1268,XAV939,"NVP-XAV939, XAV-939, XAV 939","TNKS1, TNKS2",WNT signaling,2726824,947,266
253 | 1158,XMD11-85h,,"BRSK2, FLT4, MARK4, PRKCD, RET, SRPK1",Other,none,513,266
254 | 330,XMD13-2,,RIPK1,Apoptosis regulation,none,976,266
255 | 253,XMD14-99,,"ALK, CDK7, LTK, others",Other,none,977,266
256 | 332,XMD15-27,,CAMK2,"Other, kinases",none,977,266
257 | 106,XMD8-85,ERK5-IN-1,"ERK5, BET",Other,46844147,422,266
258 | 1164,XMD8-92,XMD 8-92,MAPK7,"Other, kinases",46843772,514,266
259 | 309,Y-39983,,ROCK,Cytoskeleton,9810884,975,266
260 | 1239,YK-4-279,YK 4-279,RNA helicase A,Other,44632017,816,266
261 | 310,YM201636,"YM-201636, YM 201636",PIKFYVE,Other,9956222,975,266
262 | 45,Z-LLNle-CHO,"Z-L-Norleucine-CHO, Gamma-Secretase Inhibitor 1",gamma-secretase,Other,16760646,425,266
263 | 1161,ZG-10,,JNK1,JNK and p38 signaling,none,513,266
264 | 1050,ZM447439,"ZM-447439, ZM 447439","AURKA, AURKB",Mitosis,9914412,827,266
265 | 223,ZSTK474,"KIN001-167, ZSTK-474, ZSTK 474",PI3K (class 1),PI3K/MTOR signaling,11647372,979,266
266 | 266,Zibotentan,"ZD4054, ZD-4054",Endothelin-1 receptor (EDNRA),Other,9910224,974,266
267 | 1261,rTRAIL,,TRAIL receptor agonist,Apoptosis regulation,none,949,266
--------------------------------------------------------------------------------
/data/Drug/222drugs_pubchem_smiles.txt:
--------------------------------------------------------------------------------
1 | 25102847 COc1cc2nccc(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3)c2cc1OC
2 | 148124 CC(=O)OC12COC1CC(O)C1(C)C(=O)C(O)C3=C(C)C(OC(=O)C(O)C(NC(=O)OC(C)(C)C)c4ccccc4)CC(O)(C(OC(=O)c4ccccc4)C21)C3(C)C
3 | 5289247 O=C1NC(=O)C(=Cc2ccc3nccnc3c2)S1
4 | 9868037 O=C(Nc1ccc(OC(F)(F)F)cc1)c1sccc1NCc1ccnc2ccccc12
5 | 11364421 CCC1C(=O)N(C)c2cnc(Nc3ccc(C(=O)NC4CCN(C)CC4)cc3OC)nc2N1C1CCCC1
6 | 208908 CS(=O)(=O)CCNCc1ccc(-c2ccc3ncnc(Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1
7 | 4993 CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1
8 | 462382 CC(C)CC(C=O)NC(=O)C(CC(C)C)NC(=O)C(CC(C)C)NC(=O)OCc1ccccc1
9 | 57339144 O=C(c1ccc(C(=O)N2CCC(N3CCCC3)CC2)c(Nc2ccccc2)c1)N1CCC(N2CCCC2)CC1
10 | 2375 CC(O)(CS(=O)(=O)c1ccc(F)cc1)C(=O)Nc1ccc(C#N)c(C(F)(F)F)c1
11 | 448013 CC(=Cc1csc(C)n1)C1CC2OC2(C)CCCC(C)C(O)C(C)C(=O)C(C)(C)C(O)CC(=O)O1
12 | 24756910 C#Cc1cccc(Nc2ncnc3cc(OC)c(OCCCCCCC(=O)NO)cc23)c1
13 | 24785538 CC1(C(=O)Nc2ccc(F)nc2)CCCN1c1nc(Nc2cc(C3CC3)[nH]n2)c2cccn2n1
14 | 637858 COc1cc(C=CC(=O)N2CCC=CC2=O)cc(OC)c1OC
15 | 11624601 CCOc1nc(NC(=O)Cc2cc(OC)ccc2OC)cc(N)c1C#N
16 | 7251185 C=C1C(=O)OC2C1CCC(C)=CCCC1(C)OC21
17 | 6445533 O=C(C=Cc1ccc(CN(CCO)CCc2c[nH]c3ccccc23)cc1)NO
18 | 11373846 CC1=C(C(=O)Nc2cc3cn[nH]c3cc2F)C(c2ccc(C(F)(F)F)cc2)CC(=O)N1
19 | 6918638 O=C(C=Cc1cccc(S(=O)(=O)Nc2ccccc2)c1)NO
20 | 23725625 O=C(c1cc(Cc2n[nH]c(=O)c3ccccc23)ccc1F)N1CCN(C(=O)C2CC2)CC1
21 | 46843772 CCOc1cc(N2CCC(O)CC2)ccc1Nc1ncc2c(n1)N(C)c1ccccc1C(=O)N2C
22 | 24889392 CC(C)(C)c1cc(NC(=O)Nc2ccc(-c3cn4c(n3)sc3cc(OCCN5CCOCC5)ccc34)cc2)no1
23 | 387447 CC(C)CC(NC(=O)C(Cc1ccccc1)NC(=O)c1cnccn1)B(O)O
24 | 11617559 COc1ccc(COc2ccc(Cc3cnc(N)nc3N)cc2OC)cc1
25 | 24772860 CC1(C)CC(=O)c2c(C(F)(F)F)nn(-c3ccc(C(N)=O)c(NC4CCC(O)CC4)c3)c2C1
26 | 10302451 CN1CCN(CCOc2cc(OC3CCOCC3)c3c(Nc4c(Cl)ccc5c4OCO5)ncnc3c2)CC1
27 | 5394 Cn1nnc2c(C(N)=O)ncn2c1=O
28 | 31703 COc1cccc2c1C(=O)c1c(O)c3c(c(O)c1C2=O)CC(O)(C(=O)CO)CC3OC1CC(N)C(O)C(C)O1
29 | 44551660 COc1cc2ncnc(-n3nc(-c4ccccn4)nc3N)c2cc1OC
30 | 6710780 CCC1(O)CC2CN(CCc3c([nH]c4ccccc34)C(C(=O)OC)(c3cc4c(cc3OC)N(C)C3C(O)(C(=O)OC)C(OC(C)=O)C5(CC)C=CCN6CCC43C65)C2)C1
31 | 24776445 CS(=O)(=O)c1ccc(C(=O)Nc2ccc(Cl)c(-c3ccccn3)c2)c(Cl)c1
32 | 6445562 CCOc1cc2ncc(C#N)c(Nc3ccc(F)c(Cl)c3)c2cc1NC(=O)C=CCN(C)C
33 | 521106 Oc1ccc2ccccc2c1SSc1c(O)ccc2ccccc12
34 | 5327091 CCC1=CC(=C2NNC(C)=C2c2ccc3c(c2)OCCO3)C(=O)C=C1O
35 | 46907787 Cc1sc2c(c1C)C(c1ccc(Cl)cc1)=NC(CC(=O)OC(C)(C)C)c1nnc(C)n1-2
36 | 82146 C=C(c1ccc(C(=O)O)cc1)c1cc2c(cc1C)C(C)(C)CCC2(C)C
37 | 10184653 CN(C)CC=CC(=O)Nc1cc2c(Nc3ccc(F)c(Cl)c3)ncnc2cc1OC1CCOC1
38 | 9874913 COCC(=O)NCC=Cc1ccc2ncnc(Nc3ccc(Oc4ccc(C)nc4)c(C)c3)c2c1
39 | 6852167 CC(=O)Nc1nc(C)c(-c2ccc(Cl)c(S(=O)(=O)NCCO)c2)s1
40 | 60750 Nc1ccn(C2OC(CO)C(O)C2(F)F)c(=O)n1
41 | 44819241 Cn1ncnc1C1C2=c3c(cc(F)cc3=NC1c1ccc(F)cc1)C(=O)NN2
42 | 16663089 Cc1ccc(-c2c(C(=O)CCl)n(CCCO)c3ncnc(N)c23)cc1
43 | 11404337 COc1cc(=C2C=c3ccccc3=N2)[nH]c1=Cc1[nH]c(C)cc1C
44 | 46943432 CCNC(=O)CC1N=C(c2ccc(Cl)cc2)c2cc(OC)ccc2-n2c(C)nnc21
45 | 5311510 NC(=O)c1cccc(-c2cc(Nc3ccc(OC(F)(F)F)cc3)ncn2)c1
46 | 3062316 Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(CCO)CC2)n1
47 | 11640390 CC1(O)CC(c2nc(-c3ccc4ccc(-c5ccccc5)nc4c3)c3c(N)nccn23)C1
48 | 6918454 O=C(NOCC1CC1)c1ccc(F)c(F)c1Nc1ccc(I)cc1Cl
49 | 9967941 Cc1cccc(-c2[nH]c(C(C)(C)C)nc2-c2ccc3nccnc3c2)n1
50 | 9911830 COc1cc2nccc(Oc3ccc(NC(=O)Nc4cc(C)on4)c(Cl)c3)c2cc1OC
51 | 49806720 CCc1cc2c(cc1N1CCC(N3CCOCC3)CC1)C(C)(C)c1[nH]c3cc(C#N)ccc3c1C2=O
52 | 24180719 CCCS(=O)(=O)Nc1ccc(F)c(C(=O)c2c[nH]c3ncc(Cl)cc23)c1F
53 | 42642645 COc1cc2c(Oc3ccc(NC(=O)C4(C(=O)Nc5ccc(F)cc5)CC4)cc3F)ccnc2cc1OCCCN1CCOCC1
54 | 16760646 CCCCC(C=O)NC(=O)C(CC(C)C)NC(=O)C(CC(C)C)NC(=O)OCc1ccccc1
55 | 44462760 CC(C)(C)c1nc(-c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(-c2ccnc(N)n2)s1
56 | 5311497 CCC1=CC2CN(C1)Cc1c([nH]c3ccccc13)C(C(=O)OC)(c1cc3c(cc1OC)N(C)C1C(O)(C(=O)OC)C(OC(C)=O)C4(CC)C=CCN5CCC31C54)C2
57 | 3796 COc1cc2ncnc(Nc3cc(Br)c(O)c(Br)c3)c2cc1OC
58 | 11676786 Cc1ccc(NC(=O)c2cccc(C(C)(C)C#N)c2)cc1Nc1ccc2ncn(C)c(=O)c2c1
59 | 17755052 CS(=O)(=O)N1CCN(Cc2cc3nc(-c4cccc5[nH]ncc45)nc(N4CCOCC4)c3s2)CC1
60 | 25022668 O=C(O)c1ccc(-c2c[nH]c3ncc(-c4ccccc4)cc23)cc1C1CCCC1
61 | 36462 COc1cc(C2c3cc4c(cc3C(OC3OC5COC(C)OC5C(O)C3O)C3COC(=O)C23)OCO4)cc(OC)c1O
62 | 36314 CC(=O)OC1C(=O)C2(C)C(O)CC3OCC3(OC(C)=O)C2C(OC(=O)c2ccccc2)C2(O)CC(OC(=O)C(O)C(NC(=O)c3ccccc3)c3ccccc3)C(C)=C1C2(C)C
63 | 10451420 N#CC1=C(N)NC(=C2C(=O)C=CC=C2OCC2CC2)C=C1C1CCNCC1
64 | 444795 CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC(=O)O
65 | 10077147 O=C(NCCCNc1nc(Nc2cccc(NC(=O)N3CCCC3)c2)ncc1I)c1cccs1
66 | 6914657 O=C(C=Cc1cccnc1)NCCCCC1CCN(C(=O)c2ccccc2)CC1
67 | 11844351 COc1ccc(N(C(=O)Oc2c(C)cccc2C)c2ccnc(Nc3ccc(N4CCN(C)CC4)cc3)n2)c(OC)c1
68 | 44143370 CCN1CCC(n2cc(CNc3cc(Cl)c4ncc(C#N)c(Nc5ccc(F)c(Cl)c5)c4c3)nn2)CC1
69 | 11667893 CC1(C)CNc2cc(NC(=O)c3cccnc3NCc3ccncc3)ccc21
70 | 46930998 Cl.Cl.NC1(c2ccc(-c3nc4ccn5c(=O)[nH]nc5c4cc3-c3ccccc3)cc2)CCC1
71 | 25257557 Cc1cnc(CNC(=O)c2c(=O)c3ccc(N4CCCN(C)CC4)nc3n3c2sc2ccccc23)cn1
72 | 6918289 COC1CC2CCC(C)C(O)(O2)C(=O)C(=O)N2CCCCC2C(=O)OC(C(C)CC2CCC(OC(=O)C(C)(CO)CO)C(OC)C2)CC(=O)C(C)C=C(C)C(O)C(OC)C(=O)C(C)CC(C)C=CC=CC=C1C
73 | 3385 O=c1[nH]cc(F)c(=O)[nH]1
74 | 1401 CCN(CC)CCCCNc1ncc2cc(-c3cc(OC)cc(OC)c3)c(NC(=O)NC(C)(C)C)nc2n1
75 | 10074640 Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nc(-c2cccnc2)cs1
76 | 5278396 O=c1cc(-c2cccc3c2Sc2ccccc2S3)oc(N2CCOCC2)c1
77 | 76044 NC(CSC(c1ccccc1)(c1ccccc1)c1ccccc1)C(=O)O
78 | 5329102 CCN(CC)CCNC(=O)c1c(C)[nH]c(C=C2C(=O)Nc3ccc(F)cc32)c1C
79 | 5459322 O=C1C(=NNc2ccc3cc(S(=O)(=O)O)ccc3c2)C=C(S(=O)(=O)O)c2cccnc21
80 | 300471 CN(NC(=O)CC(=O)NN(C)C(=S)c1ccccc1)C(=S)c1ccccc1
81 | 176158 Cn1cc(C2=C(c3ccc(Cl)cc3Cl)C(=O)NC2=O)c2ccccc21
82 | 20635522 COc1cc(C)c(Sc2cnc(NC(=O)c3ccc(CNC(C)C(C)C)cc3)s2)cc1C(=O)N1CCN(C(C)=O)CC1
83 | 25222038 Fc1cc(-c2ccnc(Nc3ccc(-n4cnc(N5CCOCC5)n4)cc3)n2)cc(N2CCOCC2)c1
84 | 16725726 CCn1c(-c2nonc2N)nc2c(C#CC(C)(C)O)ncc(OCC3CCCNC3)c21
85 | 9952773 CCc1nc(-c2cccc(C)c2)c(-c2ccnc(NC(=O)c3ccccc3)c2)s1
86 | 5113032 NC(=O)C1CCCc2c1[nH]c1ccc(Cl)cc21
87 | 156422 Cc1ccc(-n2nc(C(C)(C)C)cc2NC(=O)Nc2ccc(OCCN3CCOCC3)c3ccccc23)cc1
88 | 644215 OCCCNc1cc(-c2ccnc(Nc3cccc(Cl)c3)n2)ccn1
89 | 16095342 CCn1c(-c2nonc2N)nc2cnc(Oc3cccc(NC(=O)c4ccc(OCCN5CCOCC5)cc4)c3)cc21
90 | 9826308 COc1cc2ncn(-c3cc(OCc4ccccc4C(F)(F)F)c(C(N)=O)s3)c2cc1OC
91 | 85668777 C=C1N(CCCCCC(=O)O)c2ccccc2C1(C)C
92 | 25262965 COc1ccc(-c2ccc3c(N4CCOCC4C)nc(N4CCOCC4C)nc3n2)cc1CO
93 | 71271629 COc1ccccc1S(=O)(=O)Nc1ccc2c(c1)CN(C)C(=O)N2
94 | 176870 C#Cc1cccc(Nc2ncnc3cc(OCCOC)c(OCCOC)cc23)c1
95 | 11634725 Cc1cnc(Nc2ccc(F)cc2Cl)nc1-c1c[nH]c(C(=O)NC(CO)c2cccc(Cl)c2)c1
96 | 11282283 S=C(NCc1ccc2c(c1)OCO2)N1CCN(c2ncnc3c2oc2ccccc23)CC1
97 | 9938202 Cl.Cl.O=c1cc(CN2CCOCC2)occ1OCCCCCSc1ccnc2cc(C(F)(F)F)ccc12
98 | 9826528 O=C(NOCC(O)CO)c1ccc(F)c(F)c1Nc1ccc(I)cc1F
99 | 11152667 NC(=O)Nc1cc(-c2cccc(F)c2)sc1C(=O)NC1CCCNC1
100 | 704473 Cc1cccc(Nc2nc(-c3ccncc3)cs2)c1
101 | 46844147 COc1cc(N2CCN(C)CC2)ccc1Nc1ncc2c(n1)N(C)c1ccccc1C(=O)N2C
102 | 5746 COC12C(COC(N)=O)C3=C(C(=O)C(C)=C(N)C3=O)N1CC1NC12
103 | 560326 COC(=O)CNC(=O)C(=O)OC
104 | 49821040 C=CC(=O)N1CCCC(Nc2nc(Nc3ccc(N(C)C(=O)CC)cc3)nc3nc[nH]c23)C1
105 | 11977753 Cn1c(=O)n(-c2ccc(C(C)(C)C#N)cc2)c2c3cc(-c4cnc5ccccc5c4)ccc3ncc21
106 | 54676905 CC(O)=C(C#N)C(=O)Nc1cc(Br)ccc1Br
107 | 65110 NC(=O)c1ncn(C2OC(COP(=O)(O)O)C(O)C2O)c1N
108 | 49836027 CCC(=O)N1CCN(c2ccc(-n3c(=O)ccc4cnc5ccc(-c6cnc7ccccc7c6)cc5c43)cc2C(F)(F)F)CC1
109 | 11433190 COc1ccc(C2=NC(c3ccc(Cl)cc3)C(c3ccc(Cl)cc3)N2C(=O)N2CCNC(=O)C2)c(OC(C)C)c1
110 | 8249 NC(N)=NC(N)=NCCc1ccccc1
111 | 11609586 c1ccc2c(CCNc3ccc(Nc4ccncc4)cc3)c[nH]c2c1
112 | 5291 Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1
113 | 10390396 Cc1cc(N2CCOCC2)cc2c1NC(=C1C(=O)N=CC=C1NCC(O)c1cccc(Cl)c1)N2
114 | 25124816 CCc1cc(Nc2nccc(-c3c(-c4ccc(OC)c(C(=O)Nc5c(F)cccc5F)c4)nc4ccccn34)n2)c(OC)cc1N1CCC(N2CCN(S(C)(=O)=O)CC2)CC1
115 | 216239 CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1
116 | 11455910 CC(C)c1ccccc1Cc1cc(C(=O)Nc2ccc(S(=O)(=O)c3ccccc3C(C)(C)C)cc2)c(O)c(O)c1O
117 | 5311 O=C(CCCCCCC(=O)Nc1ccccc1)NO
118 | 16038120 COc1cc(N2CCC(N3CCN(C)CC3)CC2)ccc1Nc1ncc(Cl)c(Nc2ccccc2S(=O)(=O)C(C)C)n1
119 | 3463933 Cc1cc([N+](=O)[O-])ccc1NS(=O)(=O)c1cc(Cl)ccc1Cl
120 | 4263900 COc1cc2c(cc1OC)CN(C(C)C(=O)Nc1cc([N+](=O)[O-])ccc1Cl)CC2
121 | 5280757 CCCC=CC=CC(=O)OC1C(=CC(=O)OC)CC2CC(C(C)O)OC(=O)CC(O)CC3CC(OC(C)=O)C(C)(C)C(O)(CC4CC(=CC(=O)OC)CC(C=CC(C)(C)C1(O)O2)O4)O3
122 | 11713159 CN(c1ncccc1CNc1nc(Nc2ccc3c(c2)CC(=O)N3)ncc1C(F)(F)F)S(C)(=O)=O
123 | 9956222 Nc1ccc(C(=O)Nc2cccc(-c3nc(N4CCOCC4)c4oc5ncccc5c4n3)c2)cn1
124 | 10096043 CCNC(=O)C1=C(c2ccc(CN3CCOCC3)cc2)C(=C2C=C(C(C)C)C(O)=CC2=O)ON1
125 | 6450551 CNC(=O)c1ccccc1Sc1ccc2c(C=Cc3ccccn3)n[nH]c2c1
126 | 25167777 COc1ncc(-c2ccc3nccc(-c4ccnnc4)c3c2)cc1NS(=O)(=O)c1ccc(F)cc1F
127 | 46224516 COc1cc2c(NC3CCN(C(C)C)CC3)nc(C3CCCCC3)nc2cc1OCCCN1CCCC1
128 | 5460769 Cc1c(N)nc(C(CC(N)=O)NCC(N)C(N)=O)nc1C(=O)NC(C(=O)NC(C)C(O)C(C)C(=O)NC(C(=O)NCCc1nc(-c2nc(C(=O)NCCC[S+](C)C)cs2)cs1)C(C)O)C(OC1OC(CO)C(O)C(O)C1OC1OC(CO)C(O)C(OC(N)=O)C1O)c1cnc[nH]1
129 | 5717801 O=C(C=Cc1ccccc1)NC(NC(=S)Nc1cccc2cccnc12)C(Cl)(Cl)Cl
130 | 11493598 Nc1[nH]nc2nnc(-c3c(-c4ccccc4)nn4ccccc34)cc12
131 | 123631 COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN1CCOCC1
132 | 56965967 O=C(NC(COc1ccc2[nH]c(=O)[nH]c2c1)c1ccccc1)c1cccn(Cc2ccc(F)c(F)c2)c1=O
133 | 216326 Nc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O
134 | 42640 CN1CCN(c2ccc([N+](=O)[O-])c3no[n+]([O-])c23)CC1
135 | 6753378 C=CCC1=CC=CC(=CNNC(=O)CN2CCN(Cc3ccccc3)CC2)C1=O
136 | 56962337 CC(C)N(CCCNC(=O)Nc1ccc(C(C)(C)C)cc1)CC1OC(n2cc(Br)c3c(N)ncnc32)C(O)C1O
137 | 5328940 COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc23)c(Cl)cc1Cl
138 | 2726824 O=c1nc(-c2ccc(C(F)(F)F)cc2)[nH]c2c1CSCC2
139 | 9910224 COc1nc(C)cnc1NS(=O)(=O)c1cccnc1-c1ccc(-c2nnco2)cc1
140 | 44632017 COc1ccc(C(=O)CC2(O)C(=O)Nc3c(Cl)ccc(Cl)c32)cc1
141 | 3005532 COC(=O)C(CCSC)NC(=O)c1ccc(NCC(N)CS)cc1-c1ccccc1
142 | 53394750 CN1CCc2c(c3ccccc3n2Cc2ccc(C(=O)NO)cc2)C1.OOC#CF
143 | 66577006 Cc1ccc(Nc2cc(-c3cccc(N4C(=O)c5ccccc5C4=O)c3)ncn2)cc1NS(C)(=O)=O
144 | 9813758 Cc1ccc2nc(NCCN)c3ncc(C)n3c2c1
145 | 10459196 CC(=O)OC1C(C)OC(Oc2c(-c3ccc(O)cc3)oc3cc(O)cc(O)c3c2=O)C(O)C1OC(C)=O
146 | 46883536 NC(=O)C(CCC(F)(F)F)N(Cc1ccc(-c2ncon2)cc1F)S(=O)(=O)c1ccc(Cl)cc1
147 | 176167 Cn1cc(C2=C(c3cn(C4CCN(Cc5ccccn5)CC4)c4ccccc34)C(=O)NC2=O)c2ccccc21
148 | 54685215 O=C(O)CNC(=O)c1c(O)c2ccccc2n(Cc2ccccc2)c1=O
149 | 6450816 CS(=O)(=O)O.Cc1ccc(C(=O)N(CCCN)C(c2nc3cc(Cl)ccc3c(=O)n2Cc2ccccc2)C(C)C)cc1
150 | 11960529 CC1(c2nc3c(C(N)=O)cccc3[nH]2)CCCN1
151 | 10027278 NCC(=O)Nc1ccc(-n2nc(C(F)(F)F)cc2-c2ccc3c(ccc4ccccc43)c2)cc1
152 | 9858940 Cc1cccc(-c2[nH]c(C(C)(C)C)nc2-c2ccc3c(c2)OCO3)n1
153 | 11707110 CC(=O)Nc1cccc(-n2c(=O)n(C3CC3)c(=O)c3c(Nc4ccc(I)cc4F)n(C)c(=O)c(C)c32)c1
154 | 2314623 O=C1C(=Cc2cccs2)CCC1=Cc1cccs1
155 | 11178236 COCCn1c2c([n+](Cc3cnccn3)c1C)C(=O)c1ccccc1C2=O.[Br-]
156 | 9943465 Cn1cc(C=C2C(=O)Nc3cccnc32)c2ccccc21
157 | 44137675 Cc1cc(C(C)Nc2ccccc2C(=O)O)c2nc(N3CCOCC3)cc(=O)n2c1
158 | 9907093 Cc1cc(C(C)Nc2ccccc2)c2nc(N3CCOCC3)cc(=O)n2c1
159 | 11647372 FC(F)c1nc2ccccc2n1-c1nc(N2CCOCC2)nc(N2CCOCC2)n1
160 | 4261 Nc1ccccc1NC(=O)c1ccc(CNC(=O)OCc2cccnc2)cc1
161 | 9903786 NC(=O)Nc1sc(-c2ccc(F)cc2)cc1C(N)=O
162 | 9863776 COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O
163 | 2733526 CCC(=C(c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1
164 | 11327430 O=c1cc(N2CCOCC2)oc2c(-c3cccc4c3sc3ccccc34)cccc12
165 | 5208 CC(C)=CCC(O)C1=CC(=O)c2c(O)ccc(O)c2C1=O
166 | 11626560 CC(Oc1cc(-c2cnn(C3CCNCC3)c2)cnc1N)c1c(Cl)ccc(F)c1Cl
167 | 24951314 CC(C)(C)OC(=O)Nc1ccc(-c2cc(C(=O)NCCCCCCC(=O)NO)no2)cc1
168 | 46885626 CCn1cc(-c2ccnc3[nH]c(-c4cccc(CN(C)C)c4)cc23)c(-c2ccc(NC(=O)N(C)C)cc2)n1
169 | 3218 CCCCCCCCCCCC1=C(O)C(=O)C=C(O)C1=O
170 | 44450571 O=C1N=C(NCc2cccs2)SC1=Cc1ccc2ncccc2c1
171 | 24826799 Cc1ccc(C(=O)Nc2ccc(CN3CCN(C)CC3)c(C(F)(F)F)c2)cc1C#Cc1cnc2cccnn12
172 | 11485656 Cc1ccc(F)c(NC(=O)Nc2ccc(-c3cccc4[nH]nc(N)c34)cc2)c1
173 | 46191454 CC(C)CC(=O)Nc1n[nH]c2c1CN(C(=O)C1CCN(C)CC1)C2(C)C
174 | 6505803 C=CCNC1=C2CC(C)CC(OC)C(O)C(C)C=C(C)C(OC(N)=O)C(OC)C=CC=C(C)C(=O)NC(=CC1=O)C2=O
175 | 126941 CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1
176 | 24825971 CCNc1cc(NC2CCC(O)CC2)nc(Nc2ccc3c(ccn3Cc3ccccc3)c2)n1
177 | 5494449 Cc1cc(Nc2cc(N3CCN(C)CC3)nc(Sc3ccc(NC(=O)C4CC4)cc3)n2)n[nH]1
178 | 10384072 O=C1NC(=O)c2c1c(-c1ccccc1Cl)cc1[nH]c3ccc(O)cc3c21
179 | 9810884 CC(N)c1ccc(C(=O)Nc2ccnc3[nH]ccc23)cc1
180 | 9914412 COc1cc2c(Nc3ccc(NC(=O)c4ccccc4)cc3)ncnc2cc1OCCCN1CCOCC1
181 | 11626927 COc1cc2ncn(-c3cc(OCc4ccccc4S(C)(=O)=O)c(C#N)s3)c2cc1OC
182 | 53302361 CCN1CCN(Cc2ccc(NC(=O)c3ccc(C)c(C=Cc4cnc5[nH]ccc5c4OC)c3)cc2C(F)(F)F)CC1
183 | 447912 O=S(=O)(c1ccccc1)N(CC(F)(F)F)c1ccc(C(O)(C(F)(F)F)C(F)(F)F)cc1
184 | 16720766 NS(=O)(=O)OCC1CC(n2ccc3c(NC4CCc5ccccc54)ncnc32)CC1O
185 | 16747388 Cc1ccc(C(=O)Nc2cccc(C(F)(F)F)c2)cc1Nc1nc(-c2cccnc2)nc2c1cnn2C
186 | 10109823 N#CC(c1ccnc(NCCc2cccnc2)n1)c1nc2ccccc2s1
187 | 5330286 CC(=O)c1c(C)c2cnc(Nc3ccc(N4CCNCC4)cn3)nc2n(C2CCCC2)c1=O
188 | 44224160 COc1cccc2c1=NC(=c1nc(C3CCC(C(=O)O)CC3)n3c1=C(N)N=CN3)C=2
189 | 9931953 CNCc1ccc(-c2[nH]c3cc(F)cc4c3c2CCNC4=O)cc1.O=P(O)(O)O
190 | 9884685 Oc1cccc(-c2nc(N3CCOCC3)c3oc4ncccc4c3n2)c1
191 | 11338033 O=C(NC1CCNCC1)c1[nH]ncc1NC(=O)c1c(Cl)cccc1Cl
192 | 10200390 COc1ccc(-c2cc3nccn3c(Nc3ncccc3C(N)=O)n2)cc1OC
193 | 25126798 N#CCC(C1CCCC1)n1cc(-c2ncnc3[nH]ccc23)cn1
194 | 16722836 Cc1cnc(Nc2ccc(OCCN3CCCC3)cc2)nc1Nc1cccc(S(=O)(=O)NC(C)(C)C)c1
195 | 10113978 Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S(N)(=O)=O
196 | 24894414 CCCC(=O)OCc1ccc(OC(=O)C=C(C)C=CC=C(C)C=CC2=C(C)CCCC2(C)C)cc1
197 | 9956119 Cc1cnc(-c2cnc(NCCNc3ccc(C#N)cn3)nc2-c2ccc(Cl)cc2Cl)[nH]1
198 | 10172943 Cc1[nH]nc2ccc(-c3cncc(OCC(N)Cc4c[nH]c5ccccc45)c3)cc12
199 | 53340664 CCN1CCN(Cc2ccc(NC(=O)c3ccc(C)c(Oc4ccnc5[nH]ccc45)c3)cc2C(F)(F)F)CC1
200 | 11316960 CN(C)CCOc1ccc(-c2nc(=c3ccc4c(c3)CCC=4N=O)c(=C3C=CNC=C3)[nH]2)cc1
201 | 25195352 COc1cc(N2CCC(O)CC2)ccc1Nc1cc(Nc2ccccc2S(=O)(=O)C(C)C)c2cc[nH]c2n1
202 | 6918848 CC(C)C(C(=O)Nc1ccc(C(=O)NO)cc1)c1ccccc1
203 | 84691 N.N.[Cl-].[Cl-].[Pt+2]
204 | 46931012 CN(C)Cc1cccc(NC(=C2C(=O)Nc3cc(C(=O)N(C)C)ccc32)c2ccccc2)c1
205 | 10341154 NC(=O)c1ccc(N(C(N)=O)c2c(F)cccc2F)nc1-c1ccc(F)cc1F
206 | 44182295 COc1cc(F)c(F)c(Nc2ccc(I)cc2F)c1NS(=O)(=O)C1(CC(O)CO)CC1
207 | 644241 Cc1cn(-c2cc(NC(=O)c3ccc(C)c(Nc4nccc(-c5cccnc5)n4)c3)cc(C(F)(F)F)c2)cn1
208 | 51371303 CCc1cncnc1N1CCN(Cc2nc3ccc(C(F)(F)F)cc3[nH]2)CC1
209 | 11625818 CCC(Nc1ncnc2nc[nH]c12)c1nc2cccc(F)c2c(=O)n1-c1ccccc1
210 | 11754511 O=C(Nc1cccc(Nc2ncc(Br)c(NCCc3cnc[nH]3)n2)c1)N1CCCC1
211 | 10127622 Cn1cnc2c(F)c(Nc3ccc(Br)cc3Cl)c(C(=O)NOCCO)cc21
212 | 159324 Cn1cncc1C(N)(c1ccc(Cl)cc1)c1ccc2c(c1)c(-c1cccc(Cl)c1)cc(=O)n2C
213 | 5384616 N#C[N]C(=Nc1ccccc1)NC#N
214 | 160355 CCC(CO)Nc1nc(NCc2ccccc2)c2ncn(C(C)C)c2n1
215 | 9549184 COc1cc(-c2nn(C3CCC(N4CCN(C(C)=O)CC4)CC3)c3ncnc(N)c23)ccc1NC(=O)c1cc2ccccc2n1C
216 | 104842 CCc1c2c(nc3ccc(O)cc13)-c1cc3c(c(=O)n1C2)COC(=O)C3(O)CC
217 | 10196499 O=c1[nH]c2ccccc2n1C1CCN(Cc2ccc(-c3nc4cc5[nH]cnc5cc4nc3-c3ccccc3)cc2)CC1
218 | 24978538 CC1(C)CCC(c2ccc(Cl)cc2)=C(CN2CCN(c3ccc(C(=O)NS(=O)(=O)c4ccc(NC(CCN5CCOCC5)CSc5ccccc5)c(S(=O)(=O)C(F)(F)F)c4)cc3)CC2)C1
219 | 10461815 Cc1[nH]c(C=C2C(=O)Nc3ccc(S(=O)(=O)Cc4c(Cl)cccc4Cl)cc32)c(C)c1C(=O)N1CCCC1CN1CCCC1
220 | 126565 CC12OC(CC1(O)CO)n1c3ccccc3c3c4c(c5c6ccccc6n2c5c31)CNC4=O
221 | 446378 CC=C(C)C(=O)OC1C(C)=C2C(C1OC(=O)CCCCCCC)C(C)(OC(C)=O)CC(OC(=O)CCC)C1(O)C2OC(=O)C1(C)O
222 | 6253 Nc1ccn(C2OC(CO)C(O)C2O)c(=O)n1
223 |
--------------------------------------------------------------------------------
/data/Drug/drug_graph_feat.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioMedicalBigDataMiningLab/GraphCDR/656de097f7e48580e35a9cfef9898ac3948d5dfa/data/Drug/drug_graph_feat.zip
--------------------------------------------------------------------------------
/data/Drug/drug_threshold.csv:
--------------------------------------------------------------------------------
1 | pubchem,IC50,DrugName,DrugID
2 | 9863776,-0.60596,(5Z)-7-Oxozeaenol,1242
3 | 3385,1.1236,5-Fluorouracil,179
4 | 10172943,-2.3385,A-443654,86
5 | 9549184,1.8731,A-770041,55
6 | 65110,6.1928,AICA Ribonucleotide,1001
7 | 10196499,0.96012,AKT inhibitor VIII,171
8 | 6918848,-2.3664,AR-42,272
9 | 10109823,0.8995,AS601245,207
10 | 5289247,1.1342,AS605240,224
11 | 11338033,-1.7295,AT-7519,219
12 | 11676786,-0.034145,AZ628,29
13 | 44137675,0.71712,AZD6482,156
14 | 11152667,-2.2742,AZD7762,1022
15 | 25262965,-1.2703,AZD8055,1059
16 | 10184653,-0.22156,Afatinib,1032
17 | 49806720,2.9319,Alectinib,281
18 | 11282283,2.178,Amuvatinib,293
19 | 46883536,3.9907,Avagacestat,205
20 | 6450551,0.41918,Axitinib,1021
21 | 10200390,0.21279,BAY-61-3606,178
22 | 11364421,-4.0077,BI-2536,60
23 | 46931012,2.7699,BIX02189,279
24 | 9813758,1.8541,BMS-345541,203
25 | 20635522,1.5906,BMS-509744,63
26 | 10390396,-0.40222,BMS-536924,62
27 | 24785538,-1.1641,BMS-754807,184
28 | 11754511,0.74305,BX-912,222
29 | 10077147,0.37752,BX795,1037
30 | 6918638,-1.8784,Belinostat,274
31 | 82146,2.0167,Bexarotene,186
32 | 2375,1.8801,Bicalutamide,150
33 | 5460769,-1.4805,Bleomycin,190
34 | 387447,-7.6275,Bortezomib,104
35 | 5328940,-0.14537,Bosutinib,1019
36 | 5280757,-3.9017,Bryostatin 1,197
37 | 24951314,-1.1523,CAY10603,276
38 | 5327091,1.6851,CCT-018159,1170
39 | 2314623,3.1306,CCT007093,1067
40 | 24825971,1.202,CGP-082996,54
41 | 644215,-3.6524,CGP-60474,53
42 | 9956119,2.0114,CHIR-99021,154
43 | 6918454,0.63883,CI-1040,1015
44 | 16663089,0.35105,CMK,64
45 | 44551660,0.91394,CP466722,152
46 | 9874913,3.257,CP724714,255
47 | 24756910,-2.4009,CUDC-101,273
48 | 25257557,3.6663,CX-5461,300
49 | 25102847,1.1741,Cabozantinib,249
50 | 85668777,5.144,Cetuximab,1114
51 | 84691,1.3801,Cisplatin,1005
52 | 11626560,1.2531,Crizotinib,37
53 | 6253,-1.9516,Cytarabine,1006
54 | 560326,4.4041,DMOG,165
55 | 44462760,1.9424,Dabrafenib,1373
56 | 6445533,-4.1809,Dacinostat,200
57 | 11977753,-3.9866,Dactolisib,1057
58 | 6914657,-7.1803,Daporinad,1248
59 | 3062316,-0.28739,Dasatinib,51
60 | 148124,-6.897,Docetaxel,1007
61 | 156422,3.1334,Doramapimod,1042
62 | 31703,-3.9565,Doxorubicin,133
63 | 9938202,2.4876,EHT-1864,1069
64 | 300471,-5.3392,Elesclomol,1031
65 | 3218,1.5946,Embelin,172
66 | 4261,-1.2652,Entinostat,88
67 | 176167,1.0593,Enzastaurin,229
68 | 448013,-7.4389,Epothilone B,201
69 | 176870,1.5671,Erlotinib,1
70 | 36462,-1.2198,Etoposide,134
71 | 3463933,0.56208,FH535,173
72 | 11493598,3.832,FR-180204,263
73 | 3005532,1.1367,FTI-277,166
74 | 16722836,0.49833,Fedratinib,306
75 | 42642645,-1.2725,Foretinib,308
76 | 5311510,1.613,GNF-2,52
77 | 46885626,0.43383,GSK1070916,226
78 | 25124816,1.3827,GSK1904529A,202
79 | 16095342,0.35074,GSK269962A,127
80 | 11626927,3.0676,GSK319347A,91
81 | 11373846,3.4721,GSK429286A,230
82 | 25022668,1.0119,GSK650394,177
83 | 16725726,3.2864,GSK690693,326
84 | 11617559,4.5004,GW-2580,193
85 | 9943465,1.2426,GW441756,1023
86 | 9826308,-4.9006,GW843682X,87
87 | 123631,-0.05346,Gefitinib,1010
88 | 60750,-5.9903,Gemcitabine,135
89 | 53302361,-1.0384,HG6-64-1,159
90 | 46943432,2.0537,I-BET-762,275
91 | 54685215,-0.78038,IOX2,1230
92 | 521106,1.704,IPA-3,176
93 | 11625818,3.0467,Idelalisib,238
94 | 5291,2.0495,Imatinib,34
95 | 6450816,-4.6156,Ispinesib Mesylate,208
96 | 11624601,3.4725,JNK Inhibitor VIII,1043
97 | 25222038,-1.4101,JNK-9L,157
98 | 46907787,-1.898,JQ1,163
99 | 49836027,-4.1814,JW-7-52-1,83
100 | 56965967,1.9164,KIN001-244,287
101 | 10451420,3.9071,KIN001-260,290
102 | 44143370,2.2663,KIN001-266,291
103 | 66577006,3.7318,KIN001-270,345
104 | 5278396,2.5527,KU-55933,1030
105 | 54676905,3.6673,LFM-A13,192
106 | 208908,1.6257,Lapatinib,119
107 | 216326,2.9855,Lenalidomide,1020
108 | 126565,-2.4969,Lestaurtinib,1024
109 | 11485656,1.6449,Linifanib,277
110 | 11640390,0.50445,Linsitinib,185
111 | 10096043,-4.7212,Luminespib,194
112 | 462382,-2.0053,MG-132,9
113 | 46930998,-0.13691,MK-2206,1053
114 | 25195352,1.8522,MPS-1-IN-1,294
115 | 10074640,2.263,Masitinib,292
116 | 126941,-2.4743,Methotrexate,1008
117 | 5746,-2.9647,Mitomycin-C,136
118 | 11667893,1.7167,Motesanib,1029
119 | 53340664,1.0297,NG-25,260
120 | 42640,1.8796,NSC-207895,269
121 | 5459322,3.9149,NSC-87877,147
122 | 11327430,1.0699,NU7441,1038
123 | 16747388,1.1748,NVP-BHG712,295
124 | 16038120,-1.1256,NVP-TAE684,35
125 | 24978538,0.37896,Navitoclax,1011
126 | 644241,1.6795,Nilotinib,1013
127 | 11433190,2.8063,Nutlin-3a (-),1047
128 | 44224160,-2.2484,OSI-027,299
129 | 9868037,2.7919,OSI-930,298
130 | 10027278,0.47889,OSU-03012,167
131 | 11404337,-3.1448,Obatoclax Mesylate,182
132 | 23725625,2.2434,Olaparib,1017
133 | 25167777,-5.2594,Omipalisib,283
134 | 6753378,0.29904,PAC-1,175
135 | 9826528,-2.7331,PD0325901,1060
136 | 1401,1.3686,PD173074,1049
137 | 51371303,2.6236,PF-4708671,1129
138 | 11713159,0.49307,PF-562271,158
139 | 71271629,1.2307,PFI-1,1219
140 | 10461815,1.9064,PHA-665752,6
141 | 46191454,-0.37616,PHA-793887,301
142 | 9884685,-2.302,PI-103,302
143 | 6852167,1.1468,PIK-93,303
144 | 24180719,2.7717,PLX-4720,1036
145 | 36314,-5.6772,Paclitaxel,11
146 | 5330286,-0.42572,Palbociclib,1054
147 | 7251185,1.2425,Parthenolide,89
148 | 10113978,1.6529,Pazopanib,199
149 | 6445562,-0.47851,Pelitinib,282
150 | 16720766,-1.8842,Pevonedistat,1529
151 | 8249,5.2789,Phenformin,196
152 | 17755052,-0.76788,Pictilisib,1058
153 | 637858,0.65044,Piperlongumine,1243
154 | 24826799,-1.3235,Ponatinib,155
155 | 4993,1.5774,Pyrimethamine,71
156 | 4263900,1.3074,QS11,151
157 | 24889392,1.3162,Quizartinib,254
158 | 44450571,2.0677,RO-3306,1052
159 | 5384616,-3.6395,Rapamycin,3
160 | 44182295,-0.30417,Refametinib,1014
161 | 9931953,2.1082,Rucaparib,1175
162 | 25126798,3.2715,Ruxolitinib,206
163 | 76044,-0.71324,S-Trityl-L-cysteine,41
164 | 176158,3.2994,SB216763,1025
165 | 9858940,3.0777,SB505124,1194
166 | 9967941,2.7263,SB52334,304
167 | 11316960,2.5695,SB590885,1061
168 | 56962337,1.0076,SGC0946,1264
169 | 10459196,3.5531,SL0101,1039
170 | 104842,-6.559,SN-38,1494
171 | 24772860,-3.6872,SNX-2112,328
172 | 704473,3.3769,STF-62247,258
173 | 5717801,2.056,Salubrinal,111
174 | 10302451,0.96253,Saracatinib,38
175 | 160355,2.6035,Seliciclib,110
176 | 5113032,4.6025,Selisistat,341
177 | 10127622,1.2062,Selumetinib,1062
178 | 11178236,-6.0265,Sepantronium bromide,268
179 | 11609586,1.5335,Serdemetan,1133
180 | 5208,-1.4095,Shikonin,170
181 | 216239,0.61159,Sorafenib,30
182 | 5329102,0.50064,Sunitinib,5
183 | 447912,3.2298,T0901317,333
184 | 9952773,2.5463,TAK-715,221
185 | 9907093,2.5805,TGX221,94
186 | 9903786,0.988,TPCA-1,305
187 | 11455910,-1.9388,TW 37,1149
188 | 44819241,-0.082745,Talazoparib,1259
189 | 2733526,2.7296,Tamoxifen,1199
190 | 6505803,-3.2679,Tanespimycin,1026
191 | 5394,4.6302,Temozolomide,1375
192 | 6918289,-3.8947,Temsirolimus,1016
193 | 446378,-6.1632,Thapsigargin,180
194 | 159324,-0.31364,Tipifarnib,204
195 | 9911830,-0.39743,Tivozanib,312
196 | 5494449,-0.59242,Tozasertib,32
197 | 11707110,-2.1451,Trametinib,1372
198 | 444795,2.5611,Tretinoin,1009
199 | 53394750,3.8933,Tubastatin A,265
200 | 46224516,1.3874,UNC0638,245
201 | 57339144,1.6932,UNC1215,1262
202 | 24894414,2.9403,VNLG/124,271
203 | 11634725,1.0031,VX-11e,262
204 | 10341154,1.9488,VX-702,1028
205 | 11960529,3.0134,Veliparib,1018
206 | 6710780,-5.9201,Vinblastine,1004
207 | 5311497,-5.9536,Vinorelbine,140
208 | 24776445,3.2741,Vismodegib,1033
209 | 5311,-0.54854,Vorinostat,1012
210 | 11844351,0.88193,WH-4-023,56
211 | 3796,2.6822,WHI-P97,288
212 | 49821040,2.6003,WZ-1-84,59
213 | 10384072,0.63688,Wee1 Inhibitor,1046
214 | 2726824,2.2288,XAV939,1268
215 | 46844147,1.207,XMD8-85,106
216 | 46843772,1.7833,XMD8-92,1164
217 | 9810884,4.0613,Y-39983,309
218 | 44632017,0.030235,YK-4-279,1239
219 | 9956222,0.58406,YM201636,310
220 | 16760646,-0.60881,Z-LLNle-CHO,45
221 | 9914412,0.56209,ZM447439,1050
222 | 11647372,-1.0205,ZSTK474,223
223 | 9910224,4.8219,Zibotentan,266
224 |
--------------------------------------------------------------------------------
/data/Drug/drug_threshold.txt:
--------------------------------------------------------------------------------
1 | pubchem IC50 DrugName DrugID
2 | 9863776 -0.60596 (5Z)-7-Oxozeaenol 1242
3 | 3385 1.1236 5-Fluorouracil 179
4 | 10172943 -2.3385 A-443654 86
5 | 9549184 1.8731 A-770041 55
6 | 65110 6.1928 AICA Ribonucleotide 1001
7 | 10196499 0.96012 AKT inhibitor VIII 171
8 | 6918848 -2.3664 AR-42 272
9 | 10109823 0.8995 AS601245 207
10 | 5289247 1.1342 AS605240 224
11 | 11338033 -1.7295 AT-7519 219
12 | 11676786 -0.034145 AZ628 29
13 | 44137675 0.71712 AZD6482 156
14 | 11152667 -2.2742 AZD7762 1022
15 | 25262965 -1.2703 AZD8055 1059
16 | 10184653 -0.22156 Afatinib 1032
17 | 49806720 2.9319 Alectinib 281
18 | 11282283 2.178 Amuvatinib 293
19 | 46883536 3.9907 Avagacestat 205
20 | 6450551 0.41918 Axitinib 1021
21 | 10200390 0.21279 BAY-61-3606 178
22 | 11364421 -4.0077 BI-2536 60
23 | 46931012 2.7699 BIX02189 279
24 | 9813758 1.8541 BMS-345541 203
25 | 20635522 1.5906 BMS-509744 63
26 | 10390396 -0.40222 BMS-536924 62
27 | 24785538 -1.1641 BMS-754807 184
28 | 11754511 0.74305 BX-912 222
29 | 10077147 0.37752 BX795 1037
30 | 6918638 -1.8784 Belinostat 274
31 | 82146 2.0167 Bexarotene 186
32 | 2375 1.8801 Bicalutamide 150
33 | 5460769 -1.4805 Bleomycin 190
34 | 387447 -7.6275 Bortezomib 104
35 | 5328940 -0.14537 Bosutinib 1019
36 | 5280757 -3.9017 Bryostatin 1 197
37 | 24951314 -1.1523 CAY10603 276
38 | 5327091 1.6851 CCT-018159 1170
39 | 2314623 3.1306 CCT007093 1067
40 | 24825971 1.202 CGP-082996 54
41 | 644215 -3.6524 CGP-60474 53
42 | 9956119 2.0114 CHIR-99021 154
43 | 6918454 0.63883 CI-1040 1015
44 | 16663089 0.35105 CMK 64
45 | 44551660 0.91394 CP466722 152
46 | 9874913 3.257 CP724714 255
47 | 24756910 -2.4009 CUDC-101 273
48 | 25257557 3.6663 CX-5461 300
49 | 25102847 1.1741 Cabozantinib 249
50 | 85668777 5.144 Cetuximab 1114
51 | 84691 1.3801 Cisplatin 1005
52 | 11626560 1.2531 Crizotinib 37
53 | 6253 -1.9516 Cytarabine 1006
54 | 560326 4.4041 DMOG 165
55 | 44462760 1.9424 Dabrafenib 1373
56 | 6445533 -4.1809 Dacinostat 200
57 | 11977753 -3.9866 Dactolisib 1057
58 | 6914657 -7.1803 Daporinad 1248
59 | 3062316 -0.28739 Dasatinib 51
60 | 148124 -6.897 Docetaxel 1007
61 | 156422 3.1334 Doramapimod 1042
62 | 31703 -3.9565 Doxorubicin 133
63 | 9938202 2.4876 EHT-1864 1069
64 | 300471 -5.3392 Elesclomol 1031
65 | 3218 1.5946 Embelin 172
66 | 4261 -1.2652 Entinostat 88
67 | 176167 1.0593 Enzastaurin 229
68 | 448013 -7.4389 Epothilone B 201
69 | 176870 1.5671 Erlotinib 1
70 | 36462 -1.2198 Etoposide 134
71 | 3463933 0.56208 FH535 173
72 | 11493598 3.832 FR-180204 263
73 | 3005532 1.1367 FTI-277 166
74 | 16722836 0.49833 Fedratinib 306
75 | 42642645 -1.2725 Foretinib 308
76 | 5311510 1.613 GNF-2 52
77 | 46885626 0.43383 GSK1070916 226
78 | 25124816 1.3827 GSK1904529A 202
79 | 16095342 0.35074 GSK269962A 127
80 | 11626927 3.0676 GSK319347A 91
81 | 11373846 3.4721 GSK429286A 230
82 | 25022668 1.0119 GSK650394 177
83 | 16725726 3.2864 GSK690693 326
84 | 11617559 4.5004 GW-2580 193
85 | 9943465 1.2426 GW441756 1023
86 | 9826308 -4.9006 GW843682X 87
87 | 123631 -0.05346 Gefitinib 1010
88 | 60750 -5.9903 Gemcitabine 135
89 | 53302361 -1.0384 HG6-64-1 159
90 | 46943432 2.0537 I-BET-762 275
91 | 54685215 -0.78038 IOX2 1230
92 | 521106 1.704 IPA-3 176
93 | 11625818 3.0467 Idelalisib 238
94 | 5291 2.0495 Imatinib 34
95 | 6450816 -4.6156 Ispinesib Mesylate 208
96 | 11624601 3.4725 JNK Inhibitor VIII 1043
97 | 25222038 -1.4101 JNK-9L 157
98 | 46907787 -1.898 JQ1 163
99 | 49836027 -4.1814 JW-7-52-1 83
100 | 56965967 1.9164 KIN001-244 287
101 | 10451420 3.9071 KIN001-260 290
102 | 44143370 2.2663 KIN001-266 291
103 | 66577006 3.7318 KIN001-270 345
104 | 5278396 2.5527 KU-55933 1030
105 | 54676905 3.6673 LFM-A13 192
106 | 208908 1.6257 Lapatinib 119
107 | 216326 2.9855 Lenalidomide 1020
108 | 126565 -2.4969 Lestaurtinib 1024
109 | 11485656 1.6449 Linifanib 277
110 | 11640390 0.50445 Linsitinib 185
111 | 10096043 -4.7212 Luminespib 194
112 | 462382 -2.0053 MG-132 9
113 | 46930998 -0.13691 MK-2206 1053
114 | 25195352 1.8522 MPS-1-IN-1 294
115 | 10074640 2.263 Masitinib 292
116 | 126941 -2.4743 Methotrexate 1008
117 | 5746 -2.9647 Mitomycin-C 136
118 | 11667893 1.7167 Motesanib 1029
119 | 53340664 1.0297 NG-25 260
120 | 42640 1.8796 NSC-207895 269
121 | 5459322 3.9149 NSC-87877 147
122 | 11327430 1.0699 NU7441 1038
123 | 16747388 1.1748 NVP-BHG712 295
124 | 16038120 -1.1256 NVP-TAE684 35
125 | 24978538 0.37896 Navitoclax 1011
126 | 644241 1.6795 Nilotinib 1013
127 | 11433190 2.8063 Nutlin-3a (-) 1047
128 | 44224160 -2.2484 OSI-027 299
129 | 9868037 2.7919 OSI-930 298
130 | 10027278 0.47889 OSU-03012 167
131 | 11404337 -3.1448 Obatoclax Mesylate 182
132 | 23725625 2.2434 Olaparib 1017
133 | 25167777 -5.2594 Omipalisib 283
134 | 6753378 0.29904 PAC-1 175
135 | 9826528 -2.7331 PD0325901 1060
136 | 1401 1.3686 PD173074 1049
137 | 51371303 2.6236 PF-4708671 1129
138 | 11713159 0.49307 PF-562271 158
139 | 71271629 1.2307 PFI-1 1219
140 | 10461815 1.9064 PHA-665752 6
141 | 46191454 -0.37616 PHA-793887 301
142 | 9884685 -2.302 PI-103 302
143 | 6852167 1.1468 PIK-93 303
144 | 24180719 2.7717 PLX-4720 1036
145 | 36314 -5.6772 Paclitaxel 11
146 | 5330286 -0.42572 Palbociclib 1054
147 | 7251185 1.2425 Parthenolide 89
148 | 10113978 1.6529 Pazopanib 199
149 | 6445562 -0.47851 Pelitinib 282
150 | 16720766 -1.8842 Pevonedistat 1529
151 | 8249 5.2789 Phenformin 196
152 | 17755052 -0.76788 Pictilisib 1058
153 | 637858 0.65044 Piperlongumine 1243
154 | 24826799 -1.3235 Ponatinib 155
155 | 4993 1.5774 Pyrimethamine 71
156 | 4263900 1.3074 QS11 151
157 | 24889392 1.3162 Quizartinib 254
158 | 44450571 2.0677 RO-3306 1052
159 | 5384616 -3.6395 Rapamycin 3
160 | 44182295 -0.30417 Refametinib 1014
161 | 9931953 2.1082 Rucaparib 1175
162 | 25126798 3.2715 Ruxolitinib 206
163 | 76044 -0.71324 S-Trityl-L-cysteine 41
164 | 176158 3.2994 SB216763 1025
165 | 9858940 3.0777 SB505124 1194
166 | 9967941 2.7263 SB52334 304
167 | 11316960 2.5695 SB590885 1061
168 | 56962337 1.0076 SGC0946 1264
169 | 10459196 3.5531 SL0101 1039
170 | 104842 -6.559 SN-38 1494
171 | 24772860 -3.6872 SNX-2112 328
172 | 704473 3.3769 STF-62247 258
173 | 5717801 2.056 Salubrinal 111
174 | 10302451 0.96253 Saracatinib 38
175 | 160355 2.6035 Seliciclib 110
176 | 5113032 4.6025 Selisistat 341
177 | 10127622 1.2062 Selumetinib 1062
178 | 11178236 -6.0265 Sepantronium bromide 268
179 | 11609586 1.5335 Serdemetan 1133
180 | 5208 -1.4095 Shikonin 170
181 | 216239 0.61159 Sorafenib 30
182 | 5329102 0.50064 Sunitinib 5
183 | 447912 3.2298 T0901317 333
184 | 9952773 2.5463 TAK-715 221
185 | 9907093 2.5805 TGX221 94
186 | 9903786 0.988 TPCA-1 305
187 | 11455910 -1.9388 TW 37 1149
188 | 44819241 -0.082745 Talazoparib 1259
189 | 2733526 2.7296 Tamoxifen 1199
190 | 6505803 -3.2679 Tanespimycin 1026
191 | 5394 4.6302 Temozolomide 1375
192 | 6918289 -3.8947 Temsirolimus 1016
193 | 446378 -6.1632 Thapsigargin 180
194 | 159324 -0.31364 Tipifarnib 204
195 | 9911830 -0.39743 Tivozanib 312
196 | 5494449 -0.59242 Tozasertib 32
197 | 11707110 -2.1451 Trametinib 1372
198 | 444795 2.5611 Tretinoin 1009
199 | 53394750 3.8933 Tubastatin A 265
200 | 46224516 1.3874 UNC0638 245
201 | 57339144 1.6932 UNC1215 1262
202 | 24894414 2.9403 VNLG/124 271
203 | 11634725 1.0031 VX-11e 262
204 | 10341154 1.9488 VX-702 1028
205 | 11960529 3.0134 Veliparib 1018
206 | 6710780 -5.9201 Vinblastine 1004
207 | 5311497 -5.9536 Vinorelbine 140
208 | 24776445 3.2741 Vismodegib 1033
209 | 5311 -0.54854 Vorinostat 1012
210 | 11844351 0.88193 WH-4-023 56
211 | 3796 2.6822 WHI-P97 288
212 | 49821040 2.6003 WZ-1-84 59
213 | 10384072 0.63688 Wee1 Inhibitor 1046
214 | 2726824 2.2288 XAV939 1268
215 | 46844147 1.207 XMD8-85 106
216 | 46843772 1.7833 XMD8-92 1164
217 | 9810884 4.0613 Y-39983 309
218 | 44632017 0.030235 YK-4-279 1239
219 | 9956222 0.58406 YM201636 310
220 | 16760646 -0.60881 Z-LLNle-CHO 45
221 | 9914412 0.56209 ZM447439 1050
222 | 11647372 -1.0205 ZSTK474 223
223 | 9910224 4.8219 Zibotentan 266
224 |
--------------------------------------------------------------------------------
/prog/base_model/GCNConv.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Tuple
2 | from torch_geometric.typing import Adj, OptTensor, PairTensor
3 |
4 | import torch
5 | from torch import Tensor
6 | from torch.nn import Parameter
7 | from torch_scatter import scatter_add
8 | from torch_sparse import SparseTensor, matmul, fill_diag, sum, mul
9 | from torch_geometric.nn.conv import MessagePassing
10 | from torch_geometric.utils import add_remaining_self_loops
11 | from torch_geometric.utils.num_nodes import maybe_num_nodes
12 |
13 | from my_utiils import glorot, zeros
14 |
15 |
16 | @torch.jit._overload
17 | def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False,
18 | add_self_loops=True, dtype=None):
19 | # type: (Tensor, OptTensor, Optional[int], bool, bool, Optional[int]) -> PairTensor # noqa
20 | pass
21 |
22 |
23 | @torch.jit._overload
24 | def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False,
25 | add_self_loops=True, dtype=None):
26 | # type: (SparseTensor, OptTensor, Optional[int], bool, bool, Optional[int]) -> SparseTensor # noqa
27 | pass
28 |
29 |
30 | def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False,
31 | add_self_loops=True, dtype=None):
32 |
33 | fill_value = 2. if improved else 1.
34 |
35 | if isinstance(edge_index, SparseTensor):
36 | adj_t = edge_index
37 | if not adj_t.has_value():
38 | adj_t = adj_t.fill_value(1., dtype=dtype)
39 | if add_self_loops:
40 | adj_t = fill_diag(adj_t, fill_value)
41 | deg = sum(adj_t, dim=1)
42 | deg_inv_sqrt = deg.pow_(-0.5)
43 | deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0.)
44 | adj_t = mul(adj_t, deg_inv_sqrt.view(-1, 1))
45 | adj_t = mul(adj_t, deg_inv_sqrt.view(1, -1))
46 | return adj_t
47 |
48 | else:
49 | num_nodes = maybe_num_nodes(edge_index, num_nodes)
50 |
51 | if edge_weight is None:
52 | edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype,
53 | device=edge_index.device)
54 |
55 | if add_self_loops:
56 | edge_index, tmp_edge_weight = add_remaining_self_loops(
57 | edge_index, edge_weight, fill_value, num_nodes)
58 | assert tmp_edge_weight is not None
59 | edge_weight = tmp_edge_weight
60 |
61 | row, col = edge_index[0], edge_index[1]
62 | deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes)
63 | deg_inv_sqrt = deg.pow_(-0.5)
64 | deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0)
65 | return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
66 |
67 |
68 | class GCNConv(MessagePassing):
69 |
70 | _cached_edge_index: Optional[Tuple[Tensor, Tensor]]
71 | _cached_adj_t: Optional[SparseTensor]
72 |
73 | def __init__(self, in_channels: int, out_channels: int,
74 | improved: bool = False, cached: bool = False,
75 | add_self_loops: bool = True, normalize: bool = True,
76 | bias: bool = True, **kwargs):
77 |
78 | kwargs.setdefault('aggr', 'add')
79 | super(GCNConv, self).__init__(**kwargs)
80 |
81 | self.in_channels = in_channels
82 | self.out_channels = out_channels
83 | self.improved = improved
84 | self.cached = cached
85 | self.add_self_loops = add_self_loops
86 | self.normalize = normalize
87 |
88 | self._cached_edge_index = None
89 | self._cached_adj_t = None
90 |
91 | self.weight = Parameter(torch.Tensor(in_channels, out_channels))
92 |
93 | if bias:
94 | self.bias = Parameter(torch.Tensor(out_channels))
95 | else:
96 | self.register_parameter('bias', None)
97 |
98 | self.reset_parameters()
99 |
100 | def reset_parameters(self):
101 | glorot(self.weight)
102 | zeros(self.bias)
103 | self._cached_edge_index = None
104 | self._cached_adj_t = None
105 |
106 |
107 | def forward(self, x: Tensor, edge_index: Adj,
108 | edge_weight: OptTensor = None) -> Tensor:
109 | """"""
110 |
111 | if self.normalize:
112 | if isinstance(edge_index, Tensor):
113 | cache = self._cached_edge_index
114 | if cache is None:
115 | edge_index, edge_weight = gcn_norm( # yapf: disable
116 | edge_index, edge_weight, x.size(self.node_dim),
117 | self.improved, self.add_self_loops)
118 | if self.cached:
119 | self._cached_edge_index = (edge_index, edge_weight)
120 | else:
121 | edge_index, edge_weight = cache[0], cache[1]
122 |
123 | elif isinstance(edge_index, SparseTensor):
124 | cache = self._cached_adj_t
125 | if cache is None:
126 | edge_index = gcn_norm( # yapf: disable
127 | edge_index, edge_weight, x.size(self.node_dim),
128 | self.improved, self.add_self_loops)
129 | if self.cached:
130 | self._cached_adj_t = edge_index
131 | else:
132 | edge_index = cache
133 |
134 | x = x @ self.weight
135 |
136 | # propagate_type: (x: Tensor, edge_weight: OptTensor)
137 | out = self.propagate(edge_index, x=x, edge_weight=edge_weight,
138 | size=None)
139 |
140 | if self.bias is not None:
141 | out += self.bias
142 |
143 | return out
144 |
145 |
146 | def message(self, x_j: Tensor, edge_weight: OptTensor) -> Tensor:
147 | return x_j if edge_weight is None else edge_weight.view(-1, 1) * x_j
148 |
149 | def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor:
150 | return matmul(adj_t, x, reduce=self.aggr)
151 |
152 | def __repr__(self):
153 | return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
154 | self.out_channels)
155 |
--------------------------------------------------------------------------------
/prog/base_model/SGConv.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from torch_geometric.typing import Adj, OptTensor
3 |
4 | from torch import Tensor
5 | from torch.nn import Linear
6 | from torch_sparse import SparseTensor, matmul
7 | from torch_geometric.nn.conv import MessagePassing
8 | from torch_geometric.nn.conv.gcn_conv import gcn_norm
9 |
10 |
11 | class SGConv(MessagePassing):
12 |
13 | _cached_x: Optional[Tensor]
14 |
15 | def __init__(self, in_channels: int, out_channels: int, K: int = 1,
16 | cached: bool = False, add_self_loops: bool = False,
17 | bias: bool = True, **kwargs):
18 | kwargs.setdefault('aggr', 'add')
19 | super(SGConv, self).__init__(**kwargs)
20 |
21 | self.in_channels = in_channels
22 | self.out_channels = out_channels
23 | self.K = K
24 | self.cached = cached
25 | self.add_self_loops = add_self_loops
26 |
27 | self._cached_x = None
28 |
29 | self.lin = Linear(in_channels, out_channels, bias=bias)
30 |
31 | self.reset_parameters()
32 |
33 | def reset_parameters(self):
34 | self.lin.reset_parameters()
35 | self._cached_x = None
36 |
37 | def forward(self, x: Tensor, edge_index: Adj,
38 | edge_weight: OptTensor = None) -> Tensor:
39 | """"""
40 | cache = self._cached_x
41 | if cache is None:
42 | if isinstance(edge_index, Tensor):
43 | edge_index, edge_weight = gcn_norm( # yapf: disable
44 | edge_index, edge_weight, x.size(self.node_dim), False,
45 | self.add_self_loops, dtype=x.dtype)
46 | elif isinstance(edge_index, SparseTensor):
47 | edge_index = gcn_norm( # yapf: disable
48 | edge_index, edge_weight, x.size(self.node_dim), False,
49 | self.add_self_loops, dtype=x.dtype)
50 |
51 | for k in range(self.K):
52 | # propagate_type: (x: Tensor, edge_weight: OptTensor)
53 | x = self.propagate(edge_index, x=x, edge_weight=edge_weight,
54 | size=None)
55 | if self.cached:
56 | self._cached_x = x
57 | else:
58 | x = cache
59 |
60 | return self.lin(x)
61 |
62 |
63 | def message(self, x_j: Tensor, edge_weight: Tensor) -> Tensor:
64 | return edge_weight.view(-1, 1) * x_j
65 |
66 | def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor:
67 | return matmul(adj_t, x, reduce=self.aggr)
68 |
69 | def __repr__(self):
70 | return '{}({}, {}, K={})'.format(self.__class__.__name__,
71 | self.in_channels, self.out_channels,
72 | self.K)
73 |
--------------------------------------------------------------------------------
/prog/base_model/extract_drug_feature.py:
--------------------------------------------------------------------------------
1 | #Extract Drug features through Deepchem
2 | import os
3 | import rdkit
4 | import deepchem as dc
5 | from rdkit import Chem
6 | import hickle as hkl
7 | '''
8 | CanonicalSMILES = 'CC1CCCC2(C(O2)CC(OC(=O)CC(C(C(=O)C(C1O)C)(C)C)O)C(=CC3=CSC(=N3)C)C)C'
9 | mol = Chem.MolFromSmiles(CanonicalSMILES)
10 | Simles=Chem.MolToSmiles(mol)
11 | '''
12 | drug_smiles_file='../../data/Drug/222drugs_pubchem_smiles.txt'
13 | save_dir='drug_graph_feat'
14 | pubchemid2smile = {item.split('\t')[0]:item.split('\t')[1].strip() for item in open(drug_smiles_file).readlines()}
15 | if not os.path.exists(save_dir):
16 | os.makedirs(save_dir)
17 | molecules = []
18 | for each in pubchemid2smile.keys():
19 | molecules=[]
20 | molecules.append(Chem.MolFromSmiles(pubchemid2smile[each]))
21 | featurizer = dc.feat.graph_features.ConvMolFeaturizer()
22 | mol_object = featurizer.featurize(mols=molecules)
23 | features = mol_object[0].atom_features
24 | degree_list = mol_object[0].deg_list
25 | adj_list = mol_object[0].canon_adj_list
26 | hkl.dump([features,adj_list,degree_list],'%s/%s.hkl'%(save_dir,each))
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/prog/data_load.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import csv
3 | import pandas as pd
4 | import hickle as hkl
5 | import os
6 | def dataload(Drug_info_file, IC50_threds_file, Drug_feature_file, Cell_line_info_file, Genomic_mutation_file,
7 | Cancer_response_exp_file, Gene_expression_file, Methylation_file):
8 | #-----drug_dataload
9 | reader = csv.reader(open(Drug_info_file,'r'))
10 | rows = [item for item in reader]
11 | drugid2pubchemid = {item[0]:item[5] for item in rows if item[5].isdigit()}
12 | drug2thred={}
13 | for line in open(IC50_threds_file).readlines()[1:]:
14 | drug2thred[str(line.split('\t')[0])]=float(line.strip().split('\t')[1])
15 | # '''
16 | # IC50threds = pd.read_csv(IC50_threds_file, sep=',',header=0)
17 | # drug2dict = dict(zip(IC50threds['pubchem'],IC50threds['IC50']))
18 | # IC50key=[]; IC50value=[]
19 | # for key, value in drug2dict.items():
20 | # key=str(key)
21 | # IC50key.append(key)
22 | # IC50value.append(value)
23 | # drug2thred = dict(zip(IC50key,IC50value))
24 | # '''
25 | drug_pubchem_id_set = []
26 | drug_feature = {}
27 | for each in os.listdir(Drug_feature_file):
28 | drug_pubchem_id_set.append(each.split('.')[0])
29 | feat_mat,adj_list,degree_list = hkl.load('%s/%s'%(Drug_feature_file,each))
30 | drug_feature[each.split('.')[0]] = [feat_mat,adj_list,degree_list]
31 | assert len(drug_pubchem_id_set)==len(drug_feature.values())
32 |
33 | #-----cell line_dataload
34 | cellline2cancertype ={}
35 | for line in open(Cell_line_info_file).readlines()[1:]:
36 | cellline_id = line.split('\t')[1]
37 | TCGA_label = line.strip().split('\t')[-1]
38 | cellline2cancertype[cellline_id] = TCGA_label
39 | mutation_feature = pd.read_csv(Genomic_mutation_file,sep=',',header=0,index_col=[0])
40 | gexpr_feature = pd.read_csv(Gene_expression_file,sep=',',header=0,index_col=[0])
41 | mutation_feature = mutation_feature.loc[list(gexpr_feature.index)]
42 | methylation_feature = pd.read_csv(Methylation_file,sep=',',header=0,index_col=[0])
43 | assert methylation_feature.shape[0]==gexpr_feature.shape[0]==mutation_feature.shape[0]
44 | experiment_data = pd.read_csv(Cancer_response_exp_file,sep=',',header=0,index_col=[0])
45 |
46 | #-----drug_cell line_pairs dataload
47 | drug_match_list=[item for item in experiment_data.index if item.split(':')[1] in drugid2pubchemid.keys()]
48 | experiment_data_filtered = experiment_data.loc[drug_match_list]
49 | data_idx = []
50 | use_thred=True
51 | for each_drug in experiment_data_filtered.index:
52 | for each_cellline in experiment_data_filtered.columns:
53 | pubchem_id = drugid2pubchemid[each_drug.split(':')[-1]]
54 | if str(pubchem_id) in drug_pubchem_id_set and each_cellline in mutation_feature.index:
55 | if not np.isnan(experiment_data_filtered.loc[each_drug,each_cellline]) and each_cellline in cellline2cancertype.keys():
56 | ln_IC50 = float(experiment_data_filtered.loc[each_drug,each_cellline])
57 | if use_thred:
58 | if pubchem_id in drug2thred.keys():
59 | binary_IC50 = 1 if ln_IC50 < drug2thred[pubchem_id] else -1
60 | data_idx.append((each_cellline,pubchem_id,binary_IC50,cellline2cancertype[each_cellline]))
61 | else:
62 | binary_IC50 = 1 if ln_IC50 < -2 else -1
63 | data_idx.append((each_cellline,pubchem_id,binary_IC50,cellline2cancertype[each_cellline]))
64 | #----eliminate ambiguity responses
65 | data_sort=sorted(data_idx, key=(lambda x: [x[0], x[1], x[2]]), reverse=True)
66 | data_tmp=[];data_new=[]
67 | data_idx1 = [[i[0],i[1]] for i in data_sort]
68 | for i,k in zip(data_idx1,data_sort):
69 | if i not in data_tmp:
70 | data_tmp.append(i)
71 | data_new.append(k)
72 | nb_celllines = len(set([item[0] for item in data_new]))
73 | nb_drugs = len(set([item[1] for item in data_new]))
74 | print('All %d pairs across %d cell lines and %d drugs.'%(len(data_new),nb_celllines,nb_drugs))
75 |
76 | return drug_feature, mutation_feature, gexpr_feature, methylation_feature, data_new, nb_celllines,nb_drugs
77 |
--------------------------------------------------------------------------------
/prog/data_process.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import torch
4 | import torch.utils.data as Data
5 | from graphset import *
6 | from scipy.sparse import coo_matrix
7 |
8 | def CalculateGraphFeat(feat_mat,adj_list):
9 | assert feat_mat.shape[0] == len(adj_list)
10 | adj_mat = np.zeros((len(adj_list), len(adj_list)), dtype='float32')
11 | for i in range(len(adj_list)):
12 | nodes = adj_list[i]
13 | for each in nodes:
14 | adj_mat[i,int(each)] = 1
15 | assert np.allclose(adj_mat,adj_mat.T)
16 | x, y = np.where(adj_mat == 1)
17 | adj_index = np.array(np.vstack((x, y)))
18 | return [feat_mat,adj_index]
19 |
20 | def FeatureExtract(drug_feature):
21 | drug_data = [[] for item in range(len(drug_feature))]
22 | for i in range(len(drug_feature)):
23 | feat_mat,adj_list,_ = drug_feature.iloc[i]
24 | drug_data[i] = CalculateGraphFeat(feat_mat,adj_list)
25 | return drug_data
26 |
27 | def cmask(num, ratio, seed):
28 | mask = np.ones(num, dtype=bool)
29 | mask[0:int(ratio * num)] = False
30 | np.random.seed(seed)
31 | np.random.shuffle(mask)
32 | return mask
33 |
34 | def process(drug_feature, mutation_feature, gexpr_feature, methylation_feature, data_new, nb_celllines, nb_drugs):
35 | #-----construct cell line-drug response pairs
36 | cellineid = list(set([item[0] for item in data_new]));cellineid.sort()
37 | pubmedid = list(set([item[1] for item in data_new]));pubmedid.sort()
38 | cellmap = list(zip(cellineid,list(range(len(cellineid)))))
39 | pubmedmap = list(zip(pubmedid,list(range(len(cellineid),len(cellineid)+len(pubmedid)))))
40 | cellline_num = np.squeeze([[j[1] for j in cellmap if i[0]==j[0]] for i in data_new])
41 | pubmed_num = np.squeeze([[j[1] for j in pubmedmap if i[1]==j[0]] for i in data_new])
42 | IC_num = np.squeeze([i[2] for i in data_new])
43 | allpairs = np.vstack((cellline_num,pubmed_num,IC_num)).T
44 | allpairs = allpairs[allpairs[:,2].argsort()]
45 |
46 | #----drug_feature_input
47 | pubid=[item[0] for item in pubmedmap]
48 | drug_feature=pd.DataFrame(drug_feature).T
49 | drug_feature=drug_feature.loc[pubid]
50 | atom_shape=drug_feature[0][0].shape[-1]
51 | drug_data = FeatureExtract(drug_feature)
52 | #----cell line_feature_input
53 | cellid=[item[0] for item in cellmap]
54 | gexpr_feature=gexpr_feature.loc[cellid]
55 | mutation_feature=mutation_feature.loc[cellid]
56 | methylation_feature=methylation_feature.loc[cellid]
57 | mutation = torch.from_numpy(np.array(mutation_feature,dtype='float32'))
58 | mutation = torch.unsqueeze(mutation, dim=1)
59 | mutation = torch.unsqueeze(mutation, dim=1)
60 | gexpr = torch.from_numpy(np.array(gexpr_feature,dtype='float32'))
61 | methylation = torch.from_numpy(np.array(methylation_feature,dtype='float32'))
62 |
63 | #---compile training set and test set
64 | drug_set = Data.DataLoader(dataset=GraphDataset(graphs_dict=drug_data),collate_fn=collate,batch_size=nb_drugs,shuffle=False)
65 | cellline_set = Data.DataLoader(dataset=Data.TensorDataset(mutation,gexpr,methylation),batch_size=nb_celllines,shuffle=False)
66 | use_independent_testset=True
67 | if(use_independent_testset == True):
68 | edge_mask = cmask(len(allpairs), 0.1, 666)
69 | train = allpairs[edge_mask][:, 0:3]
70 | test = allpairs[~edge_mask][:, 0:3]
71 | else:
72 | CV_edgemask = cmask(len(allpairs), 0.1, 666)
73 | cross_validation = allpairs[CV_edgemask][:, 0:3]
74 | vali_mask = cmask(len(cross_validation), 0.2, 66)
75 | train = cross_validation[vali_mask][:, 0:3]
76 | test = cross_validation[~vali_mask][:, 0:3]
77 | train[:, 1] -= nb_celllines
78 | test[:, 1] -= nb_celllines
79 | train_mask = coo_matrix((np.ones(train.shape[0], dtype=bool), (train[:, 0], train[:, 1])),
80 | shape=(nb_celllines, nb_drugs)).toarray()
81 | test_mask = coo_matrix((np.ones(test.shape[0], dtype=bool), (test[:, 0], test[:, 1])),
82 | shape=(nb_celllines, nb_drugs)).toarray()
83 | train_mask = torch.from_numpy(train_mask).view(-1)
84 | test_mask = torch.from_numpy(test_mask).view(-1)
85 | if (use_independent_testset == True):
86 | pos_edge = allpairs[allpairs[:, 2] == 1, 0:2]
87 | neg_edge = allpairs[allpairs[:, 2] == -1, 0:2]
88 | else:
89 | pos_edge = cross_validation[cross_validation[:, 2] == 1, 0:2]
90 | neg_edge = cross_validation[cross_validation[:, 2] == -1, 0:2]
91 | pos_edge[:, 1] -= nb_celllines
92 | neg_edge[:, 1] -= nb_celllines
93 | label_pos = coo_matrix((np.ones(pos_edge.shape[0]), (pos_edge[:, 0], pos_edge[:, 1])),
94 | shape=(nb_celllines, nb_drugs)).toarray()
95 | label_pos = torch.from_numpy(label_pos).type(torch.FloatTensor).view(-1)
96 | if (use_independent_testset == True):
97 | train_edge = allpairs[edge_mask]
98 | else:
99 | train_edge = cross_validation[vali_mask]
100 | train_edge = np.vstack((train_edge, train_edge[:, [1, 0, 2]]))
101 |
102 | return drug_set,cellline_set,train_edge,label_pos,train_mask,test_mask,atom_shape
103 |
--------------------------------------------------------------------------------
/prog/graphCDR-ccle.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import pandas as pd
3 | import rdkit
4 | from rdkit import Chem
5 | import deepchem as dc
6 | import time
7 | from model import *
8 | from data_process import process
9 | import argparse
10 | from my_utiils import *
11 | from data_load import dataload
12 |
13 | parser = argparse.ArgumentParser(description='Drug_response_pre')
14 | parser.add_argument('--alph', dest='alph', type=float, default=0.30, help='')
15 | parser.add_argument('--beta', dest='beta', type=float, default=0.30, help='')
16 | parser.add_argument('--epoch', dest='epoch', type=int, default=200, help='')
17 | parser.add_argument('--hidden_channels', dest='hidden_channels', type=int, default=256, help='')
18 | parser.add_argument('--output_channels', dest='output_channels', type=int, default=100, help='')
19 | args = parser.parse_args()
20 | start_time = time.time()
21 | #--------cell line feature input
22 | Genomic_mutation_file = '../data/Celline/genomic_mutation_34673_demap_features.csv'
23 | Gene_expression_file = '../data/Celline/genomic_expression_561celllines_697genes_demap_features.csv'
24 | Methylation_file = '../data/Celline/genomic_methylation_561celllines_808genes_demap_features.csv'
25 | gexpr_feature = pd.read_csv(Gene_expression_file,sep=',',header=0,index_col=[0])
26 | mutation_feature = pd.read_csv(Genomic_mutation_file,sep=',',header=0,index_col=[0])
27 | mutation_feature = mutation_feature.loc[list(gexpr_feature.index)]
28 | methylation_feature = pd.read_csv(Methylation_file,sep=',',header=0,index_col=[0])
29 | assert methylation_feature.shape[0]==gexpr_feature.shape[0]==mutation_feature.shape[0]
30 |
31 | #--------drug feature input
32 | drug='../data/CCLE/CCLE_smiles.csv'
33 | drug=pd.read_csv(drug, sep=',',header=0)
34 | drug_feature = {}
35 | featurizer = dc.feat.ConvMolFeaturizer()
36 | for tup in zip(drug['pubchem'], drug['isosmiles']):
37 | mol=Chem.MolFromSmiles(tup[1])
38 | X = featurizer.featurize(mol)
39 | drug_feature[str(tup[0])]=[X[0].get_atom_features(),X[0].get_adjacency_list(),1]
40 |
41 | #--------responses input
42 | response='../data/CCLE/CCLE_response.csv'
43 | datar=pd.read_csv(response, sep=',',header=0)
44 | data_idx = []
45 | thred=0.8
46 | for tup in zip(datar['DepMap_ID'],datar['pubchem'],datar['Z_SCORE']):
47 | t=1 if tup[2]>thred else -1
48 | data_idx.append((tup[0],str(tup[1]),t))
49 | #----eliminate ambiguity responses
50 | data_sort=sorted(data_idx, key=(lambda x: [x[0], x[1], x[2]]), reverse=True)
51 | data_tmp=[];data_new=[]
52 | data_idx1 = [[i[0],i[1]] for i in data_sort]
53 | for i,k in zip(data_idx1,data_sort):
54 | if i not in data_tmp:
55 | data_tmp.append(i)
56 | data_new.append(k)
57 | nb_celllines = len(set([item[0] for item in data_new]))
58 | nb_drugs = len(set([item[1] for item in data_new]))
59 | print('All %d pairs across %d cell lines and %d drugs.'%(len(data_new),nb_celllines,nb_drugs))
60 |
61 | drug_set,cellline_set,train_edge,label_pos,train_mask,test_mask,atom_shape = process(drug_feature, mutation_feature, gexpr_feature, methylation_feature, data_new, nb_celllines, nb_drugs)
62 |
63 | model = GraphCDR(hidden_channels=args.hidden_channels, encoder=Encoder(args.output_channels, args.hidden_channels), summary=Summary(args.output_channels, args.hidden_channels),
64 | feat=NodeRepresentation(atom_shape,gexpr_feature.shape[-1],methylation_feature.shape[-1],args.output_channels),index=nb_celllines)
65 | optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0)
66 | myloss = nn.BCELoss()
67 |
68 | def train():
69 | model.train()
70 | loss_temp=0
71 | for batch, (drug,cell) in enumerate(zip(drug_set,cellline_set)):
72 | optimizer.zero_grad()
73 | pos_z, neg_z, summary_pos, summary_neg, pos_adj=model(drug.x, drug.edge_index, drug.batch, cell[0], cell[1], cell[2], train_edge)
74 | dgi_pos = model.loss(pos_z, neg_z, summary_pos)
75 | dgi_neg = model.loss(neg_z, pos_z, summary_neg)
76 | pos_loss = myloss(pos_adj[train_mask],label_pos[train_mask])
77 | loss=(1-args.alph-args.beta)*pos_loss + args.alph*dgi_pos + args.beta*dgi_neg
78 | loss.backward()
79 | optimizer.step()
80 | loss_temp += loss.item()
81 | print('train loss: ', str(round(loss_temp, 4)))
82 |
83 | def test():
84 | model.eval()
85 | with torch.no_grad():
86 | for batch, (drug, cell) in enumerate(zip(drug_set, cellline_set)):
87 | _, _, _, _, pre_adj=model(drug.x, drug.edge_index, drug.batch,cell[0], cell[1], cell[2], train_edge)
88 | loss_temp = myloss(pre_adj[test_mask],label_pos[test_mask])
89 | yp=pre_adj[test_mask].detach().numpy()
90 | ytest=label_pos[test_mask].detach().numpy()
91 | AUC, AUPR, F1, ACC =metrics_graph(ytest,yp)
92 | print('test loss: ', str(round(loss_temp.item(), 4)))
93 | print('test auc: ' + str(round(AUC, 4)) + ' test aupr: ' + str(round(AUPR, 4)) +
94 | ' test f1: ' + str(round(F1, 4)) + ' test acc: ' + str(round(ACC, 4)))
95 | return AUC, AUPR, F1, ACC
96 |
97 | #------main
98 | final_AUC = 0;final_AUPR = 0;final_F1 = 0;final_ACC = 0
99 | for epoch in range(args.epoch):
100 | print('\nepoch: ' + str(epoch))
101 | train()
102 | AUC, AUPR, F1, ACC = test()
103 | if (AUC > final_AUC):
104 | final_AUC = AUC;final_AUPR = AUPR;final_F1 = F1;final_ACC = ACC
105 | elapsed = time.time() - start_time
106 | print('---------------------------------------')
107 | print('Elapsed time: ', round(elapsed, 4))
108 | print('Final_AUC: ' + str(round(final_AUC, 4)) + ' Final_AUPR: ' + str(round(final_AUPR, 4)) +
109 | ' Final_F1: ' + str(round(final_F1, 4)) + ' Final_ACC: ' + str(round(final_ACC, 4)))
110 | print('---------------------------------------')
111 |
--------------------------------------------------------------------------------
/prog/graphCDR.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import time
3 | from model import *
4 | from data_process import process
5 | import argparse
6 | from my_utiils import *
7 | from data_load import dataload
8 |
9 | parser = argparse.ArgumentParser(description='Drug_response_pre')
10 | parser.add_argument('--alph', dest='alph', type=float, default=0.30, help='')
11 | parser.add_argument('--beta', dest='beta', type=float, default=0.30, help='')
12 | parser.add_argument('--epoch', dest='epoch', type=int, default=350, help='')
13 | parser.add_argument('--hidden_channels', dest='hidden_channels', type=int, default=256, help='')
14 | parser.add_argument('--output_channels', dest='output_channels', type=int, default=100, help='')
15 | args = parser.parse_args()
16 | start_time = time.time()
17 | #------data files
18 | Drug_info_file='../data/Drug/1.Drug_listMon Jun 24 09_00_55 2019.csv'
19 | IC50_threds_file='../data/Drug/drug_threshold.txt'
20 | Drug_feature_file='../data/Drug/drug_graph_feat'
21 | Cell_line_info_file='../data/Celline/Cell_lines_annotations.txt'
22 | Genomic_mutation_file='../data/Celline/genomic_mutation_34673_demap_features.csv'
23 | Cancer_response_exp_file='../data/Celline/GDSC_IC50.csv'
24 | Gene_expression_file='../data/Celline/genomic_expression_561celllines_697genes_demap_features.csv'
25 | Methylation_file='../data/Celline/genomic_methylation_561celllines_808genes_demap_features.csv'
26 | #-------bio-feature extraction
27 | drug_feature, mutation_feature, gexpr_feature, methylation_feature, data_new, nb_celllines, nb_drugs=dataload(Drug_info_file, IC50_threds_file, Drug_feature_file, Cell_line_info_file, Genomic_mutation_file,
28 | Cancer_response_exp_file, Gene_expression_file, Methylation_file)
29 | #-------split train and test sets
30 | drug_set,cellline_set,train_edge,label_pos,train_mask,test_mask,atom_shape = process(drug_feature, mutation_feature, gexpr_feature, methylation_feature, data_new, nb_celllines, nb_drugs)
31 |
32 | model = GraphCDR(hidden_channels=args.hidden_channels, encoder=Encoder(args.output_channels, args.hidden_channels), summary=Summary(args.output_channels, args.hidden_channels),
33 | feat=NodeRepresentation(atom_shape,gexpr_feature.shape[-1],methylation_feature.shape[-1],args.output_channels),index=nb_celllines)
34 | optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0)
35 | myloss = nn.BCELoss()
36 |
37 | def train():
38 | model.train()
39 | loss_temp=0
40 | for batch, (drug,cell) in enumerate(zip(drug_set,cellline_set)):
41 | optimizer.zero_grad()
42 | pos_z, neg_z, summary_pos, summary_neg, pos_adj=model(drug.x, drug.edge_index, drug.batch, cell[0], cell[1], cell[2], train_edge)
43 | dgi_pos = model.loss(pos_z, neg_z, summary_pos)
44 | dgi_neg = model.loss(neg_z, pos_z, summary_neg)
45 | pos_loss = myloss(pos_adj[train_mask],label_pos[train_mask])
46 | loss=(1-args.alph-args.beta)*pos_loss + args.alph*dgi_pos + args.beta*dgi_neg
47 | loss.backward()
48 | optimizer.step()
49 | loss_temp += loss.item()
50 | print('train loss: ', str(round(loss_temp, 4)))
51 |
52 | def test():
53 | model.eval()
54 | with torch.no_grad():
55 | for batch, (drug, cell) in enumerate(zip(drug_set, cellline_set)):
56 | _, _, _, _, pre_adj=model(drug.x, drug.edge_index, drug.batch,cell[0], cell[1], cell[2], train_edge)
57 | loss_temp = myloss(pre_adj[test_mask],label_pos[test_mask])
58 | yp=pre_adj[test_mask].detach().numpy()
59 | ytest=label_pos[test_mask].detach().numpy()
60 | AUC, AUPR, F1, ACC =metrics_graph(ytest,yp)
61 | print('test loss: ', str(round(loss_temp.item(), 4)))
62 | print('test auc: ' + str(round(AUC, 4)) + ' test aupr: ' + str(round(AUPR, 4)) +
63 | ' test f1: ' + str(round(F1, 4)) + ' test acc: ' + str(round(ACC, 4)))
64 | return AUC, AUPR, F1, ACC
65 |
66 | #------main
67 | final_AUC = 0;final_AUPR = 0;final_F1 = 0;final_ACC = 0
68 | for epoch in range(args.epoch):
69 | print('\nepoch: ' + str(epoch))
70 | train()
71 | AUC, AUPR, F1, ACC = test()
72 | if (AUC > final_AUC):
73 | final_AUC = AUC;final_AUPR = AUPR;final_F1 = F1;final_ACC = ACC
74 | elapsed = time.time() - start_time
75 | print('---------------------------------------')
76 | print('Elapsed time: ', round(elapsed, 4))
77 | print('Final_AUC: ' + str(round(final_AUC, 4)) + ' Final_AUPR: ' + str(round(final_AUPR, 4)) +
78 | ' Final_F1: ' + str(round(final_F1, 4)) + ' Final_ACC: ' + str(round(final_ACC, 4)))
79 | print('---------------------------------------')
80 |
--------------------------------------------------------------------------------
/prog/graphset.py:
--------------------------------------------------------------------------------
1 | from torch_geometric.data import InMemoryDataset, Batch
2 | from torch_geometric import data as DATA
3 | import torch
4 | class GraphDataset(InMemoryDataset):
5 | def __init__(self, root='.', dataset='davis', transform=None, pre_transform=None, graphs_dict=None, dttype=None):
6 | super(GraphDataset, self).__init__(root, transform, pre_transform)
7 | self.dataset = dataset
8 | self.dttype = dttype
9 | self.process(graphs_dict)
10 |
11 | @property
12 | def raw_file_names(self):
13 | pass
14 |
15 | @property
16 | def processed_file_names(self):
17 | return [self.dataset + f'_data_{self.dttype}.pt']
18 |
19 | def download(self):
20 | pass
21 |
22 | def _download(self):
23 | pass
24 |
25 | def _process(self):
26 | pass
27 | # if not os.path.exists(self.processed_dir):
28 | # os.makedirs(self.processed_dir)
29 |
30 | def process(self, graphs_dict):
31 | data_list = []
32 | for data_mol in graphs_dict:
33 | features, edge_index = data_mol[0],data_mol[1]
34 | GCNData = DATA.Data(x=torch.Tensor(features), edge_index=torch.LongTensor(edge_index))
35 | data_list.append(GCNData)
36 | self.data = data_list
37 |
38 | def __len__(self):
39 | return len(self.data)
40 |
41 | def __getitem__(self, idx):
42 | return self.data[idx]
43 |
44 | def collate(data_list):
45 | batchA = Batch.from_data_list([data for data in data_list])
46 | return batchA
--------------------------------------------------------------------------------
/prog/model.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | from torch_geometric.nn import global_max_pool as gmp, global_mean_pool
4 | from base_model.GCNConv import GCNConv
5 | from base_model.SGConv import SGConv
6 | from torch.nn import Parameter
7 | from my_utiils import *
8 |
9 | EPS = 1e-15
10 | class NodeRepresentation(nn.Module):
11 | def __init__(self, gcn_layer, dim_gexp, dim_methy, output, units_list=[256, 256, 256], use_relu=True, use_bn=True,
12 | use_GMP=True, use_mutation=True, use_gexpr=True, use_methylation=True):
13 | super(NodeRepresentation, self).__init__()
14 | torch.manual_seed(0)
15 | # -------drug layers
16 | self.use_relu = use_relu
17 | self.use_bn = use_bn
18 | self.units_list = units_list
19 | self.use_GMP = use_GMP
20 | self.use_mutation = use_mutation
21 | self.use_gexpr = use_gexpr
22 | self.use_methylation = use_methylation
23 | self.conv1 = SGConv(gcn_layer, units_list[0])
24 | self.batch_conv1 = nn.BatchNorm1d(units_list[0])
25 | self.graph_conv = []
26 | self.graph_bn = []
27 | for i in range(len(units_list) - 1):
28 | self.graph_conv.append(SGConv(units_list[i], units_list[i + 1]))
29 | self.graph_bn.append(nn.BatchNorm1d((units_list[i + 1])))
30 | self.conv_end = SGConv(units_list[-1], output)
31 | self.batch_end = nn.BatchNorm1d(output)
32 | # --------cell line layers (three omics)
33 | # -------gexp_layer
34 | self.fc_gexp1 = nn.Linear(dim_gexp, 256)
35 | self.batch_gexp1 = nn.BatchNorm1d(256)
36 | self.fc_gexp2 = nn.Linear(256, output)
37 | # -------methy_layer
38 | self.fc_methy1 = nn.Linear(dim_methy, 256)
39 | self.batch_methy1 = nn.BatchNorm1d(256)
40 | self.fc_methy2 = nn.Linear(256, output)
41 | # -------mut_layer
42 | self.cov1 = nn.Conv2d(1, 50, (1, 700), stride=(1, 5))
43 | self.cov2 = nn.Conv2d(50, 30, (1, 5), stride=(1, 2))
44 | self.fla_mut = nn.Flatten()
45 | self.fc_mut = nn.Linear(2010, output)
46 | # ------Concatenate_three omics
47 | self.fcat = nn.Linear(300, output)
48 | self.batchc = nn.BatchNorm1d(100)
49 | self.reset_para()
50 |
51 | def reset_para(self):
52 | for m in self.modules():
53 | if isinstance(m, (nn.Conv2d, nn.Linear)):
54 | nn.init.xavier_uniform_(m.weight)
55 | if m.bias is not None:
56 | nn.init.zeros_(m.bias)
57 | return
58 |
59 | def forward(self, drug_feature, drug_adj, ibatch, mutation_data, gexpr_data, methylation_data):
60 | # -----drug representation
61 | x_drug = self.conv1(drug_feature, drug_adj)
62 | x_drug = F.relu(x_drug)
63 | x_drug = self.batch_conv1(x_drug)
64 | for i in range(len(self.units_list) - 1):
65 | x_drug = self.graph_conv[i](x_drug, drug_adj)
66 | x_drug = F.relu(x_drug)
67 | x_drug = self.graph_bn[i](x_drug)
68 | x_drug = self.conv_end(x_drug, drug_adj)
69 | x_drug = F.relu(x_drug)
70 | x_drug = self.batch_end(x_drug)
71 | if self.use_GMP:
72 | x_drug = gmp(x_drug, ibatch)
73 | else:
74 | x_drug = global_mean_pool(x_drug, ibatch)
75 |
76 | # -----cell line representation
77 | # -----mutation representation
78 | if self.use_mutation:
79 | x_mutation = torch.tanh(self.cov1(mutation_data))
80 | x_mutation = F.max_pool2d(x_mutation, (1, 5))
81 | x_mutation = F.relu(self.cov2(x_mutation))
82 | x_mutation = F.max_pool2d(x_mutation, (1, 10))
83 | x_mutation = self.fla_mut(x_mutation)
84 | x_mutation = F.relu(self.fc_mut(x_mutation))
85 | # x_mutation = torch.dropout(x_mutation, 0.1, train=False)
86 |
87 | # ----gexpr representation
88 | if self.use_gexpr:
89 | x_gexpr = torch.tanh(self.fc_gexp1(gexpr_data))
90 | x_gexpr = self.batch_gexp1(x_gexpr)
91 | # x_gexpr = torch.dropout(x_gexpr,0.1, train=False)
92 | x_gexpr = F.relu(self.fc_gexp2(x_gexpr))
93 |
94 | # ----methylation representation
95 | if self.use_methylation:
96 | x_methylation = torch.tanh(self.fc_methy1(methylation_data))
97 | x_methylation = self.batch_methy1(x_methylation)
98 | # x_methylation = torch.dropout(x_methylation, 0.1, train=False)
99 | x_methylation = F.relu(self.fc_methy2(x_methylation))
100 |
101 | # ------Concatenate representations of three omics
102 | if self.use_gexpr==False:
103 | x_cell = torch.cat((x_mutation, x_methylation), 1)
104 | elif self.use_mutation==False:
105 | x_cell = torch.cat((x_gexpr, x_methylation), 1)
106 | elif self.use_methylation == False:
107 | x_cell = torch.cat((x_mutation, x_gexpr), 1)
108 | else:
109 | x_cell = torch.cat((x_mutation, x_gexpr, x_methylation), 1)
110 | x_cell = F.relu(self.fcat(x_cell))
111 |
112 | #combine representations of cell line and drug
113 | x_all = torch.cat((x_cell, x_drug), 0)
114 | x_all = self.batchc(x_all)
115 | return x_all
116 |
117 | class Encoder(nn.Module):
118 | def __init__(self, in_channels, hidden_channels):
119 | super(Encoder, self).__init__()
120 | self.conv1 = GCNConv(in_channels, hidden_channels, cached=True)
121 | self.prelu1 = nn.PReLU(hidden_channels)
122 | # self.conv2 = GCNConv(hidden_channels, hidden_channels, cached=True)
123 | # self.prelu2 = nn.PReLU(hidden_channels)
124 | def forward(self, x, edge_index):
125 | x = self.conv1(x, edge_index)
126 | x = self.prelu1(x)
127 | # x = self.conv2(x, edge_index)
128 | # x = self.prelu2(x)
129 | return x
130 |
131 | class Summary(nn.Module):
132 | def __init__(self, ino, inn):
133 | super(Summary, self).__init__()
134 | self.fc1 = nn.Linear(ino + inn, 1)
135 |
136 | def forward(self, xo, xn):
137 | m = self.fc1(torch.cat((xo, xn), 1))
138 | m = torch.tanh(torch.squeeze(m))
139 | m = torch.exp(m) / (torch.exp(m)).sum()
140 | x = torch.matmul(m, xn)
141 | return x
142 |
143 |
144 | class GraphCDR(nn.Module):
145 | def __init__(self, hidden_channels, encoder, summary, feat, index):
146 | super(GraphCDR, self).__init__()
147 | self.hidden_channels = hidden_channels
148 | self.encoder = encoder
149 | self.summary = summary
150 | self.feat = feat
151 | self.index = index
152 | self.weight = Parameter(torch.Tensor(hidden_channels, hidden_channels))
153 | self.act = nn.Sigmoid()
154 | self.fc = nn.Linear(100, 10)
155 | self.fd = nn.Linear(100, 10)
156 | self.reset_parameters()
157 |
158 | def reset_parameters(self):
159 | reset(self.encoder)
160 | reset(self.summary)
161 | glorot(self.weight)
162 | for m in self.modules():
163 | if isinstance(m, (nn.Linear)):
164 | nn.init.xavier_uniform_(m.weight)
165 | if m.bias is not None:
166 | nn.init.zeros_(m.bias)
167 |
168 | def forward(self, drug_feature, drug_adj, ibatch, mutation_data, gexpr_data, methylation_data, edge):
169 | #---CDR_graph_edge and corrupted CDR_graph_edge
170 | pos_edge = torch.from_numpy(edge[edge[:, 2] == 1, 0:2].T)
171 | neg_edge = torch.from_numpy(edge[edge[:, 2] == -1, 0:2].T)
172 | #---cell+drug node attributes
173 | feature = self.feat(drug_feature, drug_adj, ibatch, mutation_data, gexpr_data, methylation_data)
174 | #---cell+drug embedding from the CDR graph and the corrupted CDR graph
175 | pos_z = self.encoder(feature, pos_edge)
176 | neg_z = self.encoder(feature, neg_edge)
177 | #---graph-level embedding (summary)
178 | summary_pos = self.summary(feature, pos_z)
179 | summary_neg = self.summary(feature, neg_z)
180 | #---embedding at layer l
181 | cellpos = pos_z[:self.index, ]; drugpos = pos_z[self.index:, ]
182 | #---embedding at layer 0
183 | cellfea = self.fc(feature[:self.index, ]); drugfea = self.fd(feature[self.index:, ])
184 | cellfea = torch.sigmoid(cellfea); drugfea = torch.sigmoid(drugfea)
185 | #---concatenate embeddings at different layers (0 and l)
186 | cellpos = torch.cat((cellpos, cellfea), 1)
187 | drugpos = torch.cat((drugpos, drugfea), 1)
188 | #---inner product
189 | pos_adj = torch.matmul(cellpos, drugpos.t())
190 | pos_adj = self.act(pos_adj)
191 | return pos_z, neg_z, summary_pos, summary_neg, pos_adj.view(-1)
192 |
193 | def discriminate(self, z, summary, sigmoid=True):
194 | value = torch.matmul(z, torch.matmul(self.weight, summary))
195 | return torch.sigmoid(value) if sigmoid else value
196 |
197 | def loss(self, pos_z, neg_z, summary):
198 | pos_loss = -torch.log(
199 | self.discriminate(pos_z, summary, sigmoid=True) + EPS).mean()
200 | neg_loss = -torch.log(
201 | 1 - self.discriminate(neg_z, summary, sigmoid=True) + EPS).mean()
202 | return pos_loss + neg_loss
203 |
204 | def __repr__(self):
205 | return '{}({})'.format(self.__class__.__name__, self.hidden_channels)
206 |
--------------------------------------------------------------------------------
/prog/my_utiils.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | import numpy as np
4 | from sklearn.metrics import roc_auc_score,precision_recall_curve,accuracy_score
5 | def uniform(size, tensor):
6 | if tensor is not None:
7 | bound = 1.0 / math.sqrt(size)
8 | tensor.data.uniform_(-bound, bound)
9 | def kaiming_uniform(tensor, fan, a):
10 | if tensor is not None:
11 | bound = math.sqrt(6 / ((1 + a**2) * fan))
12 | tensor.data.uniform_(-bound, bound)
13 | def glorot(tensor):
14 | if tensor is not None:
15 | stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1)))
16 | tensor.data.uniform_(-stdv, stdv)
17 | def glorot_orthogonal(tensor, scale):
18 | if tensor is not None:
19 | torch.nn.init.orthogonal_(tensor.data)
20 | scale /= ((tensor.size(-2) + tensor.size(-1)) * tensor.var())
21 | tensor.data *= scale.sqrt()
22 | def zeros(tensor):
23 | if tensor is not None:
24 | tensor.data.fill_(0)
25 | def ones(tensor):
26 | if tensor is not None:
27 | tensor.data.fill_(1)
28 | def normal(tensor, mean, std):
29 | if tensor is not None:
30 | tensor.data.normal_(mean, std)
31 | def reset(nn):
32 | def _reset(item):
33 | if hasattr(item, 'reset_parameters'):
34 | item.reset_parameters()
35 | if nn is not None:
36 | if hasattr(nn, 'children') and len(list(nn.children())) > 0:
37 | for item in nn.children():
38 | _reset(item)
39 | else:
40 | _reset(nn)
41 |
42 | def metrics_graph(yt, yp):
43 | precision, recall, _, = precision_recall_curve(yt, yp)
44 | aupr = -np.trapz(precision, recall)
45 | auc = roc_auc_score(yt, yp)
46 | #---f1,acc,recall, specificity, precision
47 | real_score=np.mat(yt)
48 | predict_score=np.mat(yp)
49 | sorted_predict_score = np.array(sorted(list(set(np.array(predict_score).flatten()))))
50 | sorted_predict_score_num = len(sorted_predict_score)
51 | thresholds = sorted_predict_score[np.int32(sorted_predict_score_num * np.arange(1, 1000) / 1000)]
52 | thresholds = np.mat(thresholds)
53 | thresholds_num = thresholds.shape[1]
54 | predict_score_matrix = np.tile(predict_score, (thresholds_num, 1))
55 | negative_index = np.where(predict_score_matrix < thresholds.T)
56 | positive_index = np.where(predict_score_matrix >= thresholds.T)
57 | predict_score_matrix[negative_index] = 0
58 | predict_score_matrix[positive_index] = 1
59 | TP = predict_score_matrix.dot(real_score.T)
60 | FP = predict_score_matrix.sum(axis=1) - TP
61 | FN = real_score.sum() - TP
62 | TN = len(real_score.T) - TP - FP - FN
63 | tpr = TP / (TP + FN)
64 | recall_list = tpr
65 | precision_list = TP / (TP + FP)
66 | f1_score_list = 2 * TP / (len(real_score.T) + TP - TN)
67 | accuracy_list = (TP + TN) / len(real_score.T)
68 | specificity_list = TN / (TN + FP)
69 | max_index = np.argmax(f1_score_list)
70 | f1_score = f1_score_list[max_index]
71 | accuracy = accuracy_list[max_index]
72 | specificity = specificity_list[max_index]
73 | recall = recall_list[max_index]
74 | precision = precision_list[max_index]
75 | return auc, aupr, f1_score[0, 0], accuracy[0, 0] #, recall[0, 0], specificity[0, 0], precision[0, 0]
76 |
--------------------------------------------------------------------------------